1 //
   2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580  /* R29, */                     // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649  /* R29, R29_H, */              // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "opto/addnode.hpp"
1000 
1001 class CallStubImpl {
1002 
1003   //--------------------------------------------------------------
1004   //---<  Used for optimization in Compile::shorten_branches  >---
1005   //--------------------------------------------------------------
1006 
1007  public:
1008   // Size of call trampoline stub.
1009   static uint size_call_trampoline() {
1010     return 0; // no call trampolines on this platform
1011   }
1012 
1013   // number of relocations needed by a call trampoline stub
1014   static uint reloc_call_trampoline() {
1015     return 0; // no call trampolines on this platform
1016   }
1017 };
1018 
1019 class HandlerImpl {
1020 
1021  public:
1022 
1023   static int emit_exception_handler(CodeBuffer &cbuf);
1024   static int emit_deopt_handler(CodeBuffer& cbuf);
1025 
1026   static uint size_exception_handler() {
1027     return MacroAssembler::far_branch_size();
1028   }
1029 
1030   static uint size_deopt_handler() {
1031     // count one adr and one far branch instruction
1032     return 4 * NativeInstruction::instruction_size;
1033   }
1034 };
1035 
1036   // graph traversal helpers
1037 
1038   MemBarNode *parent_membar(const Node *n);
1039   MemBarNode *child_membar(const MemBarNode *n);
1040   bool leading_membar(const MemBarNode *barrier);
1041 
1042   bool is_card_mark_membar(const MemBarNode *barrier);
1043   bool is_CAS(int opcode);
1044 
1045   MemBarNode *leading_to_trailing(MemBarNode *leading);
1046   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048 
1049   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050 
1051   bool unnecessary_acquire(const Node *barrier);
1052   bool needs_acquiring_load(const Node *load);
1053 
1054   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055 
1056   bool unnecessary_release(const Node *barrier);
1057   bool unnecessary_volatile(const Node *barrier);
1058   bool needs_releasing_store(const Node *store);
1059 
1060   // predicate controlling translation of CompareAndSwapX
1061   bool needs_acquiring_load_exclusive(const Node *load);
1062 
1063   // predicate controlling translation of StoreCM
1064   bool unnecessary_storestore(const Node *storecm);
1065 
1066   // predicate controlling addressing modes
1067   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1068 %}
1069 
1070 source %{
1071 
1072   // Optimizaton of volatile gets and puts
1073   // -------------------------------------
1074   //
1075   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1076   // use to implement volatile reads and writes. For a volatile read
1077   // we simply need
1078   //
1079   //   ldar<x>
1080   //
1081   // and for a volatile write we need
1082   //
1083   //   stlr<x>
1084   //
1085   // Alternatively, we can implement them by pairing a normal
1086   // load/store with a memory barrier. For a volatile read we need
1087   //
1088   //   ldr<x>
1089   //   dmb ishld
1090   //
1091   // for a volatile write
1092   //
1093   //   dmb ish
1094   //   str<x>
1095   //   dmb ish
1096   //
1097   // We can also use ldaxr and stlxr to implement compare and swap CAS
1098   // sequences. These are normally translated to an instruction
1099   // sequence like the following
1100   //
1101   //   dmb      ish
1102   // retry:
1103   //   ldxr<x>   rval raddr
1104   //   cmp       rval rold
1105   //   b.ne done
1106   //   stlxr<x>  rval, rnew, rold
1107   //   cbnz      rval retry
1108   // done:
1109   //   cset      r0, eq
1110   //   dmb ishld
1111   //
1112   // Note that the exclusive store is already using an stlxr
1113   // instruction. That is required to ensure visibility to other
1114   // threads of the exclusive write (assuming it succeeds) before that
1115   // of any subsequent writes.
1116   //
1117   // The following instruction sequence is an improvement on the above
1118   //
1119   // retry:
1120   //   ldaxr<x>  rval raddr
1121   //   cmp       rval rold
1122   //   b.ne done
1123   //   stlxr<x>  rval, rnew, rold
1124   //   cbnz      rval retry
1125   // done:
1126   //   cset      r0, eq
1127   //
1128   // We don't need the leading dmb ish since the stlxr guarantees
1129   // visibility of prior writes in the case that the swap is
1130   // successful. Crucially we don't have to worry about the case where
1131   // the swap is not successful since no valid program should be
1132   // relying on visibility of prior changes by the attempting thread
1133   // in the case where the CAS fails.
1134   //
1135   // Similarly, we don't need the trailing dmb ishld if we substitute
1136   // an ldaxr instruction since that will provide all the guarantees we
1137   // require regarding observation of changes made by other threads
1138   // before any change to the CAS address observed by the load.
1139   //
1140   // In order to generate the desired instruction sequence we need to
1141   // be able to identify specific 'signature' ideal graph node
1142   // sequences which i) occur as a translation of a volatile reads or
1143   // writes or CAS operations and ii) do not occur through any other
1144   // translation or graph transformation. We can then provide
1145   // alternative aldc matching rules which translate these node
1146   // sequences to the desired machine code sequences. Selection of the
1147   // alternative rules can be implemented by predicates which identify
1148   // the relevant node sequences.
1149   //
1150   // The ideal graph generator translates a volatile read to the node
1151   // sequence
1152   //
1153   //   LoadX[mo_acquire]
1154   //   MemBarAcquire
1155   //
1156   // As a special case when using the compressed oops optimization we
1157   // may also see this variant
1158   //
1159   //   LoadN[mo_acquire]
1160   //   DecodeN
1161   //   MemBarAcquire
1162   //
1163   // A volatile write is translated to the node sequence
1164   //
1165   //   MemBarRelease
1166   //   StoreX[mo_release] {CardMark}-optional
1167   //   MemBarVolatile
1168   //
1169   // n.b. the above node patterns are generated with a strict
1170   // 'signature' configuration of input and output dependencies (see
1171   // the predicates below for exact details). The card mark may be as
1172   // simple as a few extra nodes or, in a few GC configurations, may
1173   // include more complex control flow between the leading and
1174   // trailing memory barriers. However, whatever the card mark
1175   // configuration these signatures are unique to translated volatile
1176   // reads/stores -- they will not appear as a result of any other
1177   // bytecode translation or inlining nor as a consequence of
1178   // optimizing transforms.
1179   //
1180   // We also want to catch inlined unsafe volatile gets and puts and
1181   // be able to implement them using either ldar<x>/stlr<x> or some
1182   // combination of ldr<x>/stlr<x> and dmb instructions.
1183   //
1184   // Inlined unsafe volatiles puts manifest as a minor variant of the
1185   // normal volatile put node sequence containing an extra cpuorder
1186   // membar
1187   //
1188   //   MemBarRelease
1189   //   MemBarCPUOrder
1190   //   StoreX[mo_release] {CardMark}-optional
1191   //   MemBarVolatile
1192   //
1193   // n.b. as an aside, the cpuorder membar is not itself subject to
1194   // matching and translation by adlc rules.  However, the rule
1195   // predicates need to detect its presence in order to correctly
1196   // select the desired adlc rules.
1197   //
1198   // Inlined unsafe volatile gets manifest as a somewhat different
1199   // node sequence to a normal volatile get
1200   //
1201   //   MemBarCPUOrder
1202   //        ||       \\
1203   //   MemBarAcquire LoadX[mo_acquire]
1204   //        ||
1205   //   MemBarCPUOrder
1206   //
1207   // In this case the acquire membar does not directly depend on the
1208   // load. However, we can be sure that the load is generated from an
1209   // inlined unsafe volatile get if we see it dependent on this unique
1210   // sequence of membar nodes. Similarly, given an acquire membar we
1211   // can know that it was added because of an inlined unsafe volatile
1212   // get if it is fed and feeds a cpuorder membar and if its feed
1213   // membar also feeds an acquiring load.
1214   //
1215   // Finally an inlined (Unsafe) CAS operation is translated to the
1216   // following ideal graph
1217   //
1218   //   MemBarRelease
1219   //   MemBarCPUOrder
1220   //   CompareAndSwapX {CardMark}-optional
1221   //   MemBarCPUOrder
1222   //   MemBarAcquire
1223   //
1224   // So, where we can identify these volatile read and write
1225   // signatures we can choose to plant either of the above two code
1226   // sequences. For a volatile read we can simply plant a normal
1227   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1228   // also choose to inhibit translation of the MemBarAcquire and
1229   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1230   //
1231   // When we recognise a volatile store signature we can choose to
1232   // plant at a dmb ish as a translation for the MemBarRelease, a
1233   // normal str<x> and then a dmb ish for the MemBarVolatile.
1234   // Alternatively, we can inhibit translation of the MemBarRelease
1235   // and MemBarVolatile and instead plant a simple stlr<x>
1236   // instruction.
1237   //
1238   // when we recognise a CAS signature we can choose to plant a dmb
1239   // ish as a translation for the MemBarRelease, the conventional
1240   // macro-instruction sequence for the CompareAndSwap node (which
1241   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1242   // Alternatively, we can elide generation of the dmb instructions
1243   // and plant the alternative CompareAndSwap macro-instruction
1244   // sequence (which uses ldaxr<x>).
1245   //
1246   // Of course, the above only applies when we see these signature
1247   // configurations. We still want to plant dmb instructions in any
1248   // other cases where we may see a MemBarAcquire, MemBarRelease or
1249   // MemBarVolatile. For example, at the end of a constructor which
1250   // writes final/volatile fields we will see a MemBarRelease
1251   // instruction and this needs a 'dmb ish' lest we risk the
1252   // constructed object being visible without making the
1253   // final/volatile field writes visible.
1254   //
1255   // n.b. the translation rules below which rely on detection of the
1256   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1257   // If we see anything other than the signature configurations we
1258   // always just translate the loads and stores to ldr<x> and str<x>
1259   // and translate acquire, release and volatile membars to the
1260   // relevant dmb instructions.
1261   //
1262 
1263   // graph traversal helpers used for volatile put/get and CAS
1264   // optimization
1265 
1266   // 1) general purpose helpers
1267 
1268   // if node n is linked to a parent MemBarNode by an intervening
1269   // Control and Memory ProjNode return the MemBarNode otherwise return
1270   // NULL.
1271   //
1272   // n may only be a Load or a MemBar.
1273 
1274   MemBarNode *parent_membar(const Node *n)
1275   {
1276     Node *ctl = NULL;
1277     Node *mem = NULL;
1278     Node *membar = NULL;
1279 
1280     if (n->is_Load()) {
1281       ctl = n->lookup(LoadNode::Control);
1282       mem = n->lookup(LoadNode::Memory);
1283     } else if (n->is_MemBar()) {
1284       ctl = n->lookup(TypeFunc::Control);
1285       mem = n->lookup(TypeFunc::Memory);
1286     } else {
1287         return NULL;
1288     }
1289 
1290     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1291       return NULL;
1292     }
1293 
1294     membar = ctl->lookup(0);
1295 
1296     if (!membar || !membar->is_MemBar()) {
1297       return NULL;
1298     }
1299 
1300     if (mem->lookup(0) != membar) {
1301       return NULL;
1302     }
1303 
1304     return membar->as_MemBar();
1305   }
1306 
1307   // if n is linked to a child MemBarNode by intervening Control and
1308   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1309 
1310   MemBarNode *child_membar(const MemBarNode *n)
1311   {
1312     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1313     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1314 
1315     // MemBar needs to have both a Ctl and Mem projection
1316     if (! ctl || ! mem)
1317       return NULL;
1318 
1319     MemBarNode *child = NULL;
1320     Node *x;
1321 
1322     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1323       x = ctl->fast_out(i);
1324       // if we see a membar we keep hold of it. we may also see a new
1325       // arena copy of the original but it will appear later
1326       if (x->is_MemBar()) {
1327           child = x->as_MemBar();
1328           break;
1329       }
1330     }
1331 
1332     if (child == NULL) {
1333       return NULL;
1334     }
1335 
1336     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1337       x = mem->fast_out(i);
1338       // if we see a membar we keep hold of it. we may also see a new
1339       // arena copy of the original but it will appear later
1340       if (x == child) {
1341         return child;
1342       }
1343     }
1344     return NULL;
1345   }
1346 
1347   // helper predicate use to filter candidates for a leading memory
1348   // barrier
1349   //
1350   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1351   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1352 
1353   bool leading_membar(const MemBarNode *barrier)
1354   {
1355     int opcode = barrier->Opcode();
1356     // if this is a release membar we are ok
1357     if (opcode == Op_MemBarRelease) {
1358       return true;
1359     }
1360     // if its a cpuorder membar . . .
1361     if (opcode != Op_MemBarCPUOrder) {
1362       return false;
1363     }
1364     // then the parent has to be a release membar
1365     MemBarNode *parent = parent_membar(barrier);
1366     if (!parent) {
1367       return false;
1368     }
1369     opcode = parent->Opcode();
1370     return opcode == Op_MemBarRelease;
1371   }
1372 
1373   // 2) card mark detection helper
1374 
1375   // helper predicate which can be used to detect a volatile membar
1376   // introduced as part of a conditional card mark sequence either by
1377   // G1 or by CMS when UseCondCardMark is true.
1378   //
1379   // membar can be definitively determined to be part of a card mark
1380   // sequence if and only if all the following hold
1381   //
1382   // i) it is a MemBarVolatile
1383   //
1384   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1385   // true
1386   //
1387   // iii) the node's Mem projection feeds a StoreCM node.
1388 
1389   bool is_card_mark_membar(const MemBarNode *barrier)
1390   {
1391     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1392       return false;
1393     }
1394 
1395     if (barrier->Opcode() != Op_MemBarVolatile) {
1396       return false;
1397     }
1398 
1399     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1400 
1401     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1402       Node *y = mem->fast_out(i);
1403       if (y->Opcode() == Op_StoreCM) {
1404         return true;
1405       }
1406     }
1407 
1408     return false;
1409   }
1410 
1411 
1412   // 3) helper predicates to traverse volatile put or CAS graphs which
1413   // may contain GC barrier subgraphs
1414 
1415   // Preamble
1416   // --------
1417   //
1418   // for volatile writes we can omit generating barriers and employ a
1419   // releasing store when we see a node sequence sequence with a
1420   // leading MemBarRelease and a trailing MemBarVolatile as follows
1421   //
1422   //   MemBarRelease
1423   //  {    ||        } -- optional
1424   //  {MemBarCPUOrder}
1425   //       ||       \\
1426   //       ||     StoreX[mo_release]
1427   //       | \ Bot    / ???
1428   //       | MergeMem
1429   //       | /
1430   //   MemBarVolatile
1431   //
1432   // where
1433   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1434   //  | \ and / indicate further routing of the Ctl and Mem feeds
1435   //
1436   // Note that the memory feed from the CPUOrder membar to the
1437   // MergeMem node is an AliasIdxBot slice while the feed from the
1438   // StoreX is for a slice determined by the type of value being
1439   // written.
1440   //
1441   // the diagram above shows the graph we see for non-object stores.
1442   // for a volatile Object store (StoreN/P) we may see other nodes
1443   // below the leading membar because of the need for a GC pre- or
1444   // post-write barrier.
1445   //
1446   // with most GC configurations we with see this simple variant which
1447   // includes a post-write barrier card mark.
1448   //
1449   //   MemBarRelease______________________________
1450   //         ||    \\               Ctl \        \\
1451   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1452   //         | \ Bot  / oop                 . . .  /
1453   //         | MergeMem
1454   //         | /
1455   //         ||      /
1456   //   MemBarVolatile
1457   //
1458   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1459   // the object address to an int used to compute the card offset) and
1460   // Ctl+Mem to a StoreB node (which does the actual card mark).
1461   //
1462   // n.b. a StoreCM node is only ever used when CMS (with or without
1463   // CondCardMark) or G1 is configured. This abstract instruction
1464   // differs from a normal card mark write (StoreB) because it implies
1465   // a requirement to order visibility of the card mark (StoreCM)
1466   // after that of the object put (StoreP/N) using a StoreStore memory
1467   // barrier. Note that this is /not/ a requirement to order the
1468   // instructions in the generated code (that is already guaranteed by
1469   // the order of memory dependencies). Rather it is a requirement to
1470   // ensure visibility order which only applies on architectures like
1471   // AArch64 which do not implement TSO. This ordering is required for
1472   // both non-volatile and volatile puts.
1473   //
1474   // That implies that we need to translate a StoreCM using the
1475   // sequence
1476   //
1477   //   dmb ishst
1478   //   stlrb
1479   //
1480   // This dmb cannot be omitted even when the associated StoreX or
1481   // CompareAndSwapX is implemented using stlr. However, as described
1482   // below there are circumstances where a specific GC configuration
1483   // requires a stronger barrier in which case it can be omitted.
1484   // 
1485   // With the Serial or Parallel GC using +CondCardMark the card mark
1486   // is performed conditionally on it currently being unmarked in
1487   // which case the volatile put graph looks slightly different
1488   //
1489   //   MemBarRelease____________________________________________
1490   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1491   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1492   //         | \ Bot / oop                          \            |
1493   //         | MergeMem                            . . .      StoreB
1494   //         | /                                                /
1495   //         ||     /
1496   //   MemBarVolatile
1497   //
1498   // It is worth noting at this stage that all the above
1499   // configurations can be uniquely identified by checking that the
1500   // memory flow includes the following subgraph:
1501   //
1502   //   MemBarRelease
1503   //  {MemBarCPUOrder}
1504   //      |  \      . . .
1505   //      |  StoreX[mo_release]  . . .
1506   //  Bot |   / oop
1507   //     MergeMem
1508   //      |
1509   //   MemBarVolatile
1510   //
1511   // This is referred to as a *normal* volatile store subgraph. It can
1512   // easily be detected starting from any candidate MemBarRelease,
1513   // StoreX[mo_release] or MemBarVolatile node.
1514   //
1515   // A small variation on this normal case occurs for an unsafe CAS
1516   // operation. The basic memory flow subgraph for a non-object CAS is
1517   // as follows
1518   //
1519   //   MemBarRelease
1520   //         ||
1521   //   MemBarCPUOrder
1522   //          |     \\   . . .
1523   //          |     CompareAndSwapX
1524   //          |       |
1525   //      Bot |     SCMemProj
1526   //           \     / Bot
1527   //           MergeMem
1528   //           /
1529   //   MemBarCPUOrder
1530   //         ||
1531   //   MemBarAcquire
1532   //
1533   // The same basic variations on this arrangement (mutatis mutandis)
1534   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1535   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1536   // flow subgraph is still present.
1537   // 
1538   // This is referred to as a *normal* CAS subgraph. It can easily be
1539   // detected starting from any candidate MemBarRelease,
1540   // StoreX[mo_release] or MemBarAcquire node.
1541   //
1542   // The code below uses two helper predicates, leading_to_trailing
1543   // and trailing_to_leading to identify these normal graphs, one
1544   // validating the layout starting from the top membar and searching
1545   // down and the other validating the layout starting from the lower
1546   // membar and searching up.
1547   //
1548   // There are two special case GC configurations when the simple
1549   // normal graphs above may not be generated: when using G1 (which
1550   // always employs a conditional card mark); and when using CMS with
1551   // conditional card marking (+CondCardMark) configured. These GCs
1552   // are both concurrent rather than stop-the world GCs. So they
1553   // introduce extra Ctl+Mem flow into the graph between the leading
1554   // and trailing membar nodes, in particular enforcing stronger
1555   // memory serialisation beween the object put and the corresponding
1556   // conditional card mark. CMS employs a post-write GC barrier while
1557   // G1 employs both a pre- and post-write GC barrier.
1558   //
1559   // The post-write barrier subgraph for these configurations includes
1560   // a MemBarVolatile node -- referred to as a card mark membar --
1561   // which is needed to order the card write (StoreCM) operation in
1562   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1563   // operations performed by GC threads i.e. a card mark membar
1564   // constitutes a StoreLoad barrier hence must be translated to a dmb
1565   // ish (whether or not it sits inside a volatile store sequence).
1566   //
1567   // Of course, the use of the dmb ish for the card mark membar also
1568   // implies theat the StoreCM which follows can omit the dmb ishst
1569   // instruction. The necessary visibility ordering will already be
1570   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1571   // needs to be generated for as part of the StoreCM sequence with GC
1572   // configuration +CMS -CondCardMark.
1573   // 
1574   // Of course all these extra barrier nodes may well be absent --
1575   // they are only inserted for object puts. Their potential presence
1576   // significantly complicates the task of identifying whether a
1577   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1578   // MemBarAcquire forms part of a volatile put or CAS when using
1579   // these GC configurations (see below) and also complicates the
1580   // decision as to how to translate a MemBarVolatile and StoreCM.
1581   //
1582   // So, thjis means that a card mark MemBarVolatile occurring in the
1583   // post-barrier graph it needs to be distinguished from a normal
1584   // trailing MemBarVolatile. Resolving this is straightforward: a
1585   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1586   // node and that is a unique marker
1587   //
1588   //      MemBarVolatile (card mark)
1589   //       C |    \     . . .
1590   //         |   StoreCM   . . .
1591   //       . . .
1592   //
1593   // Returning to the task of translating the object put and the
1594   // leading/trailing membar nodes: what do the node graphs look like
1595   // for these 2 special cases? and how can we determine the status of
1596   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1597   // normal and non-normal cases?
1598   //
1599   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1600   // which selects conditonal execution based on the value loaded
1601   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1602   // intervening StoreLoad barrier (MemBarVolatile).
1603   //
1604   // So, with CMS we may see a node graph for a volatile object store
1605   // which looks like this
1606   //
1607   //   MemBarRelease
1608   //   MemBarCPUOrder_(leading)____________________
1609   //     C |  | M \       \\               M |   C \
1610   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1611   //       |  | Bot \    / oop      \        |
1612   //       |  |    MergeMem          \      / 
1613   //       |  |      /                |    /
1614   //     MemBarVolatile (card mark)   |   /
1615   //     C |  ||    M |               |  /
1616   //       | LoadB    | Bot       oop | / Bot
1617   //       |   |      |              / /
1618   //       | Cmp      |\            / /
1619   //       | /        | \          / /
1620   //       If         |  \        / /
1621   //       | \        |   \      / /
1622   // IfFalse  IfTrue  |    \    / /
1623   //       \     / \  |    |   / /
1624   //        \   / StoreCM  |  / /
1625   //         \ /      \   /  / /
1626   //        Region     Phi  / /
1627   //          | \   Raw |  / /
1628   //          |  . . .  | / /
1629   //          |       MergeMem
1630   //          |           |
1631   //        MemBarVolatile (trailing)
1632   //
1633   // Notice that there are two MergeMem nodes below the leading
1634   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1635   // the leading membar and the oopptr Mem slice from the Store into
1636   // the card mark membar. The trailing MergeMem merges the
1637   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1638   // slice from the StoreCM and an oop slice from the StoreN/P node
1639   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1640   // associated with the If region).
1641   //
1642   // So, in the case of CMS + CondCardMark the volatile object store
1643   // graph still includes a normal volatile store subgraph from the
1644   // leading membar to the trailing membar. However, it also contains
1645   // the same shape memory flow to the card mark membar. The two flows
1646   // can be distinguished by testing whether or not the downstream
1647   // membar is a card mark membar.
1648   //
1649   // The graph for a CAS also varies with CMS + CondCardMark, in
1650   // particular employing a control feed from the CompareAndSwapX node
1651   // through a CmpI and If to the card mark membar and StoreCM which
1652   // updates the associated card. This avoids executing the card mark
1653   // if the CAS fails. However, it can be seen from the diagram below
1654   // that the presence of the barrier does not alter the normal CAS
1655   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1656   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1657   // MemBarAcquire pair.
1658   //
1659   //   MemBarRelease
1660   //   MemBarCPUOrder__(leading)_______________________
1661   //   C /  M |                        \\            C \
1662   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1663   //          |                  C /  M |
1664   //          |                 CmpI    |
1665   //          |                  /      |
1666   //          |               . . .     |
1667   //          |              IfTrue     |
1668   //          |              /          |
1669   //       MemBarVolatile (card mark)   |
1670   //        C |  ||    M |              |
1671   //          | LoadB    | Bot   ______/|
1672   //          |   |      |      /       |
1673   //          | Cmp      |     /      SCMemProj
1674   //          | /        |    /         |
1675   //          If         |   /         /
1676   //          | \        |  /         / Bot
1677   //     IfFalse  IfTrue | /         /
1678   //          |   / \   / / prec    /
1679   //   . . .  |  /  StoreCM        /
1680   //        \ | /      | raw      /
1681   //        Region    . . .      /
1682   //           | \              /
1683   //           |   . . .   \    / Bot
1684   //           |        MergeMem
1685   //           |          /
1686   //         MemBarCPUOrder
1687   //         MemBarAcquire (trailing)
1688   //
1689   // This has a slightly different memory subgraph to the one seen
1690   // previously but the core of it has a similar memory flow to the
1691   // CAS normal subgraph:
1692   //
1693   //   MemBarRelease
1694   //   MemBarCPUOrder____
1695   //         |          \      . . .
1696   //         |       CompareAndSwapX  . . .
1697   //         |       C /  M |
1698   //         |      CmpI    |
1699   //         |       /      |
1700   //         |      . .    /
1701   //     Bot |   IfTrue   /
1702   //         |   /       /
1703   //    MemBarVolatile  /
1704   //         | ...     /
1705   //      StoreCM ... /
1706   //         |       / 
1707   //       . . .  SCMemProj
1708   //      Raw \    / Bot
1709   //        MergeMem
1710   //           |
1711   //   MemBarCPUOrder
1712   //   MemBarAcquire
1713   //
1714   // The G1 graph for a volatile object put is a lot more complicated.
1715   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1716   // which adds the old value to the SATB queue; the releasing store
1717   // itself; and, finally, a post-write graph which performs a card
1718   // mark.
1719   //
1720   // The pre-write graph may be omitted, but only when the put is
1721   // writing to a newly allocated (young gen) object and then only if
1722   // there is a direct memory chain to the Initialize node for the
1723   // object allocation. This will not happen for a volatile put since
1724   // any memory chain passes through the leading membar.
1725   //
1726   // The pre-write graph includes a series of 3 If tests. The outermost
1727   // If tests whether SATB is enabled (no else case). The next If tests
1728   // whether the old value is non-NULL (no else case). The third tests
1729   // whether the SATB queue index is > 0, if so updating the queue. The
1730   // else case for this third If calls out to the runtime to allocate a
1731   // new queue buffer.
1732   //
1733   // So with G1 the pre-write and releasing store subgraph looks like
1734   // this (the nested Ifs are omitted).
1735   //
1736   //  MemBarRelease (leading)____________
1737   //     C |  ||  M \   M \    M \  M \ . . .
1738   //       | LoadB   \  LoadL  LoadN   \
1739   //       | /        \                 \
1740   //       If         |\                 \
1741   //       | \        | \                 \
1742   //  IfFalse  IfTrue |  \                 \
1743   //       |     |    |   \                 |
1744   //       |     If   |   /\                |
1745   //       |     |          \               |
1746   //       |                 \              |
1747   //       |    . . .         \             |
1748   //       | /       | /       |            |
1749   //      Region  Phi[M]       |            |
1750   //       | \       |         |            |
1751   //       |  \_____ | ___     |            |
1752   //     C | C \     |   C \ M |            |
1753   //       | CastP2X | StoreN/P[mo_release] |
1754   //       |         |         |            |
1755   //     C |       M |       M |          M |
1756   //        \        | Raw     | oop       / Bot
1757   //                  . . .
1758   //          (post write subtree elided)
1759   //                    . . .
1760   //             C \         M /
1761   //         MemBarVolatile (trailing)
1762   //
1763   // Note that the three memory feeds into the post-write tree are an
1764   // AliasRawIdx slice associated with the writes in the pre-write
1765   // tree, an oop type slice from the StoreX specific to the type of
1766   // the volatile field and the AliasBotIdx slice emanating from the
1767   // leading membar.
1768   //
1769   // n.b. the LoadB in this subgraph is not the card read -- it's a
1770   // read of the SATB queue active flag.
1771   //
1772   // The CAS graph is once again a variant of the above with a
1773   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1774   // value from the CompareAndSwapX node is fed into the post-write
1775   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1776   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1777   //
1778   //  MemBarRelease (leading)____________
1779   //     C |  ||  M \   M \    M \  M \ . . .
1780   //       | LoadB   \  LoadL  LoadN   \
1781   //       | /        \                 \
1782   //       If         |\                 \
1783   //       | \        | \                 \
1784   //  IfFalse  IfTrue |  \                 \
1785   //       |     |    |   \                 \
1786   //       |     If   |    \                 |
1787   //       |     |          \                |
1788   //       |                 \               |
1789   //       |    . . .         \              |
1790   //       | /       | /       \             |
1791   //      Region  Phi[M]        \            |
1792   //       | \       |           \           |
1793   //       |  \_____ |            |          |
1794   //     C | C \     |            |          |
1795   //       | CastP2X |     CompareAndSwapX   |
1796   //       |         |   res |     |         |
1797   //     C |       M |       |  SCMemProj  M |
1798   //        \        | Raw   |     | Bot    / Bot
1799   //                  . . .
1800   //          (post write subtree elided)
1801   //                    . . .
1802   //             C \         M /
1803   //         MemBarVolatile (trailing)
1804   //
1805   // The G1 post-write subtree is also optional, this time when the
1806   // new value being written is either null or can be identified as a
1807   // newly allocated (young gen) object with no intervening control
1808   // flow. The latter cannot happen but the former may, in which case
1809   // the card mark membar is omitted and the memory feeds from the
1810   // leading membar and the SToreN/P are merged direct into the
1811   // trailing membar as per the normal subgraph. So, the only special
1812   // case which arises is when the post-write subgraph is generated.
1813   //
1814   // The kernel of the post-write G1 subgraph is the card mark itself
1815   // which includes a card mark memory barrier (MemBarVolatile), a
1816   // card test (LoadB), and a conditional update (If feeding a
1817   // StoreCM). These nodes are surrounded by a series of nested Ifs
1818   // which try to avoid doing the card mark. The top level If skips if
1819   // the object reference does not cross regions (i.e. it tests if
1820   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1821   // need not be recorded. The next If, which skips on a NULL value,
1822   // may be absent (it is not generated if the type of value is >=
1823   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1824   // checking if card_val != young).  n.b. although this test requires
1825   // a pre-read of the card it can safely be done before the StoreLoad
1826   // barrier. However that does not bypass the need to reread the card
1827   // after the barrier.
1828   //
1829   //                (pre-write subtree elided)
1830   //        . . .                  . . .    . . .  . . .
1831   //        C |               M |    M |    M |
1832   //       Region            Phi[M] StoreN    |
1833   //          |            Raw  |  oop |  Bot |
1834   //         / \_______         |\     |\     |\
1835   //      C / C \      . . .    | \    | \    | \
1836   //       If   CastP2X . . .   |  \   |  \   |  \
1837   //       / \                  |   \  |   \  |   \
1838   //      /   \                 |    \ |    \ |    \
1839   // IfFalse IfTrue             |      |      |     \
1840   //   |       |                 \     |     /       |
1841   //   |       If                 \    | \  /   \    |
1842   //   |      / \                  \   |   /     \   |
1843   //   |     /   \                  \  |  / \     |  |
1844   //   | IfFalse IfTrue           MergeMem   \    |  |
1845   //   |  . . .    / \                 |      \   |  |
1846   //   |          /   \                |       |  |  |
1847   //   |     IfFalse IfTrue            |       |  |  |
1848   //   |      . . .    |               |       |  |  |
1849   //   |               If             /        |  |  |
1850   //   |               / \           /         |  |  |
1851   //   |              /   \         /          |  |  |
1852   //   |         IfFalse IfTrue    /           |  |  |
1853   //   |           . . .   |      /            |  |  |
1854   //   |                    \    /             |  |  |
1855   //   |                     \  /              |  |  |
1856   //   |         MemBarVolatile__(card mark  ) |  |  |
1857   //   |              ||   C |     \           |  |  |
1858   //   |             LoadB   If     |         /   |  |
1859   //   |                    / \ Raw |        /   /  /
1860   //   |                   . . .    |       /   /  /
1861   //   |                        \   |      /   /  /
1862   //   |                        StoreCM   /   /  /
1863   //   |                           |     /   /  /
1864   //   |                            . . .   /  /
1865   //   |                                   /  /
1866   //   |   . . .                          /  /
1867   //   |    |             | /            /  /
1868   //   |    |           Phi[M] /        /  /
1869   //   |    |             |   /        /  /
1870   //   |    |             |  /        /  /
1871   //   |  Region  . . .  Phi[M]      /  /
1872   //   |    |             |         /  /
1873   //    \   |             |        /  /
1874   //     \  | . . .       |       /  /
1875   //      \ |             |      /  /
1876   //      Region         Phi[M] /  /
1877   //        |               \  /  /
1878   //         \             MergeMem
1879   //          \            /
1880   //          MemBarVolatile
1881   //
1882   // As with CMS + CondCardMark the first MergeMem merges the
1883   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1884   // slice from the Store into the card mark membar. However, in this
1885   // case it may also merge an AliasRawIdx mem slice from the pre
1886   // barrier write.
1887   //
1888   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1889   // leading membar with an oop slice from the StoreN and an
1890   // AliasRawIdx slice from the post barrier writes. In this case the
1891   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1892   // which combine feeds from the If regions in the post barrier
1893   // subgraph.
1894   //
1895   // So, for G1 the same characteristic subgraph arises as for CMS +
1896   // CondCardMark. There is a normal subgraph feeding the card mark
1897   // membar and a normal subgraph feeding the trailing membar.
1898   //
1899   // The CAS graph when using G1GC also includes an optional
1900   // post-write subgraph. It is very similar to the above graph except
1901   // for a few details.
1902   // 
1903   // - The control flow is gated by an additonal If which tests the
1904   // result from the CompareAndSwapX node
1905   // 
1906   //  - The MergeMem which feeds the card mark membar only merges the
1907   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1908   // slice from the pre-barrier. It does not merge the SCMemProj
1909   // AliasIdxBot slice. So, this subgraph does not look like the
1910   // normal CAS subgraph.
1911   //
1912   // - The MergeMem which feeds the trailing membar merges the
1913   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1914   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1915   // has two AliasIdxBot input slices. However, this subgraph does
1916   // still look like the normal CAS subgraph.
1917   //
1918   // So, the upshot is:
1919   //
1920   // In all cases a volatile put graph will include a *normal*
1921   // volatile store subgraph betwen the leading membar and the
1922   // trailing membar. It may also include a normal volatile store
1923   // subgraph betwen the leading membar and the card mark membar.
1924   //
1925   // In all cases a CAS graph will contain a unique normal CAS graph
1926   // feeding the trailing membar.
1927   //
1928   // In all cases where there is a card mark membar (either as part of
1929   // a volatile object put or CAS) it will be fed by a MergeMem whose
1930   // AliasIdxBot slice feed will be a leading membar.
1931   //
1932   // The predicates controlling generation of instructions for store
1933   // and barrier nodes employ a few simple helper functions (described
1934   // below) which identify the presence or absence of all these
1935   // subgraph configurations and provide a means of traversing from
1936   // one node in the subgraph to another.
1937 
1938   // is_CAS(int opcode)
1939   //
1940   // return true if opcode is one of the possible CompareAndSwapX
1941   // values otherwise false.
1942 
1943   bool is_CAS(int opcode)
1944   {
1945     switch(opcode) {
1946       // We handle these
1947     case Op_CompareAndSwapI:
1948     case Op_CompareAndSwapL:
1949     case Op_CompareAndSwapP:
1950     case Op_CompareAndSwapN:
1951  // case Op_CompareAndSwapB:
1952  // case Op_CompareAndSwapS:
1953       return true;
1954       // These are TBD
1955     case Op_WeakCompareAndSwapB:
1956     case Op_WeakCompareAndSwapS:
1957     case Op_WeakCompareAndSwapI:
1958     case Op_WeakCompareAndSwapL:
1959     case Op_WeakCompareAndSwapP:
1960     case Op_WeakCompareAndSwapN:
1961     case Op_CompareAndExchangeB:
1962     case Op_CompareAndExchangeS:
1963     case Op_CompareAndExchangeI:
1964     case Op_CompareAndExchangeL:
1965     case Op_CompareAndExchangeP:
1966     case Op_CompareAndExchangeN:
1967       return false;
1968     default:
1969       return false;
1970     }
1971   }
1972 
1973 
1974   // leading_to_trailing
1975   //
1976   //graph traversal helper which detects the normal case Mem feed from
1977   // a release membar (or, optionally, its cpuorder child) to a
1978   // dependent volatile membar i.e. it ensures that one or other of
1979   // the following Mem flow subgraph is present.
1980   //
1981   //   MemBarRelease {leading}
1982   //   {MemBarCPUOrder} {optional}
1983   //     Bot |  \      . . .
1984   //         |  StoreN/P[mo_release]  . . .
1985   //         |   /
1986   //        MergeMem
1987   //         |
1988   //   MemBarVolatile {not card mark}
1989   //
1990   //   MemBarRelease {leading}
1991   //   {MemBarCPUOrder} {optional}
1992   //      |       \      . . .
1993   //      |     CompareAndSwapX  . . .
1994   //               |
1995   //     . . .    SCMemProj
1996   //           \   |
1997   //      |    MergeMem
1998   //      |       /
1999   //    MemBarCPUOrder
2000   //    MemBarAcquire {trailing}
2001   //
2002   // the predicate needs to be capable of distinguishing the following
2003   // volatile put graph which may arises when a GC post barrier
2004   // inserts a card mark membar
2005   //
2006   //   MemBarRelease {leading}
2007   //   {MemBarCPUOrder}__
2008   //     Bot |   \       \
2009   //         |   StoreN/P \
2010   //         |    / \     |
2011   //        MergeMem \    |
2012   //         |        \   |
2013   //   MemBarVolatile  \  |
2014   //    {card mark}     \ |
2015   //                  MergeMem
2016   //                      |
2017   // {not card mark} MemBarVolatile
2018   //
2019   // if the correct configuration is present returns the trailing
2020   // membar otherwise NULL.
2021   //
2022   // the input membar is expected to be either a cpuorder membar or a
2023   // release membar. in the latter case it should not have a cpu membar
2024   // child.
2025   //
2026   // the returned value may be a card mark or trailing membar
2027   //
2028 
2029   MemBarNode *leading_to_trailing(MemBarNode *leading)
2030   {
2031     assert((leading->Opcode() == Op_MemBarRelease ||
2032             leading->Opcode() == Op_MemBarCPUOrder),
2033            "expecting a volatile or cpuroder membar!");
2034 
2035     // check the mem flow
2036     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2037 
2038     if (!mem) {
2039       return NULL;
2040     }
2041 
2042     Node *x = NULL;
2043     StoreNode * st = NULL;
2044     LoadStoreNode *cas = NULL;
2045     MergeMemNode *mm = NULL;
2046     MergeMemNode *mm2 = NULL;
2047 
2048     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2049       x = mem->fast_out(i);
2050       if (x->is_MergeMem()) {
2051         if (mm != NULL) {
2052           if (mm2 != NULL) {
2053           // should not see more than 2 merge mems
2054             return NULL;
2055           } else {
2056             mm2 = x->as_MergeMem();
2057           }
2058         } else {
2059           mm = x->as_MergeMem();
2060         }
2061       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2062         // two releasing stores/CAS nodes is one too many
2063         if (st != NULL || cas != NULL) {
2064           return NULL;
2065         }
2066         st = x->as_Store();
2067       } else if (is_CAS(x->Opcode())) {
2068         if (st != NULL || cas != NULL) {
2069           return NULL;
2070         }
2071         cas = x->as_LoadStore();
2072       }
2073     }
2074 
2075     // must have a store or a cas
2076     if (!st && !cas) {
2077       return NULL;
2078     }
2079 
2080     // must have at least one merge if we also have st
2081     if (st && !mm) {
2082       return NULL;
2083     }
2084 
2085     if (cas) {
2086       Node *y = NULL;
2087       // look for an SCMemProj
2088       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2089         x = cas->fast_out(i);
2090         if (x->is_Proj()) {
2091           y = x;
2092           break;
2093         }
2094       }
2095       if (y == NULL) {
2096         return NULL;
2097       }
2098       // the proj must feed a MergeMem
2099       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2100         x = y->fast_out(i);
2101         if (x->is_MergeMem()) {
2102           mm = x->as_MergeMem();
2103           break;
2104         }
2105       }
2106       if (mm == NULL) {
2107         return NULL;
2108       }
2109       MemBarNode *mbar = NULL;
2110       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2111       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2112         x = mm->fast_out(i);
2113         if (x->is_MemBar()) {
2114           int opcode = x->Opcode();
2115           if (opcode == Op_MemBarCPUOrder) {
2116             MemBarNode *z =  x->as_MemBar();
2117             z = child_membar(z);
2118             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2119               mbar = z;
2120             }
2121           }
2122           break;
2123         }
2124       }
2125       return mbar;
2126     } else {
2127       Node *y = NULL;
2128       // ensure the store feeds the first mergemem;
2129       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2130         if (st->fast_out(i) == mm) {
2131           y = st;
2132           break;
2133         }
2134       }
2135       if (y == NULL) {
2136         return NULL;
2137       }
2138       if (mm2 != NULL) {
2139         // ensure the store feeds the second mergemem;
2140         y = NULL;
2141         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2142           if (st->fast_out(i) == mm2) {
2143             y = st;
2144           }
2145         }
2146         if (y == NULL) {
2147           return NULL;
2148         }
2149       }
2150 
2151       MemBarNode *mbar = NULL;
2152       // ensure the first mergemem feeds a volatile membar
2153       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2154         x = mm->fast_out(i);
2155         if (x->is_MemBar()) {
2156           int opcode = x->Opcode();
2157           if (opcode == Op_MemBarVolatile) {
2158             mbar = x->as_MemBar();
2159           }
2160           break;
2161         }
2162       }
2163       if (mm2 == NULL) {
2164         // this is our only option for a trailing membar
2165         return mbar;
2166       }
2167       // ensure the second mergemem feeds a volatile membar
2168       MemBarNode *mbar2 = NULL;
2169       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2170         x = mm2->fast_out(i);
2171         if (x->is_MemBar()) {
2172           int opcode = x->Opcode();
2173           if (opcode == Op_MemBarVolatile) {
2174             mbar2 = x->as_MemBar();
2175           }
2176           break;
2177         }
2178       }
2179       // if we have two merge mems we must have two volatile membars
2180       if (mbar == NULL || mbar2 == NULL) {
2181         return NULL;
2182       }
2183       // return the trailing membar
2184       if (is_card_mark_membar(mbar2)) {
2185         return mbar;
2186       } else {
2187         if (is_card_mark_membar(mbar)) {
2188           return mbar2;
2189         } else {
2190           return NULL;
2191         }
2192       }
2193     }
2194   }
2195 
2196   // trailing_to_leading
2197   //
2198   // graph traversal helper which detects the normal case Mem feed
2199   // from a trailing membar to a preceding release membar (optionally
2200   // its cpuorder child) i.e. it ensures that one or other of the
2201   // following Mem flow subgraphs is present.
2202   //
2203   //   MemBarRelease {leading}
2204   //   MemBarCPUOrder {optional}
2205   //    | Bot |  \      . . .
2206   //    |     |  StoreN/P[mo_release]  . . .
2207   //    |     |   /
2208   //    |    MergeMem
2209   //    |     |
2210   //   MemBarVolatile {not card mark}
2211   //
2212   //   MemBarRelease {leading}
2213   //   MemBarCPUOrder {optional}
2214   //      |       \      . . .
2215   //      |     CompareAndSwapX  . . .
2216   //               |
2217   //     . . .    SCMemProj
2218   //           \   |
2219   //      |    MergeMem
2220   //      |       |
2221   //    MemBarCPUOrder
2222   //    MemBarAcquire {trailing}
2223   //
2224   // this predicate checks for the same flow as the previous predicate
2225   // but starting from the bottom rather than the top.
2226   //
2227   // if the configuration is present returns the cpuorder member for
2228   // preference or when absent the release membar otherwise NULL.
2229   //
2230   // n.b. the input membar is expected to be a MemBarVolatile or
2231   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2232   // mark membar.
2233 
2234   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2235   {
2236     // input must be a volatile membar
2237     assert((barrier->Opcode() == Op_MemBarVolatile ||
2238             barrier->Opcode() == Op_MemBarAcquire),
2239            "expecting a volatile or an acquire membar");
2240 
2241     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2242            !is_card_mark_membar(barrier),
2243            "not expecting a card mark membar");
2244     Node *x;
2245     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2246 
2247     // if we have an acquire membar then it must be fed via a CPUOrder
2248     // membar
2249 
2250     if (is_cas) {
2251       // skip to parent barrier which must be a cpuorder
2252       x = parent_membar(barrier);
2253       if (x->Opcode() != Op_MemBarCPUOrder)
2254         return NULL;
2255     } else {
2256       // start from the supplied barrier
2257       x = (Node *)barrier;
2258     }
2259 
2260     // the Mem feed to the membar should be a merge
2261     x = x ->in(TypeFunc::Memory);
2262     if (!x->is_MergeMem())
2263       return NULL;
2264 
2265     MergeMemNode *mm = x->as_MergeMem();
2266 
2267     if (is_cas) {
2268       // the merge should be fed from the CAS via an SCMemProj node
2269       x = NULL;
2270       for (uint idx = 1; idx < mm->req(); idx++) {
2271         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2272           x = mm->in(idx);
2273           break;
2274         }
2275       }
2276       if (x == NULL) {
2277         return NULL;
2278       }
2279       // check for a CAS feeding this proj
2280       x = x->in(0);
2281       int opcode = x->Opcode();
2282       if (!is_CAS(opcode)) {
2283         return NULL;
2284       }
2285       // the CAS should get its mem feed from the leading membar
2286       x = x->in(MemNode::Memory);
2287     } else {
2288       // the merge should get its Bottom mem feed from the leading membar
2289       x = mm->in(Compile::AliasIdxBot);
2290     }
2291 
2292     // ensure this is a non control projection
2293     if (!x->is_Proj() || x->is_CFG()) {
2294       return NULL;
2295     }
2296     // if it is fed by a membar that's the one we want
2297     x = x->in(0);
2298 
2299     if (!x->is_MemBar()) {
2300       return NULL;
2301     }
2302 
2303     MemBarNode *leading = x->as_MemBar();
2304     // reject invalid candidates
2305     if (!leading_membar(leading)) {
2306       return NULL;
2307     }
2308 
2309     // ok, we have a leading membar, now for the sanity clauses
2310 
2311     // the leading membar must feed Mem to a releasing store or CAS
2312     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2313     StoreNode *st = NULL;
2314     LoadStoreNode *cas = NULL;
2315     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2316       x = mem->fast_out(i);
2317       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2318         // two stores or CASes is one too many
2319         if (st != NULL || cas != NULL) {
2320           return NULL;
2321         }
2322         st = x->as_Store();
2323       } else if (is_CAS(x->Opcode())) {
2324         if (st != NULL || cas != NULL) {
2325           return NULL;
2326         }
2327         cas = x->as_LoadStore();
2328       }
2329     }
2330 
2331     // we should not have both a store and a cas
2332     if (st == NULL & cas == NULL) {
2333       return NULL;
2334     }
2335 
2336     if (st == NULL) {
2337       // nothing more to check
2338       return leading;
2339     } else {
2340       // we should not have a store if we started from an acquire
2341       if (is_cas) {
2342         return NULL;
2343       }
2344 
2345       // the store should feed the merge we used to get here
2346       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2347         if (st->fast_out(i) == mm) {
2348           return leading;
2349         }
2350       }
2351     }
2352 
2353     return NULL;
2354   }
2355 
2356   // card_mark_to_leading
2357   //
2358   // graph traversal helper which traverses from a card mark volatile
2359   // membar to a leading membar i.e. it ensures that the following Mem
2360   // flow subgraph is present.
2361   //
2362   //    MemBarRelease {leading}
2363   //   {MemBarCPUOrder} {optional}
2364   //         |   . . .
2365   //     Bot |   /
2366   //      MergeMem
2367   //         |
2368   //     MemBarVolatile (card mark)
2369   //        |     \
2370   //      . . .   StoreCM
2371   //
2372   // if the configuration is present returns the cpuorder member for
2373   // preference or when absent the release membar otherwise NULL.
2374   //
2375   // n.b. the input membar is expected to be a MemBarVolatile amd must
2376   // be a card mark membar.
2377 
2378   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2379   {
2380     // input must be a card mark volatile membar
2381     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2382 
2383     // the Mem feed to the membar should be a merge
2384     Node *x = barrier->in(TypeFunc::Memory);
2385     if (!x->is_MergeMem()) {
2386       return NULL;
2387     }
2388 
2389     MergeMemNode *mm = x->as_MergeMem();
2390 
2391     x = mm->in(Compile::AliasIdxBot);
2392 
2393     if (!x->is_MemBar()) {
2394       return NULL;
2395     }
2396 
2397     MemBarNode *leading = x->as_MemBar();
2398 
2399     if (leading_membar(leading)) {
2400       return leading;
2401     }
2402 
2403     return NULL;
2404   }
2405 
2406 bool unnecessary_acquire(const Node *barrier)
2407 {
2408   assert(barrier->is_MemBar(), "expecting a membar");
2409 
2410   if (UseBarriersForVolatile) {
2411     // we need to plant a dmb
2412     return false;
2413   }
2414 
2415   // a volatile read derived from bytecode (or also from an inlined
2416   // SHA field read via LibraryCallKit::load_field_from_object)
2417   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2418   // with a bogus read dependency on it's preceding load. so in those
2419   // cases we will find the load node at the PARMS offset of the
2420   // acquire membar.  n.b. there may be an intervening DecodeN node.
2421   //
2422   // a volatile load derived from an inlined unsafe field access
2423   // manifests as a cpuorder membar with Ctl and Mem projections
2424   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2425   // acquire then feeds another cpuorder membar via Ctl and Mem
2426   // projections. The load has no output dependency on these trailing
2427   // membars because subsequent nodes inserted into the graph take
2428   // their control feed from the final membar cpuorder meaning they
2429   // are all ordered after the load.
2430 
2431   Node *x = barrier->lookup(TypeFunc::Parms);
2432   if (x) {
2433     // we are starting from an acquire and it has a fake dependency
2434     //
2435     // need to check for
2436     //
2437     //   LoadX[mo_acquire]
2438     //   {  |1   }
2439     //   {DecodeN}
2440     //      |Parms
2441     //   MemBarAcquire*
2442     //
2443     // where * tags node we were passed
2444     // and |k means input k
2445     if (x->is_DecodeNarrowPtr()) {
2446       x = x->in(1);
2447     }
2448 
2449     return (x->is_Load() && x->as_Load()->is_acquire());
2450   }
2451 
2452   // now check for an unsafe volatile get
2453 
2454   // need to check for
2455   //
2456   //   MemBarCPUOrder
2457   //        ||       \\
2458   //   MemBarAcquire* LoadX[mo_acquire]
2459   //        ||
2460   //   MemBarCPUOrder
2461   //
2462   // where * tags node we were passed
2463   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2464 
2465   // check for a parent MemBarCPUOrder
2466   ProjNode *ctl;
2467   ProjNode *mem;
2468   MemBarNode *parent = parent_membar(barrier);
2469   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2470     return false;
2471   ctl = parent->proj_out(TypeFunc::Control);
2472   mem = parent->proj_out(TypeFunc::Memory);
2473   if (!ctl || !mem) {
2474     return false;
2475   }
2476   // ensure the proj nodes both feed a LoadX[mo_acquire]
2477   LoadNode *ld = NULL;
2478   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2479     x = ctl->fast_out(i);
2480     // if we see a load we keep hold of it and stop searching
2481     if (x->is_Load()) {
2482       ld = x->as_Load();
2483       break;
2484     }
2485   }
2486   // it must be an acquiring load
2487   if (ld && ld->is_acquire()) {
2488 
2489     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2490       x = mem->fast_out(i);
2491       // if we see the same load we drop it and stop searching
2492       if (x == ld) {
2493         ld = NULL;
2494         break;
2495       }
2496     }
2497     // we must have dropped the load
2498     if (ld == NULL) {
2499       // check for a child cpuorder membar
2500       MemBarNode *child  = child_membar(barrier->as_MemBar());
2501       if (child && child->Opcode() == Op_MemBarCPUOrder)
2502         return true;
2503     }
2504   }
2505 
2506   // final option for unnecessary mebar is that it is a trailing node
2507   // belonging to a CAS
2508 
2509   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2510 
2511   return leading != NULL;
2512 }
2513 
2514 bool needs_acquiring_load(const Node *n)
2515 {
2516   assert(n->is_Load(), "expecting a load");
2517   if (UseBarriersForVolatile) {
2518     // we use a normal load and a dmb
2519     return false;
2520   }
2521 
2522   LoadNode *ld = n->as_Load();
2523 
2524   if (!ld->is_acquire()) {
2525     return false;
2526   }
2527 
2528   // check if this load is feeding an acquire membar
2529   //
2530   //   LoadX[mo_acquire]
2531   //   {  |1   }
2532   //   {DecodeN}
2533   //      |Parms
2534   //   MemBarAcquire*
2535   //
2536   // where * tags node we were passed
2537   // and |k means input k
2538 
2539   Node *start = ld;
2540   Node *mbacq = NULL;
2541 
2542   // if we hit a DecodeNarrowPtr we reset the start node and restart
2543   // the search through the outputs
2544  restart:
2545 
2546   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2547     Node *x = start->fast_out(i);
2548     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2549       mbacq = x;
2550     } else if (!mbacq &&
2551                (x->is_DecodeNarrowPtr() ||
2552                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2553       start = x;
2554       goto restart;
2555     }
2556   }
2557 
2558   if (mbacq) {
2559     return true;
2560   }
2561 
2562   // now check for an unsafe volatile get
2563 
2564   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2565   //
2566   //     MemBarCPUOrder
2567   //        ||       \\
2568   //   MemBarAcquire* LoadX[mo_acquire]
2569   //        ||
2570   //   MemBarCPUOrder
2571 
2572   MemBarNode *membar;
2573 
2574   membar = parent_membar(ld);
2575 
2576   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2577     return false;
2578   }
2579 
2580   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2581 
2582   membar = child_membar(membar);
2583 
2584   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2585     return false;
2586   }
2587 
2588   membar = child_membar(membar);
2589 
2590   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2591     return false;
2592   }
2593 
2594   return true;
2595 }
2596 
2597 bool unnecessary_release(const Node *n)
2598 {
2599   assert((n->is_MemBar() &&
2600           n->Opcode() == Op_MemBarRelease),
2601          "expecting a release membar");
2602 
2603   if (UseBarriersForVolatile) {
2604     // we need to plant a dmb
2605     return false;
2606   }
2607 
2608   // if there is a dependent CPUOrder barrier then use that as the
2609   // leading
2610 
2611   MemBarNode *barrier = n->as_MemBar();
2612   // check for an intervening cpuorder membar
2613   MemBarNode *b = child_membar(barrier);
2614   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2615     // ok, so start the check from the dependent cpuorder barrier
2616     barrier = b;
2617   }
2618 
2619   // must start with a normal feed
2620   MemBarNode *trailing = leading_to_trailing(barrier);
2621 
2622   return (trailing != NULL);
2623 }
2624 
2625 bool unnecessary_volatile(const Node *n)
2626 {
2627   // assert n->is_MemBar();
2628   if (UseBarriersForVolatile) {
2629     // we need to plant a dmb
2630     return false;
2631   }
2632 
2633   MemBarNode *mbvol = n->as_MemBar();
2634 
2635   // first we check if this is part of a card mark. if so then we have
2636   // to generate a StoreLoad barrier
2637 
2638   if (is_card_mark_membar(mbvol)) {
2639       return false;
2640   }
2641 
2642   // ok, if it's not a card mark then we still need to check if it is
2643   // a trailing membar of a volatile put graph.
2644 
2645   return (trailing_to_leading(mbvol) != NULL);
2646 }
2647 
2648 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2649 
2650 bool needs_releasing_store(const Node *n)
2651 {
2652   // assert n->is_Store();
2653   if (UseBarriersForVolatile) {
2654     // we use a normal store and dmb combination
2655     return false;
2656   }
2657 
2658   StoreNode *st = n->as_Store();
2659 
2660   // the store must be marked as releasing
2661   if (!st->is_release()) {
2662     return false;
2663   }
2664 
2665   // the store must be fed by a membar
2666 
2667   Node *x = st->lookup(StoreNode::Memory);
2668 
2669   if (! x || !x->is_Proj()) {
2670     return false;
2671   }
2672 
2673   ProjNode *proj = x->as_Proj();
2674 
2675   x = proj->lookup(0);
2676 
2677   if (!x || !x->is_MemBar()) {
2678     return false;
2679   }
2680 
2681   MemBarNode *barrier = x->as_MemBar();
2682 
2683   // if the barrier is a release membar or a cpuorder mmebar fed by a
2684   // release membar then we need to check whether that forms part of a
2685   // volatile put graph.
2686 
2687   // reject invalid candidates
2688   if (!leading_membar(barrier)) {
2689     return false;
2690   }
2691 
2692   // does this lead a normal subgraph?
2693   MemBarNode *trailing = leading_to_trailing(barrier);
2694 
2695   return (trailing != NULL);
2696 }
2697 
2698 // predicate controlling translation of CAS
2699 //
2700 // returns true if CAS needs to use an acquiring load otherwise false
2701 
2702 bool needs_acquiring_load_exclusive(const Node *n)
2703 {
2704   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2705   if (UseBarriersForVolatile) {
2706     return false;
2707   }
2708 
2709   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2710 #ifdef ASSERT
2711   LoadStoreNode *st = n->as_LoadStore();
2712 
2713   // the store must be fed by a membar
2714 
2715   Node *x = st->lookup(StoreNode::Memory);
2716 
2717   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2718 
2719   ProjNode *proj = x->as_Proj();
2720 
2721   x = proj->lookup(0);
2722 
2723   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2724 
2725   MemBarNode *barrier = x->as_MemBar();
2726 
2727   // the barrier must be a cpuorder mmebar fed by a release membar
2728 
2729   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2730          "CAS not fed by cpuorder membar!");
2731 
2732   MemBarNode *b = parent_membar(barrier);
2733   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2734           "CAS not fed by cpuorder+release membar pair!");
2735 
2736   // does this lead a normal subgraph?
2737   MemBarNode *mbar = leading_to_trailing(barrier);
2738 
2739   assert(mbar != NULL, "CAS not embedded in normal graph!");
2740 
2741   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2742 #endif // ASSERT
2743   // so we can just return true here
2744   return true;
2745 }
2746 
2747 // predicate controlling translation of StoreCM
2748 //
2749 // returns true if a StoreStore must precede the card write otherwise
2750 // false
2751 
2752 bool unnecessary_storestore(const Node *storecm)
2753 {
2754   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2755 
2756   // we only ever need to generate a dmb ishst between an object put
2757   // and the associated card mark when we are using CMS without
2758   // conditional card marking. Any other occurence will happen when
2759   // performing a card mark using CMS with conditional card marking or
2760   // G1. In those cases the preceding MamBarVolatile will be
2761   // translated to a dmb ish which guarantes visibility of the
2762   // preceding StoreN/P before this StoreCM
2763 
2764   if (!UseConcMarkSweepGC || UseCondCardMark) {
2765     return true;
2766   }
2767 
2768   // if we are implementing volatile puts using barriers then we must
2769   // insert the dmb ishst
2770 
2771   if (UseBarriersForVolatile) {
2772     return false;
2773   }
2774 
2775   // we must be using CMS with conditional card marking so we ahve to
2776   // generate the StoreStore
2777 
2778   return false;
2779 }
2780 
2781 
2782 #define __ _masm.
2783 
2784 // advance declarations for helper functions to convert register
2785 // indices to register objects
2786 
2787 // the ad file has to provide implementations of certain methods
2788 // expected by the generic code
2789 //
2790 // REQUIRED FUNCTIONALITY
2791 
2792 //=============================================================================
2793 
2794 // !!!!! Special hack to get all types of calls to specify the byte offset
2795 //       from the start of the call to the point where the return address
2796 //       will point.
2797 
2798 int MachCallStaticJavaNode::ret_addr_offset()
2799 {
2800   // call should be a simple bl
2801   int off = 4;
2802   return off;
2803 }
2804 
2805 int MachCallDynamicJavaNode::ret_addr_offset()
2806 {
2807   return 16; // movz, movk, movk, bl
2808 }
2809 
2810 int MachCallRuntimeNode::ret_addr_offset() {
2811   // for generated stubs the call will be
2812   //   far_call(addr)
2813   // for real runtime callouts it will be six instructions
2814   // see aarch64_enc_java_to_runtime
2815   //   adr(rscratch2, retaddr)
2816   //   lea(rscratch1, RuntimeAddress(addr)
2817   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2818   //   blrt rscratch1
2819   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2820   if (cb) {
2821     return MacroAssembler::far_branch_size();
2822   } else {
2823     return 6 * NativeInstruction::instruction_size;
2824   }
2825 }
2826 
2827 // Indicate if the safepoint node needs the polling page as an input
2828 
2829 // the shared code plants the oop data at the start of the generated
2830 // code for the safepoint node and that needs ot be at the load
2831 // instruction itself. so we cannot plant a mov of the safepoint poll
2832 // address followed by a load. setting this to true means the mov is
2833 // scheduled as a prior instruction. that's better for scheduling
2834 // anyway.
2835 
2836 bool SafePointNode::needs_polling_address_input()
2837 {
2838   return true;
2839 }
2840 
2841 //=============================================================================
2842 
2843 #ifndef PRODUCT
2844 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2845   st->print("BREAKPOINT");
2846 }
2847 #endif
2848 
2849 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2850   MacroAssembler _masm(&cbuf);
2851   __ brk(0);
2852 }
2853 
2854 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2855   return MachNode::size(ra_);
2856 }
2857 
2858 //=============================================================================
2859 
2860 #ifndef PRODUCT
2861   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2862     st->print("nop \t# %d bytes pad for loops and calls", _count);
2863   }
2864 #endif
2865 
2866   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2867     MacroAssembler _masm(&cbuf);
2868     for (int i = 0; i < _count; i++) {
2869       __ nop();
2870     }
2871   }
2872 
2873   uint MachNopNode::size(PhaseRegAlloc*) const {
2874     return _count * NativeInstruction::instruction_size;
2875   }
2876 
2877 //=============================================================================
2878 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2879 
2880 int Compile::ConstantTable::calculate_table_base_offset() const {
2881   return 0;  // absolute addressing, no offset
2882 }
2883 
2884 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2885 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2886   ShouldNotReachHere();
2887 }
2888 
2889 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2890   // Empty encoding
2891 }
2892 
2893 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2894   return 0;
2895 }
2896 
2897 #ifndef PRODUCT
2898 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2899   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2900 }
2901 #endif
2902 
2903 #ifndef PRODUCT
2904 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2905   Compile* C = ra_->C;
2906 
2907   int framesize = C->frame_slots() << LogBytesPerInt;
2908 
2909   if (C->need_stack_bang(framesize))
2910     st->print("# stack bang size=%d\n\t", framesize);
2911 
2912   if (framesize < ((1 << 9) + 2 * wordSize)) {
2913     st->print("sub  sp, sp, #%d\n\t", framesize);
2914     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2915     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2916   } else {
2917     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2918     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2919     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2920     st->print("sub  sp, sp, rscratch1");
2921   }
2922 }
2923 #endif
2924 
2925 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2926   Compile* C = ra_->C;
2927   MacroAssembler _masm(&cbuf);
2928 
2929   // n.b. frame size includes space for return pc and rfp
2930   const long framesize = C->frame_size_in_bytes();
2931   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2932 
2933   // insert a nop at the start of the prolog so we can patch in a
2934   // branch if we need to invalidate the method later
2935   __ nop();
2936 
2937   int bangsize = C->bang_size_in_bytes();
2938   if (C->need_stack_bang(bangsize) && UseStackBanging)
2939     __ generate_stack_overflow_check(bangsize);
2940 
2941   __ build_frame(framesize);
2942 
2943   if (NotifySimulator) {
2944     __ notify(Assembler::method_entry);
2945   }
2946 
2947   if (VerifyStackAtCalls) {
2948     Unimplemented();
2949   }
2950 
2951   C->set_frame_complete(cbuf.insts_size());
2952 
2953   if (C->has_mach_constant_base_node()) {
2954     // NOTE: We set the table base offset here because users might be
2955     // emitted before MachConstantBaseNode.
2956     Compile::ConstantTable& constant_table = C->constant_table();
2957     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2958   }
2959 }
2960 
2961 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2962 {
2963   return MachNode::size(ra_); // too many variables; just compute it
2964                               // the hard way
2965 }
2966 
2967 int MachPrologNode::reloc() const
2968 {
2969   return 0;
2970 }
2971 
2972 //=============================================================================
2973 
2974 #ifndef PRODUCT
2975 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2976   Compile* C = ra_->C;
2977   int framesize = C->frame_slots() << LogBytesPerInt;
2978 
2979   st->print("# pop frame %d\n\t",framesize);
2980 
2981   if (framesize == 0) {
2982     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2983   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2984     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2985     st->print("add  sp, sp, #%d\n\t", framesize);
2986   } else {
2987     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2988     st->print("add  sp, sp, rscratch1\n\t");
2989     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2990   }
2991 
2992   if (do_polling() && C->is_method_compilation()) {
2993     st->print("# touch polling page\n\t");
2994     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2995     st->print("ldr zr, [rscratch1]");
2996   }
2997 }
2998 #endif
2999 
3000 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3001   Compile* C = ra_->C;
3002   MacroAssembler _masm(&cbuf);
3003   int framesize = C->frame_slots() << LogBytesPerInt;
3004 
3005   __ remove_frame(framesize);
3006 
3007   if (NotifySimulator) {
3008     __ notify(Assembler::method_reentry);
3009   }
3010 
3011   if (do_polling() && C->is_method_compilation()) {
3012     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3013   }
3014 }
3015 
3016 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3017   // Variable size. Determine dynamically.
3018   return MachNode::size(ra_);
3019 }
3020 
3021 int MachEpilogNode::reloc() const {
3022   // Return number of relocatable values contained in this instruction.
3023   return 1; // 1 for polling page.
3024 }
3025 
3026 const Pipeline * MachEpilogNode::pipeline() const {
3027   return MachNode::pipeline_class();
3028 }
3029 
3030 // This method seems to be obsolete. It is declared in machnode.hpp
3031 // and defined in all *.ad files, but it is never called. Should we
3032 // get rid of it?
3033 int MachEpilogNode::safepoint_offset() const {
3034   assert(do_polling(), "no return for this epilog node");
3035   return 4;
3036 }
3037 
3038 //=============================================================================
3039 
3040 // Figure out which register class each belongs in: rc_int, rc_float or
3041 // rc_stack.
3042 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3043 
3044 static enum RC rc_class(OptoReg::Name reg) {
3045 
3046   if (reg == OptoReg::Bad) {
3047     return rc_bad;
3048   }
3049 
3050   // we have 30 int registers * 2 halves
3051   // (rscratch1 and rscratch2 are omitted)
3052 
3053   if (reg < 60) {
3054     return rc_int;
3055   }
3056 
3057   // we have 32 float register * 2 halves
3058   if (reg < 60 + 128) {
3059     return rc_float;
3060   }
3061 
3062   // Between float regs & stack is the flags regs.
3063   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3064 
3065   return rc_stack;
3066 }
3067 
3068 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3069   Compile* C = ra_->C;
3070 
3071   // Get registers to move.
3072   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3073   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3074   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3075   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3076 
3077   enum RC src_hi_rc = rc_class(src_hi);
3078   enum RC src_lo_rc = rc_class(src_lo);
3079   enum RC dst_hi_rc = rc_class(dst_hi);
3080   enum RC dst_lo_rc = rc_class(dst_lo);
3081 
3082   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3083 
3084   if (src_hi != OptoReg::Bad) {
3085     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3086            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3087            "expected aligned-adjacent pairs");
3088   }
3089 
3090   if (src_lo == dst_lo && src_hi == dst_hi) {
3091     return 0;            // Self copy, no move.
3092   }
3093 
3094   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3095               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3096   int src_offset = ra_->reg2offset(src_lo);
3097   int dst_offset = ra_->reg2offset(dst_lo);
3098 
3099   if (bottom_type()->isa_vect() != NULL) {
3100     uint ireg = ideal_reg();
3101     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3102     if (cbuf) {
3103       MacroAssembler _masm(cbuf);
3104       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3105       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3106         // stack->stack
3107         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3108         if (ireg == Op_VecD) {
3109           __ unspill(rscratch1, true, src_offset);
3110           __ spill(rscratch1, true, dst_offset);
3111         } else {
3112           __ spill_copy128(src_offset, dst_offset);
3113         }
3114       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3115         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3116                ireg == Op_VecD ? __ T8B : __ T16B,
3117                as_FloatRegister(Matcher::_regEncode[src_lo]));
3118       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3119         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3120                        ireg == Op_VecD ? __ D : __ Q,
3121                        ra_->reg2offset(dst_lo));
3122       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3123         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3124                        ireg == Op_VecD ? __ D : __ Q,
3125                        ra_->reg2offset(src_lo));
3126       } else {
3127         ShouldNotReachHere();
3128       }
3129     }
3130   } else if (cbuf) {
3131     MacroAssembler _masm(cbuf);
3132     switch (src_lo_rc) {
3133     case rc_int:
3134       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3135         if (is64) {
3136             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3137                    as_Register(Matcher::_regEncode[src_lo]));
3138         } else {
3139             MacroAssembler _masm(cbuf);
3140             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3141                     as_Register(Matcher::_regEncode[src_lo]));
3142         }
3143       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3144         if (is64) {
3145             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3146                      as_Register(Matcher::_regEncode[src_lo]));
3147         } else {
3148             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3149                      as_Register(Matcher::_regEncode[src_lo]));
3150         }
3151       } else {                    // gpr --> stack spill
3152         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3153         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3154       }
3155       break;
3156     case rc_float:
3157       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3158         if (is64) {
3159             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3160                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3161         } else {
3162             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3163                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3164         }
3165       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3166           if (cbuf) {
3167             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3168                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3169         } else {
3170             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3171                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3172         }
3173       } else {                    // fpr --> stack spill
3174         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3175         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3176                  is64 ? __ D : __ S, dst_offset);
3177       }
3178       break;
3179     case rc_stack:
3180       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3181         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3182       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3183         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3184                    is64 ? __ D : __ S, src_offset);
3185       } else {                    // stack --> stack copy
3186         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3187         __ unspill(rscratch1, is64, src_offset);
3188         __ spill(rscratch1, is64, dst_offset);
3189       }
3190       break;
3191     default:
3192       assert(false, "bad rc_class for spill");
3193       ShouldNotReachHere();
3194     }
3195   }
3196 
3197   if (st) {
3198     st->print("spill ");
3199     if (src_lo_rc == rc_stack) {
3200       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3201     } else {
3202       st->print("%s -> ", Matcher::regName[src_lo]);
3203     }
3204     if (dst_lo_rc == rc_stack) {
3205       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3206     } else {
3207       st->print("%s", Matcher::regName[dst_lo]);
3208     }
3209     if (bottom_type()->isa_vect() != NULL) {
3210       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3211     } else {
3212       st->print("\t# spill size = %d", is64 ? 64:32);
3213     }
3214   }
3215 
3216   return 0;
3217 
3218 }
3219 
3220 #ifndef PRODUCT
3221 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3222   if (!ra_)
3223     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3224   else
3225     implementation(NULL, ra_, false, st);
3226 }
3227 #endif
3228 
3229 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3230   implementation(&cbuf, ra_, false, NULL);
3231 }
3232 
3233 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3234   return MachNode::size(ra_);
3235 }
3236 
3237 //=============================================================================
3238 
3239 #ifndef PRODUCT
3240 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3241   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3242   int reg = ra_->get_reg_first(this);
3243   st->print("add %s, rsp, #%d]\t# box lock",
3244             Matcher::regName[reg], offset);
3245 }
3246 #endif
3247 
3248 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3249   MacroAssembler _masm(&cbuf);
3250 
3251   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3252   int reg    = ra_->get_encode(this);
3253 
3254   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3255     __ add(as_Register(reg), sp, offset);
3256   } else {
3257     ShouldNotReachHere();
3258   }
3259 }
3260 
3261 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3262   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3263   return 4;
3264 }
3265 
3266 //=============================================================================
3267 
3268 #ifndef PRODUCT
3269 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3270 {
3271   st->print_cr("# MachUEPNode");
3272   if (UseCompressedClassPointers) {
3273     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3274     if (Universe::narrow_klass_shift() != 0) {
3275       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3276     }
3277   } else {
3278    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3279   }
3280   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3281   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3282 }
3283 #endif
3284 
3285 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3286 {
3287   // This is the unverified entry point.
3288   MacroAssembler _masm(&cbuf);
3289 
3290   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3291   Label skip;
3292   // TODO
3293   // can we avoid this skip and still use a reloc?
3294   __ br(Assembler::EQ, skip);
3295   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3296   __ bind(skip);
3297 }
3298 
3299 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3300 {
3301   return MachNode::size(ra_);
3302 }
3303 
3304 // REQUIRED EMIT CODE
3305 
3306 //=============================================================================
3307 
3308 // Emit exception handler code.
3309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3310 {
3311   // mov rscratch1 #exception_blob_entry_point
3312   // br rscratch1
3313   // Note that the code buffer's insts_mark is always relative to insts.
3314   // That's why we must use the macroassembler to generate a handler.
3315   MacroAssembler _masm(&cbuf);
3316   address base = __ start_a_stub(size_exception_handler());
3317   if (base == NULL) {
3318     ciEnv::current()->record_failure("CodeCache is full");
3319     return 0;  // CodeBuffer::expand failed
3320   }
3321   int offset = __ offset();
3322   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3323   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3324   __ end_a_stub();
3325   return offset;
3326 }
3327 
3328 // Emit deopt handler code.
3329 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3330 {
3331   // Note that the code buffer's insts_mark is always relative to insts.
3332   // That's why we must use the macroassembler to generate a handler.
3333   MacroAssembler _masm(&cbuf);
3334   address base = __ start_a_stub(size_deopt_handler());
3335   if (base == NULL) {
3336     ciEnv::current()->record_failure("CodeCache is full");
3337     return 0;  // CodeBuffer::expand failed
3338   }
3339   int offset = __ offset();
3340 
3341   __ adr(lr, __ pc());
3342   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3343 
3344   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3345   __ end_a_stub();
3346   return offset;
3347 }
3348 
3349 // REQUIRED MATCHER CODE
3350 
3351 //=============================================================================
3352 
3353 const bool Matcher::match_rule_supported(int opcode) {
3354 
3355   switch (opcode) {
3356   default:
3357     break;
3358   }
3359 
3360   if (!has_match_rule(opcode)) {
3361     return false;
3362   }
3363 
3364   return true;  // Per default match rules are supported.
3365 }
3366 
3367 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3368 
3369   // TODO
3370   // identify extra cases that we might want to provide match rules for
3371   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3372   bool ret_value = match_rule_supported(opcode);
3373   // Add rules here.
3374 
3375   return ret_value;  // Per default match rules are supported.
3376 }
3377 
3378 const bool Matcher::has_predicated_vectors(void) {
3379   return false;
3380 }
3381 
3382 const int Matcher::float_pressure(int default_pressure_threshold) {
3383   return default_pressure_threshold;
3384 }
3385 
3386 int Matcher::regnum_to_fpu_offset(int regnum)
3387 {
3388   Unimplemented();
3389   return 0;
3390 }
3391 
3392 // Is this branch offset short enough that a short branch can be used?
3393 //
3394 // NOTE: If the platform does not provide any short branch variants, then
3395 //       this method should return false for offset 0.
3396 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3397   // The passed offset is relative to address of the branch.
3398 
3399   return (-32768 <= offset && offset < 32768);
3400 }
3401 
3402 const bool Matcher::isSimpleConstant64(jlong value) {
3403   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3404   // Probably always true, even if a temp register is required.
3405   return true;
3406 }
3407 
3408 // true just means we have fast l2f conversion
3409 const bool Matcher::convL2FSupported(void) {
3410   return true;
3411 }
3412 
3413 // Vector width in bytes.
3414 const int Matcher::vector_width_in_bytes(BasicType bt) {
3415   int size = MIN2(16,(int)MaxVectorSize);
3416   // Minimum 2 values in vector
3417   if (size < 2*type2aelembytes(bt)) size = 0;
3418   // But never < 4
3419   if (size < 4) size = 0;
3420   return size;
3421 }
3422 
3423 // Limits on vector size (number of elements) loaded into vector.
3424 const int Matcher::max_vector_size(const BasicType bt) {
3425   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3426 }
3427 const int Matcher::min_vector_size(const BasicType bt) {
3428 //  For the moment limit the vector size to 8 bytes
3429     int size = 8 / type2aelembytes(bt);
3430     if (size < 2) size = 2;
3431     return size;
3432 }
3433 
3434 // Vector ideal reg.
3435 const int Matcher::vector_ideal_reg(int len) {
3436   switch(len) {
3437     case  8: return Op_VecD;
3438     case 16: return Op_VecX;
3439   }
3440   ShouldNotReachHere();
3441   return 0;
3442 }
3443 
3444 const int Matcher::vector_shift_count_ideal_reg(int size) {
3445   return Op_VecX;
3446 }
3447 
3448 // AES support not yet implemented
3449 const bool Matcher::pass_original_key_for_aes() {
3450   return false;
3451 }
3452 
3453 // x86 supports misaligned vectors store/load.
3454 const bool Matcher::misaligned_vectors_ok() {
3455   return !AlignVector; // can be changed by flag
3456 }
3457 
3458 // false => size gets scaled to BytesPerLong, ok.
3459 const bool Matcher::init_array_count_is_in_bytes = false;
3460 
3461 // Use conditional move (CMOVL)
3462 const int Matcher::long_cmove_cost() {
3463   // long cmoves are no more expensive than int cmoves
3464   return 0;
3465 }
3466 
3467 const int Matcher::float_cmove_cost() {
3468   // float cmoves are no more expensive than int cmoves
3469   return 0;
3470 }
3471 
3472 // Does the CPU require late expand (see block.cpp for description of late expand)?
3473 const bool Matcher::require_postalloc_expand = false;
3474 
3475 // Do we need to mask the count passed to shift instructions or does
3476 // the cpu only look at the lower 5/6 bits anyway?
3477 const bool Matcher::need_masked_shift_count = false;
3478 
3479 // This affects two different things:
3480 //  - how Decode nodes are matched
3481 //  - how ImplicitNullCheck opportunities are recognized
3482 // If true, the matcher will try to remove all Decodes and match them
3483 // (as operands) into nodes. NullChecks are not prepared to deal with
3484 // Decodes by final_graph_reshaping().
3485 // If false, final_graph_reshaping() forces the decode behind the Cmp
3486 // for a NullCheck. The matcher matches the Decode node into a register.
3487 // Implicit_null_check optimization moves the Decode along with the
3488 // memory operation back up before the NullCheck.
3489 bool Matcher::narrow_oop_use_complex_address() {
3490   return Universe::narrow_oop_shift() == 0;
3491 }
3492 
3493 bool Matcher::narrow_klass_use_complex_address() {
3494 // TODO
3495 // decide whether we need to set this to true
3496   return false;
3497 }
3498 
3499 bool Matcher::const_oop_prefer_decode() {
3500   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3501   return Universe::narrow_oop_base() == NULL;
3502 }
3503 
3504 bool Matcher::const_klass_prefer_decode() {
3505   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3506   return Universe::narrow_klass_base() == NULL;
3507 }
3508 
3509 // Is it better to copy float constants, or load them directly from
3510 // memory?  Intel can load a float constant from a direct address,
3511 // requiring no extra registers.  Most RISCs will have to materialize
3512 // an address into a register first, so they would do better to copy
3513 // the constant from stack.
3514 const bool Matcher::rematerialize_float_constants = false;
3515 
3516 // If CPU can load and store mis-aligned doubles directly then no
3517 // fixup is needed.  Else we split the double into 2 integer pieces
3518 // and move it piece-by-piece.  Only happens when passing doubles into
3519 // C code as the Java calling convention forces doubles to be aligned.
3520 const bool Matcher::misaligned_doubles_ok = true;
3521 
3522 // No-op on amd64
3523 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3524   Unimplemented();
3525 }
3526 
3527 // Advertise here if the CPU requires explicit rounding operations to
3528 // implement the UseStrictFP mode.
3529 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3530 
3531 // Are floats converted to double when stored to stack during
3532 // deoptimization?
3533 bool Matcher::float_in_double() { return true; }
3534 
3535 // Do ints take an entire long register or just half?
3536 // The relevant question is how the int is callee-saved:
3537 // the whole long is written but de-opt'ing will have to extract
3538 // the relevant 32 bits.
3539 const bool Matcher::int_in_long = true;
3540 
3541 // Return whether or not this register is ever used as an argument.
3542 // This function is used on startup to build the trampoline stubs in
3543 // generateOptoStub.  Registers not mentioned will be killed by the VM
3544 // call in the trampoline, and arguments in those registers not be
3545 // available to the callee.
3546 bool Matcher::can_be_java_arg(int reg)
3547 {
3548   return
3549     reg ==  R0_num || reg == R0_H_num ||
3550     reg ==  R1_num || reg == R1_H_num ||
3551     reg ==  R2_num || reg == R2_H_num ||
3552     reg ==  R3_num || reg == R3_H_num ||
3553     reg ==  R4_num || reg == R4_H_num ||
3554     reg ==  R5_num || reg == R5_H_num ||
3555     reg ==  R6_num || reg == R6_H_num ||
3556     reg ==  R7_num || reg == R7_H_num ||
3557     reg ==  V0_num || reg == V0_H_num ||
3558     reg ==  V1_num || reg == V1_H_num ||
3559     reg ==  V2_num || reg == V2_H_num ||
3560     reg ==  V3_num || reg == V3_H_num ||
3561     reg ==  V4_num || reg == V4_H_num ||
3562     reg ==  V5_num || reg == V5_H_num ||
3563     reg ==  V6_num || reg == V6_H_num ||
3564     reg ==  V7_num || reg == V7_H_num;
3565 }
3566 
3567 bool Matcher::is_spillable_arg(int reg)
3568 {
3569   return can_be_java_arg(reg);
3570 }
3571 
3572 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3573   return false;
3574 }
3575 
3576 RegMask Matcher::divI_proj_mask() {
3577   ShouldNotReachHere();
3578   return RegMask();
3579 }
3580 
3581 // Register for MODI projection of divmodI.
3582 RegMask Matcher::modI_proj_mask() {
3583   ShouldNotReachHere();
3584   return RegMask();
3585 }
3586 
3587 // Register for DIVL projection of divmodL.
3588 RegMask Matcher::divL_proj_mask() {
3589   ShouldNotReachHere();
3590   return RegMask();
3591 }
3592 
3593 // Register for MODL projection of divmodL.
3594 RegMask Matcher::modL_proj_mask() {
3595   ShouldNotReachHere();
3596   return RegMask();
3597 }
3598 
3599 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3600   return FP_REG_mask();
3601 }
3602 
3603 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3604   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3605     Node* u = addp->fast_out(i);
3606     if (u->is_Mem()) {
3607       int opsize = u->as_Mem()->memory_size();
3608       assert(opsize > 0, "unexpected memory operand size");
3609       if (u->as_Mem()->memory_size() != (1<<shift)) {
3610         return false;
3611       }
3612     }
3613   }
3614   return true;
3615 }
3616 
3617 const bool Matcher::convi2l_type_required = false;
3618 
3619 // Should the Matcher clone shifts on addressing modes, expecting them
3620 // to be subsumed into complex addressing expressions or compute them
3621 // into registers?
3622 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3623   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3624     return true;
3625   }
3626 
3627   Node *off = m->in(AddPNode::Offset);
3628   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3629       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3630       // Are there other uses besides address expressions?
3631       !is_visited(off)) {
3632     address_visited.set(off->_idx); // Flag as address_visited
3633     mstack.push(off->in(2), Visit);
3634     Node *conv = off->in(1);
3635     if (conv->Opcode() == Op_ConvI2L &&
3636         // Are there other uses besides address expressions?
3637         !is_visited(conv)) {
3638       address_visited.set(conv->_idx); // Flag as address_visited
3639       mstack.push(conv->in(1), Pre_Visit);
3640     } else {
3641       mstack.push(conv, Pre_Visit);
3642     }
3643     address_visited.test_set(m->_idx); // Flag as address_visited
3644     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3645     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3646     return true;
3647   } else if (off->Opcode() == Op_ConvI2L &&
3648              // Are there other uses besides address expressions?
3649              !is_visited(off)) {
3650     address_visited.test_set(m->_idx); // Flag as address_visited
3651     address_visited.set(off->_idx); // Flag as address_visited
3652     mstack.push(off->in(1), Pre_Visit);
3653     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3654     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3655     return true;
3656   }
3657   return false;
3658 }
3659 
3660 // Transform:
3661 // (AddP base (AddP base address (LShiftL index con)) offset)
3662 // into:
3663 // (AddP base (AddP base offset) (LShiftL index con))
3664 // to take full advantage of ARM's addressing modes
3665 void Compile::reshape_address(AddPNode* addp) {
3666   Node *addr = addp->in(AddPNode::Address);
3667   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3668     const AddPNode *addp2 = addr->as_AddP();
3669     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3670          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3671          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3672         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3673 
3674       // Any use that can't embed the address computation?
3675       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3676         Node* u = addp->fast_out(i);
3677         if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3678           return;
3679         }
3680       }
3681       
3682       Node* off = addp->in(AddPNode::Offset);
3683       Node* addr2 = addp2->in(AddPNode::Address);
3684       Node* base = addp->in(AddPNode::Base);
3685       
3686       Node* new_addr = NULL;
3687       // Check whether the graph already has the new AddP we need
3688       // before we create one (no GVN available here).
3689       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3690         Node* u = addr2->fast_out(i);
3691         if (u->is_AddP() &&
3692             u->in(AddPNode::Base) == base &&
3693             u->in(AddPNode::Address) == addr2 &&
3694             u->in(AddPNode::Offset) == off) {
3695           new_addr = u;
3696           break;
3697         }
3698       }
3699       
3700       if (new_addr == NULL) {
3701         new_addr = new AddPNode(base, addr2, off);
3702       }
3703       Node* new_off = addp2->in(AddPNode::Offset);
3704       addp->set_req(AddPNode::Address, new_addr);
3705       if (addr->outcnt() == 0) {
3706         addr->disconnect_inputs(NULL, this);
3707       }
3708       addp->set_req(AddPNode::Offset, new_off);
3709       if (off->outcnt() == 0) {
3710         off->disconnect_inputs(NULL, this);
3711       }
3712     }
3713   }
3714 }
3715 
3716 // helper for encoding java_to_runtime calls on sim
3717 //
3718 // this is needed to compute the extra arguments required when
3719 // planting a call to the simulator blrt instruction. the TypeFunc
3720 // can be queried to identify the counts for integral, and floating
3721 // arguments and the return type
3722 
3723 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3724 {
3725   int gps = 0;
3726   int fps = 0;
3727   const TypeTuple *domain = tf->domain();
3728   int max = domain->cnt();
3729   for (int i = TypeFunc::Parms; i < max; i++) {
3730     const Type *t = domain->field_at(i);
3731     switch(t->basic_type()) {
3732     case T_FLOAT:
3733     case T_DOUBLE:
3734       fps++;
3735     default:
3736       gps++;
3737     }
3738   }
3739   gpcnt = gps;
3740   fpcnt = fps;
3741   BasicType rt = tf->return_type();
3742   switch (rt) {
3743   case T_VOID:
3744     rtype = MacroAssembler::ret_type_void;
3745     break;
3746   default:
3747     rtype = MacroAssembler::ret_type_integral;
3748     break;
3749   case T_FLOAT:
3750     rtype = MacroAssembler::ret_type_float;
3751     break;
3752   case T_DOUBLE:
3753     rtype = MacroAssembler::ret_type_double;
3754     break;
3755   }
3756 }
3757 
3758 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3759   MacroAssembler _masm(&cbuf);                                          \
3760   {                                                                     \
3761     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3762     guarantee(DISP == 0, "mode not permitted for volatile");            \
3763     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3764     __ INSN(REG, as_Register(BASE));                                    \
3765   }
3766 
3767 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3768 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3769 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3770                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3771 
3772   // Used for all non-volatile memory accesses.  The use of
3773   // $mem->opcode() to discover whether this pattern uses sign-extended
3774   // offsets is something of a kludge.
3775   static void loadStore(MacroAssembler masm, mem_insn insn,
3776                          Register reg, int opcode,
3777                          Register base, int index, int size, int disp)
3778   {
3779     Address::extend scale;
3780 
3781     // Hooboy, this is fugly.  We need a way to communicate to the
3782     // encoder that the index needs to be sign extended, so we have to
3783     // enumerate all the cases.
3784     switch (opcode) {
3785     case INDINDEXSCALEDI2L:
3786     case INDINDEXSCALEDI2LN:
3787     case INDINDEXI2L:
3788     case INDINDEXI2LN:
3789       scale = Address::sxtw(size);
3790       break;
3791     default:
3792       scale = Address::lsl(size);
3793     }
3794 
3795     if (index == -1) {
3796       (masm.*insn)(reg, Address(base, disp));
3797     } else {
3798       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3799       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3800     }
3801   }
3802 
3803   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3804                          FloatRegister reg, int opcode,
3805                          Register base, int index, int size, int disp)
3806   {
3807     Address::extend scale;
3808 
3809     switch (opcode) {
3810     case INDINDEXSCALEDI2L:
3811     case INDINDEXSCALEDI2LN:
3812       scale = Address::sxtw(size);
3813       break;
3814     default:
3815       scale = Address::lsl(size);
3816     }
3817 
3818      if (index == -1) {
3819       (masm.*insn)(reg, Address(base, disp));
3820     } else {
3821       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3822       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3823     }
3824   }
3825 
3826   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3827                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3828                          int opcode, Register base, int index, int size, int disp)
3829   {
3830     if (index == -1) {
3831       (masm.*insn)(reg, T, Address(base, disp));
3832     } else {
3833       assert(disp == 0, "unsupported address mode");
3834       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3835     }
3836   }
3837 
3838 %}
3839 
3840 
3841 
3842 //----------ENCODING BLOCK-----------------------------------------------------
3843 // This block specifies the encoding classes used by the compiler to
3844 // output byte streams.  Encoding classes are parameterized macros
3845 // used by Machine Instruction Nodes in order to generate the bit
3846 // encoding of the instruction.  Operands specify their base encoding
3847 // interface with the interface keyword.  There are currently
3848 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3849 // COND_INTER.  REG_INTER causes an operand to generate a function
3850 // which returns its register number when queried.  CONST_INTER causes
3851 // an operand to generate a function which returns the value of the
3852 // constant when queried.  MEMORY_INTER causes an operand to generate
3853 // four functions which return the Base Register, the Index Register,
3854 // the Scale Value, and the Offset Value of the operand when queried.
3855 // COND_INTER causes an operand to generate six functions which return
3856 // the encoding code (ie - encoding bits for the instruction)
3857 // associated with each basic boolean condition for a conditional
3858 // instruction.
3859 //
3860 // Instructions specify two basic values for encoding.  Again, a
3861 // function is available to check if the constant displacement is an
3862 // oop. They use the ins_encode keyword to specify their encoding
3863 // classes (which must be a sequence of enc_class names, and their
3864 // parameters, specified in the encoding block), and they use the
3865 // opcode keyword to specify, in order, their primary, secondary, and
3866 // tertiary opcode.  Only the opcode sections which a particular
3867 // instruction needs for encoding need to be specified.
3868 encode %{
3869   // Build emit functions for each basic byte or larger field in the
3870   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3871   // from C++ code in the enc_class source block.  Emit functions will
3872   // live in the main source block for now.  In future, we can
3873   // generalize this by adding a syntax that specifies the sizes of
3874   // fields in an order, so that the adlc can build the emit functions
3875   // automagically
3876 
3877   // catch all for unimplemented encodings
3878   enc_class enc_unimplemented %{
3879     MacroAssembler _masm(&cbuf);
3880     __ unimplemented("C2 catch all");
3881   %}
3882 
3883   // BEGIN Non-volatile memory access
3884 
3885   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3886     Register dst_reg = as_Register($dst$$reg);
3887     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3888                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3889   %}
3890 
3891   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3892     Register dst_reg = as_Register($dst$$reg);
3893     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3894                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3895   %}
3896 
3897   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3898     Register dst_reg = as_Register($dst$$reg);
3899     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3900                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3901   %}
3902 
3903   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3904     Register dst_reg = as_Register($dst$$reg);
3905     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3906                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3907   %}
3908 
3909   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3910     Register dst_reg = as_Register($dst$$reg);
3911     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3912                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3913   %}
3914 
3915   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3916     Register dst_reg = as_Register($dst$$reg);
3917     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3918                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3919   %}
3920 
3921   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3922     Register dst_reg = as_Register($dst$$reg);
3923     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3924                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3925   %}
3926 
3927   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3928     Register dst_reg = as_Register($dst$$reg);
3929     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3930                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3931   %}
3932 
3933   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3934     Register dst_reg = as_Register($dst$$reg);
3935     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3936                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3937   %}
3938 
3939   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3940     Register dst_reg = as_Register($dst$$reg);
3941     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3942                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3943   %}
3944 
3945   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3946     Register dst_reg = as_Register($dst$$reg);
3947     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3948                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3949   %}
3950 
3951   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3952     Register dst_reg = as_Register($dst$$reg);
3953     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3954                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3955   %}
3956 
3957   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3958     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3959     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3960                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3961   %}
3962 
3963   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3964     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3965     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3966                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3967   %}
3968 
3969   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3970     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3971     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3972        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3973   %}
3974 
3975   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3976     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3977     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3978        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3979   %}
3980 
3981   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3982     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3983     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3984        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3985   %}
3986 
3987   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3988     Register src_reg = as_Register($src$$reg);
3989     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3990                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3991   %}
3992 
3993   enc_class aarch64_enc_strb0(memory mem) %{
3994     MacroAssembler _masm(&cbuf);
3995     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3996                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3997   %}
3998 
3999   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4000     MacroAssembler _masm(&cbuf);
4001     __ membar(Assembler::StoreStore);
4002     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4003                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4004   %}
4005 
4006   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4007     Register src_reg = as_Register($src$$reg);
4008     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4009                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4010   %}
4011 
4012   enc_class aarch64_enc_strh0(memory mem) %{
4013     MacroAssembler _masm(&cbuf);
4014     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4015                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4016   %}
4017 
4018   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4019     Register src_reg = as_Register($src$$reg);
4020     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4021                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4022   %}
4023 
4024   enc_class aarch64_enc_strw0(memory mem) %{
4025     MacroAssembler _masm(&cbuf);
4026     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4027                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4028   %}
4029 
4030   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4031     Register src_reg = as_Register($src$$reg);
4032     // we sometimes get asked to store the stack pointer into the
4033     // current thread -- we cannot do that directly on AArch64
4034     if (src_reg == r31_sp) {
4035       MacroAssembler _masm(&cbuf);
4036       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4037       __ mov(rscratch2, sp);
4038       src_reg = rscratch2;
4039     }
4040     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4041                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042   %}
4043 
4044   enc_class aarch64_enc_str0(memory mem) %{
4045     MacroAssembler _masm(&cbuf);
4046     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4047                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048   %}
4049 
4050   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4051     FloatRegister src_reg = as_FloatRegister($src$$reg);
4052     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4053                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054   %}
4055 
4056   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4057     FloatRegister src_reg = as_FloatRegister($src$$reg);
4058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4059                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060   %}
4061 
4062   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4063     FloatRegister src_reg = as_FloatRegister($src$$reg);
4064     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4065        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066   %}
4067 
4068   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4069     FloatRegister src_reg = as_FloatRegister($src$$reg);
4070     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4071        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072   %}
4073 
4074   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4075     FloatRegister src_reg = as_FloatRegister($src$$reg);
4076     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4077        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078   %}
4079 
4080   // END Non-volatile memory access
4081 
4082   // volatile loads and stores
4083 
4084   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4085     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4086                  rscratch1, stlrb);
4087   %}
4088 
4089   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4090     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4091                  rscratch1, stlrh);
4092   %}
4093 
4094   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4095     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4096                  rscratch1, stlrw);
4097   %}
4098 
4099 
4100   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4101     Register dst_reg = as_Register($dst$$reg);
4102     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4103              rscratch1, ldarb);
4104     __ sxtbw(dst_reg, dst_reg);
4105   %}
4106 
4107   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4108     Register dst_reg = as_Register($dst$$reg);
4109     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4110              rscratch1, ldarb);
4111     __ sxtb(dst_reg, dst_reg);
4112   %}
4113 
4114   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4115     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4116              rscratch1, ldarb);
4117   %}
4118 
4119   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4120     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4121              rscratch1, ldarb);
4122   %}
4123 
4124   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4125     Register dst_reg = as_Register($dst$$reg);
4126     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4127              rscratch1, ldarh);
4128     __ sxthw(dst_reg, dst_reg);
4129   %}
4130 
4131   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4132     Register dst_reg = as_Register($dst$$reg);
4133     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4134              rscratch1, ldarh);
4135     __ sxth(dst_reg, dst_reg);
4136   %}
4137 
4138   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4139     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4140              rscratch1, ldarh);
4141   %}
4142 
4143   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4144     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4145              rscratch1, ldarh);
4146   %}
4147 
4148   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4149     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4150              rscratch1, ldarw);
4151   %}
4152 
4153   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4154     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4155              rscratch1, ldarw);
4156   %}
4157 
4158   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4159     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4160              rscratch1, ldar);
4161   %}
4162 
4163   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4164     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4165              rscratch1, ldarw);
4166     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4167   %}
4168 
4169   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4170     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4171              rscratch1, ldar);
4172     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4173   %}
4174 
4175   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4176     Register src_reg = as_Register($src$$reg);
4177     // we sometimes get asked to store the stack pointer into the
4178     // current thread -- we cannot do that directly on AArch64
4179     if (src_reg == r31_sp) {
4180         MacroAssembler _masm(&cbuf);
4181       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4182       __ mov(rscratch2, sp);
4183       src_reg = rscratch2;
4184     }
4185     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4186                  rscratch1, stlr);
4187   %}
4188 
4189   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4190     {
4191       MacroAssembler _masm(&cbuf);
4192       FloatRegister src_reg = as_FloatRegister($src$$reg);
4193       __ fmovs(rscratch2, src_reg);
4194     }
4195     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4196                  rscratch1, stlrw);
4197   %}
4198 
4199   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4200     {
4201       MacroAssembler _masm(&cbuf);
4202       FloatRegister src_reg = as_FloatRegister($src$$reg);
4203       __ fmovd(rscratch2, src_reg);
4204     }
4205     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4206                  rscratch1, stlr);
4207   %}
4208 
4209   // synchronized read/update encodings
4210 
4211   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4212     MacroAssembler _masm(&cbuf);
4213     Register dst_reg = as_Register($dst$$reg);
4214     Register base = as_Register($mem$$base);
4215     int index = $mem$$index;
4216     int scale = $mem$$scale;
4217     int disp = $mem$$disp;
4218     if (index == -1) {
4219        if (disp != 0) {
4220         __ lea(rscratch1, Address(base, disp));
4221         __ ldaxr(dst_reg, rscratch1);
4222       } else {
4223         // TODO
4224         // should we ever get anything other than this case?
4225         __ ldaxr(dst_reg, base);
4226       }
4227     } else {
4228       Register index_reg = as_Register(index);
4229       if (disp == 0) {
4230         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4231         __ ldaxr(dst_reg, rscratch1);
4232       } else {
4233         __ lea(rscratch1, Address(base, disp));
4234         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4235         __ ldaxr(dst_reg, rscratch1);
4236       }
4237     }
4238   %}
4239 
4240   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4241     MacroAssembler _masm(&cbuf);
4242     Register src_reg = as_Register($src$$reg);
4243     Register base = as_Register($mem$$base);
4244     int index = $mem$$index;
4245     int scale = $mem$$scale;
4246     int disp = $mem$$disp;
4247     if (index == -1) {
4248        if (disp != 0) {
4249         __ lea(rscratch2, Address(base, disp));
4250         __ stlxr(rscratch1, src_reg, rscratch2);
4251       } else {
4252         // TODO
4253         // should we ever get anything other than this case?
4254         __ stlxr(rscratch1, src_reg, base);
4255       }
4256     } else {
4257       Register index_reg = as_Register(index);
4258       if (disp == 0) {
4259         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4260         __ stlxr(rscratch1, src_reg, rscratch2);
4261       } else {
4262         __ lea(rscratch2, Address(base, disp));
4263         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4264         __ stlxr(rscratch1, src_reg, rscratch2);
4265       }
4266     }
4267     __ cmpw(rscratch1, zr);
4268   %}
4269 
4270   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4271     MacroAssembler _masm(&cbuf);
4272     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4273     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4274                Assembler::xword, /*acquire*/ false, /*release*/ true,
4275                /*weak*/ false, noreg);
4276   %}
4277 
4278   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4279     MacroAssembler _masm(&cbuf);
4280     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4281     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4282                Assembler::word, /*acquire*/ false, /*release*/ true,
4283                /*weak*/ false, noreg);
4284   %}
4285 
4286 
4287   // The only difference between aarch64_enc_cmpxchg and
4288   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4289   // CompareAndSwap sequence to serve as a barrier on acquiring a
4290   // lock.
4291   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4292     MacroAssembler _masm(&cbuf);
4293     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4294     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4295                Assembler::xword, /*acquire*/ true, /*release*/ true,
4296                /*weak*/ false, noreg);
4297   %}
4298 
4299   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4300     MacroAssembler _masm(&cbuf);
4301     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4302     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4303                Assembler::word, /*acquire*/ true, /*release*/ true,
4304                /*weak*/ false, noreg);
4305   %}
4306 
4307 
4308   // auxiliary used for CompareAndSwapX to set result register
4309   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4310     MacroAssembler _masm(&cbuf);
4311     Register res_reg = as_Register($res$$reg);
4312     __ cset(res_reg, Assembler::EQ);
4313   %}
4314 
4315   // prefetch encodings
4316 
4317   enc_class aarch64_enc_prefetchw(memory mem) %{
4318     MacroAssembler _masm(&cbuf);
4319     Register base = as_Register($mem$$base);
4320     int index = $mem$$index;
4321     int scale = $mem$$scale;
4322     int disp = $mem$$disp;
4323     if (index == -1) {
4324       __ prfm(Address(base, disp), PSTL1KEEP);
4325     } else {
4326       Register index_reg = as_Register(index);
4327       if (disp == 0) {
4328         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4329       } else {
4330         __ lea(rscratch1, Address(base, disp));
4331         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4332       }
4333     }
4334   %}
4335 
4336   /// mov envcodings
4337 
4338   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4339     MacroAssembler _masm(&cbuf);
4340     u_int32_t con = (u_int32_t)$src$$constant;
4341     Register dst_reg = as_Register($dst$$reg);
4342     if (con == 0) {
4343       __ movw(dst_reg, zr);
4344     } else {
4345       __ movw(dst_reg, con);
4346     }
4347   %}
4348 
4349   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4350     MacroAssembler _masm(&cbuf);
4351     Register dst_reg = as_Register($dst$$reg);
4352     u_int64_t con = (u_int64_t)$src$$constant;
4353     if (con == 0) {
4354       __ mov(dst_reg, zr);
4355     } else {
4356       __ mov(dst_reg, con);
4357     }
4358   %}
4359 
4360   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4361     MacroAssembler _masm(&cbuf);
4362     Register dst_reg = as_Register($dst$$reg);
4363     address con = (address)$src$$constant;
4364     if (con == NULL || con == (address)1) {
4365       ShouldNotReachHere();
4366     } else {
4367       relocInfo::relocType rtype = $src->constant_reloc();
4368       if (rtype == relocInfo::oop_type) {
4369         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4370       } else if (rtype == relocInfo::metadata_type) {
4371         __ mov_metadata(dst_reg, (Metadata*)con);
4372       } else {
4373         assert(rtype == relocInfo::none, "unexpected reloc type");
4374         if (con < (address)(uintptr_t)os::vm_page_size()) {
4375           __ mov(dst_reg, con);
4376         } else {
4377           unsigned long offset;
4378           __ adrp(dst_reg, con, offset);
4379           __ add(dst_reg, dst_reg, offset);
4380         }
4381       }
4382     }
4383   %}
4384 
4385   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4386     MacroAssembler _masm(&cbuf);
4387     Register dst_reg = as_Register($dst$$reg);
4388     __ mov(dst_reg, zr);
4389   %}
4390 
4391   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4392     MacroAssembler _masm(&cbuf);
4393     Register dst_reg = as_Register($dst$$reg);
4394     __ mov(dst_reg, (u_int64_t)1);
4395   %}
4396 
4397   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4398     MacroAssembler _masm(&cbuf);
4399     address page = (address)$src$$constant;
4400     Register dst_reg = as_Register($dst$$reg);
4401     unsigned long off;
4402     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4403     assert(off == 0, "assumed offset == 0");
4404   %}
4405 
4406   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4407     MacroAssembler _masm(&cbuf);
4408     __ load_byte_map_base($dst$$Register);
4409   %}
4410 
4411   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4412     MacroAssembler _masm(&cbuf);
4413     Register dst_reg = as_Register($dst$$reg);
4414     address con = (address)$src$$constant;
4415     if (con == NULL) {
4416       ShouldNotReachHere();
4417     } else {
4418       relocInfo::relocType rtype = $src->constant_reloc();
4419       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4420       __ set_narrow_oop(dst_reg, (jobject)con);
4421     }
4422   %}
4423 
4424   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4425     MacroAssembler _masm(&cbuf);
4426     Register dst_reg = as_Register($dst$$reg);
4427     __ mov(dst_reg, zr);
4428   %}
4429 
4430   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4431     MacroAssembler _masm(&cbuf);
4432     Register dst_reg = as_Register($dst$$reg);
4433     address con = (address)$src$$constant;
4434     if (con == NULL) {
4435       ShouldNotReachHere();
4436     } else {
4437       relocInfo::relocType rtype = $src->constant_reloc();
4438       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4439       __ set_narrow_klass(dst_reg, (Klass *)con);
4440     }
4441   %}
4442 
4443   // arithmetic encodings
4444 
4445   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4446     MacroAssembler _masm(&cbuf);
4447     Register dst_reg = as_Register($dst$$reg);
4448     Register src_reg = as_Register($src1$$reg);
4449     int32_t con = (int32_t)$src2$$constant;
4450     // add has primary == 0, subtract has primary == 1
4451     if ($primary) { con = -con; }
4452     if (con < 0) {
4453       __ subw(dst_reg, src_reg, -con);
4454     } else {
4455       __ addw(dst_reg, src_reg, con);
4456     }
4457   %}
4458 
4459   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4460     MacroAssembler _masm(&cbuf);
4461     Register dst_reg = as_Register($dst$$reg);
4462     Register src_reg = as_Register($src1$$reg);
4463     int32_t con = (int32_t)$src2$$constant;
4464     // add has primary == 0, subtract has primary == 1
4465     if ($primary) { con = -con; }
4466     if (con < 0) {
4467       __ sub(dst_reg, src_reg, -con);
4468     } else {
4469       __ add(dst_reg, src_reg, con);
4470     }
4471   %}
4472 
4473   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4474     MacroAssembler _masm(&cbuf);
4475    Register dst_reg = as_Register($dst$$reg);
4476    Register src1_reg = as_Register($src1$$reg);
4477    Register src2_reg = as_Register($src2$$reg);
4478     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4479   %}
4480 
4481   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4482     MacroAssembler _masm(&cbuf);
4483    Register dst_reg = as_Register($dst$$reg);
4484    Register src1_reg = as_Register($src1$$reg);
4485    Register src2_reg = as_Register($src2$$reg);
4486     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4487   %}
4488 
4489   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4490     MacroAssembler _masm(&cbuf);
4491    Register dst_reg = as_Register($dst$$reg);
4492    Register src1_reg = as_Register($src1$$reg);
4493    Register src2_reg = as_Register($src2$$reg);
4494     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4495   %}
4496 
4497   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4498     MacroAssembler _masm(&cbuf);
4499    Register dst_reg = as_Register($dst$$reg);
4500    Register src1_reg = as_Register($src1$$reg);
4501    Register src2_reg = as_Register($src2$$reg);
4502     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4503   %}
4504 
4505   // compare instruction encodings
4506 
4507   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4508     MacroAssembler _masm(&cbuf);
4509     Register reg1 = as_Register($src1$$reg);
4510     Register reg2 = as_Register($src2$$reg);
4511     __ cmpw(reg1, reg2);
4512   %}
4513 
4514   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4515     MacroAssembler _masm(&cbuf);
4516     Register reg = as_Register($src1$$reg);
4517     int32_t val = $src2$$constant;
4518     if (val >= 0) {
4519       __ subsw(zr, reg, val);
4520     } else {
4521       __ addsw(zr, reg, -val);
4522     }
4523   %}
4524 
4525   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4526     MacroAssembler _masm(&cbuf);
4527     Register reg1 = as_Register($src1$$reg);
4528     u_int32_t val = (u_int32_t)$src2$$constant;
4529     __ movw(rscratch1, val);
4530     __ cmpw(reg1, rscratch1);
4531   %}
4532 
4533   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4534     MacroAssembler _masm(&cbuf);
4535     Register reg1 = as_Register($src1$$reg);
4536     Register reg2 = as_Register($src2$$reg);
4537     __ cmp(reg1, reg2);
4538   %}
4539 
4540   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4541     MacroAssembler _masm(&cbuf);
4542     Register reg = as_Register($src1$$reg);
4543     int64_t val = $src2$$constant;
4544     if (val >= 0) {
4545       __ subs(zr, reg, val);
4546     } else if (val != -val) {
4547       __ adds(zr, reg, -val);
4548     } else {
4549     // aargh, Long.MIN_VALUE is a special case
4550       __ orr(rscratch1, zr, (u_int64_t)val);
4551       __ subs(zr, reg, rscratch1);
4552     }
4553   %}
4554 
4555   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4556     MacroAssembler _masm(&cbuf);
4557     Register reg1 = as_Register($src1$$reg);
4558     u_int64_t val = (u_int64_t)$src2$$constant;
4559     __ mov(rscratch1, val);
4560     __ cmp(reg1, rscratch1);
4561   %}
4562 
4563   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4564     MacroAssembler _masm(&cbuf);
4565     Register reg1 = as_Register($src1$$reg);
4566     Register reg2 = as_Register($src2$$reg);
4567     __ cmp(reg1, reg2);
4568   %}
4569 
4570   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4571     MacroAssembler _masm(&cbuf);
4572     Register reg1 = as_Register($src1$$reg);
4573     Register reg2 = as_Register($src2$$reg);
4574     __ cmpw(reg1, reg2);
4575   %}
4576 
4577   enc_class aarch64_enc_testp(iRegP src) %{
4578     MacroAssembler _masm(&cbuf);
4579     Register reg = as_Register($src$$reg);
4580     __ cmp(reg, zr);
4581   %}
4582 
4583   enc_class aarch64_enc_testn(iRegN src) %{
4584     MacroAssembler _masm(&cbuf);
4585     Register reg = as_Register($src$$reg);
4586     __ cmpw(reg, zr);
4587   %}
4588 
4589   enc_class aarch64_enc_b(label lbl) %{
4590     MacroAssembler _masm(&cbuf);
4591     Label *L = $lbl$$label;
4592     __ b(*L);
4593   %}
4594 
4595   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4596     MacroAssembler _masm(&cbuf);
4597     Label *L = $lbl$$label;
4598     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4599   %}
4600 
4601   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4602     MacroAssembler _masm(&cbuf);
4603     Label *L = $lbl$$label;
4604     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4605   %}
4606 
4607   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4608   %{
4609      Register sub_reg = as_Register($sub$$reg);
4610      Register super_reg = as_Register($super$$reg);
4611      Register temp_reg = as_Register($temp$$reg);
4612      Register result_reg = as_Register($result$$reg);
4613 
4614      Label miss;
4615      MacroAssembler _masm(&cbuf);
4616      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4617                                      NULL, &miss,
4618                                      /*set_cond_codes:*/ true);
4619      if ($primary) {
4620        __ mov(result_reg, zr);
4621      }
4622      __ bind(miss);
4623   %}
4624 
4625   enc_class aarch64_enc_java_static_call(method meth) %{
4626     MacroAssembler _masm(&cbuf);
4627 
4628     address addr = (address)$meth$$method;
4629     address call;
4630     if (!_method) {
4631       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4632       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4633     } else {
4634       int method_index = resolved_method_index(cbuf);
4635       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4636                                                   : static_call_Relocation::spec(method_index);
4637       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4638 
4639       // Emit stub for static call
4640       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4641       if (stub == NULL) {
4642         ciEnv::current()->record_failure("CodeCache is full");
4643         return;
4644       }
4645     }
4646     if (call == NULL) {
4647       ciEnv::current()->record_failure("CodeCache is full");
4648       return;
4649     }
4650   %}
4651 
4652   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4653     MacroAssembler _masm(&cbuf);
4654     int method_index = resolved_method_index(cbuf);
4655     address call = __ ic_call((address)$meth$$method, method_index);
4656     if (call == NULL) {
4657       ciEnv::current()->record_failure("CodeCache is full");
4658       return;
4659     }
4660   %}
4661 
4662   enc_class aarch64_enc_call_epilog() %{
4663     MacroAssembler _masm(&cbuf);
4664     if (VerifyStackAtCalls) {
4665       // Check that stack depth is unchanged: find majik cookie on stack
4666       __ call_Unimplemented();
4667     }
4668   %}
4669 
4670   enc_class aarch64_enc_java_to_runtime(method meth) %{
4671     MacroAssembler _masm(&cbuf);
4672 
4673     // some calls to generated routines (arraycopy code) are scheduled
4674     // by C2 as runtime calls. if so we can call them using a br (they
4675     // will be in a reachable segment) otherwise we have to use a blrt
4676     // which loads the absolute address into a register.
4677     address entry = (address)$meth$$method;
4678     CodeBlob *cb = CodeCache::find_blob(entry);
4679     if (cb) {
4680       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4681       if (call == NULL) {
4682         ciEnv::current()->record_failure("CodeCache is full");
4683         return;
4684       }
4685     } else {
4686       int gpcnt;
4687       int fpcnt;
4688       int rtype;
4689       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4690       Label retaddr;
4691       __ adr(rscratch2, retaddr);
4692       __ lea(rscratch1, RuntimeAddress(entry));
4693       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4694       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4695       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4696       __ bind(retaddr);
4697       __ add(sp, sp, 2 * wordSize);
4698     }
4699   %}
4700 
4701   enc_class aarch64_enc_rethrow() %{
4702     MacroAssembler _masm(&cbuf);
4703     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4704   %}
4705 
4706   enc_class aarch64_enc_ret() %{
4707     MacroAssembler _masm(&cbuf);
4708     __ ret(lr);
4709   %}
4710 
4711   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4712     MacroAssembler _masm(&cbuf);
4713     Register target_reg = as_Register($jump_target$$reg);
4714     __ br(target_reg);
4715   %}
4716 
4717   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4718     MacroAssembler _masm(&cbuf);
4719     Register target_reg = as_Register($jump_target$$reg);
4720     // exception oop should be in r0
4721     // ret addr has been popped into lr
4722     // callee expects it in r3
4723     __ mov(r3, lr);
4724     __ br(target_reg);
4725   %}
4726 
4727   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4728     MacroAssembler _masm(&cbuf);
4729     Register oop = as_Register($object$$reg);
4730     Register box = as_Register($box$$reg);
4731     Register disp_hdr = as_Register($tmp$$reg);
4732     Register tmp = as_Register($tmp2$$reg);
4733     Label cont;
4734     Label object_has_monitor;
4735     Label cas_failed;
4736 
4737     assert_different_registers(oop, box, tmp, disp_hdr);
4738 
4739     // Load markOop from object into displaced_header.
4740     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4741 
4742     // Always do locking in runtime.
4743     if (EmitSync & 0x01) {
4744       __ cmp(oop, zr);
4745       return;
4746     }
4747 
4748     if (UseBiasedLocking && !UseOptoBiasInlining) {
4749       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4750     }
4751 
4752     // Handle existing monitor
4753     if ((EmitSync & 0x02) == 0) {
4754       // we can use AArch64's bit test and branch here but
4755       // markoopDesc does not define a bit index just the bit value
4756       // so assert in case the bit pos changes
4757 #     define __monitor_value_log2 1
4758       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4759       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4760 #     undef __monitor_value_log2
4761     }
4762 
4763     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4764     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4765 
4766     // Load Compare Value application register.
4767 
4768     // Initialize the box. (Must happen before we update the object mark!)
4769     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4770 
4771     // Compare object markOop with mark and if equal exchange scratch1
4772     // with object markOop.
4773     if (UseLSE) {
4774       __ mov(tmp, disp_hdr);
4775       __ casal(Assembler::xword, tmp, box, oop);
4776       __ cmp(tmp, disp_hdr);
4777       __ br(Assembler::EQ, cont);
4778     } else {
4779       Label retry_load;
4780       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4781         __ prfm(Address(oop), PSTL1STRM);
4782       __ bind(retry_load);
4783       __ ldaxr(tmp, oop);
4784       __ cmp(tmp, disp_hdr);
4785       __ br(Assembler::NE, cas_failed);
4786       // use stlxr to ensure update is immediately visible
4787       __ stlxr(tmp, box, oop);
4788       __ cbzw(tmp, cont);
4789       __ b(retry_load);
4790     }
4791 
4792     // Formerly:
4793     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4794     //               /*newv=*/box,
4795     //               /*addr=*/oop,
4796     //               /*tmp=*/tmp,
4797     //               cont,
4798     //               /*fail*/NULL);
4799 
4800     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4801 
4802     // If the compare-and-exchange succeeded, then we found an unlocked
4803     // object, will have now locked it will continue at label cont
4804 
4805     __ bind(cas_failed);
4806     // We did not see an unlocked object so try the fast recursive case.
4807 
4808     // Check if the owner is self by comparing the value in the
4809     // markOop of object (disp_hdr) with the stack pointer.
4810     __ mov(rscratch1, sp);
4811     __ sub(disp_hdr, disp_hdr, rscratch1);
4812     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4813     // If condition is true we are cont and hence we can store 0 as the
4814     // displaced header in the box, which indicates that it is a recursive lock.
4815     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4816     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4817 
4818     // Handle existing monitor.
4819     if ((EmitSync & 0x02) == 0) {
4820       __ b(cont);
4821 
4822       __ bind(object_has_monitor);
4823       // The object's monitor m is unlocked iff m->owner == NULL,
4824       // otherwise m->owner may contain a thread or a stack address.
4825       //
4826       // Try to CAS m->owner from NULL to current thread.
4827       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4828       __ mov(disp_hdr, zr);
4829 
4830       if (UseLSE) {
4831         __ mov(rscratch1, disp_hdr);
4832         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4833         __ cmp(rscratch1, disp_hdr);
4834       } else {
4835         Label retry_load, fail;
4836         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4837           __ prfm(Address(tmp), PSTL1STRM);
4838         __ bind(retry_load);
4839         __ ldaxr(rscratch1, tmp);
4840         __ cmp(disp_hdr, rscratch1);
4841         __ br(Assembler::NE, fail);
4842         // use stlxr to ensure update is immediately visible
4843         __ stlxr(rscratch1, rthread, tmp);
4844         __ cbnzw(rscratch1, retry_load);
4845         __ bind(fail);
4846       }
4847 
4848       // Label next;
4849       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4850       //               /*newv=*/rthread,
4851       //               /*addr=*/tmp,
4852       //               /*tmp=*/rscratch1,
4853       //               /*succeed*/next,
4854       //               /*fail*/NULL);
4855       // __ bind(next);
4856 
4857       // store a non-null value into the box.
4858       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4859 
4860       // PPC port checks the following invariants
4861       // #ifdef ASSERT
4862       // bne(flag, cont);
4863       // We have acquired the monitor, check some invariants.
4864       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4865       // Invariant 1: _recursions should be 0.
4866       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4867       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4868       //                        "monitor->_recursions should be 0", -1);
4869       // Invariant 2: OwnerIsThread shouldn't be 0.
4870       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4871       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4872       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4873       // #endif
4874     }
4875 
4876     __ bind(cont);
4877     // flag == EQ indicates success
4878     // flag == NE indicates failure
4879 
4880   %}
4881 
4882   // TODO
4883   // reimplement this with custom cmpxchgptr code
4884   // which avoids some of the unnecessary branching
4885   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4886     MacroAssembler _masm(&cbuf);
4887     Register oop = as_Register($object$$reg);
4888     Register box = as_Register($box$$reg);
4889     Register disp_hdr = as_Register($tmp$$reg);
4890     Register tmp = as_Register($tmp2$$reg);
4891     Label cont;
4892     Label object_has_monitor;
4893     Label cas_failed;
4894 
4895     assert_different_registers(oop, box, tmp, disp_hdr);
4896 
4897     // Always do locking in runtime.
4898     if (EmitSync & 0x01) {
4899       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4900       return;
4901     }
4902 
4903     if (UseBiasedLocking && !UseOptoBiasInlining) {
4904       __ biased_locking_exit(oop, tmp, cont);
4905     }
4906 
4907     // Find the lock address and load the displaced header from the stack.
4908     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4909 
4910     // If the displaced header is 0, we have a recursive unlock.
4911     __ cmp(disp_hdr, zr);
4912     __ br(Assembler::EQ, cont);
4913 
4914 
4915     // Handle existing monitor.
4916     if ((EmitSync & 0x02) == 0) {
4917       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4918       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4919     }
4920 
4921     // Check if it is still a light weight lock, this is is true if we
4922     // see the stack address of the basicLock in the markOop of the
4923     // object.
4924 
4925       if (UseLSE) {
4926         __ mov(tmp, box);
4927         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4928         __ cmp(tmp, box);
4929       } else {
4930         Label retry_load;
4931         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4932           __ prfm(Address(oop), PSTL1STRM);
4933         __ bind(retry_load);
4934         __ ldxr(tmp, oop);
4935         __ cmp(box, tmp);
4936         __ br(Assembler::NE, cas_failed);
4937         // use stlxr to ensure update is immediately visible
4938         __ stlxr(tmp, disp_hdr, oop);
4939         __ cbzw(tmp, cont);
4940         __ b(retry_load);
4941       }
4942 
4943     // __ cmpxchgptr(/*compare_value=*/box,
4944     //               /*exchange_value=*/disp_hdr,
4945     //               /*where=*/oop,
4946     //               /*result=*/tmp,
4947     //               cont,
4948     //               /*cas_failed*/NULL);
4949     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4950 
4951     __ bind(cas_failed);
4952 
4953     // Handle existing monitor.
4954     if ((EmitSync & 0x02) == 0) {
4955       __ b(cont);
4956 
4957       __ bind(object_has_monitor);
4958       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4959       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4960       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4961       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4962       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4963       __ cmp(rscratch1, zr);
4964       __ br(Assembler::NE, cont);
4965 
4966       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4967       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4968       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4969       __ cmp(rscratch1, zr);
4970       __ cbnz(rscratch1, cont);
4971       // need a release store here
4972       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4973       __ stlr(rscratch1, tmp); // rscratch1 is zero
4974     }
4975 
4976     __ bind(cont);
4977     // flag == EQ indicates success
4978     // flag == NE indicates failure
4979   %}
4980 
4981 %}
4982 
4983 //----------FRAME--------------------------------------------------------------
4984 // Definition of frame structure and management information.
4985 //
4986 //  S T A C K   L A Y O U T    Allocators stack-slot number
4987 //                             |   (to get allocators register number
4988 //  G  Owned by    |        |  v    add OptoReg::stack0())
4989 //  r   CALLER     |        |
4990 //  o     |        +--------+      pad to even-align allocators stack-slot
4991 //  w     V        |  pad0  |        numbers; owned by CALLER
4992 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4993 //  h     ^        |   in   |  5
4994 //        |        |  args  |  4   Holes in incoming args owned by SELF
4995 //  |     |        |        |  3
4996 //  |     |        +--------+
4997 //  V     |        | old out|      Empty on Intel, window on Sparc
4998 //        |    old |preserve|      Must be even aligned.
4999 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5000 //        |        |   in   |  3   area for Intel ret address
5001 //     Owned by    |preserve|      Empty on Sparc.
5002 //       SELF      +--------+
5003 //        |        |  pad2  |  2   pad to align old SP
5004 //        |        +--------+  1
5005 //        |        | locks  |  0
5006 //        |        +--------+----> OptoReg::stack0(), even aligned
5007 //        |        |  pad1  | 11   pad to align new SP
5008 //        |        +--------+
5009 //        |        |        | 10
5010 //        |        | spills |  9   spills
5011 //        V        |        |  8   (pad0 slot for callee)
5012 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5013 //        ^        |  out   |  7
5014 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5015 //     Owned by    +--------+
5016 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5017 //        |    new |preserve|      Must be even-aligned.
5018 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5019 //        |        |        |
5020 //
5021 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5022 //         known from SELF's arguments and the Java calling convention.
5023 //         Region 6-7 is determined per call site.
5024 // Note 2: If the calling convention leaves holes in the incoming argument
5025 //         area, those holes are owned by SELF.  Holes in the outgoing area
5026 //         are owned by the CALLEE.  Holes should not be nessecary in the
5027 //         incoming area, as the Java calling convention is completely under
5028 //         the control of the AD file.  Doubles can be sorted and packed to
5029 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5030 //         varargs C calling conventions.
5031 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5032 //         even aligned with pad0 as needed.
5033 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5034 //           (the latter is true on Intel but is it false on AArch64?)
5035 //         region 6-11 is even aligned; it may be padded out more so that
5036 //         the region from SP to FP meets the minimum stack alignment.
5037 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5038 //         alignment.  Region 11, pad1, may be dynamically extended so that
5039 //         SP meets the minimum alignment.
5040 
5041 frame %{
5042   // What direction does stack grow in (assumed to be same for C & Java)
5043   stack_direction(TOWARDS_LOW);
5044 
5045   // These three registers define part of the calling convention
5046   // between compiled code and the interpreter.
5047 
5048   // Inline Cache Register or methodOop for I2C.
5049   inline_cache_reg(R12);
5050 
5051   // Method Oop Register when calling interpreter.
5052   interpreter_method_oop_reg(R12);
5053 
5054   // Number of stack slots consumed by locking an object
5055   sync_stack_slots(2);
5056 
5057   // Compiled code's Frame Pointer
5058   frame_pointer(R31);
5059 
5060   // Interpreter stores its frame pointer in a register which is
5061   // stored to the stack by I2CAdaptors.
5062   // I2CAdaptors convert from interpreted java to compiled java.
5063   interpreter_frame_pointer(R29);
5064 
5065   // Stack alignment requirement
5066   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5067 
5068   // Number of stack slots between incoming argument block and the start of
5069   // a new frame.  The PROLOG must add this many slots to the stack.  The
5070   // EPILOG must remove this many slots. aarch64 needs two slots for
5071   // return address and fp.
5072   // TODO think this is correct but check
5073   in_preserve_stack_slots(4);
5074 
5075   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5076   // for calls to C.  Supports the var-args backing area for register parms.
5077   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5078 
5079   // The after-PROLOG location of the return address.  Location of
5080   // return address specifies a type (REG or STACK) and a number
5081   // representing the register number (i.e. - use a register name) or
5082   // stack slot.
5083   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5084   // Otherwise, it is above the locks and verification slot and alignment word
5085   // TODO this may well be correct but need to check why that - 2 is there
5086   // ppc port uses 0 but we definitely need to allow for fixed_slots
5087   // which folds in the space used for monitors
5088   return_addr(STACK - 2 +
5089               round_to((Compile::current()->in_preserve_stack_slots() +
5090                         Compile::current()->fixed_slots()),
5091                        stack_alignment_in_slots()));
5092 
5093   // Body of function which returns an integer array locating
5094   // arguments either in registers or in stack slots.  Passed an array
5095   // of ideal registers called "sig" and a "length" count.  Stack-slot
5096   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5097   // arguments for a CALLEE.  Incoming stack arguments are
5098   // automatically biased by the preserve_stack_slots field above.
5099 
5100   calling_convention
5101   %{
5102     // No difference between ingoing/outgoing just pass false
5103     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5104   %}
5105 
5106   c_calling_convention
5107   %{
5108     // This is obviously always outgoing
5109     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5110   %}
5111 
5112   // Location of compiled Java return values.  Same as C for now.
5113   return_value
5114   %{
5115     // TODO do we allow ideal_reg == Op_RegN???
5116     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5117            "only return normal values");
5118 
5119     static const int lo[Op_RegL + 1] = { // enum name
5120       0,                                 // Op_Node
5121       0,                                 // Op_Set
5122       R0_num,                            // Op_RegN
5123       R0_num,                            // Op_RegI
5124       R0_num,                            // Op_RegP
5125       V0_num,                            // Op_RegF
5126       V0_num,                            // Op_RegD
5127       R0_num                             // Op_RegL
5128     };
5129 
5130     static const int hi[Op_RegL + 1] = { // enum name
5131       0,                                 // Op_Node
5132       0,                                 // Op_Set
5133       OptoReg::Bad,                       // Op_RegN
5134       OptoReg::Bad,                      // Op_RegI
5135       R0_H_num,                          // Op_RegP
5136       OptoReg::Bad,                      // Op_RegF
5137       V0_H_num,                          // Op_RegD
5138       R0_H_num                           // Op_RegL
5139     };
5140 
5141     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5142   %}
5143 %}
5144 
5145 //----------ATTRIBUTES---------------------------------------------------------
5146 //----------Operand Attributes-------------------------------------------------
5147 op_attrib op_cost(1);        // Required cost attribute
5148 
5149 //----------Instruction Attributes---------------------------------------------
5150 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5151 ins_attrib ins_size(32);        // Required size attribute (in bits)
5152 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5153                                 // a non-matching short branch variant
5154                                 // of some long branch?
5155 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5156                                 // be a power of 2) specifies the
5157                                 // alignment that some part of the
5158                                 // instruction (not necessarily the
5159                                 // start) requires.  If > 1, a
5160                                 // compute_padding() function must be
5161                                 // provided for the instruction
5162 
5163 //----------OPERANDS-----------------------------------------------------------
5164 // Operand definitions must precede instruction definitions for correct parsing
5165 // in the ADLC because operands constitute user defined types which are used in
5166 // instruction definitions.
5167 
5168 //----------Simple Operands----------------------------------------------------
5169 
5170 // Integer operands 32 bit
5171 // 32 bit immediate
5172 operand immI()
5173 %{
5174   match(ConI);
5175 
5176   op_cost(0);
5177   format %{ %}
5178   interface(CONST_INTER);
5179 %}
5180 
5181 // 32 bit zero
5182 operand immI0()
5183 %{
5184   predicate(n->get_int() == 0);
5185   match(ConI);
5186 
5187   op_cost(0);
5188   format %{ %}
5189   interface(CONST_INTER);
5190 %}
5191 
5192 // 32 bit unit increment
5193 operand immI_1()
5194 %{
5195   predicate(n->get_int() == 1);
5196   match(ConI);
5197 
5198   op_cost(0);
5199   format %{ %}
5200   interface(CONST_INTER);
5201 %}
5202 
5203 // 32 bit unit decrement
5204 operand immI_M1()
5205 %{
5206   predicate(n->get_int() == -1);
5207   match(ConI);
5208 
5209   op_cost(0);
5210   format %{ %}
5211   interface(CONST_INTER);
5212 %}
5213 
5214 operand immI_le_4()
5215 %{
5216   predicate(n->get_int() <= 4);
5217   match(ConI);
5218 
5219   op_cost(0);
5220   format %{ %}
5221   interface(CONST_INTER);
5222 %}
5223 
5224 operand immI_31()
5225 %{
5226   predicate(n->get_int() == 31);
5227   match(ConI);
5228 
5229   op_cost(0);
5230   format %{ %}
5231   interface(CONST_INTER);
5232 %}
5233 
5234 operand immI_8()
5235 %{
5236   predicate(n->get_int() == 8);
5237   match(ConI);
5238 
5239   op_cost(0);
5240   format %{ %}
5241   interface(CONST_INTER);
5242 %}
5243 
5244 operand immI_16()
5245 %{
5246   predicate(n->get_int() == 16);
5247   match(ConI);
5248 
5249   op_cost(0);
5250   format %{ %}
5251   interface(CONST_INTER);
5252 %}
5253 
5254 operand immI_24()
5255 %{
5256   predicate(n->get_int() == 24);
5257   match(ConI);
5258 
5259   op_cost(0);
5260   format %{ %}
5261   interface(CONST_INTER);
5262 %}
5263 
5264 operand immI_32()
5265 %{
5266   predicate(n->get_int() == 32);
5267   match(ConI);
5268 
5269   op_cost(0);
5270   format %{ %}
5271   interface(CONST_INTER);
5272 %}
5273 
5274 operand immI_48()
5275 %{
5276   predicate(n->get_int() == 48);
5277   match(ConI);
5278 
5279   op_cost(0);
5280   format %{ %}
5281   interface(CONST_INTER);
5282 %}
5283 
5284 operand immI_56()
5285 %{
5286   predicate(n->get_int() == 56);
5287   match(ConI);
5288 
5289   op_cost(0);
5290   format %{ %}
5291   interface(CONST_INTER);
5292 %}
5293 
5294 operand immI_64()
5295 %{
5296   predicate(n->get_int() == 64);
5297   match(ConI);
5298 
5299   op_cost(0);
5300   format %{ %}
5301   interface(CONST_INTER);
5302 %}
5303 
5304 operand immI_255()
5305 %{
5306   predicate(n->get_int() == 255);
5307   match(ConI);
5308 
5309   op_cost(0);
5310   format %{ %}
5311   interface(CONST_INTER);
5312 %}
5313 
5314 operand immI_65535()
5315 %{
5316   predicate(n->get_int() == 65535);
5317   match(ConI);
5318 
5319   op_cost(0);
5320   format %{ %}
5321   interface(CONST_INTER);
5322 %}
5323 
5324 operand immL_63()
5325 %{
5326   predicate(n->get_int() == 63);
5327   match(ConI);
5328 
5329   op_cost(0);
5330   format %{ %}
5331   interface(CONST_INTER);
5332 %}
5333 
5334 operand immL_255()
5335 %{
5336   predicate(n->get_int() == 255);
5337   match(ConI);
5338 
5339   op_cost(0);
5340   format %{ %}
5341   interface(CONST_INTER);
5342 %}
5343 
5344 operand immL_65535()
5345 %{
5346   predicate(n->get_long() == 65535L);
5347   match(ConL);
5348 
5349   op_cost(0);
5350   format %{ %}
5351   interface(CONST_INTER);
5352 %}
5353 
5354 operand immL_4294967295()
5355 %{
5356   predicate(n->get_long() == 4294967295L);
5357   match(ConL);
5358 
5359   op_cost(0);
5360   format %{ %}
5361   interface(CONST_INTER);
5362 %}
5363 
5364 operand immL_bitmask()
5365 %{
5366   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5367             && is_power_of_2(n->get_long() + 1));
5368   match(ConL);
5369 
5370   op_cost(0);
5371   format %{ %}
5372   interface(CONST_INTER);
5373 %}
5374 
5375 operand immI_bitmask()
5376 %{
5377   predicate(((n->get_int() & 0xc0000000) == 0)
5378             && is_power_of_2(n->get_int() + 1));
5379   match(ConI);
5380 
5381   op_cost(0);
5382   format %{ %}
5383   interface(CONST_INTER);
5384 %}
5385 
5386 // Scale values for scaled offset addressing modes (up to long but not quad)
5387 operand immIScale()
5388 %{
5389   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5390   match(ConI);
5391 
5392   op_cost(0);
5393   format %{ %}
5394   interface(CONST_INTER);
5395 %}
5396 
5397 // 26 bit signed offset -- for pc-relative branches
5398 operand immI26()
5399 %{
5400   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5401   match(ConI);
5402 
5403   op_cost(0);
5404   format %{ %}
5405   interface(CONST_INTER);
5406 %}
5407 
5408 // 19 bit signed offset -- for pc-relative loads
5409 operand immI19()
5410 %{
5411   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5412   match(ConI);
5413 
5414   op_cost(0);
5415   format %{ %}
5416   interface(CONST_INTER);
5417 %}
5418 
5419 // 12 bit unsigned offset -- for base plus immediate loads
5420 operand immIU12()
5421 %{
5422   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5423   match(ConI);
5424 
5425   op_cost(0);
5426   format %{ %}
5427   interface(CONST_INTER);
5428 %}
5429 
5430 operand immLU12()
5431 %{
5432   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5433   match(ConL);
5434 
5435   op_cost(0);
5436   format %{ %}
5437   interface(CONST_INTER);
5438 %}
5439 
5440 // Offset for scaled or unscaled immediate loads and stores
5441 operand immIOffset()
5442 %{
5443   predicate(Address::offset_ok_for_immed(n->get_int()));
5444   match(ConI);
5445 
5446   op_cost(0);
5447   format %{ %}
5448   interface(CONST_INTER);
5449 %}
5450 
5451 operand immIOffset4()
5452 %{
5453   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5454   match(ConI);
5455 
5456   op_cost(0);
5457   format %{ %}
5458   interface(CONST_INTER);
5459 %}
5460 
5461 operand immIOffset8()
5462 %{
5463   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5464   match(ConI);
5465 
5466   op_cost(0);
5467   format %{ %}
5468   interface(CONST_INTER);
5469 %}
5470 
5471 operand immIOffset16()
5472 %{
5473   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5474   match(ConI);
5475 
5476   op_cost(0);
5477   format %{ %}
5478   interface(CONST_INTER);
5479 %}
5480 
5481 operand immLoffset()
5482 %{
5483   predicate(Address::offset_ok_for_immed(n->get_long()));
5484   match(ConL);
5485 
5486   op_cost(0);
5487   format %{ %}
5488   interface(CONST_INTER);
5489 %}
5490 
5491 operand immLoffset4()
5492 %{
5493   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5494   match(ConL);
5495 
5496   op_cost(0);
5497   format %{ %}
5498   interface(CONST_INTER);
5499 %}
5500 
5501 operand immLoffset8()
5502 %{
5503   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5504   match(ConL);
5505 
5506   op_cost(0);
5507   format %{ %}
5508   interface(CONST_INTER);
5509 %}
5510 
5511 operand immLoffset16()
5512 %{
5513   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5514   match(ConL);
5515 
5516   op_cost(0);
5517   format %{ %}
5518   interface(CONST_INTER);
5519 %}
5520 
5521 // 32 bit integer valid for add sub immediate
5522 operand immIAddSub()
5523 %{
5524   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5525   match(ConI);
5526   op_cost(0);
5527   format %{ %}
5528   interface(CONST_INTER);
5529 %}
5530 
5531 // 32 bit unsigned integer valid for logical immediate
5532 // TODO -- check this is right when e.g the mask is 0x80000000
5533 operand immILog()
5534 %{
5535   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5536   match(ConI);
5537 
5538   op_cost(0);
5539   format %{ %}
5540   interface(CONST_INTER);
5541 %}
5542 
5543 // Integer operands 64 bit
5544 // 64 bit immediate
5545 operand immL()
5546 %{
5547   match(ConL);
5548 
5549   op_cost(0);
5550   format %{ %}
5551   interface(CONST_INTER);
5552 %}
5553 
5554 // 64 bit zero
5555 operand immL0()
5556 %{
5557   predicate(n->get_long() == 0);
5558   match(ConL);
5559 
5560   op_cost(0);
5561   format %{ %}
5562   interface(CONST_INTER);
5563 %}
5564 
5565 // 64 bit unit increment
5566 operand immL_1()
5567 %{
5568   predicate(n->get_long() == 1);
5569   match(ConL);
5570 
5571   op_cost(0);
5572   format %{ %}
5573   interface(CONST_INTER);
5574 %}
5575 
5576 // 64 bit unit decrement
5577 operand immL_M1()
5578 %{
5579   predicate(n->get_long() == -1);
5580   match(ConL);
5581 
5582   op_cost(0);
5583   format %{ %}
5584   interface(CONST_INTER);
5585 %}
5586 
5587 // 32 bit offset of pc in thread anchor
5588 
5589 operand immL_pc_off()
5590 %{
5591   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5592                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5593   match(ConL);
5594 
5595   op_cost(0);
5596   format %{ %}
5597   interface(CONST_INTER);
5598 %}
5599 
5600 // 64 bit integer valid for add sub immediate
5601 operand immLAddSub()
5602 %{
5603   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5604   match(ConL);
5605   op_cost(0);
5606   format %{ %}
5607   interface(CONST_INTER);
5608 %}
5609 
5610 // 64 bit integer valid for logical immediate
5611 operand immLLog()
5612 %{
5613   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5614   match(ConL);
5615   op_cost(0);
5616   format %{ %}
5617   interface(CONST_INTER);
5618 %}
5619 
5620 // Long Immediate: low 32-bit mask
5621 operand immL_32bits()
5622 %{
5623   predicate(n->get_long() == 0xFFFFFFFFL);
5624   match(ConL);
5625   op_cost(0);
5626   format %{ %}
5627   interface(CONST_INTER);
5628 %}
5629 
5630 // Pointer operands
5631 // Pointer Immediate
5632 operand immP()
5633 %{
5634   match(ConP);
5635 
5636   op_cost(0);
5637   format %{ %}
5638   interface(CONST_INTER);
5639 %}
5640 
5641 // NULL Pointer Immediate
5642 operand immP0()
5643 %{
5644   predicate(n->get_ptr() == 0);
5645   match(ConP);
5646 
5647   op_cost(0);
5648   format %{ %}
5649   interface(CONST_INTER);
5650 %}
5651 
5652 // Pointer Immediate One
5653 // this is used in object initialization (initial object header)
5654 operand immP_1()
5655 %{
5656   predicate(n->get_ptr() == 1);
5657   match(ConP);
5658 
5659   op_cost(0);
5660   format %{ %}
5661   interface(CONST_INTER);
5662 %}
5663 
5664 // Polling Page Pointer Immediate
5665 operand immPollPage()
5666 %{
5667   predicate((address)n->get_ptr() == os::get_polling_page());
5668   match(ConP);
5669 
5670   op_cost(0);
5671   format %{ %}
5672   interface(CONST_INTER);
5673 %}
5674 
5675 // Card Table Byte Map Base
5676 operand immByteMapBase()
5677 %{
5678   // Get base of card map
5679   predicate((jbyte*)n->get_ptr() ==
5680         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5681   match(ConP);
5682 
5683   op_cost(0);
5684   format %{ %}
5685   interface(CONST_INTER);
5686 %}
5687 
5688 // Pointer Immediate Minus One
5689 // this is used when we want to write the current PC to the thread anchor
5690 operand immP_M1()
5691 %{
5692   predicate(n->get_ptr() == -1);
5693   match(ConP);
5694 
5695   op_cost(0);
5696   format %{ %}
5697   interface(CONST_INTER);
5698 %}
5699 
5700 // Pointer Immediate Minus Two
5701 // this is used when we want to write the current PC to the thread anchor
5702 operand immP_M2()
5703 %{
5704   predicate(n->get_ptr() == -2);
5705   match(ConP);
5706 
5707   op_cost(0);
5708   format %{ %}
5709   interface(CONST_INTER);
5710 %}
5711 
5712 // Float and Double operands
5713 // Double Immediate
5714 operand immD()
5715 %{
5716   match(ConD);
5717   op_cost(0);
5718   format %{ %}
5719   interface(CONST_INTER);
5720 %}
5721 
5722 // Double Immediate: +0.0d
5723 operand immD0()
5724 %{
5725   predicate(jlong_cast(n->getd()) == 0);
5726   match(ConD);
5727 
5728   op_cost(0);
5729   format %{ %}
5730   interface(CONST_INTER);
5731 %}
5732 
5733 // constant 'double +0.0'.
5734 operand immDPacked()
5735 %{
5736   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5737   match(ConD);
5738   op_cost(0);
5739   format %{ %}
5740   interface(CONST_INTER);
5741 %}
5742 
5743 // Float Immediate
5744 operand immF()
5745 %{
5746   match(ConF);
5747   op_cost(0);
5748   format %{ %}
5749   interface(CONST_INTER);
5750 %}
5751 
5752 // Float Immediate: +0.0f.
5753 operand immF0()
5754 %{
5755   predicate(jint_cast(n->getf()) == 0);
5756   match(ConF);
5757 
5758   op_cost(0);
5759   format %{ %}
5760   interface(CONST_INTER);
5761 %}
5762 
5763 //
5764 operand immFPacked()
5765 %{
5766   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5767   match(ConF);
5768   op_cost(0);
5769   format %{ %}
5770   interface(CONST_INTER);
5771 %}
5772 
5773 // Narrow pointer operands
5774 // Narrow Pointer Immediate
5775 operand immN()
5776 %{
5777   match(ConN);
5778 
5779   op_cost(0);
5780   format %{ %}
5781   interface(CONST_INTER);
5782 %}
5783 
5784 // Narrow NULL Pointer Immediate
5785 operand immN0()
5786 %{
5787   predicate(n->get_narrowcon() == 0);
5788   match(ConN);
5789 
5790   op_cost(0);
5791   format %{ %}
5792   interface(CONST_INTER);
5793 %}
5794 
5795 operand immNKlass()
5796 %{
5797   match(ConNKlass);
5798 
5799   op_cost(0);
5800   format %{ %}
5801   interface(CONST_INTER);
5802 %}
5803 
5804 // Integer 32 bit Register Operands
5805 // Integer 32 bitRegister (excludes SP)
5806 operand iRegI()
5807 %{
5808   constraint(ALLOC_IN_RC(any_reg32));
5809   match(RegI);
5810   match(iRegINoSp);
5811   op_cost(0);
5812   format %{ %}
5813   interface(REG_INTER);
5814 %}
5815 
5816 // Integer 32 bit Register not Special
5817 operand iRegINoSp()
5818 %{
5819   constraint(ALLOC_IN_RC(no_special_reg32));
5820   match(RegI);
5821   op_cost(0);
5822   format %{ %}
5823   interface(REG_INTER);
5824 %}
5825 
5826 // Integer 64 bit Register Operands
5827 // Integer 64 bit Register (includes SP)
5828 operand iRegL()
5829 %{
5830   constraint(ALLOC_IN_RC(any_reg));
5831   match(RegL);
5832   match(iRegLNoSp);
5833   op_cost(0);
5834   format %{ %}
5835   interface(REG_INTER);
5836 %}
5837 
5838 // Integer 64 bit Register not Special
5839 operand iRegLNoSp()
5840 %{
5841   constraint(ALLOC_IN_RC(no_special_reg));
5842   match(RegL);
5843   match(iRegL_R0);
5844   format %{ %}
5845   interface(REG_INTER);
5846 %}
5847 
5848 // Pointer Register Operands
5849 // Pointer Register
5850 operand iRegP()
5851 %{
5852   constraint(ALLOC_IN_RC(ptr_reg));
5853   match(RegP);
5854   match(iRegPNoSp);
5855   match(iRegP_R0);
5856   //match(iRegP_R2);
5857   //match(iRegP_R4);
5858   //match(iRegP_R5);
5859   match(thread_RegP);
5860   op_cost(0);
5861   format %{ %}
5862   interface(REG_INTER);
5863 %}
5864 
5865 // Pointer 64 bit Register not Special
5866 operand iRegPNoSp()
5867 %{
5868   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5869   match(RegP);
5870   // match(iRegP);
5871   // match(iRegP_R0);
5872   // match(iRegP_R2);
5873   // match(iRegP_R4);
5874   // match(iRegP_R5);
5875   // match(thread_RegP);
5876   op_cost(0);
5877   format %{ %}
5878   interface(REG_INTER);
5879 %}
5880 
5881 // Pointer 64 bit Register R0 only
5882 operand iRegP_R0()
5883 %{
5884   constraint(ALLOC_IN_RC(r0_reg));
5885   match(RegP);
5886   // match(iRegP);
5887   match(iRegPNoSp);
5888   op_cost(0);
5889   format %{ %}
5890   interface(REG_INTER);
5891 %}
5892 
5893 // Pointer 64 bit Register R1 only
5894 operand iRegP_R1()
5895 %{
5896   constraint(ALLOC_IN_RC(r1_reg));
5897   match(RegP);
5898   // match(iRegP);
5899   match(iRegPNoSp);
5900   op_cost(0);
5901   format %{ %}
5902   interface(REG_INTER);
5903 %}
5904 
5905 // Pointer 64 bit Register R2 only
5906 operand iRegP_R2()
5907 %{
5908   constraint(ALLOC_IN_RC(r2_reg));
5909   match(RegP);
5910   // match(iRegP);
5911   match(iRegPNoSp);
5912   op_cost(0);
5913   format %{ %}
5914   interface(REG_INTER);
5915 %}
5916 
5917 // Pointer 64 bit Register R3 only
5918 operand iRegP_R3()
5919 %{
5920   constraint(ALLOC_IN_RC(r3_reg));
5921   match(RegP);
5922   // match(iRegP);
5923   match(iRegPNoSp);
5924   op_cost(0);
5925   format %{ %}
5926   interface(REG_INTER);
5927 %}
5928 
5929 // Pointer 64 bit Register R4 only
5930 operand iRegP_R4()
5931 %{
5932   constraint(ALLOC_IN_RC(r4_reg));
5933   match(RegP);
5934   // match(iRegP);
5935   match(iRegPNoSp);
5936   op_cost(0);
5937   format %{ %}
5938   interface(REG_INTER);
5939 %}
5940 
5941 // Pointer 64 bit Register R5 only
5942 operand iRegP_R5()
5943 %{
5944   constraint(ALLOC_IN_RC(r5_reg));
5945   match(RegP);
5946   // match(iRegP);
5947   match(iRegPNoSp);
5948   op_cost(0);
5949   format %{ %}
5950   interface(REG_INTER);
5951 %}
5952 
5953 // Pointer 64 bit Register R10 only
5954 operand iRegP_R10()
5955 %{
5956   constraint(ALLOC_IN_RC(r10_reg));
5957   match(RegP);
5958   // match(iRegP);
5959   match(iRegPNoSp);
5960   op_cost(0);
5961   format %{ %}
5962   interface(REG_INTER);
5963 %}
5964 
5965 // Long 64 bit Register R0 only
5966 operand iRegL_R0()
5967 %{
5968   constraint(ALLOC_IN_RC(r0_reg));
5969   match(RegL);
5970   match(iRegLNoSp);
5971   op_cost(0);
5972   format %{ %}
5973   interface(REG_INTER);
5974 %}
5975 
5976 // Long 64 bit Register R2 only
5977 operand iRegL_R2()
5978 %{
5979   constraint(ALLOC_IN_RC(r2_reg));
5980   match(RegL);
5981   match(iRegLNoSp);
5982   op_cost(0);
5983   format %{ %}
5984   interface(REG_INTER);
5985 %}
5986 
5987 // Long 64 bit Register R3 only
5988 operand iRegL_R3()
5989 %{
5990   constraint(ALLOC_IN_RC(r3_reg));
5991   match(RegL);
5992   match(iRegLNoSp);
5993   op_cost(0);
5994   format %{ %}
5995   interface(REG_INTER);
5996 %}
5997 
5998 // Long 64 bit Register R11 only
5999 operand iRegL_R11()
6000 %{
6001   constraint(ALLOC_IN_RC(r11_reg));
6002   match(RegL);
6003   match(iRegLNoSp);
6004   op_cost(0);
6005   format %{ %}
6006   interface(REG_INTER);
6007 %}
6008 
6009 // Pointer 64 bit Register FP only
6010 operand iRegP_FP()
6011 %{
6012   constraint(ALLOC_IN_RC(fp_reg));
6013   match(RegP);
6014   // match(iRegP);
6015   op_cost(0);
6016   format %{ %}
6017   interface(REG_INTER);
6018 %}
6019 
6020 // Register R0 only
6021 operand iRegI_R0()
6022 %{
6023   constraint(ALLOC_IN_RC(int_r0_reg));
6024   match(RegI);
6025   match(iRegINoSp);
6026   op_cost(0);
6027   format %{ %}
6028   interface(REG_INTER);
6029 %}
6030 
6031 // Register R2 only
6032 operand iRegI_R2()
6033 %{
6034   constraint(ALLOC_IN_RC(int_r2_reg));
6035   match(RegI);
6036   match(iRegINoSp);
6037   op_cost(0);
6038   format %{ %}
6039   interface(REG_INTER);
6040 %}
6041 
6042 // Register R3 only
6043 operand iRegI_R3()
6044 %{
6045   constraint(ALLOC_IN_RC(int_r3_reg));
6046   match(RegI);
6047   match(iRegINoSp);
6048   op_cost(0);
6049   format %{ %}
6050   interface(REG_INTER);
6051 %}
6052 
6053 
6054 // Register R4 only
6055 operand iRegI_R4()
6056 %{
6057   constraint(ALLOC_IN_RC(int_r4_reg));
6058   match(RegI);
6059   match(iRegINoSp);
6060   op_cost(0);
6061   format %{ %}
6062   interface(REG_INTER);
6063 %}
6064 
6065 
6066 // Pointer Register Operands
6067 // Narrow Pointer Register
6068 operand iRegN()
6069 %{
6070   constraint(ALLOC_IN_RC(any_reg32));
6071   match(RegN);
6072   match(iRegNNoSp);
6073   op_cost(0);
6074   format %{ %}
6075   interface(REG_INTER);
6076 %}
6077 
6078 operand iRegN_R0()
6079 %{
6080   constraint(ALLOC_IN_RC(r0_reg));
6081   match(iRegN);
6082   op_cost(0);
6083   format %{ %}
6084   interface(REG_INTER);
6085 %}
6086 
6087 operand iRegN_R2()
6088 %{
6089   constraint(ALLOC_IN_RC(r2_reg));
6090   match(iRegN);
6091   op_cost(0);
6092   format %{ %}
6093   interface(REG_INTER);
6094 %}
6095 
6096 operand iRegN_R3()
6097 %{
6098   constraint(ALLOC_IN_RC(r3_reg));
6099   match(iRegN);
6100   op_cost(0);
6101   format %{ %}
6102   interface(REG_INTER);
6103 %}
6104 
6105 // Integer 64 bit Register not Special
6106 operand iRegNNoSp()
6107 %{
6108   constraint(ALLOC_IN_RC(no_special_reg32));
6109   match(RegN);
6110   op_cost(0);
6111   format %{ %}
6112   interface(REG_INTER);
6113 %}
6114 
6115 // heap base register -- used for encoding immN0
6116 
6117 operand iRegIHeapbase()
6118 %{
6119   constraint(ALLOC_IN_RC(heapbase_reg));
6120   match(RegI);
6121   op_cost(0);
6122   format %{ %}
6123   interface(REG_INTER);
6124 %}
6125 
6126 // Float Register
6127 // Float register operands
6128 operand vRegF()
6129 %{
6130   constraint(ALLOC_IN_RC(float_reg));
6131   match(RegF);
6132 
6133   op_cost(0);
6134   format %{ %}
6135   interface(REG_INTER);
6136 %}
6137 
6138 // Double Register
6139 // Double register operands
6140 operand vRegD()
6141 %{
6142   constraint(ALLOC_IN_RC(double_reg));
6143   match(RegD);
6144 
6145   op_cost(0);
6146   format %{ %}
6147   interface(REG_INTER);
6148 %}
6149 
6150 operand vecD()
6151 %{
6152   constraint(ALLOC_IN_RC(vectord_reg));
6153   match(VecD);
6154 
6155   op_cost(0);
6156   format %{ %}
6157   interface(REG_INTER);
6158 %}
6159 
6160 operand vecX()
6161 %{
6162   constraint(ALLOC_IN_RC(vectorx_reg));
6163   match(VecX);
6164 
6165   op_cost(0);
6166   format %{ %}
6167   interface(REG_INTER);
6168 %}
6169 
6170 operand vRegD_V0()
6171 %{
6172   constraint(ALLOC_IN_RC(v0_reg));
6173   match(RegD);
6174   op_cost(0);
6175   format %{ %}
6176   interface(REG_INTER);
6177 %}
6178 
6179 operand vRegD_V1()
6180 %{
6181   constraint(ALLOC_IN_RC(v1_reg));
6182   match(RegD);
6183   op_cost(0);
6184   format %{ %}
6185   interface(REG_INTER);
6186 %}
6187 
6188 operand vRegD_V2()
6189 %{
6190   constraint(ALLOC_IN_RC(v2_reg));
6191   match(RegD);
6192   op_cost(0);
6193   format %{ %}
6194   interface(REG_INTER);
6195 %}
6196 
6197 operand vRegD_V3()
6198 %{
6199   constraint(ALLOC_IN_RC(v3_reg));
6200   match(RegD);
6201   op_cost(0);
6202   format %{ %}
6203   interface(REG_INTER);
6204 %}
6205 
6206 // Flags register, used as output of signed compare instructions
6207 
6208 // note that on AArch64 we also use this register as the output for
6209 // for floating point compare instructions (CmpF CmpD). this ensures
6210 // that ordered inequality tests use GT, GE, LT or LE none of which
6211 // pass through cases where the result is unordered i.e. one or both
6212 // inputs to the compare is a NaN. this means that the ideal code can
6213 // replace e.g. a GT with an LE and not end up capturing the NaN case
6214 // (where the comparison should always fail). EQ and NE tests are
6215 // always generated in ideal code so that unordered folds into the NE
6216 // case, matching the behaviour of AArch64 NE.
6217 //
6218 // This differs from x86 where the outputs of FP compares use a
6219 // special FP flags registers and where compares based on this
6220 // register are distinguished into ordered inequalities (cmpOpUCF) and
6221 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6222 // to explicitly handle the unordered case in branches. x86 also has
6223 // to include extra CMoveX rules to accept a cmpOpUCF input.
6224 
6225 operand rFlagsReg()
6226 %{
6227   constraint(ALLOC_IN_RC(int_flags));
6228   match(RegFlags);
6229 
6230   op_cost(0);
6231   format %{ "RFLAGS" %}
6232   interface(REG_INTER);
6233 %}
6234 
6235 // Flags register, used as output of unsigned compare instructions
6236 operand rFlagsRegU()
6237 %{
6238   constraint(ALLOC_IN_RC(int_flags));
6239   match(RegFlags);
6240 
6241   op_cost(0);
6242   format %{ "RFLAGSU" %}
6243   interface(REG_INTER);
6244 %}
6245 
6246 // Special Registers
6247 
6248 // Method Register
6249 operand inline_cache_RegP(iRegP reg)
6250 %{
6251   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6252   match(reg);
6253   match(iRegPNoSp);
6254   op_cost(0);
6255   format %{ %}
6256   interface(REG_INTER);
6257 %}
6258 
6259 operand interpreter_method_oop_RegP(iRegP reg)
6260 %{
6261   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6262   match(reg);
6263   match(iRegPNoSp);
6264   op_cost(0);
6265   format %{ %}
6266   interface(REG_INTER);
6267 %}
6268 
6269 // Thread Register
6270 operand thread_RegP(iRegP reg)
6271 %{
6272   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6273   match(reg);
6274   op_cost(0);
6275   format %{ %}
6276   interface(REG_INTER);
6277 %}
6278 
6279 operand lr_RegP(iRegP reg)
6280 %{
6281   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6282   match(reg);
6283   op_cost(0);
6284   format %{ %}
6285   interface(REG_INTER);
6286 %}
6287 
6288 //----------Memory Operands----------------------------------------------------
6289 
6290 operand indirect(iRegP reg)
6291 %{
6292   constraint(ALLOC_IN_RC(ptr_reg));
6293   match(reg);
6294   op_cost(0);
6295   format %{ "[$reg]" %}
6296   interface(MEMORY_INTER) %{
6297     base($reg);
6298     index(0xffffffff);
6299     scale(0x0);
6300     disp(0x0);
6301   %}
6302 %}
6303 
6304 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6305 %{
6306   constraint(ALLOC_IN_RC(ptr_reg));
6307   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6308   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6309   op_cost(0);
6310   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6311   interface(MEMORY_INTER) %{
6312     base($reg);
6313     index($ireg);
6314     scale($scale);
6315     disp(0x0);
6316   %}
6317 %}
6318 
6319 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6320 %{
6321   constraint(ALLOC_IN_RC(ptr_reg));
6322   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6323   match(AddP reg (LShiftL lreg scale));
6324   op_cost(0);
6325   format %{ "$reg, $lreg lsl($scale)" %}
6326   interface(MEMORY_INTER) %{
6327     base($reg);
6328     index($lreg);
6329     scale($scale);
6330     disp(0x0);
6331   %}
6332 %}
6333 
6334 operand indIndexI2L(iRegP reg, iRegI ireg)
6335 %{
6336   constraint(ALLOC_IN_RC(ptr_reg));
6337   match(AddP reg (ConvI2L ireg));
6338   op_cost(0);
6339   format %{ "$reg, $ireg, 0, I2L" %}
6340   interface(MEMORY_INTER) %{
6341     base($reg);
6342     index($ireg);
6343     scale(0x0);
6344     disp(0x0);
6345   %}
6346 %}
6347 
6348 operand indIndex(iRegP reg, iRegL lreg)
6349 %{
6350   constraint(ALLOC_IN_RC(ptr_reg));
6351   match(AddP reg lreg);
6352   op_cost(0);
6353   format %{ "$reg, $lreg" %}
6354   interface(MEMORY_INTER) %{
6355     base($reg);
6356     index($lreg);
6357     scale(0x0);
6358     disp(0x0);
6359   %}
6360 %}
6361 
6362 operand indOffI(iRegP reg, immIOffset off)
6363 %{
6364   constraint(ALLOC_IN_RC(ptr_reg));
6365   match(AddP reg off);
6366   op_cost(0);
6367   format %{ "[$reg, $off]" %}
6368   interface(MEMORY_INTER) %{
6369     base($reg);
6370     index(0xffffffff);
6371     scale(0x0);
6372     disp($off);
6373   %}
6374 %}
6375 
6376 operand indOffI4(iRegP reg, immIOffset4 off)
6377 %{
6378   constraint(ALLOC_IN_RC(ptr_reg));
6379   match(AddP reg off);
6380   op_cost(0);
6381   format %{ "[$reg, $off]" %}
6382   interface(MEMORY_INTER) %{
6383     base($reg);
6384     index(0xffffffff);
6385     scale(0x0);
6386     disp($off);
6387   %}
6388 %}
6389 
6390 operand indOffI8(iRegP reg, immIOffset8 off)
6391 %{
6392   constraint(ALLOC_IN_RC(ptr_reg));
6393   match(AddP reg off);
6394   op_cost(0);
6395   format %{ "[$reg, $off]" %}
6396   interface(MEMORY_INTER) %{
6397     base($reg);
6398     index(0xffffffff);
6399     scale(0x0);
6400     disp($off);
6401   %}
6402 %}
6403 
6404 operand indOffI16(iRegP reg, immIOffset16 off)
6405 %{
6406   constraint(ALLOC_IN_RC(ptr_reg));
6407   match(AddP reg off);
6408   op_cost(0);
6409   format %{ "[$reg, $off]" %}
6410   interface(MEMORY_INTER) %{
6411     base($reg);
6412     index(0xffffffff);
6413     scale(0x0);
6414     disp($off);
6415   %}
6416 %}
6417 
6418 operand indOffL(iRegP reg, immLoffset off)
6419 %{
6420   constraint(ALLOC_IN_RC(ptr_reg));
6421   match(AddP reg off);
6422   op_cost(0);
6423   format %{ "[$reg, $off]" %}
6424   interface(MEMORY_INTER) %{
6425     base($reg);
6426     index(0xffffffff);
6427     scale(0x0);
6428     disp($off);
6429   %}
6430 %}
6431 
6432 operand indOffL4(iRegP reg, immLoffset4 off)
6433 %{
6434   constraint(ALLOC_IN_RC(ptr_reg));
6435   match(AddP reg off);
6436   op_cost(0);
6437   format %{ "[$reg, $off]" %}
6438   interface(MEMORY_INTER) %{
6439     base($reg);
6440     index(0xffffffff);
6441     scale(0x0);
6442     disp($off);
6443   %}
6444 %}
6445 
6446 operand indOffL8(iRegP reg, immLoffset8 off)
6447 %{
6448   constraint(ALLOC_IN_RC(ptr_reg));
6449   match(AddP reg off);
6450   op_cost(0);
6451   format %{ "[$reg, $off]" %}
6452   interface(MEMORY_INTER) %{
6453     base($reg);
6454     index(0xffffffff);
6455     scale(0x0);
6456     disp($off);
6457   %}
6458 %}
6459 
6460 operand indOffL16(iRegP reg, immLoffset16 off)
6461 %{
6462   constraint(ALLOC_IN_RC(ptr_reg));
6463   match(AddP reg off);
6464   op_cost(0);
6465   format %{ "[$reg, $off]" %}
6466   interface(MEMORY_INTER) %{
6467     base($reg);
6468     index(0xffffffff);
6469     scale(0x0);
6470     disp($off);
6471   %}
6472 %}
6473 
6474 operand indirectN(iRegN reg)
6475 %{
6476   predicate(Universe::narrow_oop_shift() == 0);
6477   constraint(ALLOC_IN_RC(ptr_reg));
6478   match(DecodeN reg);
6479   op_cost(0);
6480   format %{ "[$reg]\t# narrow" %}
6481   interface(MEMORY_INTER) %{
6482     base($reg);
6483     index(0xffffffff);
6484     scale(0x0);
6485     disp(0x0);
6486   %}
6487 %}
6488 
6489 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6490 %{
6491   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6492   constraint(ALLOC_IN_RC(ptr_reg));
6493   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6494   op_cost(0);
6495   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6496   interface(MEMORY_INTER) %{
6497     base($reg);
6498     index($ireg);
6499     scale($scale);
6500     disp(0x0);
6501   %}
6502 %}
6503 
6504 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6505 %{
6506   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6507   constraint(ALLOC_IN_RC(ptr_reg));
6508   match(AddP (DecodeN reg) (LShiftL lreg scale));
6509   op_cost(0);
6510   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6511   interface(MEMORY_INTER) %{
6512     base($reg);
6513     index($lreg);
6514     scale($scale);
6515     disp(0x0);
6516   %}
6517 %}
6518 
6519 operand indIndexI2LN(iRegN reg, iRegI ireg)
6520 %{
6521   predicate(Universe::narrow_oop_shift() == 0);
6522   constraint(ALLOC_IN_RC(ptr_reg));
6523   match(AddP (DecodeN reg) (ConvI2L ireg));
6524   op_cost(0);
6525   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6526   interface(MEMORY_INTER) %{
6527     base($reg);
6528     index($ireg);
6529     scale(0x0);
6530     disp(0x0);
6531   %}
6532 %}
6533 
6534 operand indIndexN(iRegN reg, iRegL lreg)
6535 %{
6536   predicate(Universe::narrow_oop_shift() == 0);
6537   constraint(ALLOC_IN_RC(ptr_reg));
6538   match(AddP (DecodeN reg) lreg);
6539   op_cost(0);
6540   format %{ "$reg, $lreg\t# narrow" %}
6541   interface(MEMORY_INTER) %{
6542     base($reg);
6543     index($lreg);
6544     scale(0x0);
6545     disp(0x0);
6546   %}
6547 %}
6548 
6549 operand indOffIN(iRegN reg, immIOffset off)
6550 %{
6551   predicate(Universe::narrow_oop_shift() == 0);
6552   constraint(ALLOC_IN_RC(ptr_reg));
6553   match(AddP (DecodeN reg) off);
6554   op_cost(0);
6555   format %{ "[$reg, $off]\t# narrow" %}
6556   interface(MEMORY_INTER) %{
6557     base($reg);
6558     index(0xffffffff);
6559     scale(0x0);
6560     disp($off);
6561   %}
6562 %}
6563 
6564 operand indOffLN(iRegN reg, immLoffset off)
6565 %{
6566   predicate(Universe::narrow_oop_shift() == 0);
6567   constraint(ALLOC_IN_RC(ptr_reg));
6568   match(AddP (DecodeN reg) off);
6569   op_cost(0);
6570   format %{ "[$reg, $off]\t# narrow" %}
6571   interface(MEMORY_INTER) %{
6572     base($reg);
6573     index(0xffffffff);
6574     scale(0x0);
6575     disp($off);
6576   %}
6577 %}
6578 
6579 
6580 
6581 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6582 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6583 %{
6584   constraint(ALLOC_IN_RC(ptr_reg));
6585   match(AddP reg off);
6586   op_cost(0);
6587   format %{ "[$reg, $off]" %}
6588   interface(MEMORY_INTER) %{
6589     base($reg);
6590     index(0xffffffff);
6591     scale(0x0);
6592     disp($off);
6593   %}
6594 %}
6595 
6596 //----------Special Memory Operands--------------------------------------------
6597 // Stack Slot Operand - This operand is used for loading and storing temporary
6598 //                      values on the stack where a match requires a value to
6599 //                      flow through memory.
6600 operand stackSlotP(sRegP reg)
6601 %{
6602   constraint(ALLOC_IN_RC(stack_slots));
6603   op_cost(100);
6604   // No match rule because this operand is only generated in matching
6605   // match(RegP);
6606   format %{ "[$reg]" %}
6607   interface(MEMORY_INTER) %{
6608     base(0x1e);  // RSP
6609     index(0x0);  // No Index
6610     scale(0x0);  // No Scale
6611     disp($reg);  // Stack Offset
6612   %}
6613 %}
6614 
6615 operand stackSlotI(sRegI reg)
6616 %{
6617   constraint(ALLOC_IN_RC(stack_slots));
6618   // No match rule because this operand is only generated in matching
6619   // match(RegI);
6620   format %{ "[$reg]" %}
6621   interface(MEMORY_INTER) %{
6622     base(0x1e);  // RSP
6623     index(0x0);  // No Index
6624     scale(0x0);  // No Scale
6625     disp($reg);  // Stack Offset
6626   %}
6627 %}
6628 
6629 operand stackSlotF(sRegF reg)
6630 %{
6631   constraint(ALLOC_IN_RC(stack_slots));
6632   // No match rule because this operand is only generated in matching
6633   // match(RegF);
6634   format %{ "[$reg]" %}
6635   interface(MEMORY_INTER) %{
6636     base(0x1e);  // RSP
6637     index(0x0);  // No Index
6638     scale(0x0);  // No Scale
6639     disp($reg);  // Stack Offset
6640   %}
6641 %}
6642 
6643 operand stackSlotD(sRegD reg)
6644 %{
6645   constraint(ALLOC_IN_RC(stack_slots));
6646   // No match rule because this operand is only generated in matching
6647   // match(RegD);
6648   format %{ "[$reg]" %}
6649   interface(MEMORY_INTER) %{
6650     base(0x1e);  // RSP
6651     index(0x0);  // No Index
6652     scale(0x0);  // No Scale
6653     disp($reg);  // Stack Offset
6654   %}
6655 %}
6656 
6657 operand stackSlotL(sRegL reg)
6658 %{
6659   constraint(ALLOC_IN_RC(stack_slots));
6660   // No match rule because this operand is only generated in matching
6661   // match(RegL);
6662   format %{ "[$reg]" %}
6663   interface(MEMORY_INTER) %{
6664     base(0x1e);  // RSP
6665     index(0x0);  // No Index
6666     scale(0x0);  // No Scale
6667     disp($reg);  // Stack Offset
6668   %}
6669 %}
6670 
6671 // Operands for expressing Control Flow
6672 // NOTE: Label is a predefined operand which should not be redefined in
6673 //       the AD file. It is generically handled within the ADLC.
6674 
6675 //----------Conditional Branch Operands----------------------------------------
6676 // Comparison Op  - This is the operation of the comparison, and is limited to
6677 //                  the following set of codes:
6678 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6679 //
6680 // Other attributes of the comparison, such as unsignedness, are specified
6681 // by the comparison instruction that sets a condition code flags register.
6682 // That result is represented by a flags operand whose subtype is appropriate
6683 // to the unsignedness (etc.) of the comparison.
6684 //
6685 // Later, the instruction which matches both the Comparison Op (a Bool) and
6686 // the flags (produced by the Cmp) specifies the coding of the comparison op
6687 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6688 
6689 // used for signed integral comparisons and fp comparisons
6690 
6691 operand cmpOp()
6692 %{
6693   match(Bool);
6694 
6695   format %{ "" %}
6696   interface(COND_INTER) %{
6697     equal(0x0, "eq");
6698     not_equal(0x1, "ne");
6699     less(0xb, "lt");
6700     greater_equal(0xa, "ge");
6701     less_equal(0xd, "le");
6702     greater(0xc, "gt");
6703     overflow(0x6, "vs");
6704     no_overflow(0x7, "vc");
6705   %}
6706 %}
6707 
6708 // used for unsigned integral comparisons
6709 
6710 operand cmpOpU()
6711 %{
6712   match(Bool);
6713 
6714   format %{ "" %}
6715   interface(COND_INTER) %{
6716     equal(0x0, "eq");
6717     not_equal(0x1, "ne");
6718     less(0x3, "lo");
6719     greater_equal(0x2, "hs");
6720     less_equal(0x9, "ls");
6721     greater(0x8, "hi");
6722     overflow(0x6, "vs");
6723     no_overflow(0x7, "vc");
6724   %}
6725 %}
6726 
6727 // used for certain integral comparisons which can be
6728 // converted to cbxx or tbxx instructions
6729 
6730 operand cmpOpEqNe()
6731 %{
6732   match(Bool);
6733   match(CmpOp);
6734   op_cost(0);
6735   predicate(n->as_Bool()->_test._test == BoolTest::ne
6736             || n->as_Bool()->_test._test == BoolTest::eq);
6737 
6738   format %{ "" %}
6739   interface(COND_INTER) %{
6740     equal(0x0, "eq");
6741     not_equal(0x1, "ne");
6742     less(0xb, "lt");
6743     greater_equal(0xa, "ge");
6744     less_equal(0xd, "le");
6745     greater(0xc, "gt");
6746     overflow(0x6, "vs");
6747     no_overflow(0x7, "vc");
6748   %}
6749 %}
6750 
6751 // used for certain integral comparisons which can be
6752 // converted to cbxx or tbxx instructions
6753 
6754 operand cmpOpLtGe()
6755 %{
6756   match(Bool);
6757   match(CmpOp);
6758   op_cost(0);
6759 
6760   predicate(n->as_Bool()->_test._test == BoolTest::lt
6761             || n->as_Bool()->_test._test == BoolTest::ge);
6762 
6763   format %{ "" %}
6764   interface(COND_INTER) %{
6765     equal(0x0, "eq");
6766     not_equal(0x1, "ne");
6767     less(0xb, "lt");
6768     greater_equal(0xa, "ge");
6769     less_equal(0xd, "le");
6770     greater(0xc, "gt");
6771     overflow(0x6, "vs");
6772     no_overflow(0x7, "vc");
6773   %}
6774 %}
6775 
6776 // used for certain unsigned integral comparisons which can be
6777 // converted to cbxx or tbxx instructions
6778 
6779 operand cmpOpUEqNeLtGe()
6780 %{
6781   match(Bool);
6782   match(CmpOp);
6783   op_cost(0);
6784 
6785   predicate(n->as_Bool()->_test._test == BoolTest::eq
6786             || n->as_Bool()->_test._test == BoolTest::ne
6787             || n->as_Bool()->_test._test == BoolTest::lt
6788             || n->as_Bool()->_test._test == BoolTest::ge);
6789 
6790   format %{ "" %}
6791   interface(COND_INTER) %{
6792     equal(0x0, "eq");
6793     not_equal(0x1, "ne");
6794     less(0xb, "lt");
6795     greater_equal(0xa, "ge");
6796     less_equal(0xd, "le");
6797     greater(0xc, "gt");
6798     overflow(0x6, "vs");
6799     no_overflow(0x7, "vc");
6800   %}
6801 %}
6802 
6803 // Special operand allowing long args to int ops to be truncated for free
6804 
6805 operand iRegL2I(iRegL reg) %{
6806 
6807   op_cost(0);
6808 
6809   match(ConvL2I reg);
6810 
6811   format %{ "l2i($reg)" %}
6812 
6813   interface(REG_INTER)
6814 %}
6815 
6816 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6817 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6818 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6819 
6820 //----------OPERAND CLASSES----------------------------------------------------
6821 // Operand Classes are groups of operands that are used as to simplify
6822 // instruction definitions by not requiring the AD writer to specify
6823 // separate instructions for every form of operand when the
6824 // instruction accepts multiple operand types with the same basic
6825 // encoding and format. The classic case of this is memory operands.
6826 
6827 // memory is used to define read/write location for load/store
6828 // instruction defs. we can turn a memory op into an Address
6829 
6830 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6831                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6832 
6833 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6834 // operations. it allows the src to be either an iRegI or a (ConvL2I
6835 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6836 // can be elided because the 32-bit instruction will just employ the
6837 // lower 32 bits anyway.
6838 //
6839 // n.b. this does not elide all L2I conversions. if the truncated
6840 // value is consumed by more than one operation then the ConvL2I
6841 // cannot be bundled into the consuming nodes so an l2i gets planted
6842 // (actually a movw $dst $src) and the downstream instructions consume
6843 // the result of the l2i as an iRegI input. That's a shame since the
6844 // movw is actually redundant but its not too costly.
6845 
6846 opclass iRegIorL2I(iRegI, iRegL2I);
6847 
6848 //----------PIPELINE-----------------------------------------------------------
6849 // Rules which define the behavior of the target architectures pipeline.
6850 
6851 // For specific pipelines, eg A53, define the stages of that pipeline
6852 //pipe_desc(ISS, EX1, EX2, WR);
6853 #define ISS S0
6854 #define EX1 S1
6855 #define EX2 S2
6856 #define WR  S3
6857 
6858 // Integer ALU reg operation
6859 pipeline %{
6860 
6861 attributes %{
6862   // ARM instructions are of fixed length
6863   fixed_size_instructions;        // Fixed size instructions TODO does
6864   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6865   // ARM instructions come in 32-bit word units
6866   instruction_unit_size = 4;         // An instruction is 4 bytes long
6867   instruction_fetch_unit_size = 64;  // The processor fetches one line
6868   instruction_fetch_units = 1;       // of 64 bytes
6869 
6870   // List of nop instructions
6871   nops( MachNop );
6872 %}
6873 
6874 // We don't use an actual pipeline model so don't care about resources
6875 // or description. we do use pipeline classes to introduce fixed
6876 // latencies
6877 
6878 //----------RESOURCES----------------------------------------------------------
6879 // Resources are the functional units available to the machine
6880 
6881 resources( INS0, INS1, INS01 = INS0 | INS1,
6882            ALU0, ALU1, ALU = ALU0 | ALU1,
6883            MAC,
6884            DIV,
6885            BRANCH,
6886            LDST,
6887            NEON_FP);
6888 
6889 //----------PIPELINE DESCRIPTION-----------------------------------------------
6890 // Pipeline Description specifies the stages in the machine's pipeline
6891 
6892 // Define the pipeline as a generic 6 stage pipeline
6893 pipe_desc(S0, S1, S2, S3, S4, S5);
6894 
6895 //----------PIPELINE CLASSES---------------------------------------------------
6896 // Pipeline Classes describe the stages in which input and output are
6897 // referenced by the hardware pipeline.
6898 
6899 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6900 %{
6901   single_instruction;
6902   src1   : S1(read);
6903   src2   : S2(read);
6904   dst    : S5(write);
6905   INS01  : ISS;
6906   NEON_FP : S5;
6907 %}
6908 
6909 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6910 %{
6911   single_instruction;
6912   src1   : S1(read);
6913   src2   : S2(read);
6914   dst    : S5(write);
6915   INS01  : ISS;
6916   NEON_FP : S5;
6917 %}
6918 
6919 pipe_class fp_uop_s(vRegF dst, vRegF src)
6920 %{
6921   single_instruction;
6922   src    : S1(read);
6923   dst    : S5(write);
6924   INS01  : ISS;
6925   NEON_FP : S5;
6926 %}
6927 
6928 pipe_class fp_uop_d(vRegD dst, vRegD src)
6929 %{
6930   single_instruction;
6931   src    : S1(read);
6932   dst    : S5(write);
6933   INS01  : ISS;
6934   NEON_FP : S5;
6935 %}
6936 
6937 pipe_class fp_d2f(vRegF dst, vRegD src)
6938 %{
6939   single_instruction;
6940   src    : S1(read);
6941   dst    : S5(write);
6942   INS01  : ISS;
6943   NEON_FP : S5;
6944 %}
6945 
6946 pipe_class fp_f2d(vRegD dst, vRegF src)
6947 %{
6948   single_instruction;
6949   src    : S1(read);
6950   dst    : S5(write);
6951   INS01  : ISS;
6952   NEON_FP : S5;
6953 %}
6954 
6955 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6956 %{
6957   single_instruction;
6958   src    : S1(read);
6959   dst    : S5(write);
6960   INS01  : ISS;
6961   NEON_FP : S5;
6962 %}
6963 
6964 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6965 %{
6966   single_instruction;
6967   src    : S1(read);
6968   dst    : S5(write);
6969   INS01  : ISS;
6970   NEON_FP : S5;
6971 %}
6972 
6973 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6974 %{
6975   single_instruction;
6976   src    : S1(read);
6977   dst    : S5(write);
6978   INS01  : ISS;
6979   NEON_FP : S5;
6980 %}
6981 
6982 pipe_class fp_l2f(vRegF dst, iRegL src)
6983 %{
6984   single_instruction;
6985   src    : S1(read);
6986   dst    : S5(write);
6987   INS01  : ISS;
6988   NEON_FP : S5;
6989 %}
6990 
6991 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
6992 %{
6993   single_instruction;
6994   src    : S1(read);
6995   dst    : S5(write);
6996   INS01  : ISS;
6997   NEON_FP : S5;
6998 %}
6999 
7000 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7001 %{
7002   single_instruction;
7003   src    : S1(read);
7004   dst    : S5(write);
7005   INS01  : ISS;
7006   NEON_FP : S5;
7007 %}
7008 
7009 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7010 %{
7011   single_instruction;
7012   src    : S1(read);
7013   dst    : S5(write);
7014   INS01  : ISS;
7015   NEON_FP : S5;
7016 %}
7017 
7018 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7019 %{
7020   single_instruction;
7021   src    : S1(read);
7022   dst    : S5(write);
7023   INS01  : ISS;
7024   NEON_FP : S5;
7025 %}
7026 
7027 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7028 %{
7029   single_instruction;
7030   src1   : S1(read);
7031   src2   : S2(read);
7032   dst    : S5(write);
7033   INS0   : ISS;
7034   NEON_FP : S5;
7035 %}
7036 
7037 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7038 %{
7039   single_instruction;
7040   src1   : S1(read);
7041   src2   : S2(read);
7042   dst    : S5(write);
7043   INS0   : ISS;
7044   NEON_FP : S5;
7045 %}
7046 
7047 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7048 %{
7049   single_instruction;
7050   cr     : S1(read);
7051   src1   : S1(read);
7052   src2   : S1(read);
7053   dst    : S3(write);
7054   INS01  : ISS;
7055   NEON_FP : S3;
7056 %}
7057 
7058 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7059 %{
7060   single_instruction;
7061   cr     : S1(read);
7062   src1   : S1(read);
7063   src2   : S1(read);
7064   dst    : S3(write);
7065   INS01  : ISS;
7066   NEON_FP : S3;
7067 %}
7068 
7069 pipe_class fp_imm_s(vRegF dst)
7070 %{
7071   single_instruction;
7072   dst    : S3(write);
7073   INS01  : ISS;
7074   NEON_FP : S3;
7075 %}
7076 
7077 pipe_class fp_imm_d(vRegD dst)
7078 %{
7079   single_instruction;
7080   dst    : S3(write);
7081   INS01  : ISS;
7082   NEON_FP : S3;
7083 %}
7084 
7085 pipe_class fp_load_constant_s(vRegF dst)
7086 %{
7087   single_instruction;
7088   dst    : S4(write);
7089   INS01  : ISS;
7090   NEON_FP : S4;
7091 %}
7092 
7093 pipe_class fp_load_constant_d(vRegD dst)
7094 %{
7095   single_instruction;
7096   dst    : S4(write);
7097   INS01  : ISS;
7098   NEON_FP : S4;
7099 %}
7100 
7101 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7102 %{
7103   single_instruction;
7104   dst    : S5(write);
7105   src1   : S1(read);
7106   src2   : S1(read);
7107   INS01  : ISS;
7108   NEON_FP : S5;
7109 %}
7110 
7111 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7112 %{
7113   single_instruction;
7114   dst    : S5(write);
7115   src1   : S1(read);
7116   src2   : S1(read);
7117   INS0   : ISS;
7118   NEON_FP : S5;
7119 %}
7120 
7121 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7122 %{
7123   single_instruction;
7124   dst    : S5(write);
7125   src1   : S1(read);
7126   src2   : S1(read);
7127   dst    : S1(read);
7128   INS01  : ISS;
7129   NEON_FP : S5;
7130 %}
7131 
7132 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7133 %{
7134   single_instruction;
7135   dst    : S5(write);
7136   src1   : S1(read);
7137   src2   : S1(read);
7138   dst    : S1(read);
7139   INS0   : ISS;
7140   NEON_FP : S5;
7141 %}
7142 
7143 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7144 %{
7145   single_instruction;
7146   dst    : S4(write);
7147   src1   : S2(read);
7148   src2   : S2(read);
7149   INS01  : ISS;
7150   NEON_FP : S4;
7151 %}
7152 
7153 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7154 %{
7155   single_instruction;
7156   dst    : S4(write);
7157   src1   : S2(read);
7158   src2   : S2(read);
7159   INS0   : ISS;
7160   NEON_FP : S4;
7161 %}
7162 
7163 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7164 %{
7165   single_instruction;
7166   dst    : S3(write);
7167   src1   : S2(read);
7168   src2   : S2(read);
7169   INS01  : ISS;
7170   NEON_FP : S3;
7171 %}
7172 
7173 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7174 %{
7175   single_instruction;
7176   dst    : S3(write);
7177   src1   : S2(read);
7178   src2   : S2(read);
7179   INS0   : ISS;
7180   NEON_FP : S3;
7181 %}
7182 
7183 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7184 %{
7185   single_instruction;
7186   dst    : S3(write);
7187   src    : S1(read);
7188   shift  : S1(read);
7189   INS01  : ISS;
7190   NEON_FP : S3;
7191 %}
7192 
7193 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7194 %{
7195   single_instruction;
7196   dst    : S3(write);
7197   src    : S1(read);
7198   shift  : S1(read);
7199   INS0   : ISS;
7200   NEON_FP : S3;
7201 %}
7202 
7203 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7204 %{
7205   single_instruction;
7206   dst    : S3(write);
7207   src    : S1(read);
7208   INS01  : ISS;
7209   NEON_FP : S3;
7210 %}
7211 
7212 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7213 %{
7214   single_instruction;
7215   dst    : S3(write);
7216   src    : S1(read);
7217   INS0   : ISS;
7218   NEON_FP : S3;
7219 %}
7220 
7221 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7222 %{
7223   single_instruction;
7224   dst    : S5(write);
7225   src1   : S1(read);
7226   src2   : S1(read);
7227   INS01  : ISS;
7228   NEON_FP : S5;
7229 %}
7230 
7231 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7232 %{
7233   single_instruction;
7234   dst    : S5(write);
7235   src1   : S1(read);
7236   src2   : S1(read);
7237   INS0   : ISS;
7238   NEON_FP : S5;
7239 %}
7240 
7241 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7242 %{
7243   single_instruction;
7244   dst    : S5(write);
7245   src1   : S1(read);
7246   src2   : S1(read);
7247   INS0   : ISS;
7248   NEON_FP : S5;
7249 %}
7250 
7251 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7252 %{
7253   single_instruction;
7254   dst    : S5(write);
7255   src1   : S1(read);
7256   src2   : S1(read);
7257   INS0   : ISS;
7258   NEON_FP : S5;
7259 %}
7260 
7261 pipe_class vsqrt_fp128(vecX dst, vecX src)
7262 %{
7263   single_instruction;
7264   dst    : S5(write);
7265   src    : S1(read);
7266   INS0   : ISS;
7267   NEON_FP : S5;
7268 %}
7269 
7270 pipe_class vunop_fp64(vecD dst, vecD src)
7271 %{
7272   single_instruction;
7273   dst    : S5(write);
7274   src    : S1(read);
7275   INS01  : ISS;
7276   NEON_FP : S5;
7277 %}
7278 
7279 pipe_class vunop_fp128(vecX dst, vecX src)
7280 %{
7281   single_instruction;
7282   dst    : S5(write);
7283   src    : S1(read);
7284   INS0   : ISS;
7285   NEON_FP : S5;
7286 %}
7287 
7288 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7289 %{
7290   single_instruction;
7291   dst    : S3(write);
7292   src    : S1(read);
7293   INS01  : ISS;
7294   NEON_FP : S3;
7295 %}
7296 
7297 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7298 %{
7299   single_instruction;
7300   dst    : S3(write);
7301   src    : S1(read);
7302   INS01  : ISS;
7303   NEON_FP : S3;
7304 %}
7305 
7306 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7307 %{
7308   single_instruction;
7309   dst    : S3(write);
7310   src    : S1(read);
7311   INS01  : ISS;
7312   NEON_FP : S3;
7313 %}
7314 
7315 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7316 %{
7317   single_instruction;
7318   dst    : S3(write);
7319   src    : S1(read);
7320   INS01  : ISS;
7321   NEON_FP : S3;
7322 %}
7323 
7324 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7325 %{
7326   single_instruction;
7327   dst    : S3(write);
7328   src    : S1(read);
7329   INS01  : ISS;
7330   NEON_FP : S3;
7331 %}
7332 
7333 pipe_class vmovi_reg_imm64(vecD dst)
7334 %{
7335   single_instruction;
7336   dst    : S3(write);
7337   INS01  : ISS;
7338   NEON_FP : S3;
7339 %}
7340 
7341 pipe_class vmovi_reg_imm128(vecX dst)
7342 %{
7343   single_instruction;
7344   dst    : S3(write);
7345   INS0   : ISS;
7346   NEON_FP : S3;
7347 %}
7348 
7349 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7350 %{
7351   single_instruction;
7352   dst    : S5(write);
7353   mem    : ISS(read);
7354   INS01  : ISS;
7355   NEON_FP : S3;
7356 %}
7357 
7358 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7359 %{
7360   single_instruction;
7361   dst    : S5(write);
7362   mem    : ISS(read);
7363   INS01  : ISS;
7364   NEON_FP : S3;
7365 %}
7366 
7367 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7368 %{
7369   single_instruction;
7370   mem    : ISS(read);
7371   src    : S2(read);
7372   INS01  : ISS;
7373   NEON_FP : S3;
7374 %}
7375 
7376 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7377 %{
7378   single_instruction;
7379   mem    : ISS(read);
7380   src    : S2(read);
7381   INS01  : ISS;
7382   NEON_FP : S3;
7383 %}
7384 
7385 //------- Integer ALU operations --------------------------
7386 
7387 // Integer ALU reg-reg operation
7388 // Operands needed in EX1, result generated in EX2
7389 // Eg.  ADD     x0, x1, x2
7390 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7391 %{
7392   single_instruction;
7393   dst    : EX2(write);
7394   src1   : EX1(read);
7395   src2   : EX1(read);
7396   INS01  : ISS; // Dual issue as instruction 0 or 1
7397   ALU    : EX2;
7398 %}
7399 
7400 // Integer ALU reg-reg operation with constant shift
7401 // Shifted register must be available in LATE_ISS instead of EX1
7402 // Eg.  ADD     x0, x1, x2, LSL #2
7403 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7404 %{
7405   single_instruction;
7406   dst    : EX2(write);
7407   src1   : EX1(read);
7408   src2   : ISS(read);
7409   INS01  : ISS;
7410   ALU    : EX2;
7411 %}
7412 
7413 // Integer ALU reg operation with constant shift
7414 // Eg.  LSL     x0, x1, #shift
7415 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7416 %{
7417   single_instruction;
7418   dst    : EX2(write);
7419   src1   : ISS(read);
7420   INS01  : ISS;
7421   ALU    : EX2;
7422 %}
7423 
7424 // Integer ALU reg-reg operation with variable shift
7425 // Both operands must be available in LATE_ISS instead of EX1
7426 // Result is available in EX1 instead of EX2
7427 // Eg.  LSLV    x0, x1, x2
7428 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7429 %{
7430   single_instruction;
7431   dst    : EX1(write);
7432   src1   : ISS(read);
7433   src2   : ISS(read);
7434   INS01  : ISS;
7435   ALU    : EX1;
7436 %}
7437 
7438 // Integer ALU reg-reg operation with extract
7439 // As for _vshift above, but result generated in EX2
7440 // Eg.  EXTR    x0, x1, x2, #N
7441 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7442 %{
7443   single_instruction;
7444   dst    : EX2(write);
7445   src1   : ISS(read);
7446   src2   : ISS(read);
7447   INS1   : ISS; // Can only dual issue as Instruction 1
7448   ALU    : EX1;
7449 %}
7450 
7451 // Integer ALU reg operation
7452 // Eg.  NEG     x0, x1
7453 pipe_class ialu_reg(iRegI dst, iRegI src)
7454 %{
7455   single_instruction;
7456   dst    : EX2(write);
7457   src    : EX1(read);
7458   INS01  : ISS;
7459   ALU    : EX2;
7460 %}
7461 
7462 // Integer ALU reg mmediate operation
7463 // Eg.  ADD     x0, x1, #N
7464 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7465 %{
7466   single_instruction;
7467   dst    : EX2(write);
7468   src1   : EX1(read);
7469   INS01  : ISS;
7470   ALU    : EX2;
7471 %}
7472 
7473 // Integer ALU immediate operation (no source operands)
7474 // Eg.  MOV     x0, #N
7475 pipe_class ialu_imm(iRegI dst)
7476 %{
7477   single_instruction;
7478   dst    : EX1(write);
7479   INS01  : ISS;
7480   ALU    : EX1;
7481 %}
7482 
7483 //------- Compare operation -------------------------------
7484 
7485 // Compare reg-reg
7486 // Eg.  CMP     x0, x1
7487 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7488 %{
7489   single_instruction;
7490 //  fixed_latency(16);
7491   cr     : EX2(write);
7492   op1    : EX1(read);
7493   op2    : EX1(read);
7494   INS01  : ISS;
7495   ALU    : EX2;
7496 %}
7497 
7498 // Compare reg-reg
7499 // Eg.  CMP     x0, #N
7500 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7501 %{
7502   single_instruction;
7503 //  fixed_latency(16);
7504   cr     : EX2(write);
7505   op1    : EX1(read);
7506   INS01  : ISS;
7507   ALU    : EX2;
7508 %}
7509 
7510 //------- Conditional instructions ------------------------
7511 
7512 // Conditional no operands
7513 // Eg.  CSINC   x0, zr, zr, <cond>
7514 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7515 %{
7516   single_instruction;
7517   cr     : EX1(read);
7518   dst    : EX2(write);
7519   INS01  : ISS;
7520   ALU    : EX2;
7521 %}
7522 
7523 // Conditional 2 operand
7524 // EG.  CSEL    X0, X1, X2, <cond>
7525 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7526 %{
7527   single_instruction;
7528   cr     : EX1(read);
7529   src1   : EX1(read);
7530   src2   : EX1(read);
7531   dst    : EX2(write);
7532   INS01  : ISS;
7533   ALU    : EX2;
7534 %}
7535 
7536 // Conditional 2 operand
7537 // EG.  CSEL    X0, X1, X2, <cond>
7538 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7539 %{
7540   single_instruction;
7541   cr     : EX1(read);
7542   src    : EX1(read);
7543   dst    : EX2(write);
7544   INS01  : ISS;
7545   ALU    : EX2;
7546 %}
7547 
7548 //------- Multiply pipeline operations --------------------
7549 
7550 // Multiply reg-reg
7551 // Eg.  MUL     w0, w1, w2
7552 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7553 %{
7554   single_instruction;
7555   dst    : WR(write);
7556   src1   : ISS(read);
7557   src2   : ISS(read);
7558   INS01  : ISS;
7559   MAC    : WR;
7560 %}
7561 
7562 // Multiply accumulate
7563 // Eg.  MADD    w0, w1, w2, w3
7564 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7565 %{
7566   single_instruction;
7567   dst    : WR(write);
7568   src1   : ISS(read);
7569   src2   : ISS(read);
7570   src3   : ISS(read);
7571   INS01  : ISS;
7572   MAC    : WR;
7573 %}
7574 
7575 // Eg.  MUL     w0, w1, w2
7576 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7577 %{
7578   single_instruction;
7579   fixed_latency(3); // Maximum latency for 64 bit mul
7580   dst    : WR(write);
7581   src1   : ISS(read);
7582   src2   : ISS(read);
7583   INS01  : ISS;
7584   MAC    : WR;
7585 %}
7586 
7587 // Multiply accumulate
7588 // Eg.  MADD    w0, w1, w2, w3
7589 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7590 %{
7591   single_instruction;
7592   fixed_latency(3); // Maximum latency for 64 bit mul
7593   dst    : WR(write);
7594   src1   : ISS(read);
7595   src2   : ISS(read);
7596   src3   : ISS(read);
7597   INS01  : ISS;
7598   MAC    : WR;
7599 %}
7600 
7601 //------- Divide pipeline operations --------------------
7602 
7603 // Eg.  SDIV    w0, w1, w2
7604 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7605 %{
7606   single_instruction;
7607   fixed_latency(8); // Maximum latency for 32 bit divide
7608   dst    : WR(write);
7609   src1   : ISS(read);
7610   src2   : ISS(read);
7611   INS0   : ISS; // Can only dual issue as instruction 0
7612   DIV    : WR;
7613 %}
7614 
7615 // Eg.  SDIV    x0, x1, x2
7616 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7617 %{
7618   single_instruction;
7619   fixed_latency(16); // Maximum latency for 64 bit divide
7620   dst    : WR(write);
7621   src1   : ISS(read);
7622   src2   : ISS(read);
7623   INS0   : ISS; // Can only dual issue as instruction 0
7624   DIV    : WR;
7625 %}
7626 
7627 //------- Load pipeline operations ------------------------
7628 
7629 // Load - prefetch
7630 // Eg.  PFRM    <mem>
7631 pipe_class iload_prefetch(memory mem)
7632 %{
7633   single_instruction;
7634   mem    : ISS(read);
7635   INS01  : ISS;
7636   LDST   : WR;
7637 %}
7638 
7639 // Load - reg, mem
7640 // Eg.  LDR     x0, <mem>
7641 pipe_class iload_reg_mem(iRegI dst, memory mem)
7642 %{
7643   single_instruction;
7644   dst    : WR(write);
7645   mem    : ISS(read);
7646   INS01  : ISS;
7647   LDST   : WR;
7648 %}
7649 
7650 // Load - reg, reg
7651 // Eg.  LDR     x0, [sp, x1]
7652 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7653 %{
7654   single_instruction;
7655   dst    : WR(write);
7656   src    : ISS(read);
7657   INS01  : ISS;
7658   LDST   : WR;
7659 %}
7660 
7661 //------- Store pipeline operations -----------------------
7662 
7663 // Store - zr, mem
7664 // Eg.  STR     zr, <mem>
7665 pipe_class istore_mem(memory mem)
7666 %{
7667   single_instruction;
7668   mem    : ISS(read);
7669   INS01  : ISS;
7670   LDST   : WR;
7671 %}
7672 
7673 // Store - reg, mem
7674 // Eg.  STR     x0, <mem>
7675 pipe_class istore_reg_mem(iRegI src, memory mem)
7676 %{
7677   single_instruction;
7678   mem    : ISS(read);
7679   src    : EX2(read);
7680   INS01  : ISS;
7681   LDST   : WR;
7682 %}
7683 
7684 // Store - reg, reg
7685 // Eg. STR      x0, [sp, x1]
7686 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7687 %{
7688   single_instruction;
7689   dst    : ISS(read);
7690   src    : EX2(read);
7691   INS01  : ISS;
7692   LDST   : WR;
7693 %}
7694 
7695 //------- Store pipeline operations -----------------------
7696 
7697 // Branch
7698 pipe_class pipe_branch()
7699 %{
7700   single_instruction;
7701   INS01  : ISS;
7702   BRANCH : EX1;
7703 %}
7704 
7705 // Conditional branch
7706 pipe_class pipe_branch_cond(rFlagsReg cr)
7707 %{
7708   single_instruction;
7709   cr     : EX1(read);
7710   INS01  : ISS;
7711   BRANCH : EX1;
7712 %}
7713 
7714 // Compare & Branch
7715 // EG.  CBZ/CBNZ
7716 pipe_class pipe_cmp_branch(iRegI op1)
7717 %{
7718   single_instruction;
7719   op1    : EX1(read);
7720   INS01  : ISS;
7721   BRANCH : EX1;
7722 %}
7723 
7724 //------- Synchronisation operations ----------------------
7725 
7726 // Any operation requiring serialization.
7727 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7728 pipe_class pipe_serial()
7729 %{
7730   single_instruction;
7731   force_serialization;
7732   fixed_latency(16);
7733   INS01  : ISS(2); // Cannot dual issue with any other instruction
7734   LDST   : WR;
7735 %}
7736 
7737 // Generic big/slow expanded idiom - also serialized
7738 pipe_class pipe_slow()
7739 %{
7740   instruction_count(10);
7741   multiple_bundles;
7742   force_serialization;
7743   fixed_latency(16);
7744   INS01  : ISS(2); // Cannot dual issue with any other instruction
7745   LDST   : WR;
7746 %}
7747 
7748 // Empty pipeline class
7749 pipe_class pipe_class_empty()
7750 %{
7751   single_instruction;
7752   fixed_latency(0);
7753 %}
7754 
7755 // Default pipeline class.
7756 pipe_class pipe_class_default()
7757 %{
7758   single_instruction;
7759   fixed_latency(2);
7760 %}
7761 
7762 // Pipeline class for compares.
7763 pipe_class pipe_class_compare()
7764 %{
7765   single_instruction;
7766   fixed_latency(16);
7767 %}
7768 
7769 // Pipeline class for memory operations.
7770 pipe_class pipe_class_memory()
7771 %{
7772   single_instruction;
7773   fixed_latency(16);
7774 %}
7775 
7776 // Pipeline class for call.
7777 pipe_class pipe_class_call()
7778 %{
7779   single_instruction;
7780   fixed_latency(100);
7781 %}
7782 
7783 // Define the class for the Nop node.
7784 define %{
7785    MachNop = pipe_class_empty;
7786 %}
7787 
7788 %}
7789 //----------INSTRUCTIONS-------------------------------------------------------
7790 //
7791 // match      -- States which machine-independent subtree may be replaced
7792 //               by this instruction.
7793 // ins_cost   -- The estimated cost of this instruction is used by instruction
7794 //               selection to identify a minimum cost tree of machine
7795 //               instructions that matches a tree of machine-independent
7796 //               instructions.
7797 // format     -- A string providing the disassembly for this instruction.
7798 //               The value of an instruction's operand may be inserted
7799 //               by referring to it with a '$' prefix.
7800 // opcode     -- Three instruction opcodes may be provided.  These are referred
7801 //               to within an encode class as $primary, $secondary, and $tertiary
7802 //               rrspectively.  The primary opcode is commonly used to
7803 //               indicate the type of machine instruction, while secondary
7804 //               and tertiary are often used for prefix options or addressing
7805 //               modes.
7806 // ins_encode -- A list of encode classes with parameters. The encode class
7807 //               name must have been defined in an 'enc_class' specification
7808 //               in the encode section of the architecture description.
7809 
7810 // ============================================================================
7811 // Memory (Load/Store) Instructions
7812 
7813 // Load Instructions
7814 
7815 // Load Byte (8 bit signed)
7816 instruct loadB(iRegINoSp dst, memory mem)
7817 %{
7818   match(Set dst (LoadB mem));
7819   predicate(!needs_acquiring_load(n));
7820 
7821   ins_cost(4 * INSN_COST);
7822   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7823 
7824   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7825 
7826   ins_pipe(iload_reg_mem);
7827 %}
7828 
7829 // Load Byte (8 bit signed) into long
7830 instruct loadB2L(iRegLNoSp dst, memory mem)
7831 %{
7832   match(Set dst (ConvI2L (LoadB mem)));
7833   predicate(!needs_acquiring_load(n->in(1)));
7834 
7835   ins_cost(4 * INSN_COST);
7836   format %{ "ldrsb  $dst, $mem\t# byte" %}
7837 
7838   ins_encode(aarch64_enc_ldrsb(dst, mem));
7839 
7840   ins_pipe(iload_reg_mem);
7841 %}
7842 
7843 // Load Byte (8 bit unsigned)
7844 instruct loadUB(iRegINoSp dst, memory mem)
7845 %{
7846   match(Set dst (LoadUB mem));
7847   predicate(!needs_acquiring_load(n));
7848 
7849   ins_cost(4 * INSN_COST);
7850   format %{ "ldrbw  $dst, $mem\t# byte" %}
7851 
7852   ins_encode(aarch64_enc_ldrb(dst, mem));
7853 
7854   ins_pipe(iload_reg_mem);
7855 %}
7856 
7857 // Load Byte (8 bit unsigned) into long
7858 instruct loadUB2L(iRegLNoSp dst, memory mem)
7859 %{
7860   match(Set dst (ConvI2L (LoadUB mem)));
7861   predicate(!needs_acquiring_load(n->in(1)));
7862 
7863   ins_cost(4 * INSN_COST);
7864   format %{ "ldrb  $dst, $mem\t# byte" %}
7865 
7866   ins_encode(aarch64_enc_ldrb(dst, mem));
7867 
7868   ins_pipe(iload_reg_mem);
7869 %}
7870 
7871 // Load Short (16 bit signed)
7872 instruct loadS(iRegINoSp dst, memory mem)
7873 %{
7874   match(Set dst (LoadS mem));
7875   predicate(!needs_acquiring_load(n));
7876 
7877   ins_cost(4 * INSN_COST);
7878   format %{ "ldrshw  $dst, $mem\t# short" %}
7879 
7880   ins_encode(aarch64_enc_ldrshw(dst, mem));
7881 
7882   ins_pipe(iload_reg_mem);
7883 %}
7884 
7885 // Load Short (16 bit signed) into long
7886 instruct loadS2L(iRegLNoSp dst, memory mem)
7887 %{
7888   match(Set dst (ConvI2L (LoadS mem)));
7889   predicate(!needs_acquiring_load(n->in(1)));
7890 
7891   ins_cost(4 * INSN_COST);
7892   format %{ "ldrsh  $dst, $mem\t# short" %}
7893 
7894   ins_encode(aarch64_enc_ldrsh(dst, mem));
7895 
7896   ins_pipe(iload_reg_mem);
7897 %}
7898 
7899 // Load Char (16 bit unsigned)
7900 instruct loadUS(iRegINoSp dst, memory mem)
7901 %{
7902   match(Set dst (LoadUS mem));
7903   predicate(!needs_acquiring_load(n));
7904 
7905   ins_cost(4 * INSN_COST);
7906   format %{ "ldrh  $dst, $mem\t# short" %}
7907 
7908   ins_encode(aarch64_enc_ldrh(dst, mem));
7909 
7910   ins_pipe(iload_reg_mem);
7911 %}
7912 
7913 // Load Short/Char (16 bit unsigned) into long
7914 instruct loadUS2L(iRegLNoSp dst, memory mem)
7915 %{
7916   match(Set dst (ConvI2L (LoadUS mem)));
7917   predicate(!needs_acquiring_load(n->in(1)));
7918 
7919   ins_cost(4 * INSN_COST);
7920   format %{ "ldrh  $dst, $mem\t# short" %}
7921 
7922   ins_encode(aarch64_enc_ldrh(dst, mem));
7923 
7924   ins_pipe(iload_reg_mem);
7925 %}
7926 
7927 // Load Integer (32 bit signed)
7928 instruct loadI(iRegINoSp dst, memory mem)
7929 %{
7930   match(Set dst (LoadI mem));
7931   predicate(!needs_acquiring_load(n));
7932 
7933   ins_cost(4 * INSN_COST);
7934   format %{ "ldrw  $dst, $mem\t# int" %}
7935 
7936   ins_encode(aarch64_enc_ldrw(dst, mem));
7937 
7938   ins_pipe(iload_reg_mem);
7939 %}
7940 
7941 // Load Integer (32 bit signed) into long
7942 instruct loadI2L(iRegLNoSp dst, memory mem)
7943 %{
7944   match(Set dst (ConvI2L (LoadI mem)));
7945   predicate(!needs_acquiring_load(n->in(1)));
7946 
7947   ins_cost(4 * INSN_COST);
7948   format %{ "ldrsw  $dst, $mem\t# int" %}
7949 
7950   ins_encode(aarch64_enc_ldrsw(dst, mem));
7951 
7952   ins_pipe(iload_reg_mem);
7953 %}
7954 
7955 // Load Integer (32 bit unsigned) into long
7956 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7957 %{
7958   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7959   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7960 
7961   ins_cost(4 * INSN_COST);
7962   format %{ "ldrw  $dst, $mem\t# int" %}
7963 
7964   ins_encode(aarch64_enc_ldrw(dst, mem));
7965 
7966   ins_pipe(iload_reg_mem);
7967 %}
7968 
7969 // Load Long (64 bit signed)
7970 instruct loadL(iRegLNoSp dst, memory mem)
7971 %{
7972   match(Set dst (LoadL mem));
7973   predicate(!needs_acquiring_load(n));
7974 
7975   ins_cost(4 * INSN_COST);
7976   format %{ "ldr  $dst, $mem\t# int" %}
7977 
7978   ins_encode(aarch64_enc_ldr(dst, mem));
7979 
7980   ins_pipe(iload_reg_mem);
7981 %}
7982 
7983 // Load Range
7984 instruct loadRange(iRegINoSp dst, memory mem)
7985 %{
7986   match(Set dst (LoadRange mem));
7987 
7988   ins_cost(4 * INSN_COST);
7989   format %{ "ldrw  $dst, $mem\t# range" %}
7990 
7991   ins_encode(aarch64_enc_ldrw(dst, mem));
7992 
7993   ins_pipe(iload_reg_mem);
7994 %}
7995 
7996 // Load Pointer
7997 instruct loadP(iRegPNoSp dst, memory mem)
7998 %{
7999   match(Set dst (LoadP mem));
8000   predicate(!needs_acquiring_load(n));
8001 
8002   ins_cost(4 * INSN_COST);
8003   format %{ "ldr  $dst, $mem\t# ptr" %}
8004 
8005   ins_encode(aarch64_enc_ldr(dst, mem));
8006 
8007   ins_pipe(iload_reg_mem);
8008 %}
8009 
8010 // Load Compressed Pointer
8011 instruct loadN(iRegNNoSp dst, memory mem)
8012 %{
8013   match(Set dst (LoadN mem));
8014   predicate(!needs_acquiring_load(n));
8015 
8016   ins_cost(4 * INSN_COST);
8017   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8018 
8019   ins_encode(aarch64_enc_ldrw(dst, mem));
8020 
8021   ins_pipe(iload_reg_mem);
8022 %}
8023 
8024 // Load Klass Pointer
8025 instruct loadKlass(iRegPNoSp dst, memory mem)
8026 %{
8027   match(Set dst (LoadKlass mem));
8028   predicate(!needs_acquiring_load(n));
8029 
8030   ins_cost(4 * INSN_COST);
8031   format %{ "ldr  $dst, $mem\t# class" %}
8032 
8033   ins_encode(aarch64_enc_ldr(dst, mem));
8034 
8035   ins_pipe(iload_reg_mem);
8036 %}
8037 
8038 // Load Narrow Klass Pointer
8039 instruct loadNKlass(iRegNNoSp dst, memory mem)
8040 %{
8041   match(Set dst (LoadNKlass mem));
8042   predicate(!needs_acquiring_load(n));
8043 
8044   ins_cost(4 * INSN_COST);
8045   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8046 
8047   ins_encode(aarch64_enc_ldrw(dst, mem));
8048 
8049   ins_pipe(iload_reg_mem);
8050 %}
8051 
8052 // Load Float
8053 instruct loadF(vRegF dst, memory mem)
8054 %{
8055   match(Set dst (LoadF mem));
8056   predicate(!needs_acquiring_load(n));
8057 
8058   ins_cost(4 * INSN_COST);
8059   format %{ "ldrs  $dst, $mem\t# float" %}
8060 
8061   ins_encode( aarch64_enc_ldrs(dst, mem) );
8062 
8063   ins_pipe(pipe_class_memory);
8064 %}
8065 
8066 // Load Double
8067 instruct loadD(vRegD dst, memory mem)
8068 %{
8069   match(Set dst (LoadD mem));
8070   predicate(!needs_acquiring_load(n));
8071 
8072   ins_cost(4 * INSN_COST);
8073   format %{ "ldrd  $dst, $mem\t# double" %}
8074 
8075   ins_encode( aarch64_enc_ldrd(dst, mem) );
8076 
8077   ins_pipe(pipe_class_memory);
8078 %}
8079 
8080 
8081 // Load Int Constant
8082 instruct loadConI(iRegINoSp dst, immI src)
8083 %{
8084   match(Set dst src);
8085 
8086   ins_cost(INSN_COST);
8087   format %{ "mov $dst, $src\t# int" %}
8088 
8089   ins_encode( aarch64_enc_movw_imm(dst, src) );
8090 
8091   ins_pipe(ialu_imm);
8092 %}
8093 
8094 // Load Long Constant
8095 instruct loadConL(iRegLNoSp dst, immL src)
8096 %{
8097   match(Set dst src);
8098 
8099   ins_cost(INSN_COST);
8100   format %{ "mov $dst, $src\t# long" %}
8101 
8102   ins_encode( aarch64_enc_mov_imm(dst, src) );
8103 
8104   ins_pipe(ialu_imm);
8105 %}
8106 
8107 // Load Pointer Constant
8108 
8109 instruct loadConP(iRegPNoSp dst, immP con)
8110 %{
8111   match(Set dst con);
8112 
8113   ins_cost(INSN_COST * 4);
8114   format %{
8115     "mov  $dst, $con\t# ptr\n\t"
8116   %}
8117 
8118   ins_encode(aarch64_enc_mov_p(dst, con));
8119 
8120   ins_pipe(ialu_imm);
8121 %}
8122 
8123 // Load Null Pointer Constant
8124 
8125 instruct loadConP0(iRegPNoSp dst, immP0 con)
8126 %{
8127   match(Set dst con);
8128 
8129   ins_cost(INSN_COST);
8130   format %{ "mov  $dst, $con\t# NULL ptr" %}
8131 
8132   ins_encode(aarch64_enc_mov_p0(dst, con));
8133 
8134   ins_pipe(ialu_imm);
8135 %}
8136 
8137 // Load Pointer Constant One
8138 
8139 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8140 %{
8141   match(Set dst con);
8142 
8143   ins_cost(INSN_COST);
8144   format %{ "mov  $dst, $con\t# NULL ptr" %}
8145 
8146   ins_encode(aarch64_enc_mov_p1(dst, con));
8147 
8148   ins_pipe(ialu_imm);
8149 %}
8150 
8151 // Load Poll Page Constant
8152 
8153 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8154 %{
8155   match(Set dst con);
8156 
8157   ins_cost(INSN_COST);
8158   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8159 
8160   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8161 
8162   ins_pipe(ialu_imm);
8163 %}
8164 
8165 // Load Byte Map Base Constant
8166 
8167 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8168 %{
8169   match(Set dst con);
8170 
8171   ins_cost(INSN_COST);
8172   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8173 
8174   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8175 
8176   ins_pipe(ialu_imm);
8177 %}
8178 
8179 // Load Narrow Pointer Constant
8180 
8181 instruct loadConN(iRegNNoSp dst, immN con)
8182 %{
8183   match(Set dst con);
8184 
8185   ins_cost(INSN_COST * 4);
8186   format %{ "mov  $dst, $con\t# compressed ptr" %}
8187 
8188   ins_encode(aarch64_enc_mov_n(dst, con));
8189 
8190   ins_pipe(ialu_imm);
8191 %}
8192 
8193 // Load Narrow Null Pointer Constant
8194 
8195 instruct loadConN0(iRegNNoSp dst, immN0 con)
8196 %{
8197   match(Set dst con);
8198 
8199   ins_cost(INSN_COST);
8200   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8201 
8202   ins_encode(aarch64_enc_mov_n0(dst, con));
8203 
8204   ins_pipe(ialu_imm);
8205 %}
8206 
8207 // Load Narrow Klass Constant
8208 
8209 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8210 %{
8211   match(Set dst con);
8212 
8213   ins_cost(INSN_COST);
8214   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8215 
8216   ins_encode(aarch64_enc_mov_nk(dst, con));
8217 
8218   ins_pipe(ialu_imm);
8219 %}
8220 
8221 // Load Packed Float Constant
8222 
8223 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8224   match(Set dst con);
8225   ins_cost(INSN_COST * 4);
8226   format %{ "fmovs  $dst, $con"%}
8227   ins_encode %{
8228     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8229   %}
8230 
8231   ins_pipe(fp_imm_s);
8232 %}
8233 
8234 // Load Float Constant
8235 
8236 instruct loadConF(vRegF dst, immF con) %{
8237   match(Set dst con);
8238 
8239   ins_cost(INSN_COST * 4);
8240 
8241   format %{
8242     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8243   %}
8244 
8245   ins_encode %{
8246     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8247   %}
8248 
8249   ins_pipe(fp_load_constant_s);
8250 %}
8251 
8252 // Load Packed Double Constant
8253 
8254 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8255   match(Set dst con);
8256   ins_cost(INSN_COST);
8257   format %{ "fmovd  $dst, $con"%}
8258   ins_encode %{
8259     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8260   %}
8261 
8262   ins_pipe(fp_imm_d);
8263 %}
8264 
8265 // Load Double Constant
8266 
8267 instruct loadConD(vRegD dst, immD con) %{
8268   match(Set dst con);
8269 
8270   ins_cost(INSN_COST * 5);
8271   format %{
8272     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8273   %}
8274 
8275   ins_encode %{
8276     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8277   %}
8278 
8279   ins_pipe(fp_load_constant_d);
8280 %}
8281 
8282 // Store Instructions
8283 
8284 // Store CMS card-mark Immediate
8285 instruct storeimmCM0(immI0 zero, memory mem)
8286 %{
8287   match(Set mem (StoreCM mem zero));
8288   predicate(unnecessary_storestore(n));
8289 
8290   ins_cost(INSN_COST);
8291   format %{ "strb zr, $mem\t# byte" %}
8292 
8293   ins_encode(aarch64_enc_strb0(mem));
8294 
8295   ins_pipe(istore_mem);
8296 %}
8297 
8298 // Store CMS card-mark Immediate with intervening StoreStore
8299 // needed when using CMS with no conditional card marking
8300 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8301 %{
8302   match(Set mem (StoreCM mem zero));
8303 
8304   ins_cost(INSN_COST * 2);
8305   format %{ "dmb ishst"
8306       "\n\tstrb zr, $mem\t# byte" %}
8307 
8308   ins_encode(aarch64_enc_strb0_ordered(mem));
8309 
8310   ins_pipe(istore_mem);
8311 %}
8312 
8313 // Store Byte
8314 instruct storeB(iRegIorL2I src, memory mem)
8315 %{
8316   match(Set mem (StoreB mem src));
8317   predicate(!needs_releasing_store(n));
8318 
8319   ins_cost(INSN_COST);
8320   format %{ "strb  $src, $mem\t# byte" %}
8321 
8322   ins_encode(aarch64_enc_strb(src, mem));
8323 
8324   ins_pipe(istore_reg_mem);
8325 %}
8326 
8327 
8328 instruct storeimmB0(immI0 zero, memory mem)
8329 %{
8330   match(Set mem (StoreB mem zero));
8331   predicate(!needs_releasing_store(n));
8332 
8333   ins_cost(INSN_COST);
8334   format %{ "strb rscractch2, $mem\t# byte" %}
8335 
8336   ins_encode(aarch64_enc_strb0(mem));
8337 
8338   ins_pipe(istore_mem);
8339 %}
8340 
8341 // Store Char/Short
8342 instruct storeC(iRegIorL2I src, memory mem)
8343 %{
8344   match(Set mem (StoreC mem src));
8345   predicate(!needs_releasing_store(n));
8346 
8347   ins_cost(INSN_COST);
8348   format %{ "strh  $src, $mem\t# short" %}
8349 
8350   ins_encode(aarch64_enc_strh(src, mem));
8351 
8352   ins_pipe(istore_reg_mem);
8353 %}
8354 
8355 instruct storeimmC0(immI0 zero, memory mem)
8356 %{
8357   match(Set mem (StoreC mem zero));
8358   predicate(!needs_releasing_store(n));
8359 
8360   ins_cost(INSN_COST);
8361   format %{ "strh  zr, $mem\t# short" %}
8362 
8363   ins_encode(aarch64_enc_strh0(mem));
8364 
8365   ins_pipe(istore_mem);
8366 %}
8367 
8368 // Store Integer
8369 
8370 instruct storeI(iRegIorL2I src, memory mem)
8371 %{
8372   match(Set mem(StoreI mem src));
8373   predicate(!needs_releasing_store(n));
8374 
8375   ins_cost(INSN_COST);
8376   format %{ "strw  $src, $mem\t# int" %}
8377 
8378   ins_encode(aarch64_enc_strw(src, mem));
8379 
8380   ins_pipe(istore_reg_mem);
8381 %}
8382 
8383 instruct storeimmI0(immI0 zero, memory mem)
8384 %{
8385   match(Set mem(StoreI mem zero));
8386   predicate(!needs_releasing_store(n));
8387 
8388   ins_cost(INSN_COST);
8389   format %{ "strw  zr, $mem\t# int" %}
8390 
8391   ins_encode(aarch64_enc_strw0(mem));
8392 
8393   ins_pipe(istore_mem);
8394 %}
8395 
8396 // Store Long (64 bit signed)
8397 instruct storeL(iRegL src, memory mem)
8398 %{
8399   match(Set mem (StoreL mem src));
8400   predicate(!needs_releasing_store(n));
8401 
8402   ins_cost(INSN_COST);
8403   format %{ "str  $src, $mem\t# int" %}
8404 
8405   ins_encode(aarch64_enc_str(src, mem));
8406 
8407   ins_pipe(istore_reg_mem);
8408 %}
8409 
8410 // Store Long (64 bit signed)
8411 instruct storeimmL0(immL0 zero, memory mem)
8412 %{
8413   match(Set mem (StoreL mem zero));
8414   predicate(!needs_releasing_store(n));
8415 
8416   ins_cost(INSN_COST);
8417   format %{ "str  zr, $mem\t# int" %}
8418 
8419   ins_encode(aarch64_enc_str0(mem));
8420 
8421   ins_pipe(istore_mem);
8422 %}
8423 
8424 // Store Pointer
8425 instruct storeP(iRegP src, memory mem)
8426 %{
8427   match(Set mem (StoreP mem src));
8428   predicate(!needs_releasing_store(n));
8429 
8430   ins_cost(INSN_COST);
8431   format %{ "str  $src, $mem\t# ptr" %}
8432 
8433   ins_encode(aarch64_enc_str(src, mem));
8434 
8435   ins_pipe(istore_reg_mem);
8436 %}
8437 
8438 // Store Pointer
8439 instruct storeimmP0(immP0 zero, memory mem)
8440 %{
8441   match(Set mem (StoreP mem zero));
8442   predicate(!needs_releasing_store(n));
8443 
8444   ins_cost(INSN_COST);
8445   format %{ "str zr, $mem\t# ptr" %}
8446 
8447   ins_encode(aarch64_enc_str0(mem));
8448 
8449   ins_pipe(istore_mem);
8450 %}
8451 
8452 // Store Compressed Pointer
8453 instruct storeN(iRegN src, memory mem)
8454 %{
8455   match(Set mem (StoreN mem src));
8456   predicate(!needs_releasing_store(n));
8457 
8458   ins_cost(INSN_COST);
8459   format %{ "strw  $src, $mem\t# compressed ptr" %}
8460 
8461   ins_encode(aarch64_enc_strw(src, mem));
8462 
8463   ins_pipe(istore_reg_mem);
8464 %}
8465 
8466 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8467 %{
8468   match(Set mem (StoreN mem zero));
8469   predicate(Universe::narrow_oop_base() == NULL &&
8470             Universe::narrow_klass_base() == NULL &&
8471             (!needs_releasing_store(n)));
8472 
8473   ins_cost(INSN_COST);
8474   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8475 
8476   ins_encode(aarch64_enc_strw(heapbase, mem));
8477 
8478   ins_pipe(istore_reg_mem);
8479 %}
8480 
8481 // Store Float
8482 instruct storeF(vRegF src, memory mem)
8483 %{
8484   match(Set mem (StoreF mem src));
8485   predicate(!needs_releasing_store(n));
8486 
8487   ins_cost(INSN_COST);
8488   format %{ "strs  $src, $mem\t# float" %}
8489 
8490   ins_encode( aarch64_enc_strs(src, mem) );
8491 
8492   ins_pipe(pipe_class_memory);
8493 %}
8494 
8495 // TODO
8496 // implement storeImmF0 and storeFImmPacked
8497 
8498 // Store Double
8499 instruct storeD(vRegD src, memory mem)
8500 %{
8501   match(Set mem (StoreD mem src));
8502   predicate(!needs_releasing_store(n));
8503 
8504   ins_cost(INSN_COST);
8505   format %{ "strd  $src, $mem\t# double" %}
8506 
8507   ins_encode( aarch64_enc_strd(src, mem) );
8508 
8509   ins_pipe(pipe_class_memory);
8510 %}
8511 
8512 // Store Compressed Klass Pointer
8513 instruct storeNKlass(iRegN src, memory mem)
8514 %{
8515   predicate(!needs_releasing_store(n));
8516   match(Set mem (StoreNKlass mem src));
8517 
8518   ins_cost(INSN_COST);
8519   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8520 
8521   ins_encode(aarch64_enc_strw(src, mem));
8522 
8523   ins_pipe(istore_reg_mem);
8524 %}
8525 
8526 // TODO
8527 // implement storeImmD0 and storeDImmPacked
8528 
8529 // prefetch instructions
8530 // Must be safe to execute with invalid address (cannot fault).
8531 
8532 instruct prefetchalloc( memory mem ) %{
8533   match(PrefetchAllocation mem);
8534 
8535   ins_cost(INSN_COST);
8536   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8537 
8538   ins_encode( aarch64_enc_prefetchw(mem) );
8539 
8540   ins_pipe(iload_prefetch);
8541 %}
8542 
8543 //  ---------------- volatile loads and stores ----------------
8544 
8545 // Load Byte (8 bit signed)
8546 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8547 %{
8548   match(Set dst (LoadB mem));
8549 
8550   ins_cost(VOLATILE_REF_COST);
8551   format %{ "ldarsb  $dst, $mem\t# byte" %}
8552 
8553   ins_encode(aarch64_enc_ldarsb(dst, mem));
8554 
8555   ins_pipe(pipe_serial);
8556 %}
8557 
8558 // Load Byte (8 bit signed) into long
8559 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8560 %{
8561   match(Set dst (ConvI2L (LoadB mem)));
8562 
8563   ins_cost(VOLATILE_REF_COST);
8564   format %{ "ldarsb  $dst, $mem\t# byte" %}
8565 
8566   ins_encode(aarch64_enc_ldarsb(dst, mem));
8567 
8568   ins_pipe(pipe_serial);
8569 %}
8570 
8571 // Load Byte (8 bit unsigned)
8572 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8573 %{
8574   match(Set dst (LoadUB mem));
8575 
8576   ins_cost(VOLATILE_REF_COST);
8577   format %{ "ldarb  $dst, $mem\t# byte" %}
8578 
8579   ins_encode(aarch64_enc_ldarb(dst, mem));
8580 
8581   ins_pipe(pipe_serial);
8582 %}
8583 
8584 // Load Byte (8 bit unsigned) into long
8585 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8586 %{
8587   match(Set dst (ConvI2L (LoadUB mem)));
8588 
8589   ins_cost(VOLATILE_REF_COST);
8590   format %{ "ldarb  $dst, $mem\t# byte" %}
8591 
8592   ins_encode(aarch64_enc_ldarb(dst, mem));
8593 
8594   ins_pipe(pipe_serial);
8595 %}
8596 
8597 // Load Short (16 bit signed)
8598 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8599 %{
8600   match(Set dst (LoadS mem));
8601 
8602   ins_cost(VOLATILE_REF_COST);
8603   format %{ "ldarshw  $dst, $mem\t# short" %}
8604 
8605   ins_encode(aarch64_enc_ldarshw(dst, mem));
8606 
8607   ins_pipe(pipe_serial);
8608 %}
8609 
8610 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8611 %{
8612   match(Set dst (LoadUS mem));
8613 
8614   ins_cost(VOLATILE_REF_COST);
8615   format %{ "ldarhw  $dst, $mem\t# short" %}
8616 
8617   ins_encode(aarch64_enc_ldarhw(dst, mem));
8618 
8619   ins_pipe(pipe_serial);
8620 %}
8621 
8622 // Load Short/Char (16 bit unsigned) into long
8623 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8624 %{
8625   match(Set dst (ConvI2L (LoadUS mem)));
8626 
8627   ins_cost(VOLATILE_REF_COST);
8628   format %{ "ldarh  $dst, $mem\t# short" %}
8629 
8630   ins_encode(aarch64_enc_ldarh(dst, mem));
8631 
8632   ins_pipe(pipe_serial);
8633 %}
8634 
8635 // Load Short/Char (16 bit signed) into long
8636 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8637 %{
8638   match(Set dst (ConvI2L (LoadS mem)));
8639 
8640   ins_cost(VOLATILE_REF_COST);
8641   format %{ "ldarh  $dst, $mem\t# short" %}
8642 
8643   ins_encode(aarch64_enc_ldarsh(dst, mem));
8644 
8645   ins_pipe(pipe_serial);
8646 %}
8647 
8648 // Load Integer (32 bit signed)
8649 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8650 %{
8651   match(Set dst (LoadI mem));
8652 
8653   ins_cost(VOLATILE_REF_COST);
8654   format %{ "ldarw  $dst, $mem\t# int" %}
8655 
8656   ins_encode(aarch64_enc_ldarw(dst, mem));
8657 
8658   ins_pipe(pipe_serial);
8659 %}
8660 
8661 // Load Integer (32 bit unsigned) into long
8662 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8663 %{
8664   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8665 
8666   ins_cost(VOLATILE_REF_COST);
8667   format %{ "ldarw  $dst, $mem\t# int" %}
8668 
8669   ins_encode(aarch64_enc_ldarw(dst, mem));
8670 
8671   ins_pipe(pipe_serial);
8672 %}
8673 
8674 // Load Long (64 bit signed)
8675 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8676 %{
8677   match(Set dst (LoadL mem));
8678 
8679   ins_cost(VOLATILE_REF_COST);
8680   format %{ "ldar  $dst, $mem\t# int" %}
8681 
8682   ins_encode(aarch64_enc_ldar(dst, mem));
8683 
8684   ins_pipe(pipe_serial);
8685 %}
8686 
8687 // Load Pointer
8688 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8689 %{
8690   match(Set dst (LoadP mem));
8691 
8692   ins_cost(VOLATILE_REF_COST);
8693   format %{ "ldar  $dst, $mem\t# ptr" %}
8694 
8695   ins_encode(aarch64_enc_ldar(dst, mem));
8696 
8697   ins_pipe(pipe_serial);
8698 %}
8699 
8700 // Load Compressed Pointer
8701 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8702 %{
8703   match(Set dst (LoadN mem));
8704 
8705   ins_cost(VOLATILE_REF_COST);
8706   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8707 
8708   ins_encode(aarch64_enc_ldarw(dst, mem));
8709 
8710   ins_pipe(pipe_serial);
8711 %}
8712 
8713 // Load Float
8714 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8715 %{
8716   match(Set dst (LoadF mem));
8717 
8718   ins_cost(VOLATILE_REF_COST);
8719   format %{ "ldars  $dst, $mem\t# float" %}
8720 
8721   ins_encode( aarch64_enc_fldars(dst, mem) );
8722 
8723   ins_pipe(pipe_serial);
8724 %}
8725 
8726 // Load Double
8727 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8728 %{
8729   match(Set dst (LoadD mem));
8730 
8731   ins_cost(VOLATILE_REF_COST);
8732   format %{ "ldard  $dst, $mem\t# double" %}
8733 
8734   ins_encode( aarch64_enc_fldard(dst, mem) );
8735 
8736   ins_pipe(pipe_serial);
8737 %}
8738 
8739 // Store Byte
8740 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8741 %{
8742   match(Set mem (StoreB mem src));
8743 
8744   ins_cost(VOLATILE_REF_COST);
8745   format %{ "stlrb  $src, $mem\t# byte" %}
8746 
8747   ins_encode(aarch64_enc_stlrb(src, mem));
8748 
8749   ins_pipe(pipe_class_memory);
8750 %}
8751 
8752 // Store Char/Short
8753 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8754 %{
8755   match(Set mem (StoreC mem src));
8756 
8757   ins_cost(VOLATILE_REF_COST);
8758   format %{ "stlrh  $src, $mem\t# short" %}
8759 
8760   ins_encode(aarch64_enc_stlrh(src, mem));
8761 
8762   ins_pipe(pipe_class_memory);
8763 %}
8764 
8765 // Store Integer
8766 
8767 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8768 %{
8769   match(Set mem(StoreI mem src));
8770 
8771   ins_cost(VOLATILE_REF_COST);
8772   format %{ "stlrw  $src, $mem\t# int" %}
8773 
8774   ins_encode(aarch64_enc_stlrw(src, mem));
8775 
8776   ins_pipe(pipe_class_memory);
8777 %}
8778 
8779 // Store Long (64 bit signed)
8780 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8781 %{
8782   match(Set mem (StoreL mem src));
8783 
8784   ins_cost(VOLATILE_REF_COST);
8785   format %{ "stlr  $src, $mem\t# int" %}
8786 
8787   ins_encode(aarch64_enc_stlr(src, mem));
8788 
8789   ins_pipe(pipe_class_memory);
8790 %}
8791 
8792 // Store Pointer
8793 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8794 %{
8795   match(Set mem (StoreP mem src));
8796 
8797   ins_cost(VOLATILE_REF_COST);
8798   format %{ "stlr  $src, $mem\t# ptr" %}
8799 
8800   ins_encode(aarch64_enc_stlr(src, mem));
8801 
8802   ins_pipe(pipe_class_memory);
8803 %}
8804 
8805 // Store Compressed Pointer
8806 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8807 %{
8808   match(Set mem (StoreN mem src));
8809 
8810   ins_cost(VOLATILE_REF_COST);
8811   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8812 
8813   ins_encode(aarch64_enc_stlrw(src, mem));
8814 
8815   ins_pipe(pipe_class_memory);
8816 %}
8817 
8818 // Store Float
8819 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8820 %{
8821   match(Set mem (StoreF mem src));
8822 
8823   ins_cost(VOLATILE_REF_COST);
8824   format %{ "stlrs  $src, $mem\t# float" %}
8825 
8826   ins_encode( aarch64_enc_fstlrs(src, mem) );
8827 
8828   ins_pipe(pipe_class_memory);
8829 %}
8830 
8831 // TODO
8832 // implement storeImmF0 and storeFImmPacked
8833 
8834 // Store Double
8835 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8836 %{
8837   match(Set mem (StoreD mem src));
8838 
8839   ins_cost(VOLATILE_REF_COST);
8840   format %{ "stlrd  $src, $mem\t# double" %}
8841 
8842   ins_encode( aarch64_enc_fstlrd(src, mem) );
8843 
8844   ins_pipe(pipe_class_memory);
8845 %}
8846 
8847 //  ---------------- end of volatile loads and stores ----------------
8848 
8849 // ============================================================================
8850 // BSWAP Instructions
8851 
8852 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8853   match(Set dst (ReverseBytesI src));
8854 
8855   ins_cost(INSN_COST);
8856   format %{ "revw  $dst, $src" %}
8857 
8858   ins_encode %{
8859     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8860   %}
8861 
8862   ins_pipe(ialu_reg);
8863 %}
8864 
8865 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8866   match(Set dst (ReverseBytesL src));
8867 
8868   ins_cost(INSN_COST);
8869   format %{ "rev  $dst, $src" %}
8870 
8871   ins_encode %{
8872     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8873   %}
8874 
8875   ins_pipe(ialu_reg);
8876 %}
8877 
8878 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8879   match(Set dst (ReverseBytesUS src));
8880 
8881   ins_cost(INSN_COST);
8882   format %{ "rev16w  $dst, $src" %}
8883 
8884   ins_encode %{
8885     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8886   %}
8887 
8888   ins_pipe(ialu_reg);
8889 %}
8890 
8891 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8892   match(Set dst (ReverseBytesS src));
8893 
8894   ins_cost(INSN_COST);
8895   format %{ "rev16w  $dst, $src\n\t"
8896             "sbfmw $dst, $dst, #0, #15" %}
8897 
8898   ins_encode %{
8899     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8900     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8901   %}
8902 
8903   ins_pipe(ialu_reg);
8904 %}
8905 
8906 // ============================================================================
8907 // Zero Count Instructions
8908 
8909 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8910   match(Set dst (CountLeadingZerosI src));
8911 
8912   ins_cost(INSN_COST);
8913   format %{ "clzw  $dst, $src" %}
8914   ins_encode %{
8915     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8916   %}
8917 
8918   ins_pipe(ialu_reg);
8919 %}
8920 
8921 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8922   match(Set dst (CountLeadingZerosL src));
8923 
8924   ins_cost(INSN_COST);
8925   format %{ "clz   $dst, $src" %}
8926   ins_encode %{
8927     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8928   %}
8929 
8930   ins_pipe(ialu_reg);
8931 %}
8932 
8933 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8934   match(Set dst (CountTrailingZerosI src));
8935 
8936   ins_cost(INSN_COST * 2);
8937   format %{ "rbitw  $dst, $src\n\t"
8938             "clzw   $dst, $dst" %}
8939   ins_encode %{
8940     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8941     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8942   %}
8943 
8944   ins_pipe(ialu_reg);
8945 %}
8946 
8947 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8948   match(Set dst (CountTrailingZerosL src));
8949 
8950   ins_cost(INSN_COST * 2);
8951   format %{ "rbit   $dst, $src\n\t"
8952             "clz    $dst, $dst" %}
8953   ins_encode %{
8954     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8955     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8956   %}
8957 
8958   ins_pipe(ialu_reg);
8959 %}
8960 
8961 //---------- Population Count Instructions -------------------------------------
8962 //
8963 
8964 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8965   predicate(UsePopCountInstruction);
8966   match(Set dst (PopCountI src));
8967   effect(TEMP tmp);
8968   ins_cost(INSN_COST * 13);
8969 
8970   format %{ "movw   $src, $src\n\t"
8971             "mov    $tmp, $src\t# vector (1D)\n\t"
8972             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8973             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8974             "mov    $dst, $tmp\t# vector (1D)" %}
8975   ins_encode %{
8976     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8977     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8978     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8979     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8980     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8981   %}
8982 
8983   ins_pipe(pipe_class_default);
8984 %}
8985 
8986 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8987   predicate(UsePopCountInstruction);
8988   match(Set dst (PopCountI (LoadI mem)));
8989   effect(TEMP tmp);
8990   ins_cost(INSN_COST * 13);
8991 
8992   format %{ "ldrs   $tmp, $mem\n\t"
8993             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8994             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8995             "mov    $dst, $tmp\t# vector (1D)" %}
8996   ins_encode %{
8997     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8998     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8999                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9000     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9001     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9002     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9003   %}
9004 
9005   ins_pipe(pipe_class_default);
9006 %}
9007 
9008 // Note: Long.bitCount(long) returns an int.
9009 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9010   predicate(UsePopCountInstruction);
9011   match(Set dst (PopCountL src));
9012   effect(TEMP tmp);
9013   ins_cost(INSN_COST * 13);
9014 
9015   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9016             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9017             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9018             "mov    $dst, $tmp\t# vector (1D)" %}
9019   ins_encode %{
9020     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9021     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9022     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9023     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9024   %}
9025 
9026   ins_pipe(pipe_class_default);
9027 %}
9028 
9029 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9030   predicate(UsePopCountInstruction);
9031   match(Set dst (PopCountL (LoadL mem)));
9032   effect(TEMP tmp);
9033   ins_cost(INSN_COST * 13);
9034 
9035   format %{ "ldrd   $tmp, $mem\n\t"
9036             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9037             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9038             "mov    $dst, $tmp\t# vector (1D)" %}
9039   ins_encode %{
9040     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9041     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9042                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9043     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9044     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9045     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9046   %}
9047 
9048   ins_pipe(pipe_class_default);
9049 %}
9050 
9051 // ============================================================================
9052 // MemBar Instruction
9053 
9054 instruct load_fence() %{
9055   match(LoadFence);
9056   ins_cost(VOLATILE_REF_COST);
9057 
9058   format %{ "load_fence" %}
9059 
9060   ins_encode %{
9061     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9062   %}
9063   ins_pipe(pipe_serial);
9064 %}
9065 
9066 instruct unnecessary_membar_acquire() %{
9067   predicate(unnecessary_acquire(n));
9068   match(MemBarAcquire);
9069   ins_cost(0);
9070 
9071   format %{ "membar_acquire (elided)" %}
9072 
9073   ins_encode %{
9074     __ block_comment("membar_acquire (elided)");
9075   %}
9076 
9077   ins_pipe(pipe_class_empty);
9078 %}
9079 
9080 instruct membar_acquire() %{
9081   match(MemBarAcquire);
9082   ins_cost(VOLATILE_REF_COST);
9083 
9084   format %{ "membar_acquire" %}
9085 
9086   ins_encode %{
9087     __ block_comment("membar_acquire");
9088     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9089   %}
9090 
9091   ins_pipe(pipe_serial);
9092 %}
9093 
9094 
9095 instruct membar_acquire_lock() %{
9096   match(MemBarAcquireLock);
9097   ins_cost(VOLATILE_REF_COST);
9098 
9099   format %{ "membar_acquire_lock (elided)" %}
9100 
9101   ins_encode %{
9102     __ block_comment("membar_acquire_lock (elided)");
9103   %}
9104 
9105   ins_pipe(pipe_serial);
9106 %}
9107 
9108 instruct store_fence() %{
9109   match(StoreFence);
9110   ins_cost(VOLATILE_REF_COST);
9111 
9112   format %{ "store_fence" %}
9113 
9114   ins_encode %{
9115     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9116   %}
9117   ins_pipe(pipe_serial);
9118 %}
9119 
9120 instruct unnecessary_membar_release() %{
9121   predicate(unnecessary_release(n));
9122   match(MemBarRelease);
9123   ins_cost(0);
9124 
9125   format %{ "membar_release (elided)" %}
9126 
9127   ins_encode %{
9128     __ block_comment("membar_release (elided)");
9129   %}
9130   ins_pipe(pipe_serial);
9131 %}
9132 
9133 instruct membar_release() %{
9134   match(MemBarRelease);
9135   ins_cost(VOLATILE_REF_COST);
9136 
9137   format %{ "membar_release" %}
9138 
9139   ins_encode %{
9140     __ block_comment("membar_release");
9141     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9142   %}
9143   ins_pipe(pipe_serial);
9144 %}
9145 
9146 instruct membar_storestore() %{
9147   match(MemBarStoreStore);
9148   ins_cost(VOLATILE_REF_COST);
9149 
9150   format %{ "MEMBAR-store-store" %}
9151 
9152   ins_encode %{
9153     __ membar(Assembler::StoreStore);
9154   %}
9155   ins_pipe(pipe_serial);
9156 %}
9157 
9158 instruct membar_release_lock() %{
9159   match(MemBarReleaseLock);
9160   ins_cost(VOLATILE_REF_COST);
9161 
9162   format %{ "membar_release_lock (elided)" %}
9163 
9164   ins_encode %{
9165     __ block_comment("membar_release_lock (elided)");
9166   %}
9167 
9168   ins_pipe(pipe_serial);
9169 %}
9170 
9171 instruct unnecessary_membar_volatile() %{
9172   predicate(unnecessary_volatile(n));
9173   match(MemBarVolatile);
9174   ins_cost(0);
9175 
9176   format %{ "membar_volatile (elided)" %}
9177 
9178   ins_encode %{
9179     __ block_comment("membar_volatile (elided)");
9180   %}
9181 
9182   ins_pipe(pipe_serial);
9183 %}
9184 
9185 instruct membar_volatile() %{
9186   match(MemBarVolatile);
9187   ins_cost(VOLATILE_REF_COST*100);
9188 
9189   format %{ "membar_volatile" %}
9190 
9191   ins_encode %{
9192     __ block_comment("membar_volatile");
9193     __ membar(Assembler::StoreLoad);
9194   %}
9195 
9196   ins_pipe(pipe_serial);
9197 %}
9198 
9199 // ============================================================================
9200 // Cast/Convert Instructions
9201 
9202 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9203   match(Set dst (CastX2P src));
9204 
9205   ins_cost(INSN_COST);
9206   format %{ "mov $dst, $src\t# long -> ptr" %}
9207 
9208   ins_encode %{
9209     if ($dst$$reg != $src$$reg) {
9210       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9211     }
9212   %}
9213 
9214   ins_pipe(ialu_reg);
9215 %}
9216 
9217 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9218   match(Set dst (CastP2X src));
9219 
9220   ins_cost(INSN_COST);
9221   format %{ "mov $dst, $src\t# ptr -> long" %}
9222 
9223   ins_encode %{
9224     if ($dst$$reg != $src$$reg) {
9225       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9226     }
9227   %}
9228 
9229   ins_pipe(ialu_reg);
9230 %}
9231 
9232 // Convert oop into int for vectors alignment masking
9233 instruct convP2I(iRegINoSp dst, iRegP src) %{
9234   match(Set dst (ConvL2I (CastP2X src)));
9235 
9236   ins_cost(INSN_COST);
9237   format %{ "movw $dst, $src\t# ptr -> int" %}
9238   ins_encode %{
9239     __ movw($dst$$Register, $src$$Register);
9240   %}
9241 
9242   ins_pipe(ialu_reg);
9243 %}
9244 
9245 // Convert compressed oop into int for vectors alignment masking
9246 // in case of 32bit oops (heap < 4Gb).
9247 instruct convN2I(iRegINoSp dst, iRegN src)
9248 %{
9249   predicate(Universe::narrow_oop_shift() == 0);
9250   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9251 
9252   ins_cost(INSN_COST);
9253   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9254   ins_encode %{
9255     __ movw($dst$$Register, $src$$Register);
9256   %}
9257 
9258   ins_pipe(ialu_reg);
9259 %}
9260 
9261 
9262 // Convert oop pointer into compressed form
9263 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9264   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9265   match(Set dst (EncodeP src));
9266   effect(KILL cr);
9267   ins_cost(INSN_COST * 3);
9268   format %{ "encode_heap_oop $dst, $src" %}
9269   ins_encode %{
9270     Register s = $src$$Register;
9271     Register d = $dst$$Register;
9272     __ encode_heap_oop(d, s);
9273   %}
9274   ins_pipe(ialu_reg);
9275 %}
9276 
9277 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9278   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9279   match(Set dst (EncodeP src));
9280   ins_cost(INSN_COST * 3);
9281   format %{ "encode_heap_oop_not_null $dst, $src" %}
9282   ins_encode %{
9283     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9284   %}
9285   ins_pipe(ialu_reg);
9286 %}
9287 
9288 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9289   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9290             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9291   match(Set dst (DecodeN src));
9292   ins_cost(INSN_COST * 3);
9293   format %{ "decode_heap_oop $dst, $src" %}
9294   ins_encode %{
9295     Register s = $src$$Register;
9296     Register d = $dst$$Register;
9297     __ decode_heap_oop(d, s);
9298   %}
9299   ins_pipe(ialu_reg);
9300 %}
9301 
9302 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9303   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9304             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9305   match(Set dst (DecodeN src));
9306   ins_cost(INSN_COST * 3);
9307   format %{ "decode_heap_oop_not_null $dst, $src" %}
9308   ins_encode %{
9309     Register s = $src$$Register;
9310     Register d = $dst$$Register;
9311     __ decode_heap_oop_not_null(d, s);
9312   %}
9313   ins_pipe(ialu_reg);
9314 %}
9315 
9316 // n.b. AArch64 implementations of encode_klass_not_null and
9317 // decode_klass_not_null do not modify the flags register so, unlike
9318 // Intel, we don't kill CR as a side effect here
9319 
9320 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9321   match(Set dst (EncodePKlass src));
9322 
9323   ins_cost(INSN_COST * 3);
9324   format %{ "encode_klass_not_null $dst,$src" %}
9325 
9326   ins_encode %{
9327     Register src_reg = as_Register($src$$reg);
9328     Register dst_reg = as_Register($dst$$reg);
9329     __ encode_klass_not_null(dst_reg, src_reg);
9330   %}
9331 
9332    ins_pipe(ialu_reg);
9333 %}
9334 
9335 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9336   match(Set dst (DecodeNKlass src));
9337 
9338   ins_cost(INSN_COST * 3);
9339   format %{ "decode_klass_not_null $dst,$src" %}
9340 
9341   ins_encode %{
9342     Register src_reg = as_Register($src$$reg);
9343     Register dst_reg = as_Register($dst$$reg);
9344     if (dst_reg != src_reg) {
9345       __ decode_klass_not_null(dst_reg, src_reg);
9346     } else {
9347       __ decode_klass_not_null(dst_reg);
9348     }
9349   %}
9350 
9351    ins_pipe(ialu_reg);
9352 %}
9353 
9354 instruct checkCastPP(iRegPNoSp dst)
9355 %{
9356   match(Set dst (CheckCastPP dst));
9357 
9358   size(0);
9359   format %{ "# checkcastPP of $dst" %}
9360   ins_encode(/* empty encoding */);
9361   ins_pipe(pipe_class_empty);
9362 %}
9363 
9364 instruct castPP(iRegPNoSp dst)
9365 %{
9366   match(Set dst (CastPP dst));
9367 
9368   size(0);
9369   format %{ "# castPP of $dst" %}
9370   ins_encode(/* empty encoding */);
9371   ins_pipe(pipe_class_empty);
9372 %}
9373 
9374 instruct castII(iRegI dst)
9375 %{
9376   match(Set dst (CastII dst));
9377 
9378   size(0);
9379   format %{ "# castII of $dst" %}
9380   ins_encode(/* empty encoding */);
9381   ins_cost(0);
9382   ins_pipe(pipe_class_empty);
9383 %}
9384 
9385 // ============================================================================
9386 // Atomic operation instructions
9387 //
9388 // Intel and SPARC both implement Ideal Node LoadPLocked and
9389 // Store{PIL}Conditional instructions using a normal load for the
9390 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9391 //
9392 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9393 // pair to lock object allocations from Eden space when not using
9394 // TLABs.
9395 //
9396 // There does not appear to be a Load{IL}Locked Ideal Node and the
9397 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9398 // and to use StoreIConditional only for 32-bit and StoreLConditional
9399 // only for 64-bit.
9400 //
9401 // We implement LoadPLocked and StorePLocked instructions using,
9402 // respectively the AArch64 hw load-exclusive and store-conditional
9403 // instructions. Whereas we must implement each of
9404 // Store{IL}Conditional using a CAS which employs a pair of
9405 // instructions comprising a load-exclusive followed by a
9406 // store-conditional.
9407 
9408 
9409 // Locked-load (linked load) of the current heap-top
9410 // used when updating the eden heap top
9411 // implemented using ldaxr on AArch64
9412 
9413 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9414 %{
9415   match(Set dst (LoadPLocked mem));
9416 
9417   ins_cost(VOLATILE_REF_COST);
9418 
9419   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9420 
9421   ins_encode(aarch64_enc_ldaxr(dst, mem));
9422 
9423   ins_pipe(pipe_serial);
9424 %}
9425 
9426 // Conditional-store of the updated heap-top.
9427 // Used during allocation of the shared heap.
9428 // Sets flag (EQ) on success.
9429 // implemented using stlxr on AArch64.
9430 
9431 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9432 %{
9433   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9434 
9435   ins_cost(VOLATILE_REF_COST);
9436 
9437  // TODO
9438  // do we need to do a store-conditional release or can we just use a
9439  // plain store-conditional?
9440 
9441   format %{
9442     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9443     "cmpw rscratch1, zr\t# EQ on successful write"
9444   %}
9445 
9446   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9447 
9448   ins_pipe(pipe_serial);
9449 %}
9450 
9451 
9452 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9453 // when attempting to rebias a lock towards the current thread.  We
9454 // must use the acquire form of cmpxchg in order to guarantee acquire
9455 // semantics in this case.
9456 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9457 %{
9458   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9459 
9460   ins_cost(VOLATILE_REF_COST);
9461 
9462   format %{
9463     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9464     "cmpw rscratch1, zr\t# EQ on successful write"
9465   %}
9466 
9467   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9468 
9469   ins_pipe(pipe_slow);
9470 %}
9471 
9472 // storeIConditional also has acquire semantics, for no better reason
9473 // than matching storeLConditional.  At the time of writing this
9474 // comment storeIConditional was not used anywhere by AArch64.
9475 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9476 %{
9477   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9478 
9479   ins_cost(VOLATILE_REF_COST);
9480 
9481   format %{
9482     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9483     "cmpw rscratch1, zr\t# EQ on successful write"
9484   %}
9485 
9486   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9487 
9488   ins_pipe(pipe_slow);
9489 %}
9490 
9491 // standard CompareAndSwapX when we are using barriers
9492 // these have higher priority than the rules selected by a predicate
9493 
9494 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9495 // can't match them
9496 
9497 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9498 
9499   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9500   ins_cost(2 * VOLATILE_REF_COST);
9501 
9502   effect(KILL cr);
9503 
9504  format %{
9505     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9506     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9507  %}
9508 
9509  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9510             aarch64_enc_cset_eq(res));
9511 
9512   ins_pipe(pipe_slow);
9513 %}
9514 
9515 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9516 
9517   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9518   ins_cost(2 * VOLATILE_REF_COST);
9519 
9520   effect(KILL cr);
9521 
9522  format %{
9523     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9524     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9525  %}
9526 
9527  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9528             aarch64_enc_cset_eq(res));
9529 
9530   ins_pipe(pipe_slow);
9531 %}
9532 
9533 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9534 
9535   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9536   ins_cost(2 * VOLATILE_REF_COST);
9537 
9538   effect(KILL cr);
9539 
9540  format %{
9541     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9542     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9543  %}
9544 
9545  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9546             aarch64_enc_cset_eq(res));
9547 
9548   ins_pipe(pipe_slow);
9549 %}
9550 
9551 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9552 
9553   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9554   ins_cost(2 * VOLATILE_REF_COST);
9555 
9556   effect(KILL cr);
9557 
9558  format %{
9559     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9560     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9561  %}
9562 
9563  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9564             aarch64_enc_cset_eq(res));
9565 
9566   ins_pipe(pipe_slow);
9567 %}
9568 
9569 // alternative CompareAndSwapX when we are eliding barriers
9570 
9571 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9572 
9573   predicate(needs_acquiring_load_exclusive(n));
9574   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9575   ins_cost(VOLATILE_REF_COST);
9576 
9577   effect(KILL cr);
9578 
9579  format %{
9580     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9581     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9582  %}
9583 
9584  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9585             aarch64_enc_cset_eq(res));
9586 
9587   ins_pipe(pipe_slow);
9588 %}
9589 
9590 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9591 
9592   predicate(needs_acquiring_load_exclusive(n));
9593   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9594   ins_cost(VOLATILE_REF_COST);
9595 
9596   effect(KILL cr);
9597 
9598  format %{
9599     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9600     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9601  %}
9602 
9603  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9604             aarch64_enc_cset_eq(res));
9605 
9606   ins_pipe(pipe_slow);
9607 %}
9608 
9609 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9610 
9611   predicate(needs_acquiring_load_exclusive(n));
9612   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9613   ins_cost(VOLATILE_REF_COST);
9614 
9615   effect(KILL cr);
9616 
9617  format %{
9618     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9619     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9620  %}
9621 
9622  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9623             aarch64_enc_cset_eq(res));
9624 
9625   ins_pipe(pipe_slow);
9626 %}
9627 
9628 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9629 
9630   predicate(needs_acquiring_load_exclusive(n));
9631   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9632   ins_cost(VOLATILE_REF_COST);
9633 
9634   effect(KILL cr);
9635 
9636  format %{
9637     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9638     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9639  %}
9640 
9641  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9642             aarch64_enc_cset_eq(res));
9643 
9644   ins_pipe(pipe_slow);
9645 %}
9646 
9647 
9648 // ---------------------------------------------------------------------
9649 
9650 
9651 // BEGIN This section of the file is automatically generated. Do not edit --------------
9652 
9653 // Sundry CAS operations.  Note that release is always true,
9654 // regardless of the memory ordering of the CAS.  This is because we
9655 // need the volatile case to be sequentially consistent but there is
9656 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9657 // can't check the type of memory ordering here, so we always emit a
9658 // STLXR.
9659 
9660 // This section is generated from aarch64_ad_cas.m4
9661 
9662 
9663 
9664 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9665   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9666   ins_cost(2 * VOLATILE_REF_COST);
9667   effect(TEMP_DEF res, KILL cr);
9668   format %{
9669     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9670   %}
9671   ins_encode %{
9672     __ uxtbw(rscratch2, $oldval$$Register);
9673     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9674                Assembler::byte, /*acquire*/ false, /*release*/ true,
9675                /*weak*/ false, $res$$Register);
9676     __ sxtbw($res$$Register, $res$$Register);
9677   %}
9678   ins_pipe(pipe_slow);
9679 %}
9680 
9681 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9682   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9683   ins_cost(2 * VOLATILE_REF_COST);
9684   effect(TEMP_DEF res, KILL cr);
9685   format %{
9686     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9687   %}
9688   ins_encode %{
9689     __ uxthw(rscratch2, $oldval$$Register);
9690     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9691                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9692                /*weak*/ false, $res$$Register);
9693     __ sxthw($res$$Register, $res$$Register);
9694   %}
9695   ins_pipe(pipe_slow);
9696 %}
9697 
9698 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9699   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9700   ins_cost(2 * VOLATILE_REF_COST);
9701   effect(TEMP_DEF res, KILL cr);
9702   format %{
9703     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9704   %}
9705   ins_encode %{
9706     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9707                Assembler::word, /*acquire*/ false, /*release*/ true,
9708                /*weak*/ false, $res$$Register);
9709   %}
9710   ins_pipe(pipe_slow);
9711 %}
9712 
9713 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9714   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9715   ins_cost(2 * VOLATILE_REF_COST);
9716   effect(TEMP_DEF res, KILL cr);
9717   format %{
9718     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9719   %}
9720   ins_encode %{
9721     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9722                Assembler::xword, /*acquire*/ false, /*release*/ true,
9723                /*weak*/ false, $res$$Register);
9724   %}
9725   ins_pipe(pipe_slow);
9726 %}
9727 
9728 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9729   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9730   ins_cost(2 * VOLATILE_REF_COST);
9731   effect(TEMP_DEF res, KILL cr);
9732   format %{
9733     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9734   %}
9735   ins_encode %{
9736     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9737                Assembler::word, /*acquire*/ false, /*release*/ true,
9738                /*weak*/ false, $res$$Register);
9739   %}
9740   ins_pipe(pipe_slow);
9741 %}
9742 
9743 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9744   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9745   ins_cost(2 * VOLATILE_REF_COST);
9746   effect(TEMP_DEF res, KILL cr);
9747   format %{
9748     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9749   %}
9750   ins_encode %{
9751     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9752                Assembler::xword, /*acquire*/ false, /*release*/ true,
9753                /*weak*/ false, $res$$Register);
9754   %}
9755   ins_pipe(pipe_slow);
9756 %}
9757 
9758 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9759   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9760   ins_cost(2 * VOLATILE_REF_COST);
9761   effect(KILL cr);
9762   format %{
9763     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9764     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9765   %}
9766   ins_encode %{
9767     __ uxtbw(rscratch2, $oldval$$Register);
9768     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9769                Assembler::byte, /*acquire*/ false, /*release*/ true,
9770                /*weak*/ true, noreg);
9771     __ csetw($res$$Register, Assembler::EQ);
9772   %}
9773   ins_pipe(pipe_slow);
9774 %}
9775 
9776 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9777   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9778   ins_cost(2 * VOLATILE_REF_COST);
9779   effect(KILL cr);
9780   format %{
9781     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9782     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9783   %}
9784   ins_encode %{
9785     __ uxthw(rscratch2, $oldval$$Register);
9786     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9787                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9788                /*weak*/ true, noreg);
9789     __ csetw($res$$Register, Assembler::EQ);
9790   %}
9791   ins_pipe(pipe_slow);
9792 %}
9793 
9794 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9795   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9796   ins_cost(2 * VOLATILE_REF_COST);
9797   effect(KILL cr);
9798   format %{
9799     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9800     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9801   %}
9802   ins_encode %{
9803     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9804                Assembler::word, /*acquire*/ false, /*release*/ true,
9805                /*weak*/ true, noreg);
9806     __ csetw($res$$Register, Assembler::EQ);
9807   %}
9808   ins_pipe(pipe_slow);
9809 %}
9810 
9811 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9812   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9813   ins_cost(2 * VOLATILE_REF_COST);
9814   effect(KILL cr);
9815   format %{
9816     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9817     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9818   %}
9819   ins_encode %{
9820     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9821                Assembler::xword, /*acquire*/ false, /*release*/ true,
9822                /*weak*/ true, noreg);
9823     __ csetw($res$$Register, Assembler::EQ);
9824   %}
9825   ins_pipe(pipe_slow);
9826 %}
9827 
9828 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9829   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9830   ins_cost(2 * VOLATILE_REF_COST);
9831   effect(KILL cr);
9832   format %{
9833     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9834     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9835   %}
9836   ins_encode %{
9837     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9838                Assembler::word, /*acquire*/ false, /*release*/ true,
9839                /*weak*/ true, noreg);
9840     __ csetw($res$$Register, Assembler::EQ);
9841   %}
9842   ins_pipe(pipe_slow);
9843 %}
9844 
9845 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9846   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9847   ins_cost(2 * VOLATILE_REF_COST);
9848   effect(KILL cr);
9849   format %{
9850     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9851     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9852   %}
9853   ins_encode %{
9854     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9855                Assembler::xword, /*acquire*/ false, /*release*/ true,
9856                /*weak*/ true, noreg);
9857     __ csetw($res$$Register, Assembler::EQ);
9858   %}
9859   ins_pipe(pipe_slow);
9860 %}
9861 
9862 // END This section of the file is automatically generated. Do not edit --------------
9863 // ---------------------------------------------------------------------
9864 
9865 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
9866   match(Set prev (GetAndSetI mem newv));
9867   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
9868   ins_encode %{
9869     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9870   %}
9871   ins_pipe(pipe_serial);
9872 %}
9873 
9874 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
9875   match(Set prev (GetAndSetL mem newv));
9876   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9877   ins_encode %{
9878     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9879   %}
9880   ins_pipe(pipe_serial);
9881 %}
9882 
9883 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
9884   match(Set prev (GetAndSetN mem newv));
9885   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9886   ins_encode %{
9887     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9888   %}
9889   ins_pipe(pipe_serial);
9890 %}
9891 
9892 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
9893   match(Set prev (GetAndSetP mem newv));
9894   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9895   ins_encode %{
9896     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9897   %}
9898   ins_pipe(pipe_serial);
9899 %}
9900 
9901 
9902 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9903   match(Set newval (GetAndAddL mem incr));
9904   ins_cost(INSN_COST * 10);
9905   format %{ "get_and_addL $newval, [$mem], $incr" %}
9906   ins_encode %{
9907     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9908   %}
9909   ins_pipe(pipe_serial);
9910 %}
9911 
9912 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9913   predicate(n->as_LoadStore()->result_not_used());
9914   match(Set dummy (GetAndAddL mem incr));
9915   ins_cost(INSN_COST * 9);
9916   format %{ "get_and_addL [$mem], $incr" %}
9917   ins_encode %{
9918     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9919   %}
9920   ins_pipe(pipe_serial);
9921 %}
9922 
9923 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9924   match(Set newval (GetAndAddL mem incr));
9925   ins_cost(INSN_COST * 10);
9926   format %{ "get_and_addL $newval, [$mem], $incr" %}
9927   ins_encode %{
9928     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9929   %}
9930   ins_pipe(pipe_serial);
9931 %}
9932 
9933 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9934   predicate(n->as_LoadStore()->result_not_used());
9935   match(Set dummy (GetAndAddL mem incr));
9936   ins_cost(INSN_COST * 9);
9937   format %{ "get_and_addL [$mem], $incr" %}
9938   ins_encode %{
9939     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9940   %}
9941   ins_pipe(pipe_serial);
9942 %}
9943 
9944 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9945   match(Set newval (GetAndAddI mem incr));
9946   ins_cost(INSN_COST * 10);
9947   format %{ "get_and_addI $newval, [$mem], $incr" %}
9948   ins_encode %{
9949     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9950   %}
9951   ins_pipe(pipe_serial);
9952 %}
9953 
9954 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9955   predicate(n->as_LoadStore()->result_not_used());
9956   match(Set dummy (GetAndAddI mem incr));
9957   ins_cost(INSN_COST * 9);
9958   format %{ "get_and_addI [$mem], $incr" %}
9959   ins_encode %{
9960     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9961   %}
9962   ins_pipe(pipe_serial);
9963 %}
9964 
9965 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9966   match(Set newval (GetAndAddI mem incr));
9967   ins_cost(INSN_COST * 10);
9968   format %{ "get_and_addI $newval, [$mem], $incr" %}
9969   ins_encode %{
9970     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9971   %}
9972   ins_pipe(pipe_serial);
9973 %}
9974 
9975 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9976   predicate(n->as_LoadStore()->result_not_used());
9977   match(Set dummy (GetAndAddI mem incr));
9978   ins_cost(INSN_COST * 9);
9979   format %{ "get_and_addI [$mem], $incr" %}
9980   ins_encode %{
9981     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9982   %}
9983   ins_pipe(pipe_serial);
9984 %}
9985 
9986 // Manifest a CmpL result in an integer register.
9987 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9988 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9989 %{
9990   match(Set dst (CmpL3 src1 src2));
9991   effect(KILL flags);
9992 
9993   ins_cost(INSN_COST * 6);
9994   format %{
9995       "cmp $src1, $src2"
9996       "csetw $dst, ne"
9997       "cnegw $dst, lt"
9998   %}
9999   // format %{ "CmpL3 $dst, $src1, $src2" %}
10000   ins_encode %{
10001     __ cmp($src1$$Register, $src2$$Register);
10002     __ csetw($dst$$Register, Assembler::NE);
10003     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10004   %}
10005 
10006   ins_pipe(pipe_class_default);
10007 %}
10008 
10009 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10010 %{
10011   match(Set dst (CmpL3 src1 src2));
10012   effect(KILL flags);
10013 
10014   ins_cost(INSN_COST * 6);
10015   format %{
10016       "cmp $src1, $src2"
10017       "csetw $dst, ne"
10018       "cnegw $dst, lt"
10019   %}
10020   ins_encode %{
10021     int32_t con = (int32_t)$src2$$constant;
10022      if (con < 0) {
10023       __ adds(zr, $src1$$Register, -con);
10024     } else {
10025       __ subs(zr, $src1$$Register, con);
10026     }
10027     __ csetw($dst$$Register, Assembler::NE);
10028     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10029   %}
10030 
10031   ins_pipe(pipe_class_default);
10032 %}
10033 
10034 // ============================================================================
10035 // Conditional Move Instructions
10036 
10037 // n.b. we have identical rules for both a signed compare op (cmpOp)
10038 // and an unsigned compare op (cmpOpU). it would be nice if we could
10039 // define an op class which merged both inputs and use it to type the
10040 // argument to a single rule. unfortunatelyt his fails because the
10041 // opclass does not live up to the COND_INTER interface of its
10042 // component operands. When the generic code tries to negate the
10043 // operand it ends up running the generci Machoper::negate method
10044 // which throws a ShouldNotHappen. So, we have to provide two flavours
10045 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10046 
10047 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10048   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10049 
10050   ins_cost(INSN_COST * 2);
10051   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10052 
10053   ins_encode %{
10054     __ cselw(as_Register($dst$$reg),
10055              as_Register($src2$$reg),
10056              as_Register($src1$$reg),
10057              (Assembler::Condition)$cmp$$cmpcode);
10058   %}
10059 
10060   ins_pipe(icond_reg_reg);
10061 %}
10062 
10063 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10064   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10065 
10066   ins_cost(INSN_COST * 2);
10067   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10068 
10069   ins_encode %{
10070     __ cselw(as_Register($dst$$reg),
10071              as_Register($src2$$reg),
10072              as_Register($src1$$reg),
10073              (Assembler::Condition)$cmp$$cmpcode);
10074   %}
10075 
10076   ins_pipe(icond_reg_reg);
10077 %}
10078 
10079 // special cases where one arg is zero
10080 
10081 // n.b. this is selected in preference to the rule above because it
10082 // avoids loading constant 0 into a source register
10083 
10084 // TODO
10085 // we ought only to be able to cull one of these variants as the ideal
10086 // transforms ought always to order the zero consistently (to left/right?)
10087 
10088 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10089   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10090 
10091   ins_cost(INSN_COST * 2);
10092   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10093 
10094   ins_encode %{
10095     __ cselw(as_Register($dst$$reg),
10096              as_Register($src$$reg),
10097              zr,
10098              (Assembler::Condition)$cmp$$cmpcode);
10099   %}
10100 
10101   ins_pipe(icond_reg);
10102 %}
10103 
10104 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10105   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10106 
10107   ins_cost(INSN_COST * 2);
10108   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10109 
10110   ins_encode %{
10111     __ cselw(as_Register($dst$$reg),
10112              as_Register($src$$reg),
10113              zr,
10114              (Assembler::Condition)$cmp$$cmpcode);
10115   %}
10116 
10117   ins_pipe(icond_reg);
10118 %}
10119 
10120 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10121   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10122 
10123   ins_cost(INSN_COST * 2);
10124   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10125 
10126   ins_encode %{
10127     __ cselw(as_Register($dst$$reg),
10128              zr,
10129              as_Register($src$$reg),
10130              (Assembler::Condition)$cmp$$cmpcode);
10131   %}
10132 
10133   ins_pipe(icond_reg);
10134 %}
10135 
10136 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10137   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10138 
10139   ins_cost(INSN_COST * 2);
10140   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10141 
10142   ins_encode %{
10143     __ cselw(as_Register($dst$$reg),
10144              zr,
10145              as_Register($src$$reg),
10146              (Assembler::Condition)$cmp$$cmpcode);
10147   %}
10148 
10149   ins_pipe(icond_reg);
10150 %}
10151 
10152 // special case for creating a boolean 0 or 1
10153 
10154 // n.b. this is selected in preference to the rule above because it
10155 // avoids loading constants 0 and 1 into a source register
10156 
10157 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10158   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10159 
10160   ins_cost(INSN_COST * 2);
10161   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10162 
10163   ins_encode %{
10164     // equivalently
10165     // cset(as_Register($dst$$reg),
10166     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10167     __ csincw(as_Register($dst$$reg),
10168              zr,
10169              zr,
10170              (Assembler::Condition)$cmp$$cmpcode);
10171   %}
10172 
10173   ins_pipe(icond_none);
10174 %}
10175 
10176 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10177   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10178 
10179   ins_cost(INSN_COST * 2);
10180   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10181 
10182   ins_encode %{
10183     // equivalently
10184     // cset(as_Register($dst$$reg),
10185     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10186     __ csincw(as_Register($dst$$reg),
10187              zr,
10188              zr,
10189              (Assembler::Condition)$cmp$$cmpcode);
10190   %}
10191 
10192   ins_pipe(icond_none);
10193 %}
10194 
10195 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10196   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10197 
10198   ins_cost(INSN_COST * 2);
10199   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10200 
10201   ins_encode %{
10202     __ csel(as_Register($dst$$reg),
10203             as_Register($src2$$reg),
10204             as_Register($src1$$reg),
10205             (Assembler::Condition)$cmp$$cmpcode);
10206   %}
10207 
10208   ins_pipe(icond_reg_reg);
10209 %}
10210 
10211 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10212   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10213 
10214   ins_cost(INSN_COST * 2);
10215   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10216 
10217   ins_encode %{
10218     __ csel(as_Register($dst$$reg),
10219             as_Register($src2$$reg),
10220             as_Register($src1$$reg),
10221             (Assembler::Condition)$cmp$$cmpcode);
10222   %}
10223 
10224   ins_pipe(icond_reg_reg);
10225 %}
10226 
10227 // special cases where one arg is zero
10228 
10229 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10230   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10231 
10232   ins_cost(INSN_COST * 2);
10233   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10234 
10235   ins_encode %{
10236     __ csel(as_Register($dst$$reg),
10237             zr,
10238             as_Register($src$$reg),
10239             (Assembler::Condition)$cmp$$cmpcode);
10240   %}
10241 
10242   ins_pipe(icond_reg);
10243 %}
10244 
10245 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10246   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10247 
10248   ins_cost(INSN_COST * 2);
10249   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10250 
10251   ins_encode %{
10252     __ csel(as_Register($dst$$reg),
10253             zr,
10254             as_Register($src$$reg),
10255             (Assembler::Condition)$cmp$$cmpcode);
10256   %}
10257 
10258   ins_pipe(icond_reg);
10259 %}
10260 
10261 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10262   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10263 
10264   ins_cost(INSN_COST * 2);
10265   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10266 
10267   ins_encode %{
10268     __ csel(as_Register($dst$$reg),
10269             as_Register($src$$reg),
10270             zr,
10271             (Assembler::Condition)$cmp$$cmpcode);
10272   %}
10273 
10274   ins_pipe(icond_reg);
10275 %}
10276 
10277 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10278   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10279 
10280   ins_cost(INSN_COST * 2);
10281   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10282 
10283   ins_encode %{
10284     __ csel(as_Register($dst$$reg),
10285             as_Register($src$$reg),
10286             zr,
10287             (Assembler::Condition)$cmp$$cmpcode);
10288   %}
10289 
10290   ins_pipe(icond_reg);
10291 %}
10292 
10293 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10294   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10295 
10296   ins_cost(INSN_COST * 2);
10297   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10298 
10299   ins_encode %{
10300     __ csel(as_Register($dst$$reg),
10301             as_Register($src2$$reg),
10302             as_Register($src1$$reg),
10303             (Assembler::Condition)$cmp$$cmpcode);
10304   %}
10305 
10306   ins_pipe(icond_reg_reg);
10307 %}
10308 
10309 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10310   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10311 
10312   ins_cost(INSN_COST * 2);
10313   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10314 
10315   ins_encode %{
10316     __ csel(as_Register($dst$$reg),
10317             as_Register($src2$$reg),
10318             as_Register($src1$$reg),
10319             (Assembler::Condition)$cmp$$cmpcode);
10320   %}
10321 
10322   ins_pipe(icond_reg_reg);
10323 %}
10324 
10325 // special cases where one arg is zero
10326 
10327 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10328   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10329 
10330   ins_cost(INSN_COST * 2);
10331   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10332 
10333   ins_encode %{
10334     __ csel(as_Register($dst$$reg),
10335             zr,
10336             as_Register($src$$reg),
10337             (Assembler::Condition)$cmp$$cmpcode);
10338   %}
10339 
10340   ins_pipe(icond_reg);
10341 %}
10342 
10343 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10344   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10345 
10346   ins_cost(INSN_COST * 2);
10347   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10348 
10349   ins_encode %{
10350     __ csel(as_Register($dst$$reg),
10351             zr,
10352             as_Register($src$$reg),
10353             (Assembler::Condition)$cmp$$cmpcode);
10354   %}
10355 
10356   ins_pipe(icond_reg);
10357 %}
10358 
10359 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10360   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10361 
10362   ins_cost(INSN_COST * 2);
10363   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10364 
10365   ins_encode %{
10366     __ csel(as_Register($dst$$reg),
10367             as_Register($src$$reg),
10368             zr,
10369             (Assembler::Condition)$cmp$$cmpcode);
10370   %}
10371 
10372   ins_pipe(icond_reg);
10373 %}
10374 
10375 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10376   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10377 
10378   ins_cost(INSN_COST * 2);
10379   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10380 
10381   ins_encode %{
10382     __ csel(as_Register($dst$$reg),
10383             as_Register($src$$reg),
10384             zr,
10385             (Assembler::Condition)$cmp$$cmpcode);
10386   %}
10387 
10388   ins_pipe(icond_reg);
10389 %}
10390 
10391 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10392   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10393 
10394   ins_cost(INSN_COST * 2);
10395   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10396 
10397   ins_encode %{
10398     __ cselw(as_Register($dst$$reg),
10399              as_Register($src2$$reg),
10400              as_Register($src1$$reg),
10401              (Assembler::Condition)$cmp$$cmpcode);
10402   %}
10403 
10404   ins_pipe(icond_reg_reg);
10405 %}
10406 
10407 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10408   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10409 
10410   ins_cost(INSN_COST * 2);
10411   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10412 
10413   ins_encode %{
10414     __ cselw(as_Register($dst$$reg),
10415              as_Register($src2$$reg),
10416              as_Register($src1$$reg),
10417              (Assembler::Condition)$cmp$$cmpcode);
10418   %}
10419 
10420   ins_pipe(icond_reg_reg);
10421 %}
10422 
10423 // special cases where one arg is zero
10424 
10425 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10426   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10427 
10428   ins_cost(INSN_COST * 2);
10429   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10430 
10431   ins_encode %{
10432     __ cselw(as_Register($dst$$reg),
10433              zr,
10434              as_Register($src$$reg),
10435              (Assembler::Condition)$cmp$$cmpcode);
10436   %}
10437 
10438   ins_pipe(icond_reg);
10439 %}
10440 
10441 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10442   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10443 
10444   ins_cost(INSN_COST * 2);
10445   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10446 
10447   ins_encode %{
10448     __ cselw(as_Register($dst$$reg),
10449              zr,
10450              as_Register($src$$reg),
10451              (Assembler::Condition)$cmp$$cmpcode);
10452   %}
10453 
10454   ins_pipe(icond_reg);
10455 %}
10456 
10457 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10458   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10459 
10460   ins_cost(INSN_COST * 2);
10461   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10462 
10463   ins_encode %{
10464     __ cselw(as_Register($dst$$reg),
10465              as_Register($src$$reg),
10466              zr,
10467              (Assembler::Condition)$cmp$$cmpcode);
10468   %}
10469 
10470   ins_pipe(icond_reg);
10471 %}
10472 
10473 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10474   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10475 
10476   ins_cost(INSN_COST * 2);
10477   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10478 
10479   ins_encode %{
10480     __ cselw(as_Register($dst$$reg),
10481              as_Register($src$$reg),
10482              zr,
10483              (Assembler::Condition)$cmp$$cmpcode);
10484   %}
10485 
10486   ins_pipe(icond_reg);
10487 %}
10488 
10489 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10490 %{
10491   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10492 
10493   ins_cost(INSN_COST * 3);
10494 
10495   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10496   ins_encode %{
10497     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10498     __ fcsels(as_FloatRegister($dst$$reg),
10499               as_FloatRegister($src2$$reg),
10500               as_FloatRegister($src1$$reg),
10501               cond);
10502   %}
10503 
10504   ins_pipe(fp_cond_reg_reg_s);
10505 %}
10506 
10507 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10508 %{
10509   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10510 
10511   ins_cost(INSN_COST * 3);
10512 
10513   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10514   ins_encode %{
10515     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10516     __ fcsels(as_FloatRegister($dst$$reg),
10517               as_FloatRegister($src2$$reg),
10518               as_FloatRegister($src1$$reg),
10519               cond);
10520   %}
10521 
10522   ins_pipe(fp_cond_reg_reg_s);
10523 %}
10524 
10525 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10526 %{
10527   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10528 
10529   ins_cost(INSN_COST * 3);
10530 
10531   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10532   ins_encode %{
10533     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10534     __ fcseld(as_FloatRegister($dst$$reg),
10535               as_FloatRegister($src2$$reg),
10536               as_FloatRegister($src1$$reg),
10537               cond);
10538   %}
10539 
10540   ins_pipe(fp_cond_reg_reg_d);
10541 %}
10542 
10543 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10544 %{
10545   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10546 
10547   ins_cost(INSN_COST * 3);
10548 
10549   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10550   ins_encode %{
10551     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10552     __ fcseld(as_FloatRegister($dst$$reg),
10553               as_FloatRegister($src2$$reg),
10554               as_FloatRegister($src1$$reg),
10555               cond);
10556   %}
10557 
10558   ins_pipe(fp_cond_reg_reg_d);
10559 %}
10560 
10561 // ============================================================================
10562 // Arithmetic Instructions
10563 //
10564 
10565 // Integer Addition
10566 
10567 // TODO
10568 // these currently employ operations which do not set CR and hence are
10569 // not flagged as killing CR but we would like to isolate the cases
10570 // where we want to set flags from those where we don't. need to work
10571 // out how to do that.
10572 
10573 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10574   match(Set dst (AddI src1 src2));
10575 
10576   ins_cost(INSN_COST);
10577   format %{ "addw  $dst, $src1, $src2" %}
10578 
10579   ins_encode %{
10580     __ addw(as_Register($dst$$reg),
10581             as_Register($src1$$reg),
10582             as_Register($src2$$reg));
10583   %}
10584 
10585   ins_pipe(ialu_reg_reg);
10586 %}
10587 
10588 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10589   match(Set dst (AddI src1 src2));
10590 
10591   ins_cost(INSN_COST);
10592   format %{ "addw $dst, $src1, $src2" %}
10593 
10594   // use opcode to indicate that this is an add not a sub
10595   opcode(0x0);
10596 
10597   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10598 
10599   ins_pipe(ialu_reg_imm);
10600 %}
10601 
10602 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10603   match(Set dst (AddI (ConvL2I src1) src2));
10604 
10605   ins_cost(INSN_COST);
10606   format %{ "addw $dst, $src1, $src2" %}
10607 
10608   // use opcode to indicate that this is an add not a sub
10609   opcode(0x0);
10610 
10611   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10612 
10613   ins_pipe(ialu_reg_imm);
10614 %}
10615 
10616 // Pointer Addition
10617 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10618   match(Set dst (AddP src1 src2));
10619 
10620   ins_cost(INSN_COST);
10621   format %{ "add $dst, $src1, $src2\t# ptr" %}
10622 
10623   ins_encode %{
10624     __ add(as_Register($dst$$reg),
10625            as_Register($src1$$reg),
10626            as_Register($src2$$reg));
10627   %}
10628 
10629   ins_pipe(ialu_reg_reg);
10630 %}
10631 
10632 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10633   match(Set dst (AddP src1 (ConvI2L src2)));
10634 
10635   ins_cost(1.9 * INSN_COST);
10636   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10637 
10638   ins_encode %{
10639     __ add(as_Register($dst$$reg),
10640            as_Register($src1$$reg),
10641            as_Register($src2$$reg), ext::sxtw);
10642   %}
10643 
10644   ins_pipe(ialu_reg_reg);
10645 %}
10646 
10647 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10648   match(Set dst (AddP src1 (LShiftL src2 scale)));
10649 
10650   ins_cost(1.9 * INSN_COST);
10651   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10652 
10653   ins_encode %{
10654     __ lea(as_Register($dst$$reg),
10655            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10656                    Address::lsl($scale$$constant)));
10657   %}
10658 
10659   ins_pipe(ialu_reg_reg_shift);
10660 %}
10661 
10662 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10663   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10664 
10665   ins_cost(1.9 * INSN_COST);
10666   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10667 
10668   ins_encode %{
10669     __ lea(as_Register($dst$$reg),
10670            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10671                    Address::sxtw($scale$$constant)));
10672   %}
10673 
10674   ins_pipe(ialu_reg_reg_shift);
10675 %}
10676 
10677 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10678   match(Set dst (LShiftL (ConvI2L src) scale));
10679 
10680   ins_cost(INSN_COST);
10681   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10682 
10683   ins_encode %{
10684     __ sbfiz(as_Register($dst$$reg),
10685           as_Register($src$$reg),
10686           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10687   %}
10688 
10689   ins_pipe(ialu_reg_shift);
10690 %}
10691 
10692 // Pointer Immediate Addition
10693 // n.b. this needs to be more expensive than using an indirect memory
10694 // operand
10695 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10696   match(Set dst (AddP src1 src2));
10697 
10698   ins_cost(INSN_COST);
10699   format %{ "add $dst, $src1, $src2\t# ptr" %}
10700 
10701   // use opcode to indicate that this is an add not a sub
10702   opcode(0x0);
10703 
10704   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10705 
10706   ins_pipe(ialu_reg_imm);
10707 %}
10708 
10709 // Long Addition
10710 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10711 
10712   match(Set dst (AddL src1 src2));
10713 
10714   ins_cost(INSN_COST);
10715   format %{ "add  $dst, $src1, $src2" %}
10716 
10717   ins_encode %{
10718     __ add(as_Register($dst$$reg),
10719            as_Register($src1$$reg),
10720            as_Register($src2$$reg));
10721   %}
10722 
10723   ins_pipe(ialu_reg_reg);
10724 %}
10725 
10726 // No constant pool entries requiredLong Immediate Addition.
10727 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10728   match(Set dst (AddL src1 src2));
10729 
10730   ins_cost(INSN_COST);
10731   format %{ "add $dst, $src1, $src2" %}
10732 
10733   // use opcode to indicate that this is an add not a sub
10734   opcode(0x0);
10735 
10736   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10737 
10738   ins_pipe(ialu_reg_imm);
10739 %}
10740 
10741 // Integer Subtraction
10742 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10743   match(Set dst (SubI src1 src2));
10744 
10745   ins_cost(INSN_COST);
10746   format %{ "subw  $dst, $src1, $src2" %}
10747 
10748   ins_encode %{
10749     __ subw(as_Register($dst$$reg),
10750             as_Register($src1$$reg),
10751             as_Register($src2$$reg));
10752   %}
10753 
10754   ins_pipe(ialu_reg_reg);
10755 %}
10756 
10757 // Immediate Subtraction
10758 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10759   match(Set dst (SubI src1 src2));
10760 
10761   ins_cost(INSN_COST);
10762   format %{ "subw $dst, $src1, $src2" %}
10763 
10764   // use opcode to indicate that this is a sub not an add
10765   opcode(0x1);
10766 
10767   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10768 
10769   ins_pipe(ialu_reg_imm);
10770 %}
10771 
10772 // Long Subtraction
10773 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10774 
10775   match(Set dst (SubL src1 src2));
10776 
10777   ins_cost(INSN_COST);
10778   format %{ "sub  $dst, $src1, $src2" %}
10779 
10780   ins_encode %{
10781     __ sub(as_Register($dst$$reg),
10782            as_Register($src1$$reg),
10783            as_Register($src2$$reg));
10784   %}
10785 
10786   ins_pipe(ialu_reg_reg);
10787 %}
10788 
10789 // No constant pool entries requiredLong Immediate Subtraction.
10790 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10791   match(Set dst (SubL src1 src2));
10792 
10793   ins_cost(INSN_COST);
10794   format %{ "sub$dst, $src1, $src2" %}
10795 
10796   // use opcode to indicate that this is a sub not an add
10797   opcode(0x1);
10798 
10799   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10800 
10801   ins_pipe(ialu_reg_imm);
10802 %}
10803 
10804 // Integer Negation (special case for sub)
10805 
10806 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10807   match(Set dst (SubI zero src));
10808 
10809   ins_cost(INSN_COST);
10810   format %{ "negw $dst, $src\t# int" %}
10811 
10812   ins_encode %{
10813     __ negw(as_Register($dst$$reg),
10814             as_Register($src$$reg));
10815   %}
10816 
10817   ins_pipe(ialu_reg);
10818 %}
10819 
10820 // Long Negation
10821 
10822 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
10823   match(Set dst (SubL zero src));
10824 
10825   ins_cost(INSN_COST);
10826   format %{ "neg $dst, $src\t# long" %}
10827 
10828   ins_encode %{
10829     __ neg(as_Register($dst$$reg),
10830            as_Register($src$$reg));
10831   %}
10832 
10833   ins_pipe(ialu_reg);
10834 %}
10835 
10836 // Integer Multiply
10837 
10838 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10839   match(Set dst (MulI src1 src2));
10840 
10841   ins_cost(INSN_COST * 3);
10842   format %{ "mulw  $dst, $src1, $src2" %}
10843 
10844   ins_encode %{
10845     __ mulw(as_Register($dst$$reg),
10846             as_Register($src1$$reg),
10847             as_Register($src2$$reg));
10848   %}
10849 
10850   ins_pipe(imul_reg_reg);
10851 %}
10852 
10853 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10854   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
10855 
10856   ins_cost(INSN_COST * 3);
10857   format %{ "smull  $dst, $src1, $src2" %}
10858 
10859   ins_encode %{
10860     __ smull(as_Register($dst$$reg),
10861              as_Register($src1$$reg),
10862              as_Register($src2$$reg));
10863   %}
10864 
10865   ins_pipe(imul_reg_reg);
10866 %}
10867 
10868 // Long Multiply
10869 
10870 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10871   match(Set dst (MulL src1 src2));
10872 
10873   ins_cost(INSN_COST * 5);
10874   format %{ "mul  $dst, $src1, $src2" %}
10875 
10876   ins_encode %{
10877     __ mul(as_Register($dst$$reg),
10878            as_Register($src1$$reg),
10879            as_Register($src2$$reg));
10880   %}
10881 
10882   ins_pipe(lmul_reg_reg);
10883 %}
10884 
10885 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10886 %{
10887   match(Set dst (MulHiL src1 src2));
10888 
10889   ins_cost(INSN_COST * 7);
10890   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10891 
10892   ins_encode %{
10893     __ smulh(as_Register($dst$$reg),
10894              as_Register($src1$$reg),
10895              as_Register($src2$$reg));
10896   %}
10897 
10898   ins_pipe(lmul_reg_reg);
10899 %}
10900 
10901 // Combined Integer Multiply & Add/Sub
10902 
10903 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10904   match(Set dst (AddI src3 (MulI src1 src2)));
10905 
10906   ins_cost(INSN_COST * 3);
10907   format %{ "madd  $dst, $src1, $src2, $src3" %}
10908 
10909   ins_encode %{
10910     __ maddw(as_Register($dst$$reg),
10911              as_Register($src1$$reg),
10912              as_Register($src2$$reg),
10913              as_Register($src3$$reg));
10914   %}
10915 
10916   ins_pipe(imac_reg_reg);
10917 %}
10918 
10919 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10920   match(Set dst (SubI src3 (MulI src1 src2)));
10921 
10922   ins_cost(INSN_COST * 3);
10923   format %{ "msub  $dst, $src1, $src2, $src3" %}
10924 
10925   ins_encode %{
10926     __ msubw(as_Register($dst$$reg),
10927              as_Register($src1$$reg),
10928              as_Register($src2$$reg),
10929              as_Register($src3$$reg));
10930   %}
10931 
10932   ins_pipe(imac_reg_reg);
10933 %}
10934 
10935 // Combined Long Multiply & Add/Sub
10936 
10937 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10938   match(Set dst (AddL src3 (MulL src1 src2)));
10939 
10940   ins_cost(INSN_COST * 5);
10941   format %{ "madd  $dst, $src1, $src2, $src3" %}
10942 
10943   ins_encode %{
10944     __ madd(as_Register($dst$$reg),
10945             as_Register($src1$$reg),
10946             as_Register($src2$$reg),
10947             as_Register($src3$$reg));
10948   %}
10949 
10950   ins_pipe(lmac_reg_reg);
10951 %}
10952 
10953 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10954   match(Set dst (SubL src3 (MulL src1 src2)));
10955 
10956   ins_cost(INSN_COST * 5);
10957   format %{ "msub  $dst, $src1, $src2, $src3" %}
10958 
10959   ins_encode %{
10960     __ msub(as_Register($dst$$reg),
10961             as_Register($src1$$reg),
10962             as_Register($src2$$reg),
10963             as_Register($src3$$reg));
10964   %}
10965 
10966   ins_pipe(lmac_reg_reg);
10967 %}
10968 
10969 // Integer Divide
10970 
10971 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10972   match(Set dst (DivI src1 src2));
10973 
10974   ins_cost(INSN_COST * 19);
10975   format %{ "sdivw  $dst, $src1, $src2" %}
10976 
10977   ins_encode(aarch64_enc_divw(dst, src1, src2));
10978   ins_pipe(idiv_reg_reg);
10979 %}
10980 
10981 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10982   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10983   ins_cost(INSN_COST);
10984   format %{ "lsrw $dst, $src1, $div1" %}
10985   ins_encode %{
10986     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10987   %}
10988   ins_pipe(ialu_reg_shift);
10989 %}
10990 
10991 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10992   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10993   ins_cost(INSN_COST);
10994   format %{ "addw $dst, $src, LSR $div1" %}
10995 
10996   ins_encode %{
10997     __ addw(as_Register($dst$$reg),
10998               as_Register($src$$reg),
10999               as_Register($src$$reg),
11000               Assembler::LSR, 31);
11001   %}
11002   ins_pipe(ialu_reg);
11003 %}
11004 
11005 // Long Divide
11006 
11007 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11008   match(Set dst (DivL src1 src2));
11009 
11010   ins_cost(INSN_COST * 35);
11011   format %{ "sdiv   $dst, $src1, $src2" %}
11012 
11013   ins_encode(aarch64_enc_div(dst, src1, src2));
11014   ins_pipe(ldiv_reg_reg);
11015 %}
11016 
11017 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
11018   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11019   ins_cost(INSN_COST);
11020   format %{ "lsr $dst, $src1, $div1" %}
11021   ins_encode %{
11022     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11023   %}
11024   ins_pipe(ialu_reg_shift);
11025 %}
11026 
11027 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
11028   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11029   ins_cost(INSN_COST);
11030   format %{ "add $dst, $src, $div1" %}
11031 
11032   ins_encode %{
11033     __ add(as_Register($dst$$reg),
11034               as_Register($src$$reg),
11035               as_Register($src$$reg),
11036               Assembler::LSR, 63);
11037   %}
11038   ins_pipe(ialu_reg);
11039 %}
11040 
11041 // Integer Remainder
11042 
11043 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11044   match(Set dst (ModI src1 src2));
11045 
11046   ins_cost(INSN_COST * 22);
11047   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11048             "msubw($dst, rscratch1, $src2, $src1" %}
11049 
11050   ins_encode(aarch64_enc_modw(dst, src1, src2));
11051   ins_pipe(idiv_reg_reg);
11052 %}
11053 
11054 // Long Remainder
11055 
11056 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11057   match(Set dst (ModL src1 src2));
11058 
11059   ins_cost(INSN_COST * 38);
11060   format %{ "sdiv   rscratch1, $src1, $src2\n"
11061             "msub($dst, rscratch1, $src2, $src1" %}
11062 
11063   ins_encode(aarch64_enc_mod(dst, src1, src2));
11064   ins_pipe(ldiv_reg_reg);
11065 %}
11066 
11067 // Integer Shifts
11068 
11069 // Shift Left Register
11070 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11071   match(Set dst (LShiftI src1 src2));
11072 
11073   ins_cost(INSN_COST * 2);
11074   format %{ "lslvw  $dst, $src1, $src2" %}
11075 
11076   ins_encode %{
11077     __ lslvw(as_Register($dst$$reg),
11078              as_Register($src1$$reg),
11079              as_Register($src2$$reg));
11080   %}
11081 
11082   ins_pipe(ialu_reg_reg_vshift);
11083 %}
11084 
11085 // Shift Left Immediate
11086 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11087   match(Set dst (LShiftI src1 src2));
11088 
11089   ins_cost(INSN_COST);
11090   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11091 
11092   ins_encode %{
11093     __ lslw(as_Register($dst$$reg),
11094             as_Register($src1$$reg),
11095             $src2$$constant & 0x1f);
11096   %}
11097 
11098   ins_pipe(ialu_reg_shift);
11099 %}
11100 
11101 // Shift Right Logical Register
11102 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11103   match(Set dst (URShiftI src1 src2));
11104 
11105   ins_cost(INSN_COST * 2);
11106   format %{ "lsrvw  $dst, $src1, $src2" %}
11107 
11108   ins_encode %{
11109     __ lsrvw(as_Register($dst$$reg),
11110              as_Register($src1$$reg),
11111              as_Register($src2$$reg));
11112   %}
11113 
11114   ins_pipe(ialu_reg_reg_vshift);
11115 %}
11116 
11117 // Shift Right Logical Immediate
11118 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11119   match(Set dst (URShiftI src1 src2));
11120 
11121   ins_cost(INSN_COST);
11122   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11123 
11124   ins_encode %{
11125     __ lsrw(as_Register($dst$$reg),
11126             as_Register($src1$$reg),
11127             $src2$$constant & 0x1f);
11128   %}
11129 
11130   ins_pipe(ialu_reg_shift);
11131 %}
11132 
11133 // Shift Right Arithmetic Register
11134 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11135   match(Set dst (RShiftI src1 src2));
11136 
11137   ins_cost(INSN_COST * 2);
11138   format %{ "asrvw  $dst, $src1, $src2" %}
11139 
11140   ins_encode %{
11141     __ asrvw(as_Register($dst$$reg),
11142              as_Register($src1$$reg),
11143              as_Register($src2$$reg));
11144   %}
11145 
11146   ins_pipe(ialu_reg_reg_vshift);
11147 %}
11148 
11149 // Shift Right Arithmetic Immediate
11150 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11151   match(Set dst (RShiftI src1 src2));
11152 
11153   ins_cost(INSN_COST);
11154   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11155 
11156   ins_encode %{
11157     __ asrw(as_Register($dst$$reg),
11158             as_Register($src1$$reg),
11159             $src2$$constant & 0x1f);
11160   %}
11161 
11162   ins_pipe(ialu_reg_shift);
11163 %}
11164 
11165 // Combined Int Mask and Right Shift (using UBFM)
11166 // TODO
11167 
11168 // Long Shifts
11169 
11170 // Shift Left Register
11171 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11172   match(Set dst (LShiftL src1 src2));
11173 
11174   ins_cost(INSN_COST * 2);
11175   format %{ "lslv  $dst, $src1, $src2" %}
11176 
11177   ins_encode %{
11178     __ lslv(as_Register($dst$$reg),
11179             as_Register($src1$$reg),
11180             as_Register($src2$$reg));
11181   %}
11182 
11183   ins_pipe(ialu_reg_reg_vshift);
11184 %}
11185 
11186 // Shift Left Immediate
11187 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11188   match(Set dst (LShiftL src1 src2));
11189 
11190   ins_cost(INSN_COST);
11191   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11192 
11193   ins_encode %{
11194     __ lsl(as_Register($dst$$reg),
11195             as_Register($src1$$reg),
11196             $src2$$constant & 0x3f);
11197   %}
11198 
11199   ins_pipe(ialu_reg_shift);
11200 %}
11201 
11202 // Shift Right Logical Register
11203 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11204   match(Set dst (URShiftL src1 src2));
11205 
11206   ins_cost(INSN_COST * 2);
11207   format %{ "lsrv  $dst, $src1, $src2" %}
11208 
11209   ins_encode %{
11210     __ lsrv(as_Register($dst$$reg),
11211             as_Register($src1$$reg),
11212             as_Register($src2$$reg));
11213   %}
11214 
11215   ins_pipe(ialu_reg_reg_vshift);
11216 %}
11217 
11218 // Shift Right Logical Immediate
11219 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11220   match(Set dst (URShiftL src1 src2));
11221 
11222   ins_cost(INSN_COST);
11223   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11224 
11225   ins_encode %{
11226     __ lsr(as_Register($dst$$reg),
11227            as_Register($src1$$reg),
11228            $src2$$constant & 0x3f);
11229   %}
11230 
11231   ins_pipe(ialu_reg_shift);
11232 %}
11233 
11234 // A special-case pattern for card table stores.
11235 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11236   match(Set dst (URShiftL (CastP2X src1) src2));
11237 
11238   ins_cost(INSN_COST);
11239   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11240 
11241   ins_encode %{
11242     __ lsr(as_Register($dst$$reg),
11243            as_Register($src1$$reg),
11244            $src2$$constant & 0x3f);
11245   %}
11246 
11247   ins_pipe(ialu_reg_shift);
11248 %}
11249 
11250 // Shift Right Arithmetic Register
11251 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11252   match(Set dst (RShiftL src1 src2));
11253 
11254   ins_cost(INSN_COST * 2);
11255   format %{ "asrv  $dst, $src1, $src2" %}
11256 
11257   ins_encode %{
11258     __ asrv(as_Register($dst$$reg),
11259             as_Register($src1$$reg),
11260             as_Register($src2$$reg));
11261   %}
11262 
11263   ins_pipe(ialu_reg_reg_vshift);
11264 %}
11265 
11266 // Shift Right Arithmetic Immediate
11267 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11268   match(Set dst (RShiftL src1 src2));
11269 
11270   ins_cost(INSN_COST);
11271   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11272 
11273   ins_encode %{
11274     __ asr(as_Register($dst$$reg),
11275            as_Register($src1$$reg),
11276            $src2$$constant & 0x3f);
11277   %}
11278 
11279   ins_pipe(ialu_reg_shift);
11280 %}
11281 
11282 // BEGIN This section of the file is automatically generated. Do not edit --------------
11283 
11284 instruct regL_not_reg(iRegLNoSp dst,
11285                          iRegL src1, immL_M1 m1,
11286                          rFlagsReg cr) %{
11287   match(Set dst (XorL src1 m1));
11288   ins_cost(INSN_COST);
11289   format %{ "eon  $dst, $src1, zr" %}
11290 
11291   ins_encode %{
11292     __ eon(as_Register($dst$$reg),
11293               as_Register($src1$$reg),
11294               zr,
11295               Assembler::LSL, 0);
11296   %}
11297 
11298   ins_pipe(ialu_reg);
11299 %}
11300 instruct regI_not_reg(iRegINoSp dst,
11301                          iRegIorL2I src1, immI_M1 m1,
11302                          rFlagsReg cr) %{
11303   match(Set dst (XorI src1 m1));
11304   ins_cost(INSN_COST);
11305   format %{ "eonw  $dst, $src1, zr" %}
11306 
11307   ins_encode %{
11308     __ eonw(as_Register($dst$$reg),
11309               as_Register($src1$$reg),
11310               zr,
11311               Assembler::LSL, 0);
11312   %}
11313 
11314   ins_pipe(ialu_reg);
11315 %}
11316 
11317 instruct AndI_reg_not_reg(iRegINoSp dst,
11318                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11319                          rFlagsReg cr) %{
11320   match(Set dst (AndI src1 (XorI src2 m1)));
11321   ins_cost(INSN_COST);
11322   format %{ "bicw  $dst, $src1, $src2" %}
11323 
11324   ins_encode %{
11325     __ bicw(as_Register($dst$$reg),
11326               as_Register($src1$$reg),
11327               as_Register($src2$$reg),
11328               Assembler::LSL, 0);
11329   %}
11330 
11331   ins_pipe(ialu_reg_reg);
11332 %}
11333 
11334 instruct AndL_reg_not_reg(iRegLNoSp dst,
11335                          iRegL src1, iRegL src2, immL_M1 m1,
11336                          rFlagsReg cr) %{
11337   match(Set dst (AndL src1 (XorL src2 m1)));
11338   ins_cost(INSN_COST);
11339   format %{ "bic  $dst, $src1, $src2" %}
11340 
11341   ins_encode %{
11342     __ bic(as_Register($dst$$reg),
11343               as_Register($src1$$reg),
11344               as_Register($src2$$reg),
11345               Assembler::LSL, 0);
11346   %}
11347 
11348   ins_pipe(ialu_reg_reg);
11349 %}
11350 
11351 instruct OrI_reg_not_reg(iRegINoSp dst,
11352                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11353                          rFlagsReg cr) %{
11354   match(Set dst (OrI src1 (XorI src2 m1)));
11355   ins_cost(INSN_COST);
11356   format %{ "ornw  $dst, $src1, $src2" %}
11357 
11358   ins_encode %{
11359     __ ornw(as_Register($dst$$reg),
11360               as_Register($src1$$reg),
11361               as_Register($src2$$reg),
11362               Assembler::LSL, 0);
11363   %}
11364 
11365   ins_pipe(ialu_reg_reg);
11366 %}
11367 
11368 instruct OrL_reg_not_reg(iRegLNoSp dst,
11369                          iRegL src1, iRegL src2, immL_M1 m1,
11370                          rFlagsReg cr) %{
11371   match(Set dst (OrL src1 (XorL src2 m1)));
11372   ins_cost(INSN_COST);
11373   format %{ "orn  $dst, $src1, $src2" %}
11374 
11375   ins_encode %{
11376     __ orn(as_Register($dst$$reg),
11377               as_Register($src1$$reg),
11378               as_Register($src2$$reg),
11379               Assembler::LSL, 0);
11380   %}
11381 
11382   ins_pipe(ialu_reg_reg);
11383 %}
11384 
11385 instruct XorI_reg_not_reg(iRegINoSp dst,
11386                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11387                          rFlagsReg cr) %{
11388   match(Set dst (XorI m1 (XorI src2 src1)));
11389   ins_cost(INSN_COST);
11390   format %{ "eonw  $dst, $src1, $src2" %}
11391 
11392   ins_encode %{
11393     __ eonw(as_Register($dst$$reg),
11394               as_Register($src1$$reg),
11395               as_Register($src2$$reg),
11396               Assembler::LSL, 0);
11397   %}
11398 
11399   ins_pipe(ialu_reg_reg);
11400 %}
11401 
11402 instruct XorL_reg_not_reg(iRegLNoSp dst,
11403                          iRegL src1, iRegL src2, immL_M1 m1,
11404                          rFlagsReg cr) %{
11405   match(Set dst (XorL m1 (XorL src2 src1)));
11406   ins_cost(INSN_COST);
11407   format %{ "eon  $dst, $src1, $src2" %}
11408 
11409   ins_encode %{
11410     __ eon(as_Register($dst$$reg),
11411               as_Register($src1$$reg),
11412               as_Register($src2$$reg),
11413               Assembler::LSL, 0);
11414   %}
11415 
11416   ins_pipe(ialu_reg_reg);
11417 %}
11418 
11419 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11420                          iRegIorL2I src1, iRegIorL2I src2,
11421                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11422   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11423   ins_cost(1.9 * INSN_COST);
11424   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11425 
11426   ins_encode %{
11427     __ bicw(as_Register($dst$$reg),
11428               as_Register($src1$$reg),
11429               as_Register($src2$$reg),
11430               Assembler::LSR,
11431               $src3$$constant & 0x1f);
11432   %}
11433 
11434   ins_pipe(ialu_reg_reg_shift);
11435 %}
11436 
11437 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11438                          iRegL src1, iRegL src2,
11439                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11440   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11441   ins_cost(1.9 * INSN_COST);
11442   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11443 
11444   ins_encode %{
11445     __ bic(as_Register($dst$$reg),
11446               as_Register($src1$$reg),
11447               as_Register($src2$$reg),
11448               Assembler::LSR,
11449               $src3$$constant & 0x3f);
11450   %}
11451 
11452   ins_pipe(ialu_reg_reg_shift);
11453 %}
11454 
11455 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11456                          iRegIorL2I src1, iRegIorL2I src2,
11457                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11458   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11459   ins_cost(1.9 * INSN_COST);
11460   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11461 
11462   ins_encode %{
11463     __ bicw(as_Register($dst$$reg),
11464               as_Register($src1$$reg),
11465               as_Register($src2$$reg),
11466               Assembler::ASR,
11467               $src3$$constant & 0x1f);
11468   %}
11469 
11470   ins_pipe(ialu_reg_reg_shift);
11471 %}
11472 
11473 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11474                          iRegL src1, iRegL src2,
11475                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11476   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11477   ins_cost(1.9 * INSN_COST);
11478   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11479 
11480   ins_encode %{
11481     __ bic(as_Register($dst$$reg),
11482               as_Register($src1$$reg),
11483               as_Register($src2$$reg),
11484               Assembler::ASR,
11485               $src3$$constant & 0x3f);
11486   %}
11487 
11488   ins_pipe(ialu_reg_reg_shift);
11489 %}
11490 
11491 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11492                          iRegIorL2I src1, iRegIorL2I src2,
11493                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11494   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11495   ins_cost(1.9 * INSN_COST);
11496   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11497 
11498   ins_encode %{
11499     __ bicw(as_Register($dst$$reg),
11500               as_Register($src1$$reg),
11501               as_Register($src2$$reg),
11502               Assembler::LSL,
11503               $src3$$constant & 0x1f);
11504   %}
11505 
11506   ins_pipe(ialu_reg_reg_shift);
11507 %}
11508 
11509 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11510                          iRegL src1, iRegL src2,
11511                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11512   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11513   ins_cost(1.9 * INSN_COST);
11514   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11515 
11516   ins_encode %{
11517     __ bic(as_Register($dst$$reg),
11518               as_Register($src1$$reg),
11519               as_Register($src2$$reg),
11520               Assembler::LSL,
11521               $src3$$constant & 0x3f);
11522   %}
11523 
11524   ins_pipe(ialu_reg_reg_shift);
11525 %}
11526 
11527 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11528                          iRegIorL2I src1, iRegIorL2I src2,
11529                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11530   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11531   ins_cost(1.9 * INSN_COST);
11532   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11533 
11534   ins_encode %{
11535     __ eonw(as_Register($dst$$reg),
11536               as_Register($src1$$reg),
11537               as_Register($src2$$reg),
11538               Assembler::LSR,
11539               $src3$$constant & 0x1f);
11540   %}
11541 
11542   ins_pipe(ialu_reg_reg_shift);
11543 %}
11544 
11545 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11546                          iRegL src1, iRegL src2,
11547                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11548   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11549   ins_cost(1.9 * INSN_COST);
11550   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11551 
11552   ins_encode %{
11553     __ eon(as_Register($dst$$reg),
11554               as_Register($src1$$reg),
11555               as_Register($src2$$reg),
11556               Assembler::LSR,
11557               $src3$$constant & 0x3f);
11558   %}
11559 
11560   ins_pipe(ialu_reg_reg_shift);
11561 %}
11562 
11563 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11564                          iRegIorL2I src1, iRegIorL2I src2,
11565                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11566   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11567   ins_cost(1.9 * INSN_COST);
11568   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11569 
11570   ins_encode %{
11571     __ eonw(as_Register($dst$$reg),
11572               as_Register($src1$$reg),
11573               as_Register($src2$$reg),
11574               Assembler::ASR,
11575               $src3$$constant & 0x1f);
11576   %}
11577 
11578   ins_pipe(ialu_reg_reg_shift);
11579 %}
11580 
11581 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11582                          iRegL src1, iRegL src2,
11583                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11584   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11585   ins_cost(1.9 * INSN_COST);
11586   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11587 
11588   ins_encode %{
11589     __ eon(as_Register($dst$$reg),
11590               as_Register($src1$$reg),
11591               as_Register($src2$$reg),
11592               Assembler::ASR,
11593               $src3$$constant & 0x3f);
11594   %}
11595 
11596   ins_pipe(ialu_reg_reg_shift);
11597 %}
11598 
11599 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11600                          iRegIorL2I src1, iRegIorL2I src2,
11601                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11602   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11603   ins_cost(1.9 * INSN_COST);
11604   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11605 
11606   ins_encode %{
11607     __ eonw(as_Register($dst$$reg),
11608               as_Register($src1$$reg),
11609               as_Register($src2$$reg),
11610               Assembler::LSL,
11611               $src3$$constant & 0x1f);
11612   %}
11613 
11614   ins_pipe(ialu_reg_reg_shift);
11615 %}
11616 
11617 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11618                          iRegL src1, iRegL src2,
11619                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11620   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11621   ins_cost(1.9 * INSN_COST);
11622   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11623 
11624   ins_encode %{
11625     __ eon(as_Register($dst$$reg),
11626               as_Register($src1$$reg),
11627               as_Register($src2$$reg),
11628               Assembler::LSL,
11629               $src3$$constant & 0x3f);
11630   %}
11631 
11632   ins_pipe(ialu_reg_reg_shift);
11633 %}
11634 
11635 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11636                          iRegIorL2I src1, iRegIorL2I src2,
11637                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11638   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11639   ins_cost(1.9 * INSN_COST);
11640   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11641 
11642   ins_encode %{
11643     __ ornw(as_Register($dst$$reg),
11644               as_Register($src1$$reg),
11645               as_Register($src2$$reg),
11646               Assembler::LSR,
11647               $src3$$constant & 0x1f);
11648   %}
11649 
11650   ins_pipe(ialu_reg_reg_shift);
11651 %}
11652 
11653 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11654                          iRegL src1, iRegL src2,
11655                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11656   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11657   ins_cost(1.9 * INSN_COST);
11658   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11659 
11660   ins_encode %{
11661     __ orn(as_Register($dst$$reg),
11662               as_Register($src1$$reg),
11663               as_Register($src2$$reg),
11664               Assembler::LSR,
11665               $src3$$constant & 0x3f);
11666   %}
11667 
11668   ins_pipe(ialu_reg_reg_shift);
11669 %}
11670 
11671 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11672                          iRegIorL2I src1, iRegIorL2I src2,
11673                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11674   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11675   ins_cost(1.9 * INSN_COST);
11676   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11677 
11678   ins_encode %{
11679     __ ornw(as_Register($dst$$reg),
11680               as_Register($src1$$reg),
11681               as_Register($src2$$reg),
11682               Assembler::ASR,
11683               $src3$$constant & 0x1f);
11684   %}
11685 
11686   ins_pipe(ialu_reg_reg_shift);
11687 %}
11688 
11689 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11690                          iRegL src1, iRegL src2,
11691                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11692   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11693   ins_cost(1.9 * INSN_COST);
11694   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11695 
11696   ins_encode %{
11697     __ orn(as_Register($dst$$reg),
11698               as_Register($src1$$reg),
11699               as_Register($src2$$reg),
11700               Assembler::ASR,
11701               $src3$$constant & 0x3f);
11702   %}
11703 
11704   ins_pipe(ialu_reg_reg_shift);
11705 %}
11706 
11707 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11708                          iRegIorL2I src1, iRegIorL2I src2,
11709                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11710   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11711   ins_cost(1.9 * INSN_COST);
11712   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11713 
11714   ins_encode %{
11715     __ ornw(as_Register($dst$$reg),
11716               as_Register($src1$$reg),
11717               as_Register($src2$$reg),
11718               Assembler::LSL,
11719               $src3$$constant & 0x1f);
11720   %}
11721 
11722   ins_pipe(ialu_reg_reg_shift);
11723 %}
11724 
11725 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11726                          iRegL src1, iRegL src2,
11727                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11728   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11729   ins_cost(1.9 * INSN_COST);
11730   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11731 
11732   ins_encode %{
11733     __ orn(as_Register($dst$$reg),
11734               as_Register($src1$$reg),
11735               as_Register($src2$$reg),
11736               Assembler::LSL,
11737               $src3$$constant & 0x3f);
11738   %}
11739 
11740   ins_pipe(ialu_reg_reg_shift);
11741 %}
11742 
11743 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11744                          iRegIorL2I src1, iRegIorL2I src2,
11745                          immI src3, rFlagsReg cr) %{
11746   match(Set dst (AndI src1 (URShiftI src2 src3)));
11747 
11748   ins_cost(1.9 * INSN_COST);
11749   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11750 
11751   ins_encode %{
11752     __ andw(as_Register($dst$$reg),
11753               as_Register($src1$$reg),
11754               as_Register($src2$$reg),
11755               Assembler::LSR,
11756               $src3$$constant & 0x1f);
11757   %}
11758 
11759   ins_pipe(ialu_reg_reg_shift);
11760 %}
11761 
11762 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11763                          iRegL src1, iRegL src2,
11764                          immI src3, rFlagsReg cr) %{
11765   match(Set dst (AndL src1 (URShiftL src2 src3)));
11766 
11767   ins_cost(1.9 * INSN_COST);
11768   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11769 
11770   ins_encode %{
11771     __ andr(as_Register($dst$$reg),
11772               as_Register($src1$$reg),
11773               as_Register($src2$$reg),
11774               Assembler::LSR,
11775               $src3$$constant & 0x3f);
11776   %}
11777 
11778   ins_pipe(ialu_reg_reg_shift);
11779 %}
11780 
11781 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11782                          iRegIorL2I src1, iRegIorL2I src2,
11783                          immI src3, rFlagsReg cr) %{
11784   match(Set dst (AndI src1 (RShiftI src2 src3)));
11785 
11786   ins_cost(1.9 * INSN_COST);
11787   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11788 
11789   ins_encode %{
11790     __ andw(as_Register($dst$$reg),
11791               as_Register($src1$$reg),
11792               as_Register($src2$$reg),
11793               Assembler::ASR,
11794               $src3$$constant & 0x1f);
11795   %}
11796 
11797   ins_pipe(ialu_reg_reg_shift);
11798 %}
11799 
11800 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11801                          iRegL src1, iRegL src2,
11802                          immI src3, rFlagsReg cr) %{
11803   match(Set dst (AndL src1 (RShiftL src2 src3)));
11804 
11805   ins_cost(1.9 * INSN_COST);
11806   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11807 
11808   ins_encode %{
11809     __ andr(as_Register($dst$$reg),
11810               as_Register($src1$$reg),
11811               as_Register($src2$$reg),
11812               Assembler::ASR,
11813               $src3$$constant & 0x3f);
11814   %}
11815 
11816   ins_pipe(ialu_reg_reg_shift);
11817 %}
11818 
11819 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11820                          iRegIorL2I src1, iRegIorL2I src2,
11821                          immI src3, rFlagsReg cr) %{
11822   match(Set dst (AndI src1 (LShiftI src2 src3)));
11823 
11824   ins_cost(1.9 * INSN_COST);
11825   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11826 
11827   ins_encode %{
11828     __ andw(as_Register($dst$$reg),
11829               as_Register($src1$$reg),
11830               as_Register($src2$$reg),
11831               Assembler::LSL,
11832               $src3$$constant & 0x1f);
11833   %}
11834 
11835   ins_pipe(ialu_reg_reg_shift);
11836 %}
11837 
11838 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11839                          iRegL src1, iRegL src2,
11840                          immI src3, rFlagsReg cr) %{
11841   match(Set dst (AndL src1 (LShiftL src2 src3)));
11842 
11843   ins_cost(1.9 * INSN_COST);
11844   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11845 
11846   ins_encode %{
11847     __ andr(as_Register($dst$$reg),
11848               as_Register($src1$$reg),
11849               as_Register($src2$$reg),
11850               Assembler::LSL,
11851               $src3$$constant & 0x3f);
11852   %}
11853 
11854   ins_pipe(ialu_reg_reg_shift);
11855 %}
11856 
11857 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11858                          iRegIorL2I src1, iRegIorL2I src2,
11859                          immI src3, rFlagsReg cr) %{
11860   match(Set dst (XorI src1 (URShiftI src2 src3)));
11861 
11862   ins_cost(1.9 * INSN_COST);
11863   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
11864 
11865   ins_encode %{
11866     __ eorw(as_Register($dst$$reg),
11867               as_Register($src1$$reg),
11868               as_Register($src2$$reg),
11869               Assembler::LSR,
11870               $src3$$constant & 0x1f);
11871   %}
11872 
11873   ins_pipe(ialu_reg_reg_shift);
11874 %}
11875 
11876 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11877                          iRegL src1, iRegL src2,
11878                          immI src3, rFlagsReg cr) %{
11879   match(Set dst (XorL src1 (URShiftL src2 src3)));
11880 
11881   ins_cost(1.9 * INSN_COST);
11882   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
11883 
11884   ins_encode %{
11885     __ eor(as_Register($dst$$reg),
11886               as_Register($src1$$reg),
11887               as_Register($src2$$reg),
11888               Assembler::LSR,
11889               $src3$$constant & 0x3f);
11890   %}
11891 
11892   ins_pipe(ialu_reg_reg_shift);
11893 %}
11894 
11895 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11896                          iRegIorL2I src1, iRegIorL2I src2,
11897                          immI src3, rFlagsReg cr) %{
11898   match(Set dst (XorI src1 (RShiftI src2 src3)));
11899 
11900   ins_cost(1.9 * INSN_COST);
11901   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11902 
11903   ins_encode %{
11904     __ eorw(as_Register($dst$$reg),
11905               as_Register($src1$$reg),
11906               as_Register($src2$$reg),
11907               Assembler::ASR,
11908               $src3$$constant & 0x1f);
11909   %}
11910 
11911   ins_pipe(ialu_reg_reg_shift);
11912 %}
11913 
11914 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11915                          iRegL src1, iRegL src2,
11916                          immI src3, rFlagsReg cr) %{
11917   match(Set dst (XorL src1 (RShiftL src2 src3)));
11918 
11919   ins_cost(1.9 * INSN_COST);
11920   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11921 
11922   ins_encode %{
11923     __ eor(as_Register($dst$$reg),
11924               as_Register($src1$$reg),
11925               as_Register($src2$$reg),
11926               Assembler::ASR,
11927               $src3$$constant & 0x3f);
11928   %}
11929 
11930   ins_pipe(ialu_reg_reg_shift);
11931 %}
11932 
11933 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11934                          iRegIorL2I src1, iRegIorL2I src2,
11935                          immI src3, rFlagsReg cr) %{
11936   match(Set dst (XorI src1 (LShiftI src2 src3)));
11937 
11938   ins_cost(1.9 * INSN_COST);
11939   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11940 
11941   ins_encode %{
11942     __ eorw(as_Register($dst$$reg),
11943               as_Register($src1$$reg),
11944               as_Register($src2$$reg),
11945               Assembler::LSL,
11946               $src3$$constant & 0x1f);
11947   %}
11948 
11949   ins_pipe(ialu_reg_reg_shift);
11950 %}
11951 
11952 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11953                          iRegL src1, iRegL src2,
11954                          immI src3, rFlagsReg cr) %{
11955   match(Set dst (XorL src1 (LShiftL src2 src3)));
11956 
11957   ins_cost(1.9 * INSN_COST);
11958   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11959 
11960   ins_encode %{
11961     __ eor(as_Register($dst$$reg),
11962               as_Register($src1$$reg),
11963               as_Register($src2$$reg),
11964               Assembler::LSL,
11965               $src3$$constant & 0x3f);
11966   %}
11967 
11968   ins_pipe(ialu_reg_reg_shift);
11969 %}
11970 
11971 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11972                          iRegIorL2I src1, iRegIorL2I src2,
11973                          immI src3, rFlagsReg cr) %{
11974   match(Set dst (OrI src1 (URShiftI src2 src3)));
11975 
11976   ins_cost(1.9 * INSN_COST);
11977   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11978 
11979   ins_encode %{
11980     __ orrw(as_Register($dst$$reg),
11981               as_Register($src1$$reg),
11982               as_Register($src2$$reg),
11983               Assembler::LSR,
11984               $src3$$constant & 0x1f);
11985   %}
11986 
11987   ins_pipe(ialu_reg_reg_shift);
11988 %}
11989 
11990 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11991                          iRegL src1, iRegL src2,
11992                          immI src3, rFlagsReg cr) %{
11993   match(Set dst (OrL src1 (URShiftL src2 src3)));
11994 
11995   ins_cost(1.9 * INSN_COST);
11996   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11997 
11998   ins_encode %{
11999     __ orr(as_Register($dst$$reg),
12000               as_Register($src1$$reg),
12001               as_Register($src2$$reg),
12002               Assembler::LSR,
12003               $src3$$constant & 0x3f);
12004   %}
12005 
12006   ins_pipe(ialu_reg_reg_shift);
12007 %}
12008 
12009 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12010                          iRegIorL2I src1, iRegIorL2I src2,
12011                          immI src3, rFlagsReg cr) %{
12012   match(Set dst (OrI src1 (RShiftI src2 src3)));
12013 
12014   ins_cost(1.9 * INSN_COST);
12015   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12016 
12017   ins_encode %{
12018     __ orrw(as_Register($dst$$reg),
12019               as_Register($src1$$reg),
12020               as_Register($src2$$reg),
12021               Assembler::ASR,
12022               $src3$$constant & 0x1f);
12023   %}
12024 
12025   ins_pipe(ialu_reg_reg_shift);
12026 %}
12027 
12028 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12029                          iRegL src1, iRegL src2,
12030                          immI src3, rFlagsReg cr) %{
12031   match(Set dst (OrL src1 (RShiftL src2 src3)));
12032 
12033   ins_cost(1.9 * INSN_COST);
12034   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12035 
12036   ins_encode %{
12037     __ orr(as_Register($dst$$reg),
12038               as_Register($src1$$reg),
12039               as_Register($src2$$reg),
12040               Assembler::ASR,
12041               $src3$$constant & 0x3f);
12042   %}
12043 
12044   ins_pipe(ialu_reg_reg_shift);
12045 %}
12046 
12047 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12048                          iRegIorL2I src1, iRegIorL2I src2,
12049                          immI src3, rFlagsReg cr) %{
12050   match(Set dst (OrI src1 (LShiftI src2 src3)));
12051 
12052   ins_cost(1.9 * INSN_COST);
12053   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12054 
12055   ins_encode %{
12056     __ orrw(as_Register($dst$$reg),
12057               as_Register($src1$$reg),
12058               as_Register($src2$$reg),
12059               Assembler::LSL,
12060               $src3$$constant & 0x1f);
12061   %}
12062 
12063   ins_pipe(ialu_reg_reg_shift);
12064 %}
12065 
12066 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12067                          iRegL src1, iRegL src2,
12068                          immI src3, rFlagsReg cr) %{
12069   match(Set dst (OrL src1 (LShiftL src2 src3)));
12070 
12071   ins_cost(1.9 * INSN_COST);
12072   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12073 
12074   ins_encode %{
12075     __ orr(as_Register($dst$$reg),
12076               as_Register($src1$$reg),
12077               as_Register($src2$$reg),
12078               Assembler::LSL,
12079               $src3$$constant & 0x3f);
12080   %}
12081 
12082   ins_pipe(ialu_reg_reg_shift);
12083 %}
12084 
12085 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12086                          iRegIorL2I src1, iRegIorL2I src2,
12087                          immI src3, rFlagsReg cr) %{
12088   match(Set dst (AddI src1 (URShiftI src2 src3)));
12089 
12090   ins_cost(1.9 * INSN_COST);
12091   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12092 
12093   ins_encode %{
12094     __ addw(as_Register($dst$$reg),
12095               as_Register($src1$$reg),
12096               as_Register($src2$$reg),
12097               Assembler::LSR,
12098               $src3$$constant & 0x1f);
12099   %}
12100 
12101   ins_pipe(ialu_reg_reg_shift);
12102 %}
12103 
12104 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12105                          iRegL src1, iRegL src2,
12106                          immI src3, rFlagsReg cr) %{
12107   match(Set dst (AddL src1 (URShiftL src2 src3)));
12108 
12109   ins_cost(1.9 * INSN_COST);
12110   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12111 
12112   ins_encode %{
12113     __ add(as_Register($dst$$reg),
12114               as_Register($src1$$reg),
12115               as_Register($src2$$reg),
12116               Assembler::LSR,
12117               $src3$$constant & 0x3f);
12118   %}
12119 
12120   ins_pipe(ialu_reg_reg_shift);
12121 %}
12122 
12123 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12124                          iRegIorL2I src1, iRegIorL2I src2,
12125                          immI src3, rFlagsReg cr) %{
12126   match(Set dst (AddI src1 (RShiftI src2 src3)));
12127 
12128   ins_cost(1.9 * INSN_COST);
12129   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12130 
12131   ins_encode %{
12132     __ addw(as_Register($dst$$reg),
12133               as_Register($src1$$reg),
12134               as_Register($src2$$reg),
12135               Assembler::ASR,
12136               $src3$$constant & 0x1f);
12137   %}
12138 
12139   ins_pipe(ialu_reg_reg_shift);
12140 %}
12141 
12142 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12143                          iRegL src1, iRegL src2,
12144                          immI src3, rFlagsReg cr) %{
12145   match(Set dst (AddL src1 (RShiftL src2 src3)));
12146 
12147   ins_cost(1.9 * INSN_COST);
12148   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12149 
12150   ins_encode %{
12151     __ add(as_Register($dst$$reg),
12152               as_Register($src1$$reg),
12153               as_Register($src2$$reg),
12154               Assembler::ASR,
12155               $src3$$constant & 0x3f);
12156   %}
12157 
12158   ins_pipe(ialu_reg_reg_shift);
12159 %}
12160 
12161 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12162                          iRegIorL2I src1, iRegIorL2I src2,
12163                          immI src3, rFlagsReg cr) %{
12164   match(Set dst (AddI src1 (LShiftI src2 src3)));
12165 
12166   ins_cost(1.9 * INSN_COST);
12167   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12168 
12169   ins_encode %{
12170     __ addw(as_Register($dst$$reg),
12171               as_Register($src1$$reg),
12172               as_Register($src2$$reg),
12173               Assembler::LSL,
12174               $src3$$constant & 0x1f);
12175   %}
12176 
12177   ins_pipe(ialu_reg_reg_shift);
12178 %}
12179 
12180 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12181                          iRegL src1, iRegL src2,
12182                          immI src3, rFlagsReg cr) %{
12183   match(Set dst (AddL src1 (LShiftL src2 src3)));
12184 
12185   ins_cost(1.9 * INSN_COST);
12186   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12187 
12188   ins_encode %{
12189     __ add(as_Register($dst$$reg),
12190               as_Register($src1$$reg),
12191               as_Register($src2$$reg),
12192               Assembler::LSL,
12193               $src3$$constant & 0x3f);
12194   %}
12195 
12196   ins_pipe(ialu_reg_reg_shift);
12197 %}
12198 
12199 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12200                          iRegIorL2I src1, iRegIorL2I src2,
12201                          immI src3, rFlagsReg cr) %{
12202   match(Set dst (SubI src1 (URShiftI src2 src3)));
12203 
12204   ins_cost(1.9 * INSN_COST);
12205   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12206 
12207   ins_encode %{
12208     __ subw(as_Register($dst$$reg),
12209               as_Register($src1$$reg),
12210               as_Register($src2$$reg),
12211               Assembler::LSR,
12212               $src3$$constant & 0x1f);
12213   %}
12214 
12215   ins_pipe(ialu_reg_reg_shift);
12216 %}
12217 
12218 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12219                          iRegL src1, iRegL src2,
12220                          immI src3, rFlagsReg cr) %{
12221   match(Set dst (SubL src1 (URShiftL src2 src3)));
12222 
12223   ins_cost(1.9 * INSN_COST);
12224   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12225 
12226   ins_encode %{
12227     __ sub(as_Register($dst$$reg),
12228               as_Register($src1$$reg),
12229               as_Register($src2$$reg),
12230               Assembler::LSR,
12231               $src3$$constant & 0x3f);
12232   %}
12233 
12234   ins_pipe(ialu_reg_reg_shift);
12235 %}
12236 
12237 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12238                          iRegIorL2I src1, iRegIorL2I src2,
12239                          immI src3, rFlagsReg cr) %{
12240   match(Set dst (SubI src1 (RShiftI src2 src3)));
12241 
12242   ins_cost(1.9 * INSN_COST);
12243   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12244 
12245   ins_encode %{
12246     __ subw(as_Register($dst$$reg),
12247               as_Register($src1$$reg),
12248               as_Register($src2$$reg),
12249               Assembler::ASR,
12250               $src3$$constant & 0x1f);
12251   %}
12252 
12253   ins_pipe(ialu_reg_reg_shift);
12254 %}
12255 
12256 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12257                          iRegL src1, iRegL src2,
12258                          immI src3, rFlagsReg cr) %{
12259   match(Set dst (SubL src1 (RShiftL src2 src3)));
12260 
12261   ins_cost(1.9 * INSN_COST);
12262   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12263 
12264   ins_encode %{
12265     __ sub(as_Register($dst$$reg),
12266               as_Register($src1$$reg),
12267               as_Register($src2$$reg),
12268               Assembler::ASR,
12269               $src3$$constant & 0x3f);
12270   %}
12271 
12272   ins_pipe(ialu_reg_reg_shift);
12273 %}
12274 
12275 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12276                          iRegIorL2I src1, iRegIorL2I src2,
12277                          immI src3, rFlagsReg cr) %{
12278   match(Set dst (SubI src1 (LShiftI src2 src3)));
12279 
12280   ins_cost(1.9 * INSN_COST);
12281   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12282 
12283   ins_encode %{
12284     __ subw(as_Register($dst$$reg),
12285               as_Register($src1$$reg),
12286               as_Register($src2$$reg),
12287               Assembler::LSL,
12288               $src3$$constant & 0x1f);
12289   %}
12290 
12291   ins_pipe(ialu_reg_reg_shift);
12292 %}
12293 
12294 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12295                          iRegL src1, iRegL src2,
12296                          immI src3, rFlagsReg cr) %{
12297   match(Set dst (SubL src1 (LShiftL src2 src3)));
12298 
12299   ins_cost(1.9 * INSN_COST);
12300   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12301 
12302   ins_encode %{
12303     __ sub(as_Register($dst$$reg),
12304               as_Register($src1$$reg),
12305               as_Register($src2$$reg),
12306               Assembler::LSL,
12307               $src3$$constant & 0x3f);
12308   %}
12309 
12310   ins_pipe(ialu_reg_reg_shift);
12311 %}
12312 
12313 
12314 
12315 // Shift Left followed by Shift Right.
12316 // This idiom is used by the compiler for the i2b bytecode etc.
12317 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12318 %{
12319   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12320   // Make sure we are not going to exceed what sbfm can do.
12321   predicate((unsigned int)n->in(2)->get_int() <= 63
12322             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12323 
12324   ins_cost(INSN_COST * 2);
12325   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12326   ins_encode %{
12327     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12328     int s = 63 - lshift;
12329     int r = (rshift - lshift) & 63;
12330     __ sbfm(as_Register($dst$$reg),
12331             as_Register($src$$reg),
12332             r, s);
12333   %}
12334 
12335   ins_pipe(ialu_reg_shift);
12336 %}
12337 
12338 // Shift Left followed by Shift Right.
12339 // This idiom is used by the compiler for the i2b bytecode etc.
12340 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12341 %{
12342   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12343   // Make sure we are not going to exceed what sbfmw can do.
12344   predicate((unsigned int)n->in(2)->get_int() <= 31
12345             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12346 
12347   ins_cost(INSN_COST * 2);
12348   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12349   ins_encode %{
12350     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12351     int s = 31 - lshift;
12352     int r = (rshift - lshift) & 31;
12353     __ sbfmw(as_Register($dst$$reg),
12354             as_Register($src$$reg),
12355             r, s);
12356   %}
12357 
12358   ins_pipe(ialu_reg_shift);
12359 %}
12360 
12361 // Shift Left followed by Shift Right.
12362 // This idiom is used by the compiler for the i2b bytecode etc.
12363 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12364 %{
12365   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12366   // Make sure we are not going to exceed what ubfm can do.
12367   predicate((unsigned int)n->in(2)->get_int() <= 63
12368             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12369 
12370   ins_cost(INSN_COST * 2);
12371   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12372   ins_encode %{
12373     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12374     int s = 63 - lshift;
12375     int r = (rshift - lshift) & 63;
12376     __ ubfm(as_Register($dst$$reg),
12377             as_Register($src$$reg),
12378             r, s);
12379   %}
12380 
12381   ins_pipe(ialu_reg_shift);
12382 %}
12383 
12384 // Shift Left followed by Shift Right.
12385 // This idiom is used by the compiler for the i2b bytecode etc.
12386 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12387 %{
12388   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12389   // Make sure we are not going to exceed what ubfmw can do.
12390   predicate((unsigned int)n->in(2)->get_int() <= 31
12391             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12392 
12393   ins_cost(INSN_COST * 2);
12394   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12395   ins_encode %{
12396     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12397     int s = 31 - lshift;
12398     int r = (rshift - lshift) & 31;
12399     __ ubfmw(as_Register($dst$$reg),
12400             as_Register($src$$reg),
12401             r, s);
12402   %}
12403 
12404   ins_pipe(ialu_reg_shift);
12405 %}
12406 // Bitfield extract with shift & mask
12407 
12408 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12409 %{
12410   match(Set dst (AndI (URShiftI src rshift) mask));
12411 
12412   ins_cost(INSN_COST);
12413   format %{ "ubfxw $dst, $src, $mask" %}
12414   ins_encode %{
12415     int rshift = $rshift$$constant;
12416     long mask = $mask$$constant;
12417     int width = exact_log2(mask+1);
12418     __ ubfxw(as_Register($dst$$reg),
12419             as_Register($src$$reg), rshift, width);
12420   %}
12421   ins_pipe(ialu_reg_shift);
12422 %}
12423 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12424 %{
12425   match(Set dst (AndL (URShiftL src rshift) mask));
12426 
12427   ins_cost(INSN_COST);
12428   format %{ "ubfx $dst, $src, $mask" %}
12429   ins_encode %{
12430     int rshift = $rshift$$constant;
12431     long mask = $mask$$constant;
12432     int width = exact_log2(mask+1);
12433     __ ubfx(as_Register($dst$$reg),
12434             as_Register($src$$reg), rshift, width);
12435   %}
12436   ins_pipe(ialu_reg_shift);
12437 %}
12438 
12439 // We can use ubfx when extending an And with a mask when we know mask
12440 // is positive.  We know that because immI_bitmask guarantees it.
12441 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12442 %{
12443   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12444 
12445   ins_cost(INSN_COST * 2);
12446   format %{ "ubfx $dst, $src, $mask" %}
12447   ins_encode %{
12448     int rshift = $rshift$$constant;
12449     long mask = $mask$$constant;
12450     int width = exact_log2(mask+1);
12451     __ ubfx(as_Register($dst$$reg),
12452             as_Register($src$$reg), rshift, width);
12453   %}
12454   ins_pipe(ialu_reg_shift);
12455 %}
12456 
12457 // Rotations
12458 
12459 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12460 %{
12461   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12462   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12463 
12464   ins_cost(INSN_COST);
12465   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12466 
12467   ins_encode %{
12468     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12469             $rshift$$constant & 63);
12470   %}
12471   ins_pipe(ialu_reg_reg_extr);
12472 %}
12473 
12474 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12475 %{
12476   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12477   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12478 
12479   ins_cost(INSN_COST);
12480   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12481 
12482   ins_encode %{
12483     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12484             $rshift$$constant & 31);
12485   %}
12486   ins_pipe(ialu_reg_reg_extr);
12487 %}
12488 
12489 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12490 %{
12491   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12492   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12493 
12494   ins_cost(INSN_COST);
12495   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12496 
12497   ins_encode %{
12498     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12499             $rshift$$constant & 63);
12500   %}
12501   ins_pipe(ialu_reg_reg_extr);
12502 %}
12503 
12504 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12505 %{
12506   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12507   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12508 
12509   ins_cost(INSN_COST);
12510   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12511 
12512   ins_encode %{
12513     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12514             $rshift$$constant & 31);
12515   %}
12516   ins_pipe(ialu_reg_reg_extr);
12517 %}
12518 
12519 
12520 // rol expander
12521 
12522 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12523 %{
12524   effect(DEF dst, USE src, USE shift);
12525 
12526   format %{ "rol    $dst, $src, $shift" %}
12527   ins_cost(INSN_COST * 3);
12528   ins_encode %{
12529     __ subw(rscratch1, zr, as_Register($shift$$reg));
12530     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12531             rscratch1);
12532     %}
12533   ins_pipe(ialu_reg_reg_vshift);
12534 %}
12535 
12536 // rol expander
12537 
12538 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12539 %{
12540   effect(DEF dst, USE src, USE shift);
12541 
12542   format %{ "rol    $dst, $src, $shift" %}
12543   ins_cost(INSN_COST * 3);
12544   ins_encode %{
12545     __ subw(rscratch1, zr, as_Register($shift$$reg));
12546     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12547             rscratch1);
12548     %}
12549   ins_pipe(ialu_reg_reg_vshift);
12550 %}
12551 
12552 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12553 %{
12554   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12555 
12556   expand %{
12557     rolL_rReg(dst, src, shift, cr);
12558   %}
12559 %}
12560 
12561 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12562 %{
12563   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12564 
12565   expand %{
12566     rolL_rReg(dst, src, shift, cr);
12567   %}
12568 %}
12569 
12570 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12571 %{
12572   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12573 
12574   expand %{
12575     rolI_rReg(dst, src, shift, cr);
12576   %}
12577 %}
12578 
12579 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12580 %{
12581   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12582 
12583   expand %{
12584     rolI_rReg(dst, src, shift, cr);
12585   %}
12586 %}
12587 
12588 // ror expander
12589 
12590 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12591 %{
12592   effect(DEF dst, USE src, USE shift);
12593 
12594   format %{ "ror    $dst, $src, $shift" %}
12595   ins_cost(INSN_COST);
12596   ins_encode %{
12597     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12598             as_Register($shift$$reg));
12599     %}
12600   ins_pipe(ialu_reg_reg_vshift);
12601 %}
12602 
12603 // ror expander
12604 
12605 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12606 %{
12607   effect(DEF dst, USE src, USE shift);
12608 
12609   format %{ "ror    $dst, $src, $shift" %}
12610   ins_cost(INSN_COST);
12611   ins_encode %{
12612     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12613             as_Register($shift$$reg));
12614     %}
12615   ins_pipe(ialu_reg_reg_vshift);
12616 %}
12617 
12618 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12619 %{
12620   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12621 
12622   expand %{
12623     rorL_rReg(dst, src, shift, cr);
12624   %}
12625 %}
12626 
12627 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12628 %{
12629   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12630 
12631   expand %{
12632     rorL_rReg(dst, src, shift, cr);
12633   %}
12634 %}
12635 
12636 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12637 %{
12638   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12639 
12640   expand %{
12641     rorI_rReg(dst, src, shift, cr);
12642   %}
12643 %}
12644 
12645 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12646 %{
12647   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12648 
12649   expand %{
12650     rorI_rReg(dst, src, shift, cr);
12651   %}
12652 %}
12653 
12654 // Add/subtract (extended)
12655 
12656 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12657 %{
12658   match(Set dst (AddL src1 (ConvI2L src2)));
12659   ins_cost(INSN_COST);
12660   format %{ "add  $dst, $src1, sxtw $src2" %}
12661 
12662    ins_encode %{
12663      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12664             as_Register($src2$$reg), ext::sxtw);
12665    %}
12666   ins_pipe(ialu_reg_reg);
12667 %};
12668 
12669 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12670 %{
12671   match(Set dst (SubL src1 (ConvI2L src2)));
12672   ins_cost(INSN_COST);
12673   format %{ "sub  $dst, $src1, sxtw $src2" %}
12674 
12675    ins_encode %{
12676      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12677             as_Register($src2$$reg), ext::sxtw);
12678    %}
12679   ins_pipe(ialu_reg_reg);
12680 %};
12681 
12682 
12683 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12684 %{
12685   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12686   ins_cost(INSN_COST);
12687   format %{ "add  $dst, $src1, sxth $src2" %}
12688 
12689    ins_encode %{
12690      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12691             as_Register($src2$$reg), ext::sxth);
12692    %}
12693   ins_pipe(ialu_reg_reg);
12694 %}
12695 
12696 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12697 %{
12698   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12699   ins_cost(INSN_COST);
12700   format %{ "add  $dst, $src1, sxtb $src2" %}
12701 
12702    ins_encode %{
12703      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12704             as_Register($src2$$reg), ext::sxtb);
12705    %}
12706   ins_pipe(ialu_reg_reg);
12707 %}
12708 
12709 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12710 %{
12711   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12712   ins_cost(INSN_COST);
12713   format %{ "add  $dst, $src1, uxtb $src2" %}
12714 
12715    ins_encode %{
12716      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12717             as_Register($src2$$reg), ext::uxtb);
12718    %}
12719   ins_pipe(ialu_reg_reg);
12720 %}
12721 
12722 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12723 %{
12724   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12725   ins_cost(INSN_COST);
12726   format %{ "add  $dst, $src1, sxth $src2" %}
12727 
12728    ins_encode %{
12729      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12730             as_Register($src2$$reg), ext::sxth);
12731    %}
12732   ins_pipe(ialu_reg_reg);
12733 %}
12734 
12735 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12736 %{
12737   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12738   ins_cost(INSN_COST);
12739   format %{ "add  $dst, $src1, sxtw $src2" %}
12740 
12741    ins_encode %{
12742      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12743             as_Register($src2$$reg), ext::sxtw);
12744    %}
12745   ins_pipe(ialu_reg_reg);
12746 %}
12747 
12748 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12749 %{
12750   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12751   ins_cost(INSN_COST);
12752   format %{ "add  $dst, $src1, sxtb $src2" %}
12753 
12754    ins_encode %{
12755      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12756             as_Register($src2$$reg), ext::sxtb);
12757    %}
12758   ins_pipe(ialu_reg_reg);
12759 %}
12760 
12761 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12762 %{
12763   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12764   ins_cost(INSN_COST);
12765   format %{ "add  $dst, $src1, uxtb $src2" %}
12766 
12767    ins_encode %{
12768      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12769             as_Register($src2$$reg), ext::uxtb);
12770    %}
12771   ins_pipe(ialu_reg_reg);
12772 %}
12773 
12774 
12775 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12776 %{
12777   match(Set dst (AddI src1 (AndI src2 mask)));
12778   ins_cost(INSN_COST);
12779   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12780 
12781    ins_encode %{
12782      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12783             as_Register($src2$$reg), ext::uxtb);
12784    %}
12785   ins_pipe(ialu_reg_reg);
12786 %}
12787 
12788 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12789 %{
12790   match(Set dst (AddI src1 (AndI src2 mask)));
12791   ins_cost(INSN_COST);
12792   format %{ "addw  $dst, $src1, $src2, uxth" %}
12793 
12794    ins_encode %{
12795      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12796             as_Register($src2$$reg), ext::uxth);
12797    %}
12798   ins_pipe(ialu_reg_reg);
12799 %}
12800 
12801 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12802 %{
12803   match(Set dst (AddL src1 (AndL src2 mask)));
12804   ins_cost(INSN_COST);
12805   format %{ "add  $dst, $src1, $src2, uxtb" %}
12806 
12807    ins_encode %{
12808      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12809             as_Register($src2$$reg), ext::uxtb);
12810    %}
12811   ins_pipe(ialu_reg_reg);
12812 %}
12813 
12814 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12815 %{
12816   match(Set dst (AddL src1 (AndL src2 mask)));
12817   ins_cost(INSN_COST);
12818   format %{ "add  $dst, $src1, $src2, uxth" %}
12819 
12820    ins_encode %{
12821      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12822             as_Register($src2$$reg), ext::uxth);
12823    %}
12824   ins_pipe(ialu_reg_reg);
12825 %}
12826 
12827 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12828 %{
12829   match(Set dst (AddL src1 (AndL src2 mask)));
12830   ins_cost(INSN_COST);
12831   format %{ "add  $dst, $src1, $src2, uxtw" %}
12832 
12833    ins_encode %{
12834      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12835             as_Register($src2$$reg), ext::uxtw);
12836    %}
12837   ins_pipe(ialu_reg_reg);
12838 %}
12839 
12840 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12841 %{
12842   match(Set dst (SubI src1 (AndI src2 mask)));
12843   ins_cost(INSN_COST);
12844   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12845 
12846    ins_encode %{
12847      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12848             as_Register($src2$$reg), ext::uxtb);
12849    %}
12850   ins_pipe(ialu_reg_reg);
12851 %}
12852 
12853 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12854 %{
12855   match(Set dst (SubI src1 (AndI src2 mask)));
12856   ins_cost(INSN_COST);
12857   format %{ "subw  $dst, $src1, $src2, uxth" %}
12858 
12859    ins_encode %{
12860      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12861             as_Register($src2$$reg), ext::uxth);
12862    %}
12863   ins_pipe(ialu_reg_reg);
12864 %}
12865 
12866 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12867 %{
12868   match(Set dst (SubL src1 (AndL src2 mask)));
12869   ins_cost(INSN_COST);
12870   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12871 
12872    ins_encode %{
12873      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12874             as_Register($src2$$reg), ext::uxtb);
12875    %}
12876   ins_pipe(ialu_reg_reg);
12877 %}
12878 
12879 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12880 %{
12881   match(Set dst (SubL src1 (AndL src2 mask)));
12882   ins_cost(INSN_COST);
12883   format %{ "sub  $dst, $src1, $src2, uxth" %}
12884 
12885    ins_encode %{
12886      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12887             as_Register($src2$$reg), ext::uxth);
12888    %}
12889   ins_pipe(ialu_reg_reg);
12890 %}
12891 
12892 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12893 %{
12894   match(Set dst (SubL src1 (AndL src2 mask)));
12895   ins_cost(INSN_COST);
12896   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12897 
12898    ins_encode %{
12899      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12900             as_Register($src2$$reg), ext::uxtw);
12901    %}
12902   ins_pipe(ialu_reg_reg);
12903 %}
12904 
12905 // END This section of the file is automatically generated. Do not edit --------------
12906 
12907 // ============================================================================
12908 // Floating Point Arithmetic Instructions
12909 
12910 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12911   match(Set dst (AddF src1 src2));
12912 
12913   ins_cost(INSN_COST * 5);
12914   format %{ "fadds   $dst, $src1, $src2" %}
12915 
12916   ins_encode %{
12917     __ fadds(as_FloatRegister($dst$$reg),
12918              as_FloatRegister($src1$$reg),
12919              as_FloatRegister($src2$$reg));
12920   %}
12921 
12922   ins_pipe(fp_dop_reg_reg_s);
12923 %}
12924 
12925 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12926   match(Set dst (AddD src1 src2));
12927 
12928   ins_cost(INSN_COST * 5);
12929   format %{ "faddd   $dst, $src1, $src2" %}
12930 
12931   ins_encode %{
12932     __ faddd(as_FloatRegister($dst$$reg),
12933              as_FloatRegister($src1$$reg),
12934              as_FloatRegister($src2$$reg));
12935   %}
12936 
12937   ins_pipe(fp_dop_reg_reg_d);
12938 %}
12939 
12940 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12941   match(Set dst (SubF src1 src2));
12942 
12943   ins_cost(INSN_COST * 5);
12944   format %{ "fsubs   $dst, $src1, $src2" %}
12945 
12946   ins_encode %{
12947     __ fsubs(as_FloatRegister($dst$$reg),
12948              as_FloatRegister($src1$$reg),
12949              as_FloatRegister($src2$$reg));
12950   %}
12951 
12952   ins_pipe(fp_dop_reg_reg_s);
12953 %}
12954 
12955 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12956   match(Set dst (SubD src1 src2));
12957 
12958   ins_cost(INSN_COST * 5);
12959   format %{ "fsubd   $dst, $src1, $src2" %}
12960 
12961   ins_encode %{
12962     __ fsubd(as_FloatRegister($dst$$reg),
12963              as_FloatRegister($src1$$reg),
12964              as_FloatRegister($src2$$reg));
12965   %}
12966 
12967   ins_pipe(fp_dop_reg_reg_d);
12968 %}
12969 
12970 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12971   match(Set dst (MulF src1 src2));
12972 
12973   ins_cost(INSN_COST * 6);
12974   format %{ "fmuls   $dst, $src1, $src2" %}
12975 
12976   ins_encode %{
12977     __ fmuls(as_FloatRegister($dst$$reg),
12978              as_FloatRegister($src1$$reg),
12979              as_FloatRegister($src2$$reg));
12980   %}
12981 
12982   ins_pipe(fp_dop_reg_reg_s);
12983 %}
12984 
12985 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12986   match(Set dst (MulD src1 src2));
12987 
12988   ins_cost(INSN_COST * 6);
12989   format %{ "fmuld   $dst, $src1, $src2" %}
12990 
12991   ins_encode %{
12992     __ fmuld(as_FloatRegister($dst$$reg),
12993              as_FloatRegister($src1$$reg),
12994              as_FloatRegister($src2$$reg));
12995   %}
12996 
12997   ins_pipe(fp_dop_reg_reg_d);
12998 %}
12999 
13000 // We cannot use these fused mul w add/sub ops because they don't
13001 // produce the same result as the equivalent separated ops
13002 // (essentially they don't round the intermediate result). that's a
13003 // shame. leaving them here in case we can idenitfy cases where it is
13004 // legitimate to use them
13005 
13006 
13007 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13008 //   match(Set dst (AddF (MulF src1 src2) src3));
13009 
13010 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13011 
13012 //   ins_encode %{
13013 //     __ fmadds(as_FloatRegister($dst$$reg),
13014 //              as_FloatRegister($src1$$reg),
13015 //              as_FloatRegister($src2$$reg),
13016 //              as_FloatRegister($src3$$reg));
13017 //   %}
13018 
13019 //   ins_pipe(pipe_class_default);
13020 // %}
13021 
13022 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13023 //   match(Set dst (AddD (MulD src1 src2) src3));
13024 
13025 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13026 
13027 //   ins_encode %{
13028 //     __ fmaddd(as_FloatRegister($dst$$reg),
13029 //              as_FloatRegister($src1$$reg),
13030 //              as_FloatRegister($src2$$reg),
13031 //              as_FloatRegister($src3$$reg));
13032 //   %}
13033 
13034 //   ins_pipe(pipe_class_default);
13035 // %}
13036 
13037 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13038 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
13039 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
13040 
13041 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13042 
13043 //   ins_encode %{
13044 //     __ fmsubs(as_FloatRegister($dst$$reg),
13045 //               as_FloatRegister($src1$$reg),
13046 //               as_FloatRegister($src2$$reg),
13047 //              as_FloatRegister($src3$$reg));
13048 //   %}
13049 
13050 //   ins_pipe(pipe_class_default);
13051 // %}
13052 
13053 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13054 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
13055 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
13056 
13057 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13058 
13059 //   ins_encode %{
13060 //     __ fmsubd(as_FloatRegister($dst$$reg),
13061 //               as_FloatRegister($src1$$reg),
13062 //               as_FloatRegister($src2$$reg),
13063 //               as_FloatRegister($src3$$reg));
13064 //   %}
13065 
13066 //   ins_pipe(pipe_class_default);
13067 // %}
13068 
13069 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13070 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
13071 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
13072 
13073 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13074 
13075 //   ins_encode %{
13076 //     __ fnmadds(as_FloatRegister($dst$$reg),
13077 //                as_FloatRegister($src1$$reg),
13078 //                as_FloatRegister($src2$$reg),
13079 //                as_FloatRegister($src3$$reg));
13080 //   %}
13081 
13082 //   ins_pipe(pipe_class_default);
13083 // %}
13084 
13085 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13086 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
13087 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
13088 
13089 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13090 
13091 //   ins_encode %{
13092 //     __ fnmaddd(as_FloatRegister($dst$$reg),
13093 //                as_FloatRegister($src1$$reg),
13094 //                as_FloatRegister($src2$$reg),
13095 //                as_FloatRegister($src3$$reg));
13096 //   %}
13097 
13098 //   ins_pipe(pipe_class_default);
13099 // %}
13100 
13101 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13102 //   match(Set dst (SubF (MulF src1 src2) src3));
13103 
13104 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13105 
13106 //   ins_encode %{
13107 //     __ fnmsubs(as_FloatRegister($dst$$reg),
13108 //                as_FloatRegister($src1$$reg),
13109 //                as_FloatRegister($src2$$reg),
13110 //                as_FloatRegister($src3$$reg));
13111 //   %}
13112 
13113 //   ins_pipe(pipe_class_default);
13114 // %}
13115 
13116 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13117 //   match(Set dst (SubD (MulD src1 src2) src3));
13118 
13119 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13120 
13121 //   ins_encode %{
13122 //   // n.b. insn name should be fnmsubd
13123 //     __ fnmsub(as_FloatRegister($dst$$reg),
13124 //                as_FloatRegister($src1$$reg),
13125 //                as_FloatRegister($src2$$reg),
13126 //                as_FloatRegister($src3$$reg));
13127 //   %}
13128 
13129 //   ins_pipe(pipe_class_default);
13130 // %}
13131 
13132 
13133 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13134   match(Set dst (DivF src1  src2));
13135 
13136   ins_cost(INSN_COST * 18);
13137   format %{ "fdivs   $dst, $src1, $src2" %}
13138 
13139   ins_encode %{
13140     __ fdivs(as_FloatRegister($dst$$reg),
13141              as_FloatRegister($src1$$reg),
13142              as_FloatRegister($src2$$reg));
13143   %}
13144 
13145   ins_pipe(fp_div_s);
13146 %}
13147 
13148 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13149   match(Set dst (DivD src1  src2));
13150 
13151   ins_cost(INSN_COST * 32);
13152   format %{ "fdivd   $dst, $src1, $src2" %}
13153 
13154   ins_encode %{
13155     __ fdivd(as_FloatRegister($dst$$reg),
13156              as_FloatRegister($src1$$reg),
13157              as_FloatRegister($src2$$reg));
13158   %}
13159 
13160   ins_pipe(fp_div_d);
13161 %}
13162 
13163 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13164   match(Set dst (NegF src));
13165 
13166   ins_cost(INSN_COST * 3);
13167   format %{ "fneg   $dst, $src" %}
13168 
13169   ins_encode %{
13170     __ fnegs(as_FloatRegister($dst$$reg),
13171              as_FloatRegister($src$$reg));
13172   %}
13173 
13174   ins_pipe(fp_uop_s);
13175 %}
13176 
13177 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13178   match(Set dst (NegD src));
13179 
13180   ins_cost(INSN_COST * 3);
13181   format %{ "fnegd   $dst, $src" %}
13182 
13183   ins_encode %{
13184     __ fnegd(as_FloatRegister($dst$$reg),
13185              as_FloatRegister($src$$reg));
13186   %}
13187 
13188   ins_pipe(fp_uop_d);
13189 %}
13190 
13191 instruct absF_reg(vRegF dst, vRegF src) %{
13192   match(Set dst (AbsF src));
13193 
13194   ins_cost(INSN_COST * 3);
13195   format %{ "fabss   $dst, $src" %}
13196   ins_encode %{
13197     __ fabss(as_FloatRegister($dst$$reg),
13198              as_FloatRegister($src$$reg));
13199   %}
13200 
13201   ins_pipe(fp_uop_s);
13202 %}
13203 
13204 instruct absD_reg(vRegD dst, vRegD src) %{
13205   match(Set dst (AbsD src));
13206 
13207   ins_cost(INSN_COST * 3);
13208   format %{ "fabsd   $dst, $src" %}
13209   ins_encode %{
13210     __ fabsd(as_FloatRegister($dst$$reg),
13211              as_FloatRegister($src$$reg));
13212   %}
13213 
13214   ins_pipe(fp_uop_d);
13215 %}
13216 
13217 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13218   match(Set dst (SqrtD src));
13219 
13220   ins_cost(INSN_COST * 50);
13221   format %{ "fsqrtd  $dst, $src" %}
13222   ins_encode %{
13223     __ fsqrtd(as_FloatRegister($dst$$reg),
13224              as_FloatRegister($src$$reg));
13225   %}
13226 
13227   ins_pipe(fp_div_s);
13228 %}
13229 
13230 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13231   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13232 
13233   ins_cost(INSN_COST * 50);
13234   format %{ "fsqrts  $dst, $src" %}
13235   ins_encode %{
13236     __ fsqrts(as_FloatRegister($dst$$reg),
13237              as_FloatRegister($src$$reg));
13238   %}
13239 
13240   ins_pipe(fp_div_d);
13241 %}
13242 
13243 // ============================================================================
13244 // Logical Instructions
13245 
13246 // Integer Logical Instructions
13247 
13248 // And Instructions
13249 
13250 
13251 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13252   match(Set dst (AndI src1 src2));
13253 
13254   format %{ "andw  $dst, $src1, $src2\t# int" %}
13255 
13256   ins_cost(INSN_COST);
13257   ins_encode %{
13258     __ andw(as_Register($dst$$reg),
13259             as_Register($src1$$reg),
13260             as_Register($src2$$reg));
13261   %}
13262 
13263   ins_pipe(ialu_reg_reg);
13264 %}
13265 
13266 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13267   match(Set dst (AndI src1 src2));
13268 
13269   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13270 
13271   ins_cost(INSN_COST);
13272   ins_encode %{
13273     __ andw(as_Register($dst$$reg),
13274             as_Register($src1$$reg),
13275             (unsigned long)($src2$$constant));
13276   %}
13277 
13278   ins_pipe(ialu_reg_imm);
13279 %}
13280 
13281 // Or Instructions
13282 
13283 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13284   match(Set dst (OrI src1 src2));
13285 
13286   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13287 
13288   ins_cost(INSN_COST);
13289   ins_encode %{
13290     __ orrw(as_Register($dst$$reg),
13291             as_Register($src1$$reg),
13292             as_Register($src2$$reg));
13293   %}
13294 
13295   ins_pipe(ialu_reg_reg);
13296 %}
13297 
13298 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13299   match(Set dst (OrI src1 src2));
13300 
13301   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13302 
13303   ins_cost(INSN_COST);
13304   ins_encode %{
13305     __ orrw(as_Register($dst$$reg),
13306             as_Register($src1$$reg),
13307             (unsigned long)($src2$$constant));
13308   %}
13309 
13310   ins_pipe(ialu_reg_imm);
13311 %}
13312 
13313 // Xor Instructions
13314 
13315 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13316   match(Set dst (XorI src1 src2));
13317 
13318   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13319 
13320   ins_cost(INSN_COST);
13321   ins_encode %{
13322     __ eorw(as_Register($dst$$reg),
13323             as_Register($src1$$reg),
13324             as_Register($src2$$reg));
13325   %}
13326 
13327   ins_pipe(ialu_reg_reg);
13328 %}
13329 
13330 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13331   match(Set dst (XorI src1 src2));
13332 
13333   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13334 
13335   ins_cost(INSN_COST);
13336   ins_encode %{
13337     __ eorw(as_Register($dst$$reg),
13338             as_Register($src1$$reg),
13339             (unsigned long)($src2$$constant));
13340   %}
13341 
13342   ins_pipe(ialu_reg_imm);
13343 %}
13344 
13345 // Long Logical Instructions
13346 // TODO
13347 
13348 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13349   match(Set dst (AndL src1 src2));
13350 
13351   format %{ "and  $dst, $src1, $src2\t# int" %}
13352 
13353   ins_cost(INSN_COST);
13354   ins_encode %{
13355     __ andr(as_Register($dst$$reg),
13356             as_Register($src1$$reg),
13357             as_Register($src2$$reg));
13358   %}
13359 
13360   ins_pipe(ialu_reg_reg);
13361 %}
13362 
13363 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13364   match(Set dst (AndL src1 src2));
13365 
13366   format %{ "and  $dst, $src1, $src2\t# int" %}
13367 
13368   ins_cost(INSN_COST);
13369   ins_encode %{
13370     __ andr(as_Register($dst$$reg),
13371             as_Register($src1$$reg),
13372             (unsigned long)($src2$$constant));
13373   %}
13374 
13375   ins_pipe(ialu_reg_imm);
13376 %}
13377 
13378 // Or Instructions
13379 
13380 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13381   match(Set dst (OrL src1 src2));
13382 
13383   format %{ "orr  $dst, $src1, $src2\t# int" %}
13384 
13385   ins_cost(INSN_COST);
13386   ins_encode %{
13387     __ orr(as_Register($dst$$reg),
13388            as_Register($src1$$reg),
13389            as_Register($src2$$reg));
13390   %}
13391 
13392   ins_pipe(ialu_reg_reg);
13393 %}
13394 
13395 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13396   match(Set dst (OrL src1 src2));
13397 
13398   format %{ "orr  $dst, $src1, $src2\t# int" %}
13399 
13400   ins_cost(INSN_COST);
13401   ins_encode %{
13402     __ orr(as_Register($dst$$reg),
13403            as_Register($src1$$reg),
13404            (unsigned long)($src2$$constant));
13405   %}
13406 
13407   ins_pipe(ialu_reg_imm);
13408 %}
13409 
13410 // Xor Instructions
13411 
13412 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13413   match(Set dst (XorL src1 src2));
13414 
13415   format %{ "eor  $dst, $src1, $src2\t# int" %}
13416 
13417   ins_cost(INSN_COST);
13418   ins_encode %{
13419     __ eor(as_Register($dst$$reg),
13420            as_Register($src1$$reg),
13421            as_Register($src2$$reg));
13422   %}
13423 
13424   ins_pipe(ialu_reg_reg);
13425 %}
13426 
13427 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13428   match(Set dst (XorL src1 src2));
13429 
13430   ins_cost(INSN_COST);
13431   format %{ "eor  $dst, $src1, $src2\t# int" %}
13432 
13433   ins_encode %{
13434     __ eor(as_Register($dst$$reg),
13435            as_Register($src1$$reg),
13436            (unsigned long)($src2$$constant));
13437   %}
13438 
13439   ins_pipe(ialu_reg_imm);
13440 %}
13441 
13442 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13443 %{
13444   match(Set dst (ConvI2L src));
13445 
13446   ins_cost(INSN_COST);
13447   format %{ "sxtw  $dst, $src\t# i2l" %}
13448   ins_encode %{
13449     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13450   %}
13451   ins_pipe(ialu_reg_shift);
13452 %}
13453 
13454 // this pattern occurs in bigmath arithmetic
13455 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13456 %{
13457   match(Set dst (AndL (ConvI2L src) mask));
13458 
13459   ins_cost(INSN_COST);
13460   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13461   ins_encode %{
13462     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13463   %}
13464 
13465   ins_pipe(ialu_reg_shift);
13466 %}
13467 
13468 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13469   match(Set dst (ConvL2I src));
13470 
13471   ins_cost(INSN_COST);
13472   format %{ "movw  $dst, $src \t// l2i" %}
13473 
13474   ins_encode %{
13475     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13476   %}
13477 
13478   ins_pipe(ialu_reg);
13479 %}
13480 
13481 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13482 %{
13483   match(Set dst (Conv2B src));
13484   effect(KILL cr);
13485 
13486   format %{
13487     "cmpw $src, zr\n\t"
13488     "cset $dst, ne"
13489   %}
13490 
13491   ins_encode %{
13492     __ cmpw(as_Register($src$$reg), zr);
13493     __ cset(as_Register($dst$$reg), Assembler::NE);
13494   %}
13495 
13496   ins_pipe(ialu_reg);
13497 %}
13498 
13499 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13500 %{
13501   match(Set dst (Conv2B src));
13502   effect(KILL cr);
13503 
13504   format %{
13505     "cmp  $src, zr\n\t"
13506     "cset $dst, ne"
13507   %}
13508 
13509   ins_encode %{
13510     __ cmp(as_Register($src$$reg), zr);
13511     __ cset(as_Register($dst$$reg), Assembler::NE);
13512   %}
13513 
13514   ins_pipe(ialu_reg);
13515 %}
13516 
13517 instruct convD2F_reg(vRegF dst, vRegD src) %{
13518   match(Set dst (ConvD2F src));
13519 
13520   ins_cost(INSN_COST * 5);
13521   format %{ "fcvtd  $dst, $src \t// d2f" %}
13522 
13523   ins_encode %{
13524     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13525   %}
13526 
13527   ins_pipe(fp_d2f);
13528 %}
13529 
13530 instruct convF2D_reg(vRegD dst, vRegF src) %{
13531   match(Set dst (ConvF2D src));
13532 
13533   ins_cost(INSN_COST * 5);
13534   format %{ "fcvts  $dst, $src \t// f2d" %}
13535 
13536   ins_encode %{
13537     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13538   %}
13539 
13540   ins_pipe(fp_f2d);
13541 %}
13542 
13543 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13544   match(Set dst (ConvF2I src));
13545 
13546   ins_cost(INSN_COST * 5);
13547   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13548 
13549   ins_encode %{
13550     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13551   %}
13552 
13553   ins_pipe(fp_f2i);
13554 %}
13555 
13556 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13557   match(Set dst (ConvF2L src));
13558 
13559   ins_cost(INSN_COST * 5);
13560   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13561 
13562   ins_encode %{
13563     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13564   %}
13565 
13566   ins_pipe(fp_f2l);
13567 %}
13568 
13569 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13570   match(Set dst (ConvI2F src));
13571 
13572   ins_cost(INSN_COST * 5);
13573   format %{ "scvtfws  $dst, $src \t// i2f" %}
13574 
13575   ins_encode %{
13576     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13577   %}
13578 
13579   ins_pipe(fp_i2f);
13580 %}
13581 
13582 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13583   match(Set dst (ConvL2F src));
13584 
13585   ins_cost(INSN_COST * 5);
13586   format %{ "scvtfs  $dst, $src \t// l2f" %}
13587 
13588   ins_encode %{
13589     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13590   %}
13591 
13592   ins_pipe(fp_l2f);
13593 %}
13594 
13595 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13596   match(Set dst (ConvD2I src));
13597 
13598   ins_cost(INSN_COST * 5);
13599   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13600 
13601   ins_encode %{
13602     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13603   %}
13604 
13605   ins_pipe(fp_d2i);
13606 %}
13607 
13608 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13609   match(Set dst (ConvD2L src));
13610 
13611   ins_cost(INSN_COST * 5);
13612   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13613 
13614   ins_encode %{
13615     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13616   %}
13617 
13618   ins_pipe(fp_d2l);
13619 %}
13620 
13621 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13622   match(Set dst (ConvI2D src));
13623 
13624   ins_cost(INSN_COST * 5);
13625   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13626 
13627   ins_encode %{
13628     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13629   %}
13630 
13631   ins_pipe(fp_i2d);
13632 %}
13633 
13634 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13635   match(Set dst (ConvL2D src));
13636 
13637   ins_cost(INSN_COST * 5);
13638   format %{ "scvtfd  $dst, $src \t// l2d" %}
13639 
13640   ins_encode %{
13641     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13642   %}
13643 
13644   ins_pipe(fp_l2d);
13645 %}
13646 
13647 // stack <-> reg and reg <-> reg shuffles with no conversion
13648 
13649 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13650 
13651   match(Set dst (MoveF2I src));
13652 
13653   effect(DEF dst, USE src);
13654 
13655   ins_cost(4 * INSN_COST);
13656 
13657   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13658 
13659   ins_encode %{
13660     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13661   %}
13662 
13663   ins_pipe(iload_reg_reg);
13664 
13665 %}
13666 
13667 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13668 
13669   match(Set dst (MoveI2F src));
13670 
13671   effect(DEF dst, USE src);
13672 
13673   ins_cost(4 * INSN_COST);
13674 
13675   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13676 
13677   ins_encode %{
13678     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13679   %}
13680 
13681   ins_pipe(pipe_class_memory);
13682 
13683 %}
13684 
13685 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13686 
13687   match(Set dst (MoveD2L src));
13688 
13689   effect(DEF dst, USE src);
13690 
13691   ins_cost(4 * INSN_COST);
13692 
13693   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13694 
13695   ins_encode %{
13696     __ ldr($dst$$Register, Address(sp, $src$$disp));
13697   %}
13698 
13699   ins_pipe(iload_reg_reg);
13700 
13701 %}
13702 
13703 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13704 
13705   match(Set dst (MoveL2D src));
13706 
13707   effect(DEF dst, USE src);
13708 
13709   ins_cost(4 * INSN_COST);
13710 
13711   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13712 
13713   ins_encode %{
13714     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13715   %}
13716 
13717   ins_pipe(pipe_class_memory);
13718 
13719 %}
13720 
13721 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13722 
13723   match(Set dst (MoveF2I src));
13724 
13725   effect(DEF dst, USE src);
13726 
13727   ins_cost(INSN_COST);
13728 
13729   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13730 
13731   ins_encode %{
13732     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13733   %}
13734 
13735   ins_pipe(pipe_class_memory);
13736 
13737 %}
13738 
13739 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13740 
13741   match(Set dst (MoveI2F src));
13742 
13743   effect(DEF dst, USE src);
13744 
13745   ins_cost(INSN_COST);
13746 
13747   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13748 
13749   ins_encode %{
13750     __ strw($src$$Register, Address(sp, $dst$$disp));
13751   %}
13752 
13753   ins_pipe(istore_reg_reg);
13754 
13755 %}
13756 
13757 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13758 
13759   match(Set dst (MoveD2L src));
13760 
13761   effect(DEF dst, USE src);
13762 
13763   ins_cost(INSN_COST);
13764 
13765   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13766 
13767   ins_encode %{
13768     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13769   %}
13770 
13771   ins_pipe(pipe_class_memory);
13772 
13773 %}
13774 
13775 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13776 
13777   match(Set dst (MoveL2D src));
13778 
13779   effect(DEF dst, USE src);
13780 
13781   ins_cost(INSN_COST);
13782 
13783   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13784 
13785   ins_encode %{
13786     __ str($src$$Register, Address(sp, $dst$$disp));
13787   %}
13788 
13789   ins_pipe(istore_reg_reg);
13790 
13791 %}
13792 
13793 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13794 
13795   match(Set dst (MoveF2I src));
13796 
13797   effect(DEF dst, USE src);
13798 
13799   ins_cost(INSN_COST);
13800 
13801   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13802 
13803   ins_encode %{
13804     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13805   %}
13806 
13807   ins_pipe(fp_f2i);
13808 
13809 %}
13810 
13811 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13812 
13813   match(Set dst (MoveI2F src));
13814 
13815   effect(DEF dst, USE src);
13816 
13817   ins_cost(INSN_COST);
13818 
13819   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13820 
13821   ins_encode %{
13822     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13823   %}
13824 
13825   ins_pipe(fp_i2f);
13826 
13827 %}
13828 
13829 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13830 
13831   match(Set dst (MoveD2L src));
13832 
13833   effect(DEF dst, USE src);
13834 
13835   ins_cost(INSN_COST);
13836 
13837   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13838 
13839   ins_encode %{
13840     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13841   %}
13842 
13843   ins_pipe(fp_d2l);
13844 
13845 %}
13846 
13847 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13848 
13849   match(Set dst (MoveL2D src));
13850 
13851   effect(DEF dst, USE src);
13852 
13853   ins_cost(INSN_COST);
13854 
13855   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13856 
13857   ins_encode %{
13858     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13859   %}
13860 
13861   ins_pipe(fp_l2d);
13862 
13863 %}
13864 
13865 // ============================================================================
13866 // clearing of an array
13867 
13868 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13869 %{
13870   match(Set dummy (ClearArray cnt base));
13871   effect(USE_KILL cnt, USE_KILL base);
13872 
13873   ins_cost(4 * INSN_COST);
13874   format %{ "ClearArray $cnt, $base" %}
13875 
13876   ins_encode %{
13877     __ zero_words($base$$Register, $cnt$$Register);
13878   %}
13879 
13880   ins_pipe(pipe_class_memory);
13881 %}
13882 
13883 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
13884 %{
13885   match(Set dummy (ClearArray cnt base));
13886   effect(USE_KILL base, TEMP tmp);
13887 
13888   ins_cost(4 * INSN_COST);
13889   format %{ "ClearArray $cnt, $base" %}
13890 
13891   ins_encode %{
13892     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13893   %}
13894 
13895   ins_pipe(pipe_class_memory);
13896 %}
13897 
13898 // ============================================================================
13899 // Overflow Math Instructions
13900 
13901 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13902 %{
13903   match(Set cr (OverflowAddI op1 op2));
13904 
13905   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13906   ins_cost(INSN_COST);
13907   ins_encode %{
13908     __ cmnw($op1$$Register, $op2$$Register);
13909   %}
13910 
13911   ins_pipe(icmp_reg_reg);
13912 %}
13913 
13914 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13915 %{
13916   match(Set cr (OverflowAddI op1 op2));
13917 
13918   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13919   ins_cost(INSN_COST);
13920   ins_encode %{
13921     __ cmnw($op1$$Register, $op2$$constant);
13922   %}
13923 
13924   ins_pipe(icmp_reg_imm);
13925 %}
13926 
13927 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13928 %{
13929   match(Set cr (OverflowAddL op1 op2));
13930 
13931   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13932   ins_cost(INSN_COST);
13933   ins_encode %{
13934     __ cmn($op1$$Register, $op2$$Register);
13935   %}
13936 
13937   ins_pipe(icmp_reg_reg);
13938 %}
13939 
13940 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13941 %{
13942   match(Set cr (OverflowAddL op1 op2));
13943 
13944   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13945   ins_cost(INSN_COST);
13946   ins_encode %{
13947     __ cmn($op1$$Register, $op2$$constant);
13948   %}
13949 
13950   ins_pipe(icmp_reg_imm);
13951 %}
13952 
13953 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13954 %{
13955   match(Set cr (OverflowSubI op1 op2));
13956 
13957   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13958   ins_cost(INSN_COST);
13959   ins_encode %{
13960     __ cmpw($op1$$Register, $op2$$Register);
13961   %}
13962 
13963   ins_pipe(icmp_reg_reg);
13964 %}
13965 
13966 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13967 %{
13968   match(Set cr (OverflowSubI op1 op2));
13969 
13970   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13971   ins_cost(INSN_COST);
13972   ins_encode %{
13973     __ cmpw($op1$$Register, $op2$$constant);
13974   %}
13975 
13976   ins_pipe(icmp_reg_imm);
13977 %}
13978 
13979 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13980 %{
13981   match(Set cr (OverflowSubL op1 op2));
13982 
13983   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13984   ins_cost(INSN_COST);
13985   ins_encode %{
13986     __ cmp($op1$$Register, $op2$$Register);
13987   %}
13988 
13989   ins_pipe(icmp_reg_reg);
13990 %}
13991 
13992 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13993 %{
13994   match(Set cr (OverflowSubL op1 op2));
13995 
13996   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13997   ins_cost(INSN_COST);
13998   ins_encode %{
13999     __ cmp($op1$$Register, $op2$$constant);
14000   %}
14001 
14002   ins_pipe(icmp_reg_imm);
14003 %}
14004 
14005 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14006 %{
14007   match(Set cr (OverflowSubI zero op1));
14008 
14009   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14010   ins_cost(INSN_COST);
14011   ins_encode %{
14012     __ cmpw(zr, $op1$$Register);
14013   %}
14014 
14015   ins_pipe(icmp_reg_imm);
14016 %}
14017 
14018 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14019 %{
14020   match(Set cr (OverflowSubL zero op1));
14021 
14022   format %{ "cmp   zr, $op1\t# overflow check long" %}
14023   ins_cost(INSN_COST);
14024   ins_encode %{
14025     __ cmp(zr, $op1$$Register);
14026   %}
14027 
14028   ins_pipe(icmp_reg_imm);
14029 %}
14030 
14031 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14032 %{
14033   match(Set cr (OverflowMulI op1 op2));
14034 
14035   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14036             "cmp   rscratch1, rscratch1, sxtw\n\t"
14037             "movw  rscratch1, #0x80000000\n\t"
14038             "cselw rscratch1, rscratch1, zr, NE\n\t"
14039             "cmpw  rscratch1, #1" %}
14040   ins_cost(5 * INSN_COST);
14041   ins_encode %{
14042     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14043     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14044     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14045     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14046     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14047   %}
14048 
14049   ins_pipe(pipe_slow);
14050 %}
14051 
14052 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14053 %{
14054   match(If cmp (OverflowMulI op1 op2));
14055   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14056             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14057   effect(USE labl, KILL cr);
14058 
14059   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14060             "cmp   rscratch1, rscratch1, sxtw\n\t"
14061             "b$cmp   $labl" %}
14062   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14063   ins_encode %{
14064     Label* L = $labl$$label;
14065     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14066     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14067     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14068     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14069   %}
14070 
14071   ins_pipe(pipe_serial);
14072 %}
14073 
14074 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14075 %{
14076   match(Set cr (OverflowMulL op1 op2));
14077 
14078   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14079             "smulh rscratch2, $op1, $op2\n\t"
14080             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14081             "movw  rscratch1, #0x80000000\n\t"
14082             "cselw rscratch1, rscratch1, zr, NE\n\t"
14083             "cmpw  rscratch1, #1" %}
14084   ins_cost(6 * INSN_COST);
14085   ins_encode %{
14086     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14087     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14088     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14089     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14090     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14091     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14092   %}
14093 
14094   ins_pipe(pipe_slow);
14095 %}
14096 
14097 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14098 %{
14099   match(If cmp (OverflowMulL op1 op2));
14100   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14101             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14102   effect(USE labl, KILL cr);
14103 
14104   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14105             "smulh rscratch2, $op1, $op2\n\t"
14106             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14107             "b$cmp $labl" %}
14108   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14109   ins_encode %{
14110     Label* L = $labl$$label;
14111     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14112     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14113     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14114     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14115     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14116   %}
14117 
14118   ins_pipe(pipe_serial);
14119 %}
14120 
14121 // ============================================================================
14122 // Compare Instructions
14123 
14124 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14125 %{
14126   match(Set cr (CmpI op1 op2));
14127 
14128   effect(DEF cr, USE op1, USE op2);
14129 
14130   ins_cost(INSN_COST);
14131   format %{ "cmpw  $op1, $op2" %}
14132 
14133   ins_encode(aarch64_enc_cmpw(op1, op2));
14134 
14135   ins_pipe(icmp_reg_reg);
14136 %}
14137 
14138 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14139 %{
14140   match(Set cr (CmpI op1 zero));
14141 
14142   effect(DEF cr, USE op1);
14143 
14144   ins_cost(INSN_COST);
14145   format %{ "cmpw $op1, 0" %}
14146 
14147   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14148 
14149   ins_pipe(icmp_reg_imm);
14150 %}
14151 
14152 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14153 %{
14154   match(Set cr (CmpI op1 op2));
14155 
14156   effect(DEF cr, USE op1);
14157 
14158   ins_cost(INSN_COST);
14159   format %{ "cmpw  $op1, $op2" %}
14160 
14161   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14162 
14163   ins_pipe(icmp_reg_imm);
14164 %}
14165 
14166 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14167 %{
14168   match(Set cr (CmpI op1 op2));
14169 
14170   effect(DEF cr, USE op1);
14171 
14172   ins_cost(INSN_COST * 2);
14173   format %{ "cmpw  $op1, $op2" %}
14174 
14175   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14176 
14177   ins_pipe(icmp_reg_imm);
14178 %}
14179 
14180 // Unsigned compare Instructions; really, same as signed compare
14181 // except it should only be used to feed an If or a CMovI which takes a
14182 // cmpOpU.
14183 
14184 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14185 %{
14186   match(Set cr (CmpU op1 op2));
14187 
14188   effect(DEF cr, USE op1, USE op2);
14189 
14190   ins_cost(INSN_COST);
14191   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14192 
14193   ins_encode(aarch64_enc_cmpw(op1, op2));
14194 
14195   ins_pipe(icmp_reg_reg);
14196 %}
14197 
14198 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14199 %{
14200   match(Set cr (CmpU op1 zero));
14201 
14202   effect(DEF cr, USE op1);
14203 
14204   ins_cost(INSN_COST);
14205   format %{ "cmpw $op1, #0\t# unsigned" %}
14206 
14207   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14208 
14209   ins_pipe(icmp_reg_imm);
14210 %}
14211 
14212 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14213 %{
14214   match(Set cr (CmpU op1 op2));
14215 
14216   effect(DEF cr, USE op1);
14217 
14218   ins_cost(INSN_COST);
14219   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14220 
14221   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14222 
14223   ins_pipe(icmp_reg_imm);
14224 %}
14225 
14226 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14227 %{
14228   match(Set cr (CmpU op1 op2));
14229 
14230   effect(DEF cr, USE op1);
14231 
14232   ins_cost(INSN_COST * 2);
14233   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14234 
14235   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14236 
14237   ins_pipe(icmp_reg_imm);
14238 %}
14239 
14240 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14241 %{
14242   match(Set cr (CmpL op1 op2));
14243 
14244   effect(DEF cr, USE op1, USE op2);
14245 
14246   ins_cost(INSN_COST);
14247   format %{ "cmp  $op1, $op2" %}
14248 
14249   ins_encode(aarch64_enc_cmp(op1, op2));
14250 
14251   ins_pipe(icmp_reg_reg);
14252 %}
14253 
14254 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
14255 %{
14256   match(Set cr (CmpL op1 zero));
14257 
14258   effect(DEF cr, USE op1);
14259 
14260   ins_cost(INSN_COST);
14261   format %{ "tst  $op1" %}
14262 
14263   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14264 
14265   ins_pipe(icmp_reg_imm);
14266 %}
14267 
14268 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14269 %{
14270   match(Set cr (CmpL op1 op2));
14271 
14272   effect(DEF cr, USE op1);
14273 
14274   ins_cost(INSN_COST);
14275   format %{ "cmp  $op1, $op2" %}
14276 
14277   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14278 
14279   ins_pipe(icmp_reg_imm);
14280 %}
14281 
14282 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14283 %{
14284   match(Set cr (CmpL op1 op2));
14285 
14286   effect(DEF cr, USE op1);
14287 
14288   ins_cost(INSN_COST * 2);
14289   format %{ "cmp  $op1, $op2" %}
14290 
14291   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14292 
14293   ins_pipe(icmp_reg_imm);
14294 %}
14295 
14296 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14297 %{
14298   match(Set cr (CmpP op1 op2));
14299 
14300   effect(DEF cr, USE op1, USE op2);
14301 
14302   ins_cost(INSN_COST);
14303   format %{ "cmp  $op1, $op2\t // ptr" %}
14304 
14305   ins_encode(aarch64_enc_cmpp(op1, op2));
14306 
14307   ins_pipe(icmp_reg_reg);
14308 %}
14309 
14310 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14311 %{
14312   match(Set cr (CmpN op1 op2));
14313 
14314   effect(DEF cr, USE op1, USE op2);
14315 
14316   ins_cost(INSN_COST);
14317   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14318 
14319   ins_encode(aarch64_enc_cmpn(op1, op2));
14320 
14321   ins_pipe(icmp_reg_reg);
14322 %}
14323 
14324 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14325 %{
14326   match(Set cr (CmpP op1 zero));
14327 
14328   effect(DEF cr, USE op1, USE zero);
14329 
14330   ins_cost(INSN_COST);
14331   format %{ "cmp  $op1, 0\t // ptr" %}
14332 
14333   ins_encode(aarch64_enc_testp(op1));
14334 
14335   ins_pipe(icmp_reg_imm);
14336 %}
14337 
14338 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14339 %{
14340   match(Set cr (CmpN op1 zero));
14341 
14342   effect(DEF cr, USE op1, USE zero);
14343 
14344   ins_cost(INSN_COST);
14345   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14346 
14347   ins_encode(aarch64_enc_testn(op1));
14348 
14349   ins_pipe(icmp_reg_imm);
14350 %}
14351 
14352 // FP comparisons
14353 //
14354 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14355 // using normal cmpOp. See declaration of rFlagsReg for details.
14356 
14357 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14358 %{
14359   match(Set cr (CmpF src1 src2));
14360 
14361   ins_cost(3 * INSN_COST);
14362   format %{ "fcmps $src1, $src2" %}
14363 
14364   ins_encode %{
14365     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14366   %}
14367 
14368   ins_pipe(pipe_class_compare);
14369 %}
14370 
14371 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14372 %{
14373   match(Set cr (CmpF src1 src2));
14374 
14375   ins_cost(3 * INSN_COST);
14376   format %{ "fcmps $src1, 0.0" %}
14377 
14378   ins_encode %{
14379     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14380   %}
14381 
14382   ins_pipe(pipe_class_compare);
14383 %}
14384 // FROM HERE
14385 
14386 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14387 %{
14388   match(Set cr (CmpD src1 src2));
14389 
14390   ins_cost(3 * INSN_COST);
14391   format %{ "fcmpd $src1, $src2" %}
14392 
14393   ins_encode %{
14394     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14395   %}
14396 
14397   ins_pipe(pipe_class_compare);
14398 %}
14399 
14400 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14401 %{
14402   match(Set cr (CmpD src1 src2));
14403 
14404   ins_cost(3 * INSN_COST);
14405   format %{ "fcmpd $src1, 0.0" %}
14406 
14407   ins_encode %{
14408     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14409   %}
14410 
14411   ins_pipe(pipe_class_compare);
14412 %}
14413 
14414 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14415 %{
14416   match(Set dst (CmpF3 src1 src2));
14417   effect(KILL cr);
14418 
14419   ins_cost(5 * INSN_COST);
14420   format %{ "fcmps $src1, $src2\n\t"
14421             "csinvw($dst, zr, zr, eq\n\t"
14422             "csnegw($dst, $dst, $dst, lt)"
14423   %}
14424 
14425   ins_encode %{
14426     Label done;
14427     FloatRegister s1 = as_FloatRegister($src1$$reg);
14428     FloatRegister s2 = as_FloatRegister($src2$$reg);
14429     Register d = as_Register($dst$$reg);
14430     __ fcmps(s1, s2);
14431     // installs 0 if EQ else -1
14432     __ csinvw(d, zr, zr, Assembler::EQ);
14433     // keeps -1 if less or unordered else installs 1
14434     __ csnegw(d, d, d, Assembler::LT);
14435     __ bind(done);
14436   %}
14437 
14438   ins_pipe(pipe_class_default);
14439 
14440 %}
14441 
14442 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14443 %{
14444   match(Set dst (CmpD3 src1 src2));
14445   effect(KILL cr);
14446 
14447   ins_cost(5 * INSN_COST);
14448   format %{ "fcmpd $src1, $src2\n\t"
14449             "csinvw($dst, zr, zr, eq\n\t"
14450             "csnegw($dst, $dst, $dst, lt)"
14451   %}
14452 
14453   ins_encode %{
14454     Label done;
14455     FloatRegister s1 = as_FloatRegister($src1$$reg);
14456     FloatRegister s2 = as_FloatRegister($src2$$reg);
14457     Register d = as_Register($dst$$reg);
14458     __ fcmpd(s1, s2);
14459     // installs 0 if EQ else -1
14460     __ csinvw(d, zr, zr, Assembler::EQ);
14461     // keeps -1 if less or unordered else installs 1
14462     __ csnegw(d, d, d, Assembler::LT);
14463     __ bind(done);
14464   %}
14465   ins_pipe(pipe_class_default);
14466 
14467 %}
14468 
14469 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14470 %{
14471   match(Set dst (CmpF3 src1 zero));
14472   effect(KILL cr);
14473 
14474   ins_cost(5 * INSN_COST);
14475   format %{ "fcmps $src1, 0.0\n\t"
14476             "csinvw($dst, zr, zr, eq\n\t"
14477             "csnegw($dst, $dst, $dst, lt)"
14478   %}
14479 
14480   ins_encode %{
14481     Label done;
14482     FloatRegister s1 = as_FloatRegister($src1$$reg);
14483     Register d = as_Register($dst$$reg);
14484     __ fcmps(s1, 0.0D);
14485     // installs 0 if EQ else -1
14486     __ csinvw(d, zr, zr, Assembler::EQ);
14487     // keeps -1 if less or unordered else installs 1
14488     __ csnegw(d, d, d, Assembler::LT);
14489     __ bind(done);
14490   %}
14491 
14492   ins_pipe(pipe_class_default);
14493 
14494 %}
14495 
14496 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14497 %{
14498   match(Set dst (CmpD3 src1 zero));
14499   effect(KILL cr);
14500 
14501   ins_cost(5 * INSN_COST);
14502   format %{ "fcmpd $src1, 0.0\n\t"
14503             "csinvw($dst, zr, zr, eq\n\t"
14504             "csnegw($dst, $dst, $dst, lt)"
14505   %}
14506 
14507   ins_encode %{
14508     Label done;
14509     FloatRegister s1 = as_FloatRegister($src1$$reg);
14510     Register d = as_Register($dst$$reg);
14511     __ fcmpd(s1, 0.0D);
14512     // installs 0 if EQ else -1
14513     __ csinvw(d, zr, zr, Assembler::EQ);
14514     // keeps -1 if less or unordered else installs 1
14515     __ csnegw(d, d, d, Assembler::LT);
14516     __ bind(done);
14517   %}
14518   ins_pipe(pipe_class_default);
14519 
14520 %}
14521 
14522 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14523 %{
14524   match(Set dst (CmpLTMask p q));
14525   effect(KILL cr);
14526 
14527   ins_cost(3 * INSN_COST);
14528 
14529   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14530             "csetw $dst, lt\n\t"
14531             "subw $dst, zr, $dst"
14532   %}
14533 
14534   ins_encode %{
14535     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14536     __ csetw(as_Register($dst$$reg), Assembler::LT);
14537     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14538   %}
14539 
14540   ins_pipe(ialu_reg_reg);
14541 %}
14542 
14543 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14544 %{
14545   match(Set dst (CmpLTMask src zero));
14546   effect(KILL cr);
14547 
14548   ins_cost(INSN_COST);
14549 
14550   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14551 
14552   ins_encode %{
14553     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14554   %}
14555 
14556   ins_pipe(ialu_reg_shift);
14557 %}
14558 
14559 // ============================================================================
14560 // Max and Min
14561 
14562 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14563 %{
14564   match(Set dst (MinI src1 src2));
14565 
14566   effect(DEF dst, USE src1, USE src2, KILL cr);
14567   size(8);
14568 
14569   ins_cost(INSN_COST * 3);
14570   format %{
14571     "cmpw $src1 $src2\t signed int\n\t"
14572     "cselw $dst, $src1, $src2 lt\t"
14573   %}
14574 
14575   ins_encode %{
14576     __ cmpw(as_Register($src1$$reg),
14577             as_Register($src2$$reg));
14578     __ cselw(as_Register($dst$$reg),
14579              as_Register($src1$$reg),
14580              as_Register($src2$$reg),
14581              Assembler::LT);
14582   %}
14583 
14584   ins_pipe(ialu_reg_reg);
14585 %}
14586 // FROM HERE
14587 
14588 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14589 %{
14590   match(Set dst (MaxI src1 src2));
14591 
14592   effect(DEF dst, USE src1, USE src2, KILL cr);
14593   size(8);
14594 
14595   ins_cost(INSN_COST * 3);
14596   format %{
14597     "cmpw $src1 $src2\t signed int\n\t"
14598     "cselw $dst, $src1, $src2 gt\t"
14599   %}
14600 
14601   ins_encode %{
14602     __ cmpw(as_Register($src1$$reg),
14603             as_Register($src2$$reg));
14604     __ cselw(as_Register($dst$$reg),
14605              as_Register($src1$$reg),
14606              as_Register($src2$$reg),
14607              Assembler::GT);
14608   %}
14609 
14610   ins_pipe(ialu_reg_reg);
14611 %}
14612 
14613 // ============================================================================
14614 // Branch Instructions
14615 
14616 // Direct Branch.
14617 instruct branch(label lbl)
14618 %{
14619   match(Goto);
14620 
14621   effect(USE lbl);
14622 
14623   ins_cost(BRANCH_COST);
14624   format %{ "b  $lbl" %}
14625 
14626   ins_encode(aarch64_enc_b(lbl));
14627 
14628   ins_pipe(pipe_branch);
14629 %}
14630 
14631 // Conditional Near Branch
14632 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14633 %{
14634   // Same match rule as `branchConFar'.
14635   match(If cmp cr);
14636 
14637   effect(USE lbl);
14638 
14639   ins_cost(BRANCH_COST);
14640   // If set to 1 this indicates that the current instruction is a
14641   // short variant of a long branch. This avoids using this
14642   // instruction in first-pass matching. It will then only be used in
14643   // the `Shorten_branches' pass.
14644   // ins_short_branch(1);
14645   format %{ "b$cmp  $lbl" %}
14646 
14647   ins_encode(aarch64_enc_br_con(cmp, lbl));
14648 
14649   ins_pipe(pipe_branch_cond);
14650 %}
14651 
14652 // Conditional Near Branch Unsigned
14653 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14654 %{
14655   // Same match rule as `branchConFar'.
14656   match(If cmp cr);
14657 
14658   effect(USE lbl);
14659 
14660   ins_cost(BRANCH_COST);
14661   // If set to 1 this indicates that the current instruction is a
14662   // short variant of a long branch. This avoids using this
14663   // instruction in first-pass matching. It will then only be used in
14664   // the `Shorten_branches' pass.
14665   // ins_short_branch(1);
14666   format %{ "b$cmp  $lbl\t# unsigned" %}
14667 
14668   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14669 
14670   ins_pipe(pipe_branch_cond);
14671 %}
14672 
14673 // Make use of CBZ and CBNZ.  These instructions, as well as being
14674 // shorter than (cmp; branch), have the additional benefit of not
14675 // killing the flags.
14676 
14677 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14678   match(If cmp (CmpI op1 op2));
14679   effect(USE labl);
14680 
14681   ins_cost(BRANCH_COST);
14682   format %{ "cbw$cmp   $op1, $labl" %}
14683   ins_encode %{
14684     Label* L = $labl$$label;
14685     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14686     if (cond == Assembler::EQ)
14687       __ cbzw($op1$$Register, *L);
14688     else
14689       __ cbnzw($op1$$Register, *L);
14690   %}
14691   ins_pipe(pipe_cmp_branch);
14692 %}
14693 
14694 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14695   match(If cmp (CmpL op1 op2));
14696   effect(USE labl);
14697 
14698   ins_cost(BRANCH_COST);
14699   format %{ "cb$cmp   $op1, $labl" %}
14700   ins_encode %{
14701     Label* L = $labl$$label;
14702     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14703     if (cond == Assembler::EQ)
14704       __ cbz($op1$$Register, *L);
14705     else
14706       __ cbnz($op1$$Register, *L);
14707   %}
14708   ins_pipe(pipe_cmp_branch);
14709 %}
14710 
14711 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14712   match(If cmp (CmpP op1 op2));
14713   effect(USE labl);
14714 
14715   ins_cost(BRANCH_COST);
14716   format %{ "cb$cmp   $op1, $labl" %}
14717   ins_encode %{
14718     Label* L = $labl$$label;
14719     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14720     if (cond == Assembler::EQ)
14721       __ cbz($op1$$Register, *L);
14722     else
14723       __ cbnz($op1$$Register, *L);
14724   %}
14725   ins_pipe(pipe_cmp_branch);
14726 %}
14727 
14728 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14729   match(If cmp (CmpN op1 op2));
14730   effect(USE labl);
14731 
14732   ins_cost(BRANCH_COST);
14733   format %{ "cbw$cmp   $op1, $labl" %}
14734   ins_encode %{
14735     Label* L = $labl$$label;
14736     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14737     if (cond == Assembler::EQ)
14738       __ cbzw($op1$$Register, *L);
14739     else
14740       __ cbnzw($op1$$Register, *L);
14741   %}
14742   ins_pipe(pipe_cmp_branch);
14743 %}
14744 
14745 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14746   match(If cmp (CmpP (DecodeN oop) zero));
14747   effect(USE labl);
14748 
14749   ins_cost(BRANCH_COST);
14750   format %{ "cb$cmp   $oop, $labl" %}
14751   ins_encode %{
14752     Label* L = $labl$$label;
14753     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14754     if (cond == Assembler::EQ)
14755       __ cbzw($oop$$Register, *L);
14756     else
14757       __ cbnzw($oop$$Register, *L);
14758   %}
14759   ins_pipe(pipe_cmp_branch);
14760 %}
14761 
14762 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14763   match(If cmp (CmpU op1 op2));
14764   effect(USE labl);
14765 
14766   ins_cost(BRANCH_COST);
14767   format %{ "cbw$cmp   $op1, $labl" %}
14768   ins_encode %{
14769     Label* L = $labl$$label;
14770     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14771     if (cond == Assembler::EQ || cond == Assembler::LS)
14772       __ cbzw($op1$$Register, *L);
14773     else
14774       __ cbnzw($op1$$Register, *L);
14775   %}
14776   ins_pipe(pipe_cmp_branch);
14777 %}
14778 
14779 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14780   match(If cmp (CmpU op1 op2));
14781   effect(USE labl);
14782 
14783   ins_cost(BRANCH_COST);
14784   format %{ "cb$cmp   $op1, $labl" %}
14785   ins_encode %{
14786     Label* L = $labl$$label;
14787     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14788     if (cond == Assembler::EQ || cond == Assembler::LS)
14789       __ cbz($op1$$Register, *L);
14790     else
14791       __ cbnz($op1$$Register, *L);
14792   %}
14793   ins_pipe(pipe_cmp_branch);
14794 %}
14795 
14796 // Test bit and Branch
14797 
14798 // Patterns for short (< 32KiB) variants
14799 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14800   match(If cmp (CmpL op1 op2));
14801   effect(USE labl);
14802 
14803   ins_cost(BRANCH_COST);
14804   format %{ "cb$cmp   $op1, $labl # long" %}
14805   ins_encode %{
14806     Label* L = $labl$$label;
14807     Assembler::Condition cond =
14808       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14809     __ tbr(cond, $op1$$Register, 63, *L);
14810   %}
14811   ins_pipe(pipe_cmp_branch);
14812   ins_short_branch(1);
14813 %}
14814 
14815 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14816   match(If cmp (CmpI op1 op2));
14817   effect(USE labl);
14818 
14819   ins_cost(BRANCH_COST);
14820   format %{ "cb$cmp   $op1, $labl # int" %}
14821   ins_encode %{
14822     Label* L = $labl$$label;
14823     Assembler::Condition cond =
14824       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14825     __ tbr(cond, $op1$$Register, 31, *L);
14826   %}
14827   ins_pipe(pipe_cmp_branch);
14828   ins_short_branch(1);
14829 %}
14830 
14831 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14832   match(If cmp (CmpL (AndL op1 op2) op3));
14833   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14834   effect(USE labl);
14835 
14836   ins_cost(BRANCH_COST);
14837   format %{ "tb$cmp   $op1, $op2, $labl" %}
14838   ins_encode %{
14839     Label* L = $labl$$label;
14840     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14841     int bit = exact_log2($op2$$constant);
14842     __ tbr(cond, $op1$$Register, bit, *L);
14843   %}
14844   ins_pipe(pipe_cmp_branch);
14845   ins_short_branch(1);
14846 %}
14847 
14848 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14849   match(If cmp (CmpI (AndI op1 op2) op3));
14850   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14851   effect(USE labl);
14852 
14853   ins_cost(BRANCH_COST);
14854   format %{ "tb$cmp   $op1, $op2, $labl" %}
14855   ins_encode %{
14856     Label* L = $labl$$label;
14857     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14858     int bit = exact_log2($op2$$constant);
14859     __ tbr(cond, $op1$$Register, bit, *L);
14860   %}
14861   ins_pipe(pipe_cmp_branch);
14862   ins_short_branch(1);
14863 %}
14864 
14865 // And far variants
14866 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14867   match(If cmp (CmpL op1 op2));
14868   effect(USE labl);
14869 
14870   ins_cost(BRANCH_COST);
14871   format %{ "cb$cmp   $op1, $labl # long" %}
14872   ins_encode %{
14873     Label* L = $labl$$label;
14874     Assembler::Condition cond =
14875       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14876     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14877   %}
14878   ins_pipe(pipe_cmp_branch);
14879 %}
14880 
14881 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14882   match(If cmp (CmpI op1 op2));
14883   effect(USE labl);
14884 
14885   ins_cost(BRANCH_COST);
14886   format %{ "cb$cmp   $op1, $labl # int" %}
14887   ins_encode %{
14888     Label* L = $labl$$label;
14889     Assembler::Condition cond =
14890       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14891     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14892   %}
14893   ins_pipe(pipe_cmp_branch);
14894 %}
14895 
14896 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14897   match(If cmp (CmpL (AndL op1 op2) op3));
14898   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14899   effect(USE labl);
14900 
14901   ins_cost(BRANCH_COST);
14902   format %{ "tb$cmp   $op1, $op2, $labl" %}
14903   ins_encode %{
14904     Label* L = $labl$$label;
14905     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14906     int bit = exact_log2($op2$$constant);
14907     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14908   %}
14909   ins_pipe(pipe_cmp_branch);
14910 %}
14911 
14912 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14913   match(If cmp (CmpI (AndI op1 op2) op3));
14914   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14915   effect(USE labl);
14916 
14917   ins_cost(BRANCH_COST);
14918   format %{ "tb$cmp   $op1, $op2, $labl" %}
14919   ins_encode %{
14920     Label* L = $labl$$label;
14921     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14922     int bit = exact_log2($op2$$constant);
14923     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14924   %}
14925   ins_pipe(pipe_cmp_branch);
14926 %}
14927 
14928 // Test bits
14929 
14930 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14931   match(Set cr (CmpL (AndL op1 op2) op3));
14932   predicate(Assembler::operand_valid_for_logical_immediate
14933             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14934 
14935   ins_cost(INSN_COST);
14936   format %{ "tst $op1, $op2 # long" %}
14937   ins_encode %{
14938     __ tst($op1$$Register, $op2$$constant);
14939   %}
14940   ins_pipe(ialu_reg_reg);
14941 %}
14942 
14943 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14944   match(Set cr (CmpI (AndI op1 op2) op3));
14945   predicate(Assembler::operand_valid_for_logical_immediate
14946             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14947 
14948   ins_cost(INSN_COST);
14949   format %{ "tst $op1, $op2 # int" %}
14950   ins_encode %{
14951     __ tstw($op1$$Register, $op2$$constant);
14952   %}
14953   ins_pipe(ialu_reg_reg);
14954 %}
14955 
14956 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14957   match(Set cr (CmpL (AndL op1 op2) op3));
14958 
14959   ins_cost(INSN_COST);
14960   format %{ "tst $op1, $op2 # long" %}
14961   ins_encode %{
14962     __ tst($op1$$Register, $op2$$Register);
14963   %}
14964   ins_pipe(ialu_reg_reg);
14965 %}
14966 
14967 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14968   match(Set cr (CmpI (AndI op1 op2) op3));
14969 
14970   ins_cost(INSN_COST);
14971   format %{ "tstw $op1, $op2 # int" %}
14972   ins_encode %{
14973     __ tstw($op1$$Register, $op2$$Register);
14974   %}
14975   ins_pipe(ialu_reg_reg);
14976 %}
14977 
14978 
14979 // Conditional Far Branch
14980 // Conditional Far Branch Unsigned
14981 // TODO: fixme
14982 
14983 // counted loop end branch near
14984 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14985 %{
14986   match(CountedLoopEnd cmp cr);
14987 
14988   effect(USE lbl);
14989 
14990   ins_cost(BRANCH_COST);
14991   // short variant.
14992   // ins_short_branch(1);
14993   format %{ "b$cmp $lbl \t// counted loop end" %}
14994 
14995   ins_encode(aarch64_enc_br_con(cmp, lbl));
14996 
14997   ins_pipe(pipe_branch);
14998 %}
14999 
15000 // counted loop end branch near Unsigned
15001 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15002 %{
15003   match(CountedLoopEnd cmp cr);
15004 
15005   effect(USE lbl);
15006 
15007   ins_cost(BRANCH_COST);
15008   // short variant.
15009   // ins_short_branch(1);
15010   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15011 
15012   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15013 
15014   ins_pipe(pipe_branch);
15015 %}
15016 
15017 // counted loop end branch far
15018 // counted loop end branch far unsigned
15019 // TODO: fixme
15020 
15021 // ============================================================================
15022 // inlined locking and unlocking
15023 
15024 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15025 %{
15026   match(Set cr (FastLock object box));
15027   effect(TEMP tmp, TEMP tmp2);
15028 
15029   // TODO
15030   // identify correct cost
15031   ins_cost(5 * INSN_COST);
15032   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15033 
15034   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15035 
15036   ins_pipe(pipe_serial);
15037 %}
15038 
15039 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15040 %{
15041   match(Set cr (FastUnlock object box));
15042   effect(TEMP tmp, TEMP tmp2);
15043 
15044   ins_cost(5 * INSN_COST);
15045   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15046 
15047   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15048 
15049   ins_pipe(pipe_serial);
15050 %}
15051 
15052 
15053 // ============================================================================
15054 // Safepoint Instructions
15055 
15056 // TODO
15057 // provide a near and far version of this code
15058 
15059 instruct safePoint(iRegP poll)
15060 %{
15061   match(SafePoint poll);
15062 
15063   format %{
15064     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15065   %}
15066   ins_encode %{
15067     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15068   %}
15069   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15070 %}
15071 
15072 
15073 // ============================================================================
15074 // Procedure Call/Return Instructions
15075 
15076 // Call Java Static Instruction
15077 
15078 instruct CallStaticJavaDirect(method meth)
15079 %{
15080   match(CallStaticJava);
15081 
15082   effect(USE meth);
15083 
15084   ins_cost(CALL_COST);
15085 
15086   format %{ "call,static $meth \t// ==> " %}
15087 
15088   ins_encode( aarch64_enc_java_static_call(meth),
15089               aarch64_enc_call_epilog );
15090 
15091   ins_pipe(pipe_class_call);
15092 %}
15093 
15094 // TO HERE
15095 
15096 // Call Java Dynamic Instruction
15097 instruct CallDynamicJavaDirect(method meth)
15098 %{
15099   match(CallDynamicJava);
15100 
15101   effect(USE meth);
15102 
15103   ins_cost(CALL_COST);
15104 
15105   format %{ "CALL,dynamic $meth \t// ==> " %}
15106 
15107   ins_encode( aarch64_enc_java_dynamic_call(meth),
15108                aarch64_enc_call_epilog );
15109 
15110   ins_pipe(pipe_class_call);
15111 %}
15112 
15113 // Call Runtime Instruction
15114 
15115 instruct CallRuntimeDirect(method meth)
15116 %{
15117   match(CallRuntime);
15118 
15119   effect(USE meth);
15120 
15121   ins_cost(CALL_COST);
15122 
15123   format %{ "CALL, runtime $meth" %}
15124 
15125   ins_encode( aarch64_enc_java_to_runtime(meth) );
15126 
15127   ins_pipe(pipe_class_call);
15128 %}
15129 
15130 // Call Runtime Instruction
15131 
15132 instruct CallLeafDirect(method meth)
15133 %{
15134   match(CallLeaf);
15135 
15136   effect(USE meth);
15137 
15138   ins_cost(CALL_COST);
15139 
15140   format %{ "CALL, runtime leaf $meth" %}
15141 
15142   ins_encode( aarch64_enc_java_to_runtime(meth) );
15143 
15144   ins_pipe(pipe_class_call);
15145 %}
15146 
15147 // Call Runtime Instruction
15148 
15149 instruct CallLeafNoFPDirect(method meth)
15150 %{
15151   match(CallLeafNoFP);
15152 
15153   effect(USE meth);
15154 
15155   ins_cost(CALL_COST);
15156 
15157   format %{ "CALL, runtime leaf nofp $meth" %}
15158 
15159   ins_encode( aarch64_enc_java_to_runtime(meth) );
15160 
15161   ins_pipe(pipe_class_call);
15162 %}
15163 
15164 // Tail Call; Jump from runtime stub to Java code.
15165 // Also known as an 'interprocedural jump'.
15166 // Target of jump will eventually return to caller.
15167 // TailJump below removes the return address.
15168 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15169 %{
15170   match(TailCall jump_target method_oop);
15171 
15172   ins_cost(CALL_COST);
15173 
15174   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15175 
15176   ins_encode(aarch64_enc_tail_call(jump_target));
15177 
15178   ins_pipe(pipe_class_call);
15179 %}
15180 
15181 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15182 %{
15183   match(TailJump jump_target ex_oop);
15184 
15185   ins_cost(CALL_COST);
15186 
15187   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15188 
15189   ins_encode(aarch64_enc_tail_jmp(jump_target));
15190 
15191   ins_pipe(pipe_class_call);
15192 %}
15193 
15194 // Create exception oop: created by stack-crawling runtime code.
15195 // Created exception is now available to this handler, and is setup
15196 // just prior to jumping to this handler. No code emitted.
15197 // TODO check
15198 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15199 instruct CreateException(iRegP_R0 ex_oop)
15200 %{
15201   match(Set ex_oop (CreateEx));
15202 
15203   format %{ " -- \t// exception oop; no code emitted" %}
15204 
15205   size(0);
15206 
15207   ins_encode( /*empty*/ );
15208 
15209   ins_pipe(pipe_class_empty);
15210 %}
15211 
15212 // Rethrow exception: The exception oop will come in the first
15213 // argument position. Then JUMP (not call) to the rethrow stub code.
15214 instruct RethrowException() %{
15215   match(Rethrow);
15216   ins_cost(CALL_COST);
15217 
15218   format %{ "b rethrow_stub" %}
15219 
15220   ins_encode( aarch64_enc_rethrow() );
15221 
15222   ins_pipe(pipe_class_call);
15223 %}
15224 
15225 
15226 // Return Instruction
15227 // epilog node loads ret address into lr as part of frame pop
15228 instruct Ret()
15229 %{
15230   match(Return);
15231 
15232   format %{ "ret\t// return register" %}
15233 
15234   ins_encode( aarch64_enc_ret() );
15235 
15236   ins_pipe(pipe_branch);
15237 %}
15238 
15239 // Die now.
15240 instruct ShouldNotReachHere() %{
15241   match(Halt);
15242 
15243   ins_cost(CALL_COST);
15244   format %{ "ShouldNotReachHere" %}
15245 
15246   ins_encode %{
15247     // TODO
15248     // implement proper trap call here
15249     __ brk(999);
15250   %}
15251 
15252   ins_pipe(pipe_class_default);
15253 %}
15254 
15255 // ============================================================================
15256 // Partial Subtype Check
15257 //
15258 // superklass array for an instance of the superklass.  Set a hidden
15259 // internal cache on a hit (cache is checked with exposed code in
15260 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15261 // encoding ALSO sets flags.
15262 
15263 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15264 %{
15265   match(Set result (PartialSubtypeCheck sub super));
15266   effect(KILL cr, KILL temp);
15267 
15268   ins_cost(1100);  // slightly larger than the next version
15269   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15270 
15271   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15272 
15273   opcode(0x1); // Force zero of result reg on hit
15274 
15275   ins_pipe(pipe_class_memory);
15276 %}
15277 
15278 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15279 %{
15280   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15281   effect(KILL temp, KILL result);
15282 
15283   ins_cost(1100);  // slightly larger than the next version
15284   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15285 
15286   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15287 
15288   opcode(0x0); // Don't zero result reg on hit
15289 
15290   ins_pipe(pipe_class_memory);
15291 %}
15292 
15293 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15294                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15295 %{
15296   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15297   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15298   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15299 
15300   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15301   ins_encode %{
15302     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15303     __ string_compare($str1$$Register, $str2$$Register,
15304                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15305                       $tmp1$$Register,
15306                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15307   %}
15308   ins_pipe(pipe_class_memory);
15309 %}
15310 
15311 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15312                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15313 %{
15314   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15315   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15316   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15317 
15318   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15319   ins_encode %{
15320     __ string_compare($str1$$Register, $str2$$Register,
15321                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15322                       $tmp1$$Register,
15323                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15324   %}
15325   ins_pipe(pipe_class_memory);
15326 %}
15327 
15328 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15329                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15330 %{
15331   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15332   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15333   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15334 
15335   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15336   ins_encode %{
15337     __ string_compare($str1$$Register, $str2$$Register,
15338                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15339                       $tmp1$$Register,
15340                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15341   %}
15342   ins_pipe(pipe_class_memory);
15343 %}
15344 
15345 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15346                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15347 %{
15348   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15349   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15350   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15351 
15352   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15353   ins_encode %{
15354     __ string_compare($str1$$Register, $str2$$Register,
15355                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15356                       $tmp1$$Register,
15357                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15358   %}
15359   ins_pipe(pipe_class_memory);
15360 %}
15361 
15362 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15363        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15364 %{
15365   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15366   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15367   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15368          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15369   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15370 
15371   ins_encode %{
15372     __ string_indexof($str1$$Register, $str2$$Register,
15373                       $cnt1$$Register, $cnt2$$Register,
15374                       $tmp1$$Register, $tmp2$$Register,
15375                       $tmp3$$Register, $tmp4$$Register,
15376                       -1, $result$$Register, StrIntrinsicNode::UU);
15377   %}
15378   ins_pipe(pipe_class_memory);
15379 %}
15380 
15381 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15382        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15383 %{
15384   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15385   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15386   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15387          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15388   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15389 
15390   ins_encode %{
15391     __ string_indexof($str1$$Register, $str2$$Register,
15392                       $cnt1$$Register, $cnt2$$Register,
15393                       $tmp1$$Register, $tmp2$$Register,
15394                       $tmp3$$Register, $tmp4$$Register,
15395                       -1, $result$$Register, StrIntrinsicNode::LL);
15396   %}
15397   ins_pipe(pipe_class_memory);
15398 %}
15399 
15400 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15401        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15402 %{
15403   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15404   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15405   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15406          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15407   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15408 
15409   ins_encode %{
15410     __ string_indexof($str1$$Register, $str2$$Register,
15411                       $cnt1$$Register, $cnt2$$Register,
15412                       $tmp1$$Register, $tmp2$$Register,
15413                       $tmp3$$Register, $tmp4$$Register,
15414                       -1, $result$$Register, StrIntrinsicNode::UL);
15415   %}
15416   ins_pipe(pipe_class_memory);
15417 %}
15418 
15419 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15420        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15421 %{
15422   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15423   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15424   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15425          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15426   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15427 
15428   ins_encode %{
15429     __ string_indexof($str1$$Register, $str2$$Register,
15430                       $cnt1$$Register, $cnt2$$Register,
15431                       $tmp1$$Register, $tmp2$$Register,
15432                       $tmp3$$Register, $tmp4$$Register,
15433                       -1, $result$$Register, StrIntrinsicNode::LU);
15434   %}
15435   ins_pipe(pipe_class_memory);
15436 %}
15437 
15438 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15439                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15440                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15441 %{
15442   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15443   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15444   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15445          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15446   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15447 
15448   ins_encode %{
15449     int icnt2 = (int)$int_cnt2$$constant;
15450     __ string_indexof($str1$$Register, $str2$$Register,
15451                       $cnt1$$Register, zr,
15452                       $tmp1$$Register, $tmp2$$Register,
15453                       $tmp3$$Register, $tmp4$$Register,
15454                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15455   %}
15456   ins_pipe(pipe_class_memory);
15457 %}
15458 
15459 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15460                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15461                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15462 %{
15463   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15464   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15465   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15466          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15467   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15468 
15469   ins_encode %{
15470     int icnt2 = (int)$int_cnt2$$constant;
15471     __ string_indexof($str1$$Register, $str2$$Register,
15472                       $cnt1$$Register, zr,
15473                       $tmp1$$Register, $tmp2$$Register,
15474                       $tmp3$$Register, $tmp4$$Register,
15475                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15476   %}
15477   ins_pipe(pipe_class_memory);
15478 %}
15479 
15480 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15481                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15482                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15483 %{
15484   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15485   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15486   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15487          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15488   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15489 
15490   ins_encode %{
15491     int icnt2 = (int)$int_cnt2$$constant;
15492     __ string_indexof($str1$$Register, $str2$$Register,
15493                       $cnt1$$Register, zr,
15494                       $tmp1$$Register, $tmp2$$Register,
15495                       $tmp3$$Register, $tmp4$$Register,
15496                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15497   %}
15498   ins_pipe(pipe_class_memory);
15499 %}
15500 
15501 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15502                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15503                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15504 %{
15505   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15506   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15507   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15508          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15509   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15510 
15511   ins_encode %{
15512     int icnt2 = (int)$int_cnt2$$constant;
15513     __ string_indexof($str1$$Register, $str2$$Register,
15514                       $cnt1$$Register, zr,
15515                       $tmp1$$Register, $tmp2$$Register,
15516                       $tmp3$$Register, $tmp4$$Register,
15517                       icnt2, $result$$Register, StrIntrinsicNode::LU);
15518   %}
15519   ins_pipe(pipe_class_memory);
15520 %}
15521 
15522 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15523                               iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15524                               iRegI tmp3, rFlagsReg cr)
15525 %{
15526   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15527   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15528          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15529 
15530   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15531 
15532   ins_encode %{
15533     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15534                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
15535                            $tmp3$$Register);
15536   %}
15537   ins_pipe(pipe_class_memory);
15538 %}
15539 
15540 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15541                         iRegI_R0 result, rFlagsReg cr)
15542 %{
15543   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15544   match(Set result (StrEquals (Binary str1 str2) cnt));
15545   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15546 
15547   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15548   ins_encode %{
15549     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15550     __ arrays_equals($str1$$Register, $str2$$Register,
15551                      $result$$Register, $cnt$$Register,
15552                      1, /*is_string*/true);
15553   %}
15554   ins_pipe(pipe_class_memory);
15555 %}
15556 
15557 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15558                         iRegI_R0 result, rFlagsReg cr)
15559 %{
15560   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15561   match(Set result (StrEquals (Binary str1 str2) cnt));
15562   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15563 
15564   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15565   ins_encode %{
15566     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15567     __ asrw($cnt$$Register, $cnt$$Register, 1);
15568     __ arrays_equals($str1$$Register, $str2$$Register,
15569                      $result$$Register, $cnt$$Register,
15570                      2, /*is_string*/true);
15571   %}
15572   ins_pipe(pipe_class_memory);
15573 %}
15574 
15575 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15576                       iRegP_R10 tmp, rFlagsReg cr)
15577 %{
15578   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15579   match(Set result (AryEq ary1 ary2));
15580   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15581 
15582   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15583   ins_encode %{
15584     __ arrays_equals($ary1$$Register, $ary2$$Register,
15585                      $result$$Register, $tmp$$Register,
15586                      1, /*is_string*/false);
15587     %}
15588   ins_pipe(pipe_class_memory);
15589 %}
15590 
15591 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15592                       iRegP_R10 tmp, rFlagsReg cr)
15593 %{
15594   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15595   match(Set result (AryEq ary1 ary2));
15596   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15597 
15598   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15599   ins_encode %{
15600     __ arrays_equals($ary1$$Register, $ary2$$Register,
15601                      $result$$Register, $tmp$$Register,
15602                      2, /*is_string*/false);
15603   %}
15604   ins_pipe(pipe_class_memory);
15605 %}
15606 
15607 
15608 // fast char[] to byte[] compression
15609 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15610                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15611                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15612                          iRegI_R0 result, rFlagsReg cr)
15613 %{
15614   match(Set result (StrCompressedCopy src (Binary dst len)));
15615   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15616 
15617   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15618   ins_encode %{
15619     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15620                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15621                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15622                            $result$$Register);
15623   %}
15624   ins_pipe( pipe_slow );
15625 %}
15626 
15627 // fast byte[] to char[] inflation
15628 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15629                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15630 %{
15631   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15632   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15633 
15634   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15635   ins_encode %{
15636     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15637                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15638   %}
15639   ins_pipe(pipe_class_memory);
15640 %}
15641 
15642 // encode char[] to byte[] in ISO_8859_1
15643 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15644                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15645                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15646                           iRegI_R0 result, rFlagsReg cr)
15647 %{
15648   match(Set result (EncodeISOArray src (Binary dst len)));
15649   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15650          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15651 
15652   format %{ "Encode array $src,$dst,$len -> $result" %}
15653   ins_encode %{
15654     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15655          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15656          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15657   %}
15658   ins_pipe( pipe_class_memory );
15659 %}
15660 
15661 // ============================================================================
15662 // This name is KNOWN by the ADLC and cannot be changed.
15663 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15664 // for this guy.
15665 instruct tlsLoadP(thread_RegP dst)
15666 %{
15667   match(Set dst (ThreadLocal));
15668 
15669   ins_cost(0);
15670 
15671   format %{ " -- \t// $dst=Thread::current(), empty" %}
15672 
15673   size(0);
15674 
15675   ins_encode( /*empty*/ );
15676 
15677   ins_pipe(pipe_class_empty);
15678 %}
15679 
15680 // ====================VECTOR INSTRUCTIONS=====================================
15681 
15682 // Load vector (32 bits)
15683 instruct loadV4(vecD dst, vmem4 mem)
15684 %{
15685   predicate(n->as_LoadVector()->memory_size() == 4);
15686   match(Set dst (LoadVector mem));
15687   ins_cost(4 * INSN_COST);
15688   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15689   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15690   ins_pipe(vload_reg_mem64);
15691 %}
15692 
15693 // Load vector (64 bits)
15694 instruct loadV8(vecD dst, vmem8 mem)
15695 %{
15696   predicate(n->as_LoadVector()->memory_size() == 8);
15697   match(Set dst (LoadVector mem));
15698   ins_cost(4 * INSN_COST);
15699   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15700   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15701   ins_pipe(vload_reg_mem64);
15702 %}
15703 
15704 // Load Vector (128 bits)
15705 instruct loadV16(vecX dst, vmem16 mem)
15706 %{
15707   predicate(n->as_LoadVector()->memory_size() == 16);
15708   match(Set dst (LoadVector mem));
15709   ins_cost(4 * INSN_COST);
15710   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15711   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15712   ins_pipe(vload_reg_mem128);
15713 %}
15714 
15715 // Store Vector (32 bits)
15716 instruct storeV4(vecD src, vmem4 mem)
15717 %{
15718   predicate(n->as_StoreVector()->memory_size() == 4);
15719   match(Set mem (StoreVector mem src));
15720   ins_cost(4 * INSN_COST);
15721   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15722   ins_encode( aarch64_enc_strvS(src, mem) );
15723   ins_pipe(vstore_reg_mem64);
15724 %}
15725 
15726 // Store Vector (64 bits)
15727 instruct storeV8(vecD src, vmem8 mem)
15728 %{
15729   predicate(n->as_StoreVector()->memory_size() == 8);
15730   match(Set mem (StoreVector mem src));
15731   ins_cost(4 * INSN_COST);
15732   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15733   ins_encode( aarch64_enc_strvD(src, mem) );
15734   ins_pipe(vstore_reg_mem64);
15735 %}
15736 
15737 // Store Vector (128 bits)
15738 instruct storeV16(vecX src, vmem16 mem)
15739 %{
15740   predicate(n->as_StoreVector()->memory_size() == 16);
15741   match(Set mem (StoreVector mem src));
15742   ins_cost(4 * INSN_COST);
15743   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15744   ins_encode( aarch64_enc_strvQ(src, mem) );
15745   ins_pipe(vstore_reg_mem128);
15746 %}
15747 
15748 instruct replicate8B(vecD dst, iRegIorL2I src)
15749 %{
15750   predicate(n->as_Vector()->length() == 4 ||
15751             n->as_Vector()->length() == 8);
15752   match(Set dst (ReplicateB src));
15753   ins_cost(INSN_COST);
15754   format %{ "dup  $dst, $src\t# vector (8B)" %}
15755   ins_encode %{
15756     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15757   %}
15758   ins_pipe(vdup_reg_reg64);
15759 %}
15760 
15761 instruct replicate16B(vecX dst, iRegIorL2I src)
15762 %{
15763   predicate(n->as_Vector()->length() == 16);
15764   match(Set dst (ReplicateB src));
15765   ins_cost(INSN_COST);
15766   format %{ "dup  $dst, $src\t# vector (16B)" %}
15767   ins_encode %{
15768     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15769   %}
15770   ins_pipe(vdup_reg_reg128);
15771 %}
15772 
15773 instruct replicate8B_imm(vecD dst, immI con)
15774 %{
15775   predicate(n->as_Vector()->length() == 4 ||
15776             n->as_Vector()->length() == 8);
15777   match(Set dst (ReplicateB con));
15778   ins_cost(INSN_COST);
15779   format %{ "movi  $dst, $con\t# vector(8B)" %}
15780   ins_encode %{
15781     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15782   %}
15783   ins_pipe(vmovi_reg_imm64);
15784 %}
15785 
15786 instruct replicate16B_imm(vecX dst, immI con)
15787 %{
15788   predicate(n->as_Vector()->length() == 16);
15789   match(Set dst (ReplicateB con));
15790   ins_cost(INSN_COST);
15791   format %{ "movi  $dst, $con\t# vector(16B)" %}
15792   ins_encode %{
15793     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15794   %}
15795   ins_pipe(vmovi_reg_imm128);
15796 %}
15797 
15798 instruct replicate4S(vecD dst, iRegIorL2I src)
15799 %{
15800   predicate(n->as_Vector()->length() == 2 ||
15801             n->as_Vector()->length() == 4);
15802   match(Set dst (ReplicateS src));
15803   ins_cost(INSN_COST);
15804   format %{ "dup  $dst, $src\t# vector (4S)" %}
15805   ins_encode %{
15806     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15807   %}
15808   ins_pipe(vdup_reg_reg64);
15809 %}
15810 
15811 instruct replicate8S(vecX dst, iRegIorL2I src)
15812 %{
15813   predicate(n->as_Vector()->length() == 8);
15814   match(Set dst (ReplicateS src));
15815   ins_cost(INSN_COST);
15816   format %{ "dup  $dst, $src\t# vector (8S)" %}
15817   ins_encode %{
15818     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15819   %}
15820   ins_pipe(vdup_reg_reg128);
15821 %}
15822 
15823 instruct replicate4S_imm(vecD dst, immI con)
15824 %{
15825   predicate(n->as_Vector()->length() == 2 ||
15826             n->as_Vector()->length() == 4);
15827   match(Set dst (ReplicateS con));
15828   ins_cost(INSN_COST);
15829   format %{ "movi  $dst, $con\t# vector(4H)" %}
15830   ins_encode %{
15831     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15832   %}
15833   ins_pipe(vmovi_reg_imm64);
15834 %}
15835 
15836 instruct replicate8S_imm(vecX dst, immI con)
15837 %{
15838   predicate(n->as_Vector()->length() == 8);
15839   match(Set dst (ReplicateS con));
15840   ins_cost(INSN_COST);
15841   format %{ "movi  $dst, $con\t# vector(8H)" %}
15842   ins_encode %{
15843     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15844   %}
15845   ins_pipe(vmovi_reg_imm128);
15846 %}
15847 
15848 instruct replicate2I(vecD dst, iRegIorL2I src)
15849 %{
15850   predicate(n->as_Vector()->length() == 2);
15851   match(Set dst (ReplicateI src));
15852   ins_cost(INSN_COST);
15853   format %{ "dup  $dst, $src\t# vector (2I)" %}
15854   ins_encode %{
15855     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15856   %}
15857   ins_pipe(vdup_reg_reg64);
15858 %}
15859 
15860 instruct replicate4I(vecX dst, iRegIorL2I src)
15861 %{
15862   predicate(n->as_Vector()->length() == 4);
15863   match(Set dst (ReplicateI src));
15864   ins_cost(INSN_COST);
15865   format %{ "dup  $dst, $src\t# vector (4I)" %}
15866   ins_encode %{
15867     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15868   %}
15869   ins_pipe(vdup_reg_reg128);
15870 %}
15871 
15872 instruct replicate2I_imm(vecD dst, immI con)
15873 %{
15874   predicate(n->as_Vector()->length() == 2);
15875   match(Set dst (ReplicateI con));
15876   ins_cost(INSN_COST);
15877   format %{ "movi  $dst, $con\t# vector(2I)" %}
15878   ins_encode %{
15879     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15880   %}
15881   ins_pipe(vmovi_reg_imm64);
15882 %}
15883 
15884 instruct replicate4I_imm(vecX dst, immI con)
15885 %{
15886   predicate(n->as_Vector()->length() == 4);
15887   match(Set dst (ReplicateI con));
15888   ins_cost(INSN_COST);
15889   format %{ "movi  $dst, $con\t# vector(4I)" %}
15890   ins_encode %{
15891     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15892   %}
15893   ins_pipe(vmovi_reg_imm128);
15894 %}
15895 
15896 instruct replicate2L(vecX dst, iRegL src)
15897 %{
15898   predicate(n->as_Vector()->length() == 2);
15899   match(Set dst (ReplicateL src));
15900   ins_cost(INSN_COST);
15901   format %{ "dup  $dst, $src\t# vector (2L)" %}
15902   ins_encode %{
15903     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15904   %}
15905   ins_pipe(vdup_reg_reg128);
15906 %}
15907 
15908 instruct replicate2L_zero(vecX dst, immI0 zero)
15909 %{
15910   predicate(n->as_Vector()->length() == 2);
15911   match(Set dst (ReplicateI zero));
15912   ins_cost(INSN_COST);
15913   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15914   ins_encode %{
15915     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15916            as_FloatRegister($dst$$reg),
15917            as_FloatRegister($dst$$reg));
15918   %}
15919   ins_pipe(vmovi_reg_imm128);
15920 %}
15921 
15922 instruct replicate2F(vecD dst, vRegF src)
15923 %{
15924   predicate(n->as_Vector()->length() == 2);
15925   match(Set dst (ReplicateF src));
15926   ins_cost(INSN_COST);
15927   format %{ "dup  $dst, $src\t# vector (2F)" %}
15928   ins_encode %{
15929     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15930            as_FloatRegister($src$$reg));
15931   %}
15932   ins_pipe(vdup_reg_freg64);
15933 %}
15934 
15935 instruct replicate4F(vecX dst, vRegF src)
15936 %{
15937   predicate(n->as_Vector()->length() == 4);
15938   match(Set dst (ReplicateF src));
15939   ins_cost(INSN_COST);
15940   format %{ "dup  $dst, $src\t# vector (4F)" %}
15941   ins_encode %{
15942     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15943            as_FloatRegister($src$$reg));
15944   %}
15945   ins_pipe(vdup_reg_freg128);
15946 %}
15947 
15948 instruct replicate2D(vecX dst, vRegD src)
15949 %{
15950   predicate(n->as_Vector()->length() == 2);
15951   match(Set dst (ReplicateD src));
15952   ins_cost(INSN_COST);
15953   format %{ "dup  $dst, $src\t# vector (2D)" %}
15954   ins_encode %{
15955     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15956            as_FloatRegister($src$$reg));
15957   %}
15958   ins_pipe(vdup_reg_dreg128);
15959 %}
15960 
15961 // ====================REDUCTION ARITHMETIC====================================
15962 
15963 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
15964 %{
15965   match(Set dst (AddReductionVI src1 src2));
15966   ins_cost(INSN_COST);
15967   effect(TEMP tmp, TEMP tmp2);
15968   format %{ "umov  $tmp, $src2, S, 0\n\t"
15969             "umov  $tmp2, $src2, S, 1\n\t"
15970             "addw  $dst, $src1, $tmp\n\t"
15971             "addw  $dst, $dst, $tmp2\t add reduction2i"
15972   %}
15973   ins_encode %{
15974     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15975     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15976     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15977     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15978   %}
15979   ins_pipe(pipe_class_default);
15980 %}
15981 
15982 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
15983 %{
15984   match(Set dst (AddReductionVI src1 src2));
15985   ins_cost(INSN_COST);
15986   effect(TEMP tmp, TEMP tmp2);
15987   format %{ "addv  $tmp, T4S, $src2\n\t"
15988             "umov  $tmp2, $tmp, S, 0\n\t"
15989             "addw  $dst, $tmp2, $src1\t add reduction4i"
15990   %}
15991   ins_encode %{
15992     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15993             as_FloatRegister($src2$$reg));
15994     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15995     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15996   %}
15997   ins_pipe(pipe_class_default);
15998 %}
15999 
16000 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
16001 %{
16002   match(Set dst (MulReductionVI src1 src2));
16003   ins_cost(INSN_COST);
16004   effect(TEMP tmp, TEMP dst);
16005   format %{ "umov  $tmp, $src2, S, 0\n\t"
16006             "mul   $dst, $tmp, $src1\n\t"
16007             "umov  $tmp, $src2, S, 1\n\t"
16008             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16009   %}
16010   ins_encode %{
16011     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16012     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16013     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16014     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16015   %}
16016   ins_pipe(pipe_class_default);
16017 %}
16018 
16019 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16020 %{
16021   match(Set dst (MulReductionVI src1 src2));
16022   ins_cost(INSN_COST);
16023   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16024   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16025             "mul   $tmp, $tmp, $src2\n\t"
16026             "umov  $tmp2, $tmp, S, 0\n\t"
16027             "mul   $dst, $tmp2, $src1\n\t"
16028             "umov  $tmp2, $tmp, S, 1\n\t"
16029             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16030   %}
16031   ins_encode %{
16032     __ ins(as_FloatRegister($tmp$$reg), __ D,
16033            as_FloatRegister($src2$$reg), 0, 1);
16034     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16035            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16036     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16037     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16038     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16039     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16040   %}
16041   ins_pipe(pipe_class_default);
16042 %}
16043 
16044 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16045 %{
16046   match(Set dst (AddReductionVF src1 src2));
16047   ins_cost(INSN_COST);
16048   effect(TEMP tmp, TEMP dst);
16049   format %{ "fadds $dst, $src1, $src2\n\t"
16050             "ins   $tmp, S, $src2, 0, 1\n\t"
16051             "fadds $dst, $dst, $tmp\t add reduction2f"
16052   %}
16053   ins_encode %{
16054     __ fadds(as_FloatRegister($dst$$reg),
16055              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16056     __ ins(as_FloatRegister($tmp$$reg), __ S,
16057            as_FloatRegister($src2$$reg), 0, 1);
16058     __ fadds(as_FloatRegister($dst$$reg),
16059              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16060   %}
16061   ins_pipe(pipe_class_default);
16062 %}
16063 
16064 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16065 %{
16066   match(Set dst (AddReductionVF src1 src2));
16067   ins_cost(INSN_COST);
16068   effect(TEMP tmp, TEMP dst);
16069   format %{ "fadds $dst, $src1, $src2\n\t"
16070             "ins   $tmp, S, $src2, 0, 1\n\t"
16071             "fadds $dst, $dst, $tmp\n\t"
16072             "ins   $tmp, S, $src2, 0, 2\n\t"
16073             "fadds $dst, $dst, $tmp\n\t"
16074             "ins   $tmp, S, $src2, 0, 3\n\t"
16075             "fadds $dst, $dst, $tmp\t add reduction4f"
16076   %}
16077   ins_encode %{
16078     __ fadds(as_FloatRegister($dst$$reg),
16079              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16080     __ ins(as_FloatRegister($tmp$$reg), __ S,
16081            as_FloatRegister($src2$$reg), 0, 1);
16082     __ fadds(as_FloatRegister($dst$$reg),
16083              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16084     __ ins(as_FloatRegister($tmp$$reg), __ S,
16085            as_FloatRegister($src2$$reg), 0, 2);
16086     __ fadds(as_FloatRegister($dst$$reg),
16087              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16088     __ ins(as_FloatRegister($tmp$$reg), __ S,
16089            as_FloatRegister($src2$$reg), 0, 3);
16090     __ fadds(as_FloatRegister($dst$$reg),
16091              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16092   %}
16093   ins_pipe(pipe_class_default);
16094 %}
16095 
16096 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16097 %{
16098   match(Set dst (MulReductionVF src1 src2));
16099   ins_cost(INSN_COST);
16100   effect(TEMP tmp, TEMP dst);
16101   format %{ "fmuls $dst, $src1, $src2\n\t"
16102             "ins   $tmp, S, $src2, 0, 1\n\t"
16103             "fmuls $dst, $dst, $tmp\t add reduction4f"
16104   %}
16105   ins_encode %{
16106     __ fmuls(as_FloatRegister($dst$$reg),
16107              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16108     __ ins(as_FloatRegister($tmp$$reg), __ S,
16109            as_FloatRegister($src2$$reg), 0, 1);
16110     __ fmuls(as_FloatRegister($dst$$reg),
16111              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16112   %}
16113   ins_pipe(pipe_class_default);
16114 %}
16115 
16116 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16117 %{
16118   match(Set dst (MulReductionVF src1 src2));
16119   ins_cost(INSN_COST);
16120   effect(TEMP tmp, TEMP dst);
16121   format %{ "fmuls $dst, $src1, $src2\n\t"
16122             "ins   $tmp, S, $src2, 0, 1\n\t"
16123             "fmuls $dst, $dst, $tmp\n\t"
16124             "ins   $tmp, S, $src2, 0, 2\n\t"
16125             "fmuls $dst, $dst, $tmp\n\t"
16126             "ins   $tmp, S, $src2, 0, 3\n\t"
16127             "fmuls $dst, $dst, $tmp\t add reduction4f"
16128   %}
16129   ins_encode %{
16130     __ fmuls(as_FloatRegister($dst$$reg),
16131              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16132     __ ins(as_FloatRegister($tmp$$reg), __ S,
16133            as_FloatRegister($src2$$reg), 0, 1);
16134     __ fmuls(as_FloatRegister($dst$$reg),
16135              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16136     __ ins(as_FloatRegister($tmp$$reg), __ S,
16137            as_FloatRegister($src2$$reg), 0, 2);
16138     __ fmuls(as_FloatRegister($dst$$reg),
16139              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16140     __ ins(as_FloatRegister($tmp$$reg), __ S,
16141            as_FloatRegister($src2$$reg), 0, 3);
16142     __ fmuls(as_FloatRegister($dst$$reg),
16143              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16144   %}
16145   ins_pipe(pipe_class_default);
16146 %}
16147 
16148 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16149 %{
16150   match(Set dst (AddReductionVD src1 src2));
16151   ins_cost(INSN_COST);
16152   effect(TEMP tmp, TEMP dst);
16153   format %{ "faddd $dst, $src1, $src2\n\t"
16154             "ins   $tmp, D, $src2, 0, 1\n\t"
16155             "faddd $dst, $dst, $tmp\t add reduction2d"
16156   %}
16157   ins_encode %{
16158     __ faddd(as_FloatRegister($dst$$reg),
16159              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16160     __ ins(as_FloatRegister($tmp$$reg), __ D,
16161            as_FloatRegister($src2$$reg), 0, 1);
16162     __ faddd(as_FloatRegister($dst$$reg),
16163              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16164   %}
16165   ins_pipe(pipe_class_default);
16166 %}
16167 
16168 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16169 %{
16170   match(Set dst (MulReductionVD src1 src2));
16171   ins_cost(INSN_COST);
16172   effect(TEMP tmp, TEMP dst);
16173   format %{ "fmuld $dst, $src1, $src2\n\t"
16174             "ins   $tmp, D, $src2, 0, 1\n\t"
16175             "fmuld $dst, $dst, $tmp\t add reduction2d"
16176   %}
16177   ins_encode %{
16178     __ fmuld(as_FloatRegister($dst$$reg),
16179              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16180     __ ins(as_FloatRegister($tmp$$reg), __ D,
16181            as_FloatRegister($src2$$reg), 0, 1);
16182     __ fmuld(as_FloatRegister($dst$$reg),
16183              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16184   %}
16185   ins_pipe(pipe_class_default);
16186 %}
16187 
16188 // ====================VECTOR ARITHMETIC=======================================
16189 
16190 // --------------------------------- ADD --------------------------------------
16191 
16192 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16193 %{
16194   predicate(n->as_Vector()->length() == 4 ||
16195             n->as_Vector()->length() == 8);
16196   match(Set dst (AddVB src1 src2));
16197   ins_cost(INSN_COST);
16198   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16199   ins_encode %{
16200     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16201             as_FloatRegister($src1$$reg),
16202             as_FloatRegister($src2$$reg));
16203   %}
16204   ins_pipe(vdop64);
16205 %}
16206 
16207 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16208 %{
16209   predicate(n->as_Vector()->length() == 16);
16210   match(Set dst (AddVB src1 src2));
16211   ins_cost(INSN_COST);
16212   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16213   ins_encode %{
16214     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16215             as_FloatRegister($src1$$reg),
16216             as_FloatRegister($src2$$reg));
16217   %}
16218   ins_pipe(vdop128);
16219 %}
16220 
16221 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16222 %{
16223   predicate(n->as_Vector()->length() == 2 ||
16224             n->as_Vector()->length() == 4);
16225   match(Set dst (AddVS src1 src2));
16226   ins_cost(INSN_COST);
16227   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16228   ins_encode %{
16229     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16230             as_FloatRegister($src1$$reg),
16231             as_FloatRegister($src2$$reg));
16232   %}
16233   ins_pipe(vdop64);
16234 %}
16235 
16236 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16237 %{
16238   predicate(n->as_Vector()->length() == 8);
16239   match(Set dst (AddVS src1 src2));
16240   ins_cost(INSN_COST);
16241   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16242   ins_encode %{
16243     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16244             as_FloatRegister($src1$$reg),
16245             as_FloatRegister($src2$$reg));
16246   %}
16247   ins_pipe(vdop128);
16248 %}
16249 
16250 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16251 %{
16252   predicate(n->as_Vector()->length() == 2);
16253   match(Set dst (AddVI src1 src2));
16254   ins_cost(INSN_COST);
16255   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16256   ins_encode %{
16257     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16258             as_FloatRegister($src1$$reg),
16259             as_FloatRegister($src2$$reg));
16260   %}
16261   ins_pipe(vdop64);
16262 %}
16263 
16264 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16265 %{
16266   predicate(n->as_Vector()->length() == 4);
16267   match(Set dst (AddVI src1 src2));
16268   ins_cost(INSN_COST);
16269   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16270   ins_encode %{
16271     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16272             as_FloatRegister($src1$$reg),
16273             as_FloatRegister($src2$$reg));
16274   %}
16275   ins_pipe(vdop128);
16276 %}
16277 
16278 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16279 %{
16280   predicate(n->as_Vector()->length() == 2);
16281   match(Set dst (AddVL src1 src2));
16282   ins_cost(INSN_COST);
16283   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16284   ins_encode %{
16285     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16286             as_FloatRegister($src1$$reg),
16287             as_FloatRegister($src2$$reg));
16288   %}
16289   ins_pipe(vdop128);
16290 %}
16291 
16292 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16293 %{
16294   predicate(n->as_Vector()->length() == 2);
16295   match(Set dst (AddVF src1 src2));
16296   ins_cost(INSN_COST);
16297   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16298   ins_encode %{
16299     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16300             as_FloatRegister($src1$$reg),
16301             as_FloatRegister($src2$$reg));
16302   %}
16303   ins_pipe(vdop_fp64);
16304 %}
16305 
16306 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16307 %{
16308   predicate(n->as_Vector()->length() == 4);
16309   match(Set dst (AddVF src1 src2));
16310   ins_cost(INSN_COST);
16311   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16312   ins_encode %{
16313     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16314             as_FloatRegister($src1$$reg),
16315             as_FloatRegister($src2$$reg));
16316   %}
16317   ins_pipe(vdop_fp128);
16318 %}
16319 
16320 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16321 %{
16322   match(Set dst (AddVD src1 src2));
16323   ins_cost(INSN_COST);
16324   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16325   ins_encode %{
16326     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16327             as_FloatRegister($src1$$reg),
16328             as_FloatRegister($src2$$reg));
16329   %}
16330   ins_pipe(vdop_fp128);
16331 %}
16332 
16333 // --------------------------------- SUB --------------------------------------
16334 
16335 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16336 %{
16337   predicate(n->as_Vector()->length() == 4 ||
16338             n->as_Vector()->length() == 8);
16339   match(Set dst (SubVB src1 src2));
16340   ins_cost(INSN_COST);
16341   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16342   ins_encode %{
16343     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16344             as_FloatRegister($src1$$reg),
16345             as_FloatRegister($src2$$reg));
16346   %}
16347   ins_pipe(vdop64);
16348 %}
16349 
16350 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16351 %{
16352   predicate(n->as_Vector()->length() == 16);
16353   match(Set dst (SubVB src1 src2));
16354   ins_cost(INSN_COST);
16355   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16356   ins_encode %{
16357     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16358             as_FloatRegister($src1$$reg),
16359             as_FloatRegister($src2$$reg));
16360   %}
16361   ins_pipe(vdop128);
16362 %}
16363 
16364 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16365 %{
16366   predicate(n->as_Vector()->length() == 2 ||
16367             n->as_Vector()->length() == 4);
16368   match(Set dst (SubVS src1 src2));
16369   ins_cost(INSN_COST);
16370   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16371   ins_encode %{
16372     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16373             as_FloatRegister($src1$$reg),
16374             as_FloatRegister($src2$$reg));
16375   %}
16376   ins_pipe(vdop64);
16377 %}
16378 
16379 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16380 %{
16381   predicate(n->as_Vector()->length() == 8);
16382   match(Set dst (SubVS src1 src2));
16383   ins_cost(INSN_COST);
16384   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16385   ins_encode %{
16386     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16387             as_FloatRegister($src1$$reg),
16388             as_FloatRegister($src2$$reg));
16389   %}
16390   ins_pipe(vdop128);
16391 %}
16392 
16393 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16394 %{
16395   predicate(n->as_Vector()->length() == 2);
16396   match(Set dst (SubVI src1 src2));
16397   ins_cost(INSN_COST);
16398   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16399   ins_encode %{
16400     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16401             as_FloatRegister($src1$$reg),
16402             as_FloatRegister($src2$$reg));
16403   %}
16404   ins_pipe(vdop64);
16405 %}
16406 
16407 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16408 %{
16409   predicate(n->as_Vector()->length() == 4);
16410   match(Set dst (SubVI src1 src2));
16411   ins_cost(INSN_COST);
16412   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16413   ins_encode %{
16414     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16415             as_FloatRegister($src1$$reg),
16416             as_FloatRegister($src2$$reg));
16417   %}
16418   ins_pipe(vdop128);
16419 %}
16420 
16421 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16422 %{
16423   predicate(n->as_Vector()->length() == 2);
16424   match(Set dst (SubVL src1 src2));
16425   ins_cost(INSN_COST);
16426   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16427   ins_encode %{
16428     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16429             as_FloatRegister($src1$$reg),
16430             as_FloatRegister($src2$$reg));
16431   %}
16432   ins_pipe(vdop128);
16433 %}
16434 
16435 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16436 %{
16437   predicate(n->as_Vector()->length() == 2);
16438   match(Set dst (SubVF src1 src2));
16439   ins_cost(INSN_COST);
16440   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16441   ins_encode %{
16442     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16443             as_FloatRegister($src1$$reg),
16444             as_FloatRegister($src2$$reg));
16445   %}
16446   ins_pipe(vdop_fp64);
16447 %}
16448 
16449 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16450 %{
16451   predicate(n->as_Vector()->length() == 4);
16452   match(Set dst (SubVF src1 src2));
16453   ins_cost(INSN_COST);
16454   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16455   ins_encode %{
16456     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16457             as_FloatRegister($src1$$reg),
16458             as_FloatRegister($src2$$reg));
16459   %}
16460   ins_pipe(vdop_fp128);
16461 %}
16462 
16463 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16464 %{
16465   predicate(n->as_Vector()->length() == 2);
16466   match(Set dst (SubVD src1 src2));
16467   ins_cost(INSN_COST);
16468   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16469   ins_encode %{
16470     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16471             as_FloatRegister($src1$$reg),
16472             as_FloatRegister($src2$$reg));
16473   %}
16474   ins_pipe(vdop_fp128);
16475 %}
16476 
16477 // --------------------------------- MUL --------------------------------------
16478 
16479 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16480 %{
16481   predicate(n->as_Vector()->length() == 2 ||
16482             n->as_Vector()->length() == 4);
16483   match(Set dst (MulVS src1 src2));
16484   ins_cost(INSN_COST);
16485   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16486   ins_encode %{
16487     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16488             as_FloatRegister($src1$$reg),
16489             as_FloatRegister($src2$$reg));
16490   %}
16491   ins_pipe(vmul64);
16492 %}
16493 
16494 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16495 %{
16496   predicate(n->as_Vector()->length() == 8);
16497   match(Set dst (MulVS src1 src2));
16498   ins_cost(INSN_COST);
16499   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16500   ins_encode %{
16501     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16502             as_FloatRegister($src1$$reg),
16503             as_FloatRegister($src2$$reg));
16504   %}
16505   ins_pipe(vmul128);
16506 %}
16507 
16508 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16509 %{
16510   predicate(n->as_Vector()->length() == 2);
16511   match(Set dst (MulVI src1 src2));
16512   ins_cost(INSN_COST);
16513   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16514   ins_encode %{
16515     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16516             as_FloatRegister($src1$$reg),
16517             as_FloatRegister($src2$$reg));
16518   %}
16519   ins_pipe(vmul64);
16520 %}
16521 
16522 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16523 %{
16524   predicate(n->as_Vector()->length() == 4);
16525   match(Set dst (MulVI src1 src2));
16526   ins_cost(INSN_COST);
16527   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16528   ins_encode %{
16529     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16530             as_FloatRegister($src1$$reg),
16531             as_FloatRegister($src2$$reg));
16532   %}
16533   ins_pipe(vmul128);
16534 %}
16535 
16536 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16537 %{
16538   predicate(n->as_Vector()->length() == 2);
16539   match(Set dst (MulVF src1 src2));
16540   ins_cost(INSN_COST);
16541   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16542   ins_encode %{
16543     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16544             as_FloatRegister($src1$$reg),
16545             as_FloatRegister($src2$$reg));
16546   %}
16547   ins_pipe(vmuldiv_fp64);
16548 %}
16549 
16550 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16551 %{
16552   predicate(n->as_Vector()->length() == 4);
16553   match(Set dst (MulVF src1 src2));
16554   ins_cost(INSN_COST);
16555   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16556   ins_encode %{
16557     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16558             as_FloatRegister($src1$$reg),
16559             as_FloatRegister($src2$$reg));
16560   %}
16561   ins_pipe(vmuldiv_fp128);
16562 %}
16563 
16564 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16565 %{
16566   predicate(n->as_Vector()->length() == 2);
16567   match(Set dst (MulVD src1 src2));
16568   ins_cost(INSN_COST);
16569   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16570   ins_encode %{
16571     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16572             as_FloatRegister($src1$$reg),
16573             as_FloatRegister($src2$$reg));
16574   %}
16575   ins_pipe(vmuldiv_fp128);
16576 %}
16577 
16578 // --------------------------------- MLA --------------------------------------
16579 
16580 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16581 %{
16582   predicate(n->as_Vector()->length() == 2 ||
16583             n->as_Vector()->length() == 4);
16584   match(Set dst (AddVS dst (MulVS src1 src2)));
16585   ins_cost(INSN_COST);
16586   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16587   ins_encode %{
16588     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16589             as_FloatRegister($src1$$reg),
16590             as_FloatRegister($src2$$reg));
16591   %}
16592   ins_pipe(vmla64);
16593 %}
16594 
16595 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16596 %{
16597   predicate(n->as_Vector()->length() == 8);
16598   match(Set dst (AddVS dst (MulVS src1 src2)));
16599   ins_cost(INSN_COST);
16600   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16601   ins_encode %{
16602     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16603             as_FloatRegister($src1$$reg),
16604             as_FloatRegister($src2$$reg));
16605   %}
16606   ins_pipe(vmla128);
16607 %}
16608 
16609 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16610 %{
16611   predicate(n->as_Vector()->length() == 2);
16612   match(Set dst (AddVI dst (MulVI src1 src2)));
16613   ins_cost(INSN_COST);
16614   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16615   ins_encode %{
16616     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16617             as_FloatRegister($src1$$reg),
16618             as_FloatRegister($src2$$reg));
16619   %}
16620   ins_pipe(vmla64);
16621 %}
16622 
16623 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16624 %{
16625   predicate(n->as_Vector()->length() == 4);
16626   match(Set dst (AddVI dst (MulVI src1 src2)));
16627   ins_cost(INSN_COST);
16628   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16629   ins_encode %{
16630     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16631             as_FloatRegister($src1$$reg),
16632             as_FloatRegister($src2$$reg));
16633   %}
16634   ins_pipe(vmla128);
16635 %}
16636 
16637 // --------------------------------- MLS --------------------------------------
16638 
16639 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16640 %{
16641   predicate(n->as_Vector()->length() == 2 ||
16642             n->as_Vector()->length() == 4);
16643   match(Set dst (SubVS dst (MulVS src1 src2)));
16644   ins_cost(INSN_COST);
16645   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16646   ins_encode %{
16647     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16648             as_FloatRegister($src1$$reg),
16649             as_FloatRegister($src2$$reg));
16650   %}
16651   ins_pipe(vmla64);
16652 %}
16653 
16654 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16655 %{
16656   predicate(n->as_Vector()->length() == 8);
16657   match(Set dst (SubVS dst (MulVS src1 src2)));
16658   ins_cost(INSN_COST);
16659   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16660   ins_encode %{
16661     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16662             as_FloatRegister($src1$$reg),
16663             as_FloatRegister($src2$$reg));
16664   %}
16665   ins_pipe(vmla128);
16666 %}
16667 
16668 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16669 %{
16670   predicate(n->as_Vector()->length() == 2);
16671   match(Set dst (SubVI dst (MulVI src1 src2)));
16672   ins_cost(INSN_COST);
16673   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16674   ins_encode %{
16675     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16676             as_FloatRegister($src1$$reg),
16677             as_FloatRegister($src2$$reg));
16678   %}
16679   ins_pipe(vmla64);
16680 %}
16681 
16682 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16683 %{
16684   predicate(n->as_Vector()->length() == 4);
16685   match(Set dst (SubVI dst (MulVI src1 src2)));
16686   ins_cost(INSN_COST);
16687   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16688   ins_encode %{
16689     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16690             as_FloatRegister($src1$$reg),
16691             as_FloatRegister($src2$$reg));
16692   %}
16693   ins_pipe(vmla128);
16694 %}
16695 
16696 // --------------------------------- DIV --------------------------------------
16697 
16698 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16699 %{
16700   predicate(n->as_Vector()->length() == 2);
16701   match(Set dst (DivVF src1 src2));
16702   ins_cost(INSN_COST);
16703   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16704   ins_encode %{
16705     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16706             as_FloatRegister($src1$$reg),
16707             as_FloatRegister($src2$$reg));
16708   %}
16709   ins_pipe(vmuldiv_fp64);
16710 %}
16711 
16712 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16713 %{
16714   predicate(n->as_Vector()->length() == 4);
16715   match(Set dst (DivVF src1 src2));
16716   ins_cost(INSN_COST);
16717   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16718   ins_encode %{
16719     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16720             as_FloatRegister($src1$$reg),
16721             as_FloatRegister($src2$$reg));
16722   %}
16723   ins_pipe(vmuldiv_fp128);
16724 %}
16725 
16726 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16727 %{
16728   predicate(n->as_Vector()->length() == 2);
16729   match(Set dst (DivVD src1 src2));
16730   ins_cost(INSN_COST);
16731   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16732   ins_encode %{
16733     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16734             as_FloatRegister($src1$$reg),
16735             as_FloatRegister($src2$$reg));
16736   %}
16737   ins_pipe(vmuldiv_fp128);
16738 %}
16739 
16740 // --------------------------------- SQRT -------------------------------------
16741 
16742 instruct vsqrt2D(vecX dst, vecX src)
16743 %{
16744   predicate(n->as_Vector()->length() == 2);
16745   match(Set dst (SqrtVD src));
16746   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16747   ins_encode %{
16748     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16749              as_FloatRegister($src$$reg));
16750   %}
16751   ins_pipe(vsqrt_fp128);
16752 %}
16753 
16754 // --------------------------------- ABS --------------------------------------
16755 
16756 instruct vabs2F(vecD dst, vecD src)
16757 %{
16758   predicate(n->as_Vector()->length() == 2);
16759   match(Set dst (AbsVF src));
16760   ins_cost(INSN_COST * 3);
16761   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16762   ins_encode %{
16763     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16764             as_FloatRegister($src$$reg));
16765   %}
16766   ins_pipe(vunop_fp64);
16767 %}
16768 
16769 instruct vabs4F(vecX dst, vecX src)
16770 %{
16771   predicate(n->as_Vector()->length() == 4);
16772   match(Set dst (AbsVF src));
16773   ins_cost(INSN_COST * 3);
16774   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16775   ins_encode %{
16776     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16777             as_FloatRegister($src$$reg));
16778   %}
16779   ins_pipe(vunop_fp128);
16780 %}
16781 
16782 instruct vabs2D(vecX dst, vecX src)
16783 %{
16784   predicate(n->as_Vector()->length() == 2);
16785   match(Set dst (AbsVD src));
16786   ins_cost(INSN_COST * 3);
16787   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16788   ins_encode %{
16789     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16790             as_FloatRegister($src$$reg));
16791   %}
16792   ins_pipe(vunop_fp128);
16793 %}
16794 
16795 // --------------------------------- NEG --------------------------------------
16796 
16797 instruct vneg2F(vecD dst, vecD src)
16798 %{
16799   predicate(n->as_Vector()->length() == 2);
16800   match(Set dst (NegVF src));
16801   ins_cost(INSN_COST * 3);
16802   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16803   ins_encode %{
16804     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16805             as_FloatRegister($src$$reg));
16806   %}
16807   ins_pipe(vunop_fp64);
16808 %}
16809 
16810 instruct vneg4F(vecX dst, vecX src)
16811 %{
16812   predicate(n->as_Vector()->length() == 4);
16813   match(Set dst (NegVF src));
16814   ins_cost(INSN_COST * 3);
16815   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16816   ins_encode %{
16817     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16818             as_FloatRegister($src$$reg));
16819   %}
16820   ins_pipe(vunop_fp128);
16821 %}
16822 
16823 instruct vneg2D(vecX dst, vecX src)
16824 %{
16825   predicate(n->as_Vector()->length() == 2);
16826   match(Set dst (NegVD src));
16827   ins_cost(INSN_COST * 3);
16828   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16829   ins_encode %{
16830     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16831             as_FloatRegister($src$$reg));
16832   %}
16833   ins_pipe(vunop_fp128);
16834 %}
16835 
16836 // --------------------------------- AND --------------------------------------
16837 
16838 instruct vand8B(vecD dst, vecD src1, vecD src2)
16839 %{
16840   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16841             n->as_Vector()->length_in_bytes() == 8);
16842   match(Set dst (AndV src1 src2));
16843   ins_cost(INSN_COST);
16844   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16845   ins_encode %{
16846     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16847             as_FloatRegister($src1$$reg),
16848             as_FloatRegister($src2$$reg));
16849   %}
16850   ins_pipe(vlogical64);
16851 %}
16852 
16853 instruct vand16B(vecX dst, vecX src1, vecX src2)
16854 %{
16855   predicate(n->as_Vector()->length_in_bytes() == 16);
16856   match(Set dst (AndV src1 src2));
16857   ins_cost(INSN_COST);
16858   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16859   ins_encode %{
16860     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16861             as_FloatRegister($src1$$reg),
16862             as_FloatRegister($src2$$reg));
16863   %}
16864   ins_pipe(vlogical128);
16865 %}
16866 
16867 // --------------------------------- OR ---------------------------------------
16868 
16869 instruct vor8B(vecD dst, vecD src1, vecD src2)
16870 %{
16871   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16872             n->as_Vector()->length_in_bytes() == 8);
16873   match(Set dst (OrV src1 src2));
16874   ins_cost(INSN_COST);
16875   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16876   ins_encode %{
16877     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16878             as_FloatRegister($src1$$reg),
16879             as_FloatRegister($src2$$reg));
16880   %}
16881   ins_pipe(vlogical64);
16882 %}
16883 
16884 instruct vor16B(vecX dst, vecX src1, vecX src2)
16885 %{
16886   predicate(n->as_Vector()->length_in_bytes() == 16);
16887   match(Set dst (OrV src1 src2));
16888   ins_cost(INSN_COST);
16889   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16890   ins_encode %{
16891     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16892             as_FloatRegister($src1$$reg),
16893             as_FloatRegister($src2$$reg));
16894   %}
16895   ins_pipe(vlogical128);
16896 %}
16897 
16898 // --------------------------------- XOR --------------------------------------
16899 
16900 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16901 %{
16902   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16903             n->as_Vector()->length_in_bytes() == 8);
16904   match(Set dst (XorV src1 src2));
16905   ins_cost(INSN_COST);
16906   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16907   ins_encode %{
16908     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16909             as_FloatRegister($src1$$reg),
16910             as_FloatRegister($src2$$reg));
16911   %}
16912   ins_pipe(vlogical64);
16913 %}
16914 
16915 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16916 %{
16917   predicate(n->as_Vector()->length_in_bytes() == 16);
16918   match(Set dst (XorV src1 src2));
16919   ins_cost(INSN_COST);
16920   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16921   ins_encode %{
16922     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16923             as_FloatRegister($src1$$reg),
16924             as_FloatRegister($src2$$reg));
16925   %}
16926   ins_pipe(vlogical128);
16927 %}
16928 
16929 // ------------------------------ Shift ---------------------------------------
16930 
16931 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16932   match(Set dst (LShiftCntV cnt));
16933   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
16934   ins_encode %{
16935     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16936   %}
16937   ins_pipe(vdup_reg_reg128);
16938 %}
16939 
16940 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16941 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16942   match(Set dst (RShiftCntV cnt));
16943   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
16944   ins_encode %{
16945     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16946     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16947   %}
16948   ins_pipe(vdup_reg_reg128);
16949 %}
16950 
16951 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16952   predicate(n->as_Vector()->length() == 4 ||
16953             n->as_Vector()->length() == 8);
16954   match(Set dst (LShiftVB src shift));
16955   match(Set dst (RShiftVB src shift));
16956   ins_cost(INSN_COST);
16957   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16958   ins_encode %{
16959     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16960             as_FloatRegister($src$$reg),
16961             as_FloatRegister($shift$$reg));
16962   %}
16963   ins_pipe(vshift64);
16964 %}
16965 
16966 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16967   predicate(n->as_Vector()->length() == 16);
16968   match(Set dst (LShiftVB src shift));
16969   match(Set dst (RShiftVB src shift));
16970   ins_cost(INSN_COST);
16971   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16972   ins_encode %{
16973     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16974             as_FloatRegister($src$$reg),
16975             as_FloatRegister($shift$$reg));
16976   %}
16977   ins_pipe(vshift128);
16978 %}
16979 
16980 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16981   predicate(n->as_Vector()->length() == 4 ||
16982             n->as_Vector()->length() == 8);
16983   match(Set dst (URShiftVB src shift));
16984   ins_cost(INSN_COST);
16985   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
16986   ins_encode %{
16987     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16988             as_FloatRegister($src$$reg),
16989             as_FloatRegister($shift$$reg));
16990   %}
16991   ins_pipe(vshift64);
16992 %}
16993 
16994 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
16995   predicate(n->as_Vector()->length() == 16);
16996   match(Set dst (URShiftVB src shift));
16997   ins_cost(INSN_COST);
16998   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
16999   ins_encode %{
17000     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17001             as_FloatRegister($src$$reg),
17002             as_FloatRegister($shift$$reg));
17003   %}
17004   ins_pipe(vshift128);
17005 %}
17006 
17007 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17008   predicate(n->as_Vector()->length() == 4 ||
17009             n->as_Vector()->length() == 8);
17010   match(Set dst (LShiftVB src shift));
17011   ins_cost(INSN_COST);
17012   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17013   ins_encode %{
17014     int sh = (int)$shift$$constant & 31;
17015     if (sh >= 8) {
17016       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17017              as_FloatRegister($src$$reg),
17018              as_FloatRegister($src$$reg));
17019     } else {
17020       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17021              as_FloatRegister($src$$reg), sh);
17022     }
17023   %}
17024   ins_pipe(vshift64_imm);
17025 %}
17026 
17027 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17028   predicate(n->as_Vector()->length() == 16);
17029   match(Set dst (LShiftVB src shift));
17030   ins_cost(INSN_COST);
17031   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17032   ins_encode %{
17033     int sh = (int)$shift$$constant & 31;
17034     if (sh >= 8) {
17035       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17036              as_FloatRegister($src$$reg),
17037              as_FloatRegister($src$$reg));
17038     } else {
17039       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17040              as_FloatRegister($src$$reg), sh);
17041     }
17042   %}
17043   ins_pipe(vshift128_imm);
17044 %}
17045 
17046 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17047   predicate(n->as_Vector()->length() == 4 ||
17048             n->as_Vector()->length() == 8);
17049   match(Set dst (RShiftVB src shift));
17050   ins_cost(INSN_COST);
17051   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17052   ins_encode %{
17053     int sh = (int)$shift$$constant & 31;
17054     if (sh >= 8) sh = 7;
17055     sh = -sh & 7;
17056     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17057            as_FloatRegister($src$$reg), sh);
17058   %}
17059   ins_pipe(vshift64_imm);
17060 %}
17061 
17062 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17063   predicate(n->as_Vector()->length() == 16);
17064   match(Set dst (RShiftVB src shift));
17065   ins_cost(INSN_COST);
17066   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17067   ins_encode %{
17068     int sh = (int)$shift$$constant & 31;
17069     if (sh >= 8) sh = 7;
17070     sh = -sh & 7;
17071     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17072            as_FloatRegister($src$$reg), sh);
17073   %}
17074   ins_pipe(vshift128_imm);
17075 %}
17076 
17077 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17078   predicate(n->as_Vector()->length() == 4 ||
17079             n->as_Vector()->length() == 8);
17080   match(Set dst (URShiftVB src shift));
17081   ins_cost(INSN_COST);
17082   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17083   ins_encode %{
17084     int sh = (int)$shift$$constant & 31;
17085     if (sh >= 8) {
17086       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17087              as_FloatRegister($src$$reg),
17088              as_FloatRegister($src$$reg));
17089     } else {
17090       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17091              as_FloatRegister($src$$reg), -sh & 7);
17092     }
17093   %}
17094   ins_pipe(vshift64_imm);
17095 %}
17096 
17097 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17098   predicate(n->as_Vector()->length() == 16);
17099   match(Set dst (URShiftVB src shift));
17100   ins_cost(INSN_COST);
17101   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17102   ins_encode %{
17103     int sh = (int)$shift$$constant & 31;
17104     if (sh >= 8) {
17105       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17106              as_FloatRegister($src$$reg),
17107              as_FloatRegister($src$$reg));
17108     } else {
17109       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17110              as_FloatRegister($src$$reg), -sh & 7);
17111     }
17112   %}
17113   ins_pipe(vshift128_imm);
17114 %}
17115 
17116 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17117   predicate(n->as_Vector()->length() == 2 ||
17118             n->as_Vector()->length() == 4);
17119   match(Set dst (LShiftVS src shift));
17120   match(Set dst (RShiftVS src shift));
17121   ins_cost(INSN_COST);
17122   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17123   ins_encode %{
17124     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17125             as_FloatRegister($src$$reg),
17126             as_FloatRegister($shift$$reg));
17127   %}
17128   ins_pipe(vshift64);
17129 %}
17130 
17131 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17132   predicate(n->as_Vector()->length() == 8);
17133   match(Set dst (LShiftVS src shift));
17134   match(Set dst (RShiftVS src shift));
17135   ins_cost(INSN_COST);
17136   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17137   ins_encode %{
17138     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17139             as_FloatRegister($src$$reg),
17140             as_FloatRegister($shift$$reg));
17141   %}
17142   ins_pipe(vshift128);
17143 %}
17144 
17145 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17146   predicate(n->as_Vector()->length() == 2 ||
17147             n->as_Vector()->length() == 4);
17148   match(Set dst (URShiftVS src shift));
17149   ins_cost(INSN_COST);
17150   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17151   ins_encode %{
17152     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17153             as_FloatRegister($src$$reg),
17154             as_FloatRegister($shift$$reg));
17155   %}
17156   ins_pipe(vshift64);
17157 %}
17158 
17159 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17160   predicate(n->as_Vector()->length() == 8);
17161   match(Set dst (URShiftVS src shift));
17162   ins_cost(INSN_COST);
17163   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17164   ins_encode %{
17165     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17166             as_FloatRegister($src$$reg),
17167             as_FloatRegister($shift$$reg));
17168   %}
17169   ins_pipe(vshift128);
17170 %}
17171 
17172 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17173   predicate(n->as_Vector()->length() == 2 ||
17174             n->as_Vector()->length() == 4);
17175   match(Set dst (LShiftVS src shift));
17176   ins_cost(INSN_COST);
17177   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17178   ins_encode %{
17179     int sh = (int)$shift$$constant & 31;
17180     if (sh >= 16) {
17181       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17182              as_FloatRegister($src$$reg),
17183              as_FloatRegister($src$$reg));
17184     } else {
17185       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17186              as_FloatRegister($src$$reg), sh);
17187     }
17188   %}
17189   ins_pipe(vshift64_imm);
17190 %}
17191 
17192 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17193   predicate(n->as_Vector()->length() == 8);
17194   match(Set dst (LShiftVS src shift));
17195   ins_cost(INSN_COST);
17196   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17197   ins_encode %{
17198     int sh = (int)$shift$$constant & 31;
17199     if (sh >= 16) {
17200       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17201              as_FloatRegister($src$$reg),
17202              as_FloatRegister($src$$reg));
17203     } else {
17204       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17205              as_FloatRegister($src$$reg), sh);
17206     }
17207   %}
17208   ins_pipe(vshift128_imm);
17209 %}
17210 
17211 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17212   predicate(n->as_Vector()->length() == 2 ||
17213             n->as_Vector()->length() == 4);
17214   match(Set dst (RShiftVS src shift));
17215   ins_cost(INSN_COST);
17216   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17217   ins_encode %{
17218     int sh = (int)$shift$$constant & 31;
17219     if (sh >= 16) sh = 15;
17220     sh = -sh & 15;
17221     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17222            as_FloatRegister($src$$reg), sh);
17223   %}
17224   ins_pipe(vshift64_imm);
17225 %}
17226 
17227 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17228   predicate(n->as_Vector()->length() == 8);
17229   match(Set dst (RShiftVS src shift));
17230   ins_cost(INSN_COST);
17231   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17232   ins_encode %{
17233     int sh = (int)$shift$$constant & 31;
17234     if (sh >= 16) sh = 15;
17235     sh = -sh & 15;
17236     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17237            as_FloatRegister($src$$reg), sh);
17238   %}
17239   ins_pipe(vshift128_imm);
17240 %}
17241 
17242 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17243   predicate(n->as_Vector()->length() == 2 ||
17244             n->as_Vector()->length() == 4);
17245   match(Set dst (URShiftVS src shift));
17246   ins_cost(INSN_COST);
17247   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17248   ins_encode %{
17249     int sh = (int)$shift$$constant & 31;
17250     if (sh >= 16) {
17251       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17252              as_FloatRegister($src$$reg),
17253              as_FloatRegister($src$$reg));
17254     } else {
17255       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17256              as_FloatRegister($src$$reg), -sh & 15);
17257     }
17258   %}
17259   ins_pipe(vshift64_imm);
17260 %}
17261 
17262 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17263   predicate(n->as_Vector()->length() == 8);
17264   match(Set dst (URShiftVS src shift));
17265   ins_cost(INSN_COST);
17266   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17267   ins_encode %{
17268     int sh = (int)$shift$$constant & 31;
17269     if (sh >= 16) {
17270       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17271              as_FloatRegister($src$$reg),
17272              as_FloatRegister($src$$reg));
17273     } else {
17274       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17275              as_FloatRegister($src$$reg), -sh & 15);
17276     }
17277   %}
17278   ins_pipe(vshift128_imm);
17279 %}
17280 
17281 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17282   predicate(n->as_Vector()->length() == 2);
17283   match(Set dst (LShiftVI src shift));
17284   match(Set dst (RShiftVI src shift));
17285   ins_cost(INSN_COST);
17286   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17287   ins_encode %{
17288     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17289             as_FloatRegister($src$$reg),
17290             as_FloatRegister($shift$$reg));
17291   %}
17292   ins_pipe(vshift64);
17293 %}
17294 
17295 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17296   predicate(n->as_Vector()->length() == 4);
17297   match(Set dst (LShiftVI src shift));
17298   match(Set dst (RShiftVI src shift));
17299   ins_cost(INSN_COST);
17300   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17301   ins_encode %{
17302     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17303             as_FloatRegister($src$$reg),
17304             as_FloatRegister($shift$$reg));
17305   %}
17306   ins_pipe(vshift128);
17307 %}
17308 
17309 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17310   predicate(n->as_Vector()->length() == 2);
17311   match(Set dst (URShiftVI src shift));
17312   ins_cost(INSN_COST);
17313   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17314   ins_encode %{
17315     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17316             as_FloatRegister($src$$reg),
17317             as_FloatRegister($shift$$reg));
17318   %}
17319   ins_pipe(vshift64);
17320 %}
17321 
17322 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17323   predicate(n->as_Vector()->length() == 4);
17324   match(Set dst (URShiftVI src shift));
17325   ins_cost(INSN_COST);
17326   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17327   ins_encode %{
17328     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17329             as_FloatRegister($src$$reg),
17330             as_FloatRegister($shift$$reg));
17331   %}
17332   ins_pipe(vshift128);
17333 %}
17334 
17335 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17336   predicate(n->as_Vector()->length() == 2);
17337   match(Set dst (LShiftVI src shift));
17338   ins_cost(INSN_COST);
17339   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17340   ins_encode %{
17341     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17342            as_FloatRegister($src$$reg),
17343            (int)$shift$$constant & 31);
17344   %}
17345   ins_pipe(vshift64_imm);
17346 %}
17347 
17348 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17349   predicate(n->as_Vector()->length() == 4);
17350   match(Set dst (LShiftVI src shift));
17351   ins_cost(INSN_COST);
17352   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17353   ins_encode %{
17354     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17355            as_FloatRegister($src$$reg),
17356            (int)$shift$$constant & 31);
17357   %}
17358   ins_pipe(vshift128_imm);
17359 %}
17360 
17361 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17362   predicate(n->as_Vector()->length() == 2);
17363   match(Set dst (RShiftVI src shift));
17364   ins_cost(INSN_COST);
17365   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17366   ins_encode %{
17367     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17368             as_FloatRegister($src$$reg),
17369             -(int)$shift$$constant & 31);
17370   %}
17371   ins_pipe(vshift64_imm);
17372 %}
17373 
17374 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17375   predicate(n->as_Vector()->length() == 4);
17376   match(Set dst (RShiftVI src shift));
17377   ins_cost(INSN_COST);
17378   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17379   ins_encode %{
17380     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17381             as_FloatRegister($src$$reg),
17382             -(int)$shift$$constant & 31);
17383   %}
17384   ins_pipe(vshift128_imm);
17385 %}
17386 
17387 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17388   predicate(n->as_Vector()->length() == 2);
17389   match(Set dst (URShiftVI src shift));
17390   ins_cost(INSN_COST);
17391   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17392   ins_encode %{
17393     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17394             as_FloatRegister($src$$reg),
17395             -(int)$shift$$constant & 31);
17396   %}
17397   ins_pipe(vshift64_imm);
17398 %}
17399 
17400 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17401   predicate(n->as_Vector()->length() == 4);
17402   match(Set dst (URShiftVI src shift));
17403   ins_cost(INSN_COST);
17404   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17405   ins_encode %{
17406     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17407             as_FloatRegister($src$$reg),
17408             -(int)$shift$$constant & 31);
17409   %}
17410   ins_pipe(vshift128_imm);
17411 %}
17412 
17413 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17414   predicate(n->as_Vector()->length() == 2);
17415   match(Set dst (LShiftVL src shift));
17416   match(Set dst (RShiftVL src shift));
17417   ins_cost(INSN_COST);
17418   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17419   ins_encode %{
17420     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17421             as_FloatRegister($src$$reg),
17422             as_FloatRegister($shift$$reg));
17423   %}
17424   ins_pipe(vshift128);
17425 %}
17426 
17427 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17428   predicate(n->as_Vector()->length() == 2);
17429   match(Set dst (URShiftVL src shift));
17430   ins_cost(INSN_COST);
17431   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17432   ins_encode %{
17433     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17434             as_FloatRegister($src$$reg),
17435             as_FloatRegister($shift$$reg));
17436   %}
17437   ins_pipe(vshift128);
17438 %}
17439 
17440 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17441   predicate(n->as_Vector()->length() == 2);
17442   match(Set dst (LShiftVL src shift));
17443   ins_cost(INSN_COST);
17444   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17445   ins_encode %{
17446     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17447            as_FloatRegister($src$$reg),
17448            (int)$shift$$constant & 63);
17449   %}
17450   ins_pipe(vshift128_imm);
17451 %}
17452 
17453 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17454   predicate(n->as_Vector()->length() == 2);
17455   match(Set dst (RShiftVL src shift));
17456   ins_cost(INSN_COST);
17457   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17458   ins_encode %{
17459     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17460             as_FloatRegister($src$$reg),
17461             -(int)$shift$$constant & 63);
17462   %}
17463   ins_pipe(vshift128_imm);
17464 %}
17465 
17466 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17467   predicate(n->as_Vector()->length() == 2);
17468   match(Set dst (URShiftVL src shift));
17469   ins_cost(INSN_COST);
17470   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17471   ins_encode %{
17472     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17473             as_FloatRegister($src$$reg),
17474             -(int)$shift$$constant & 63);
17475   %}
17476   ins_pipe(vshift128_imm);
17477 %}
17478 
17479 //----------PEEPHOLE RULES-----------------------------------------------------
17480 // These must follow all instruction definitions as they use the names
17481 // defined in the instructions definitions.
17482 //
17483 // peepmatch ( root_instr_name [preceding_instruction]* );
17484 //
17485 // peepconstraint %{
17486 // (instruction_number.operand_name relational_op instruction_number.operand_name
17487 //  [, ...] );
17488 // // instruction numbers are zero-based using left to right order in peepmatch
17489 //
17490 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17491 // // provide an instruction_number.operand_name for each operand that appears
17492 // // in the replacement instruction's match rule
17493 //
17494 // ---------VM FLAGS---------------------------------------------------------
17495 //
17496 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17497 //
17498 // Each peephole rule is given an identifying number starting with zero and
17499 // increasing by one in the order seen by the parser.  An individual peephole
17500 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17501 // on the command-line.
17502 //
17503 // ---------CURRENT LIMITATIONS----------------------------------------------
17504 //
17505 // Only match adjacent instructions in same basic block
17506 // Only equality constraints
17507 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17508 // Only one replacement instruction
17509 //
17510 // ---------EXAMPLE----------------------------------------------------------
17511 //
17512 // // pertinent parts of existing instructions in architecture description
17513 // instruct movI(iRegINoSp dst, iRegI src)
17514 // %{
17515 //   match(Set dst (CopyI src));
17516 // %}
17517 //
17518 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17519 // %{
17520 //   match(Set dst (AddI dst src));
17521 //   effect(KILL cr);
17522 // %}
17523 //
17524 // // Change (inc mov) to lea
17525 // peephole %{
17526 //   // increment preceeded by register-register move
17527 //   peepmatch ( incI_iReg movI );
17528 //   // require that the destination register of the increment
17529 //   // match the destination register of the move
17530 //   peepconstraint ( 0.dst == 1.dst );
17531 //   // construct a replacement instruction that sets
17532 //   // the destination to ( move's source register + one )
17533 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17534 // %}
17535 //
17536 
17537 // Implementation no longer uses movX instructions since
17538 // machine-independent system no longer uses CopyX nodes.
17539 //
17540 // peephole
17541 // %{
17542 //   peepmatch (incI_iReg movI);
17543 //   peepconstraint (0.dst == 1.dst);
17544 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17545 // %}
17546 
17547 // peephole
17548 // %{
17549 //   peepmatch (decI_iReg movI);
17550 //   peepconstraint (0.dst == 1.dst);
17551 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17552 // %}
17553 
17554 // peephole
17555 // %{
17556 //   peepmatch (addI_iReg_imm movI);
17557 //   peepconstraint (0.dst == 1.dst);
17558 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17559 // %}
17560 
17561 // peephole
17562 // %{
17563 //   peepmatch (incL_iReg movL);
17564 //   peepconstraint (0.dst == 1.dst);
17565 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17566 // %}
17567 
17568 // peephole
17569 // %{
17570 //   peepmatch (decL_iReg movL);
17571 //   peepconstraint (0.dst == 1.dst);
17572 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17573 // %}
17574 
17575 // peephole
17576 // %{
17577 //   peepmatch (addL_iReg_imm movL);
17578 //   peepconstraint (0.dst == 1.dst);
17579 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17580 // %}
17581 
17582 // peephole
17583 // %{
17584 //   peepmatch (addP_iReg_imm movP);
17585 //   peepconstraint (0.dst == 1.dst);
17586 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17587 // %}
17588 
17589 // // Change load of spilled value to only a spill
17590 // instruct storeI(memory mem, iRegI src)
17591 // %{
17592 //   match(Set mem (StoreI mem src));
17593 // %}
17594 //
17595 // instruct loadI(iRegINoSp dst, memory mem)
17596 // %{
17597 //   match(Set dst (LoadI mem));
17598 // %}
17599 //
17600 
17601 //----------SMARTSPILL RULES---------------------------------------------------
17602 // These must follow all instruction definitions as they use the names
17603 // defined in the instructions definitions.
17604 
17605 // Local Variables:
17606 // mode: c++
17607 // End: