1 //
   2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580  /* R29, */                     // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649  /* R29, R29_H, */              // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "gc/shenandoah/brooksPointer.hpp"
1000 #include "opto/addnode.hpp"
1001 
1002 class CallStubImpl {
1003 
1004   //--------------------------------------------------------------
1005   //---<  Used for optimization in Compile::shorten_branches  >---
1006   //--------------------------------------------------------------
1007 
1008  public:
1009   // Size of call trampoline stub.
1010   static uint size_call_trampoline() {
1011     return 0; // no call trampolines on this platform
1012   }
1013 
1014   // number of relocations needed by a call trampoline stub
1015   static uint reloc_call_trampoline() {
1016     return 0; // no call trampolines on this platform
1017   }
1018 };
1019 
1020 class HandlerImpl {
1021 
1022  public:
1023 
1024   static int emit_exception_handler(CodeBuffer &cbuf);
1025   static int emit_deopt_handler(CodeBuffer& cbuf);
1026 
1027   static uint size_exception_handler() {
1028     return MacroAssembler::far_branch_size();
1029   }
1030 
1031   static uint size_deopt_handler() {
1032     // count one adr and one far branch instruction
1033     return 4 * NativeInstruction::instruction_size;
1034   }
1035 };
1036 
1037   // graph traversal helpers
1038 
1039   MemBarNode *parent_membar(const Node *n);
1040   MemBarNode *child_membar(const MemBarNode *n);
1041   bool leading_membar(const MemBarNode *barrier);
1042 
1043   bool is_card_mark_membar(const MemBarNode *barrier);
1044   bool is_CAS(int opcode);
1045 
1046   MemBarNode *leading_to_trailing(MemBarNode *leading);
1047   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 
1067   // predicate controlling addressing modes
1068   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1069 %}
1070 
1071 source %{
1072 
1073   // Optimizaton of volatile gets and puts
1074   // -------------------------------------
1075   //
1076   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1077   // use to implement volatile reads and writes. For a volatile read
1078   // we simply need
1079   //
1080   //   ldar<x>
1081   //
1082   // and for a volatile write we need
1083   //
1084   //   stlr<x>
1085   //
1086   // Alternatively, we can implement them by pairing a normal
1087   // load/store with a memory barrier. For a volatile read we need
1088   //
1089   //   ldr<x>
1090   //   dmb ishld
1091   //
1092   // for a volatile write
1093   //
1094   //   dmb ish
1095   //   str<x>
1096   //   dmb ish
1097   //
1098   // We can also use ldaxr and stlxr to implement compare and swap CAS
1099   // sequences. These are normally translated to an instruction
1100   // sequence like the following
1101   //
1102   //   dmb      ish
1103   // retry:
1104   //   ldxr<x>   rval raddr
1105   //   cmp       rval rold
1106   //   b.ne done
1107   //   stlxr<x>  rval, rnew, rold
1108   //   cbnz      rval retry
1109   // done:
1110   //   cset      r0, eq
1111   //   dmb ishld
1112   //
1113   // Note that the exclusive store is already using an stlxr
1114   // instruction. That is required to ensure visibility to other
1115   // threads of the exclusive write (assuming it succeeds) before that
1116   // of any subsequent writes.
1117   //
1118   // The following instruction sequence is an improvement on the above
1119   //
1120   // retry:
1121   //   ldaxr<x>  rval raddr
1122   //   cmp       rval rold
1123   //   b.ne done
1124   //   stlxr<x>  rval, rnew, rold
1125   //   cbnz      rval retry
1126   // done:
1127   //   cset      r0, eq
1128   //
1129   // We don't need the leading dmb ish since the stlxr guarantees
1130   // visibility of prior writes in the case that the swap is
1131   // successful. Crucially we don't have to worry about the case where
1132   // the swap is not successful since no valid program should be
1133   // relying on visibility of prior changes by the attempting thread
1134   // in the case where the CAS fails.
1135   //
1136   // Similarly, we don't need the trailing dmb ishld if we substitute
1137   // an ldaxr instruction since that will provide all the guarantees we
1138   // require regarding observation of changes made by other threads
1139   // before any change to the CAS address observed by the load.
1140   //
1141   // In order to generate the desired instruction sequence we need to
1142   // be able to identify specific 'signature' ideal graph node
1143   // sequences which i) occur as a translation of a volatile reads or
1144   // writes or CAS operations and ii) do not occur through any other
1145   // translation or graph transformation. We can then provide
1146   // alternative aldc matching rules which translate these node
1147   // sequences to the desired machine code sequences. Selection of the
1148   // alternative rules can be implemented by predicates which identify
1149   // the relevant node sequences.
1150   //
1151   // The ideal graph generator translates a volatile read to the node
1152   // sequence
1153   //
1154   //   LoadX[mo_acquire]
1155   //   MemBarAcquire
1156   //
1157   // As a special case when using the compressed oops optimization we
1158   // may also see this variant
1159   //
1160   //   LoadN[mo_acquire]
1161   //   DecodeN
1162   //   MemBarAcquire
1163   //
1164   // A volatile write is translated to the node sequence
1165   //
1166   //   MemBarRelease
1167   //   StoreX[mo_release] {CardMark}-optional
1168   //   MemBarVolatile
1169   //
1170   // n.b. the above node patterns are generated with a strict
1171   // 'signature' configuration of input and output dependencies (see
1172   // the predicates below for exact details). The card mark may be as
1173   // simple as a few extra nodes or, in a few GC configurations, may
1174   // include more complex control flow between the leading and
1175   // trailing memory barriers. However, whatever the card mark
1176   // configuration these signatures are unique to translated volatile
1177   // reads/stores -- they will not appear as a result of any other
1178   // bytecode translation or inlining nor as a consequence of
1179   // optimizing transforms.
1180   //
1181   // We also want to catch inlined unsafe volatile gets and puts and
1182   // be able to implement them using either ldar<x>/stlr<x> or some
1183   // combination of ldr<x>/stlr<x> and dmb instructions.
1184   //
1185   // Inlined unsafe volatiles puts manifest as a minor variant of the
1186   // normal volatile put node sequence containing an extra cpuorder
1187   // membar
1188   //
1189   //   MemBarRelease
1190   //   MemBarCPUOrder
1191   //   StoreX[mo_release] {CardMark}-optional
1192   //   MemBarVolatile
1193   //
1194   // n.b. as an aside, the cpuorder membar is not itself subject to
1195   // matching and translation by adlc rules.  However, the rule
1196   // predicates need to detect its presence in order to correctly
1197   // select the desired adlc rules.
1198   //
1199   // Inlined unsafe volatile gets manifest as a somewhat different
1200   // node sequence to a normal volatile get
1201   //
1202   //   MemBarCPUOrder
1203   //        ||       \\
1204   //   MemBarAcquire LoadX[mo_acquire]
1205   //        ||
1206   //   MemBarCPUOrder
1207   //
1208   // In this case the acquire membar does not directly depend on the
1209   // load. However, we can be sure that the load is generated from an
1210   // inlined unsafe volatile get if we see it dependent on this unique
1211   // sequence of membar nodes. Similarly, given an acquire membar we
1212   // can know that it was added because of an inlined unsafe volatile
1213   // get if it is fed and feeds a cpuorder membar and if its feed
1214   // membar also feeds an acquiring load.
1215   //
1216   // Finally an inlined (Unsafe) CAS operation is translated to the
1217   // following ideal graph
1218   //
1219   //   MemBarRelease
1220   //   MemBarCPUOrder
1221   //   CompareAndSwapX {CardMark}-optional
1222   //   MemBarCPUOrder
1223   //   MemBarAcquire
1224   //
1225   // So, where we can identify these volatile read and write
1226   // signatures we can choose to plant either of the above two code
1227   // sequences. For a volatile read we can simply plant a normal
1228   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1229   // also choose to inhibit translation of the MemBarAcquire and
1230   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1231   //
1232   // When we recognise a volatile store signature we can choose to
1233   // plant at a dmb ish as a translation for the MemBarRelease, a
1234   // normal str<x> and then a dmb ish for the MemBarVolatile.
1235   // Alternatively, we can inhibit translation of the MemBarRelease
1236   // and MemBarVolatile and instead plant a simple stlr<x>
1237   // instruction.
1238   //
1239   // when we recognise a CAS signature we can choose to plant a dmb
1240   // ish as a translation for the MemBarRelease, the conventional
1241   // macro-instruction sequence for the CompareAndSwap node (which
1242   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1243   // Alternatively, we can elide generation of the dmb instructions
1244   // and plant the alternative CompareAndSwap macro-instruction
1245   // sequence (which uses ldaxr<x>).
1246   //
1247   // Of course, the above only applies when we see these signature
1248   // configurations. We still want to plant dmb instructions in any
1249   // other cases where we may see a MemBarAcquire, MemBarRelease or
1250   // MemBarVolatile. For example, at the end of a constructor which
1251   // writes final/volatile fields we will see a MemBarRelease
1252   // instruction and this needs a 'dmb ish' lest we risk the
1253   // constructed object being visible without making the
1254   // final/volatile field writes visible.
1255   //
1256   // n.b. the translation rules below which rely on detection of the
1257   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1258   // If we see anything other than the signature configurations we
1259   // always just translate the loads and stores to ldr<x> and str<x>
1260   // and translate acquire, release and volatile membars to the
1261   // relevant dmb instructions.
1262   //
1263 
1264   // graph traversal helpers used for volatile put/get and CAS
1265   // optimization
1266 
1267   // 1) general purpose helpers
1268 
1269   // if node n is linked to a parent MemBarNode by an intervening
1270   // Control and Memory ProjNode return the MemBarNode otherwise return
1271   // NULL.
1272   //
1273   // n may only be a Load or a MemBar.
1274 
1275   MemBarNode *parent_membar(const Node *n)
1276   {
1277     Node *ctl = NULL;
1278     Node *mem = NULL;
1279     Node *membar = NULL;
1280 
1281     if (n->is_Load()) {
1282       ctl = n->lookup(LoadNode::Control);
1283       mem = n->lookup(LoadNode::Memory);
1284     } else if (n->is_MemBar()) {
1285       ctl = n->lookup(TypeFunc::Control);
1286       mem = n->lookup(TypeFunc::Memory);
1287     } else {
1288         return NULL;
1289     }
1290 
1291     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1292       return NULL;
1293     }
1294 
1295     membar = ctl->lookup(0);
1296 
1297     if (!membar || !membar->is_MemBar()) {
1298       return NULL;
1299     }
1300 
1301     if (mem->lookup(0) != membar) {
1302       return NULL;
1303     }
1304 
1305     return membar->as_MemBar();
1306   }
1307 
1308   // if n is linked to a child MemBarNode by intervening Control and
1309   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1310 
1311   MemBarNode *child_membar(const MemBarNode *n)
1312   {
1313     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1314     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1315 
1316     // MemBar needs to have both a Ctl and Mem projection
1317     if (! ctl || ! mem)
1318       return NULL;
1319 
1320     MemBarNode *child = NULL;
1321     Node *x;
1322 
1323     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1324       x = ctl->fast_out(i);
1325       // if we see a membar we keep hold of it. we may also see a new
1326       // arena copy of the original but it will appear later
1327       if (x->is_MemBar()) {
1328           child = x->as_MemBar();
1329           break;
1330       }
1331     }
1332 
1333     if (child == NULL) {
1334       return NULL;
1335     }
1336 
1337     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1338       x = mem->fast_out(i);
1339       // if we see a membar we keep hold of it. we may also see a new
1340       // arena copy of the original but it will appear later
1341       if (x == child) {
1342         return child;
1343       }
1344     }
1345     return NULL;
1346   }
1347 
1348   // helper predicate use to filter candidates for a leading memory
1349   // barrier
1350   //
1351   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1352   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1353 
1354   bool leading_membar(const MemBarNode *barrier)
1355   {
1356     int opcode = barrier->Opcode();
1357     // if this is a release membar we are ok
1358     if (opcode == Op_MemBarRelease) {
1359       return true;
1360     }
1361     // if its a cpuorder membar . . .
1362     if (opcode != Op_MemBarCPUOrder) {
1363       return false;
1364     }
1365     // then the parent has to be a release membar
1366     MemBarNode *parent = parent_membar(barrier);
1367     if (!parent) {
1368       return false;
1369     }
1370     opcode = parent->Opcode();
1371     return opcode == Op_MemBarRelease;
1372   }
1373 
1374   // 2) card mark detection helper
1375 
1376   // helper predicate which can be used to detect a volatile membar
1377   // introduced as part of a conditional card mark sequence either by
1378   // G1 or by CMS when UseCondCardMark is true.
1379   //
1380   // membar can be definitively determined to be part of a card mark
1381   // sequence if and only if all the following hold
1382   //
1383   // i) it is a MemBarVolatile
1384   //
1385   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1386   // true
1387   //
1388   // iii) the node's Mem projection feeds a StoreCM node.
1389 
1390   bool is_card_mark_membar(const MemBarNode *barrier)
1391   {
1392     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1393       return false;
1394     }
1395 
1396     if (barrier->Opcode() != Op_MemBarVolatile) {
1397       return false;
1398     }
1399 
1400     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1401 
1402     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1403       Node *y = mem->fast_out(i);
1404       if (y->Opcode() == Op_StoreCM) {
1405         return true;
1406       }
1407     }
1408 
1409     return false;
1410   }
1411 
1412 
1413   // 3) helper predicates to traverse volatile put or CAS graphs which
1414   // may contain GC barrier subgraphs
1415 
1416   // Preamble
1417   // --------
1418   //
1419   // for volatile writes we can omit generating barriers and employ a
1420   // releasing store when we see a node sequence sequence with a
1421   // leading MemBarRelease and a trailing MemBarVolatile as follows
1422   //
1423   //   MemBarRelease
1424   //  {    ||        } -- optional
1425   //  {MemBarCPUOrder}
1426   //       ||       \\
1427   //       ||     StoreX[mo_release]
1428   //       | \ Bot    / ???
1429   //       | MergeMem
1430   //       | /
1431   //   MemBarVolatile
1432   //
1433   // where
1434   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1435   //  | \ and / indicate further routing of the Ctl and Mem feeds
1436   //
1437   // Note that the memory feed from the CPUOrder membar to the
1438   // MergeMem node is an AliasIdxBot slice while the feed from the
1439   // StoreX is for a slice determined by the type of value being
1440   // written.
1441   //
1442   // the diagram above shows the graph we see for non-object stores.
1443   // for a volatile Object store (StoreN/P) we may see other nodes
1444   // below the leading membar because of the need for a GC pre- or
1445   // post-write barrier.
1446   //
1447   // with most GC configurations we with see this simple variant which
1448   // includes a post-write barrier card mark.
1449   //
1450   //   MemBarRelease______________________________
1451   //         ||    \\               Ctl \        \\
1452   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1453   //         | \ Bot  / oop                 . . .  /
1454   //         | MergeMem
1455   //         | /
1456   //         ||      /
1457   //   MemBarVolatile
1458   //
1459   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1460   // the object address to an int used to compute the card offset) and
1461   // Ctl+Mem to a StoreB node (which does the actual card mark).
1462   //
1463   // n.b. a StoreCM node is only ever used when CMS (with or without
1464   // CondCardMark) or G1 is configured. This abstract instruction
1465   // differs from a normal card mark write (StoreB) because it implies
1466   // a requirement to order visibility of the card mark (StoreCM)
1467   // after that of the object put (StoreP/N) using a StoreStore memory
1468   // barrier. Note that this is /not/ a requirement to order the
1469   // instructions in the generated code (that is already guaranteed by
1470   // the order of memory dependencies). Rather it is a requirement to
1471   // ensure visibility order which only applies on architectures like
1472   // AArch64 which do not implement TSO. This ordering is required for
1473   // both non-volatile and volatile puts.
1474   //
1475   // That implies that we need to translate a StoreCM using the
1476   // sequence
1477   //
1478   //   dmb ishst
1479   //   stlrb
1480   //
1481   // This dmb cannot be omitted even when the associated StoreX or
1482   // CompareAndSwapX is implemented using stlr. However, as described
1483   // below there are circumstances where a specific GC configuration
1484   // requires a stronger barrier in which case it can be omitted.
1485   // 
1486   // With the Serial or Parallel GC using +CondCardMark the card mark
1487   // is performed conditionally on it currently being unmarked in
1488   // which case the volatile put graph looks slightly different
1489   //
1490   //   MemBarRelease____________________________________________
1491   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1492   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1493   //         | \ Bot / oop                          \            |
1494   //         | MergeMem                            . . .      StoreB
1495   //         | /                                                /
1496   //         ||     /
1497   //   MemBarVolatile
1498   //
1499   // It is worth noting at this stage that all the above
1500   // configurations can be uniquely identified by checking that the
1501   // memory flow includes the following subgraph:
1502   //
1503   //   MemBarRelease
1504   //  {MemBarCPUOrder}
1505   //      |  \      . . .
1506   //      |  StoreX[mo_release]  . . .
1507   //  Bot |   / oop
1508   //     MergeMem
1509   //      |
1510   //   MemBarVolatile
1511   //
1512   // This is referred to as a *normal* volatile store subgraph. It can
1513   // easily be detected starting from any candidate MemBarRelease,
1514   // StoreX[mo_release] or MemBarVolatile node.
1515   //
1516   // A small variation on this normal case occurs for an unsafe CAS
1517   // operation. The basic memory flow subgraph for a non-object CAS is
1518   // as follows
1519   //
1520   //   MemBarRelease
1521   //         ||
1522   //   MemBarCPUOrder
1523   //          |     \\   . . .
1524   //          |     CompareAndSwapX
1525   //          |       |
1526   //      Bot |     SCMemProj
1527   //           \     / Bot
1528   //           MergeMem
1529   //           /
1530   //   MemBarCPUOrder
1531   //         ||
1532   //   MemBarAcquire
1533   //
1534   // The same basic variations on this arrangement (mutatis mutandis)
1535   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1536   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1537   // flow subgraph is still present.
1538   // 
1539   // This is referred to as a *normal* CAS subgraph. It can easily be
1540   // detected starting from any candidate MemBarRelease,
1541   // StoreX[mo_release] or MemBarAcquire node.
1542   //
1543   // The code below uses two helper predicates, leading_to_trailing
1544   // and trailing_to_leading to identify these normal graphs, one
1545   // validating the layout starting from the top membar and searching
1546   // down and the other validating the layout starting from the lower
1547   // membar and searching up.
1548   //
1549   // There are two special case GC configurations when the simple
1550   // normal graphs above may not be generated: when using G1 (which
1551   // always employs a conditional card mark); and when using CMS with
1552   // conditional card marking (+CondCardMark) configured. These GCs
1553   // are both concurrent rather than stop-the world GCs. So they
1554   // introduce extra Ctl+Mem flow into the graph between the leading
1555   // and trailing membar nodes, in particular enforcing stronger
1556   // memory serialisation beween the object put and the corresponding
1557   // conditional card mark. CMS employs a post-write GC barrier while
1558   // G1 employs both a pre- and post-write GC barrier.
1559   //
1560   // The post-write barrier subgraph for these configurations includes
1561   // a MemBarVolatile node -- referred to as a card mark membar --
1562   // which is needed to order the card write (StoreCM) operation in
1563   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1564   // operations performed by GC threads i.e. a card mark membar
1565   // constitutes a StoreLoad barrier hence must be translated to a dmb
1566   // ish (whether or not it sits inside a volatile store sequence).
1567   //
1568   // Of course, the use of the dmb ish for the card mark membar also
1569   // implies theat the StoreCM which follows can omit the dmb ishst
1570   // instruction. The necessary visibility ordering will already be
1571   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1572   // needs to be generated for as part of the StoreCM sequence with GC
1573   // configuration +CMS -CondCardMark.
1574   // 
1575   // Of course all these extra barrier nodes may well be absent --
1576   // they are only inserted for object puts. Their potential presence
1577   // significantly complicates the task of identifying whether a
1578   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1579   // MemBarAcquire forms part of a volatile put or CAS when using
1580   // these GC configurations (see below) and also complicates the
1581   // decision as to how to translate a MemBarVolatile and StoreCM.
1582   //
1583   // So, thjis means that a card mark MemBarVolatile occurring in the
1584   // post-barrier graph it needs to be distinguished from a normal
1585   // trailing MemBarVolatile. Resolving this is straightforward: a
1586   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1587   // node and that is a unique marker
1588   //
1589   //      MemBarVolatile (card mark)
1590   //       C |    \     . . .
1591   //         |   StoreCM   . . .
1592   //       . . .
1593   //
1594   // Returning to the task of translating the object put and the
1595   // leading/trailing membar nodes: what do the node graphs look like
1596   // for these 2 special cases? and how can we determine the status of
1597   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1598   // normal and non-normal cases?
1599   //
1600   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1601   // which selects conditonal execution based on the value loaded
1602   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1603   // intervening StoreLoad barrier (MemBarVolatile).
1604   //
1605   // So, with CMS we may see a node graph for a volatile object store
1606   // which looks like this
1607   //
1608   //   MemBarRelease
1609   //   MemBarCPUOrder_(leading)____________________
1610   //     C |  | M \       \\               M |   C \
1611   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1612   //       |  | Bot \    / oop      \        |
1613   //       |  |    MergeMem          \      / 
1614   //       |  |      /                |    /
1615   //     MemBarVolatile (card mark)   |   /
1616   //     C |  ||    M |               |  /
1617   //       | LoadB    | Bot       oop | / Bot
1618   //       |   |      |              / /
1619   //       | Cmp      |\            / /
1620   //       | /        | \          / /
1621   //       If         |  \        / /
1622   //       | \        |   \      / /
1623   // IfFalse  IfTrue  |    \    / /
1624   //       \     / \  |    |   / /
1625   //        \   / StoreCM  |  / /
1626   //         \ /      \   /  / /
1627   //        Region     Phi  / /
1628   //          | \   Raw |  / /
1629   //          |  . . .  | / /
1630   //          |       MergeMem
1631   //          |           |
1632   //        MemBarVolatile (trailing)
1633   //
1634   // Notice that there are two MergeMem nodes below the leading
1635   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1636   // the leading membar and the oopptr Mem slice from the Store into
1637   // the card mark membar. The trailing MergeMem merges the
1638   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1639   // slice from the StoreCM and an oop slice from the StoreN/P node
1640   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1641   // associated with the If region).
1642   //
1643   // So, in the case of CMS + CondCardMark the volatile object store
1644   // graph still includes a normal volatile store subgraph from the
1645   // leading membar to the trailing membar. However, it also contains
1646   // the same shape memory flow to the card mark membar. The two flows
1647   // can be distinguished by testing whether or not the downstream
1648   // membar is a card mark membar.
1649   //
1650   // The graph for a CAS also varies with CMS + CondCardMark, in
1651   // particular employing a control feed from the CompareAndSwapX node
1652   // through a CmpI and If to the card mark membar and StoreCM which
1653   // updates the associated card. This avoids executing the card mark
1654   // if the CAS fails. However, it can be seen from the diagram below
1655   // that the presence of the barrier does not alter the normal CAS
1656   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1657   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1658   // MemBarAcquire pair.
1659   //
1660   //   MemBarRelease
1661   //   MemBarCPUOrder__(leading)_______________________
1662   //   C /  M |                        \\            C \
1663   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1664   //          |                  C /  M |
1665   //          |                 CmpI    |
1666   //          |                  /      |
1667   //          |               . . .     |
1668   //          |              IfTrue     |
1669   //          |              /          |
1670   //       MemBarVolatile (card mark)   |
1671   //        C |  ||    M |              |
1672   //          | LoadB    | Bot   ______/|
1673   //          |   |      |      /       |
1674   //          | Cmp      |     /      SCMemProj
1675   //          | /        |    /         |
1676   //          If         |   /         /
1677   //          | \        |  /         / Bot
1678   //     IfFalse  IfTrue | /         /
1679   //          |   / \   / / prec    /
1680   //   . . .  |  /  StoreCM        /
1681   //        \ | /      | raw      /
1682   //        Region    . . .      /
1683   //           | \              /
1684   //           |   . . .   \    / Bot
1685   //           |        MergeMem
1686   //           |          /
1687   //         MemBarCPUOrder
1688   //         MemBarAcquire (trailing)
1689   //
1690   // This has a slightly different memory subgraph to the one seen
1691   // previously but the core of it has a similar memory flow to the
1692   // CAS normal subgraph:
1693   //
1694   //   MemBarRelease
1695   //   MemBarCPUOrder____
1696   //         |          \      . . .
1697   //         |       CompareAndSwapX  . . .
1698   //         |       C /  M |
1699   //         |      CmpI    |
1700   //         |       /      |
1701   //         |      . .    /
1702   //     Bot |   IfTrue   /
1703   //         |   /       /
1704   //    MemBarVolatile  /
1705   //         | ...     /
1706   //      StoreCM ... /
1707   //         |       / 
1708   //       . . .  SCMemProj
1709   //      Raw \    / Bot
1710   //        MergeMem
1711   //           |
1712   //   MemBarCPUOrder
1713   //   MemBarAcquire
1714   //
1715   // The G1 graph for a volatile object put is a lot more complicated.
1716   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1717   // which adds the old value to the SATB queue; the releasing store
1718   // itself; and, finally, a post-write graph which performs a card
1719   // mark.
1720   //
1721   // The pre-write graph may be omitted, but only when the put is
1722   // writing to a newly allocated (young gen) object and then only if
1723   // there is a direct memory chain to the Initialize node for the
1724   // object allocation. This will not happen for a volatile put since
1725   // any memory chain passes through the leading membar.
1726   //
1727   // The pre-write graph includes a series of 3 If tests. The outermost
1728   // If tests whether SATB is enabled (no else case). The next If tests
1729   // whether the old value is non-NULL (no else case). The third tests
1730   // whether the SATB queue index is > 0, if so updating the queue. The
1731   // else case for this third If calls out to the runtime to allocate a
1732   // new queue buffer.
1733   //
1734   // So with G1 the pre-write and releasing store subgraph looks like
1735   // this (the nested Ifs are omitted).
1736   //
1737   //  MemBarRelease (leading)____________
1738   //     C |  ||  M \   M \    M \  M \ . . .
1739   //       | LoadB   \  LoadL  LoadN   \
1740   //       | /        \                 \
1741   //       If         |\                 \
1742   //       | \        | \                 \
1743   //  IfFalse  IfTrue |  \                 \
1744   //       |     |    |   \                 |
1745   //       |     If   |   /\                |
1746   //       |     |          \               |
1747   //       |                 \              |
1748   //       |    . . .         \             |
1749   //       | /       | /       |            |
1750   //      Region  Phi[M]       |            |
1751   //       | \       |         |            |
1752   //       |  \_____ | ___     |            |
1753   //     C | C \     |   C \ M |            |
1754   //       | CastP2X | StoreN/P[mo_release] |
1755   //       |         |         |            |
1756   //     C |       M |       M |          M |
1757   //        \        | Raw     | oop       / Bot
1758   //                  . . .
1759   //          (post write subtree elided)
1760   //                    . . .
1761   //             C \         M /
1762   //         MemBarVolatile (trailing)
1763   //
1764   // Note that the three memory feeds into the post-write tree are an
1765   // AliasRawIdx slice associated with the writes in the pre-write
1766   // tree, an oop type slice from the StoreX specific to the type of
1767   // the volatile field and the AliasBotIdx slice emanating from the
1768   // leading membar.
1769   //
1770   // n.b. the LoadB in this subgraph is not the card read -- it's a
1771   // read of the SATB queue active flag.
1772   //
1773   // The CAS graph is once again a variant of the above with a
1774   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1775   // value from the CompareAndSwapX node is fed into the post-write
1776   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1777   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1778   //
1779   //  MemBarRelease (leading)____________
1780   //     C |  ||  M \   M \    M \  M \ . . .
1781   //       | LoadB   \  LoadL  LoadN   \
1782   //       | /        \                 \
1783   //       If         |\                 \
1784   //       | \        | \                 \
1785   //  IfFalse  IfTrue |  \                 \
1786   //       |     |    |   \                 \
1787   //       |     If   |    \                 |
1788   //       |     |          \                |
1789   //       |                 \               |
1790   //       |    . . .         \              |
1791   //       | /       | /       \             |
1792   //      Region  Phi[M]        \            |
1793   //       | \       |           \           |
1794   //       |  \_____ |            |          |
1795   //     C | C \     |            |          |
1796   //       | CastP2X |     CompareAndSwapX   |
1797   //       |         |   res |     |         |
1798   //     C |       M |       |  SCMemProj  M |
1799   //        \        | Raw   |     | Bot    / Bot
1800   //                  . . .
1801   //          (post write subtree elided)
1802   //                    . . .
1803   //             C \         M /
1804   //         MemBarVolatile (trailing)
1805   //
1806   // The G1 post-write subtree is also optional, this time when the
1807   // new value being written is either null or can be identified as a
1808   // newly allocated (young gen) object with no intervening control
1809   // flow. The latter cannot happen but the former may, in which case
1810   // the card mark membar is omitted and the memory feeds from the
1811   // leading membar and the SToreN/P are merged direct into the
1812   // trailing membar as per the normal subgraph. So, the only special
1813   // case which arises is when the post-write subgraph is generated.
1814   //
1815   // The kernel of the post-write G1 subgraph is the card mark itself
1816   // which includes a card mark memory barrier (MemBarVolatile), a
1817   // card test (LoadB), and a conditional update (If feeding a
1818   // StoreCM). These nodes are surrounded by a series of nested Ifs
1819   // which try to avoid doing the card mark. The top level If skips if
1820   // the object reference does not cross regions (i.e. it tests if
1821   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1822   // need not be recorded. The next If, which skips on a NULL value,
1823   // may be absent (it is not generated if the type of value is >=
1824   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1825   // checking if card_val != young).  n.b. although this test requires
1826   // a pre-read of the card it can safely be done before the StoreLoad
1827   // barrier. However that does not bypass the need to reread the card
1828   // after the barrier.
1829   //
1830   //                (pre-write subtree elided)
1831   //        . . .                  . . .    . . .  . . .
1832   //        C |               M |    M |    M |
1833   //       Region            Phi[M] StoreN    |
1834   //          |            Raw  |  oop |  Bot |
1835   //         / \_______         |\     |\     |\
1836   //      C / C \      . . .    | \    | \    | \
1837   //       If   CastP2X . . .   |  \   |  \   |  \
1838   //       / \                  |   \  |   \  |   \
1839   //      /   \                 |    \ |    \ |    \
1840   // IfFalse IfTrue             |      |      |     \
1841   //   |       |                 \     |     /       |
1842   //   |       If                 \    | \  /   \    |
1843   //   |      / \                  \   |   /     \   |
1844   //   |     /   \                  \  |  / \     |  |
1845   //   | IfFalse IfTrue           MergeMem   \    |  |
1846   //   |  . . .    / \                 |      \   |  |
1847   //   |          /   \                |       |  |  |
1848   //   |     IfFalse IfTrue            |       |  |  |
1849   //   |      . . .    |               |       |  |  |
1850   //   |               If             /        |  |  |
1851   //   |               / \           /         |  |  |
1852   //   |              /   \         /          |  |  |
1853   //   |         IfFalse IfTrue    /           |  |  |
1854   //   |           . . .   |      /            |  |  |
1855   //   |                    \    /             |  |  |
1856   //   |                     \  /              |  |  |
1857   //   |         MemBarVolatile__(card mark  ) |  |  |
1858   //   |              ||   C |     \           |  |  |
1859   //   |             LoadB   If     |         /   |  |
1860   //   |                    / \ Raw |        /   /  /
1861   //   |                   . . .    |       /   /  /
1862   //   |                        \   |      /   /  /
1863   //   |                        StoreCM   /   /  /
1864   //   |                           |     /   /  /
1865   //   |                            . . .   /  /
1866   //   |                                   /  /
1867   //   |   . . .                          /  /
1868   //   |    |             | /            /  /
1869   //   |    |           Phi[M] /        /  /
1870   //   |    |             |   /        /  /
1871   //   |    |             |  /        /  /
1872   //   |  Region  . . .  Phi[M]      /  /
1873   //   |    |             |         /  /
1874   //    \   |             |        /  /
1875   //     \  | . . .       |       /  /
1876   //      \ |             |      /  /
1877   //      Region         Phi[M] /  /
1878   //        |               \  /  /
1879   //         \             MergeMem
1880   //          \            /
1881   //          MemBarVolatile
1882   //
1883   // As with CMS + CondCardMark the first MergeMem merges the
1884   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1885   // slice from the Store into the card mark membar. However, in this
1886   // case it may also merge an AliasRawIdx mem slice from the pre
1887   // barrier write.
1888   //
1889   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1890   // leading membar with an oop slice from the StoreN and an
1891   // AliasRawIdx slice from the post barrier writes. In this case the
1892   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1893   // which combine feeds from the If regions in the post barrier
1894   // subgraph.
1895   //
1896   // So, for G1 the same characteristic subgraph arises as for CMS +
1897   // CondCardMark. There is a normal subgraph feeding the card mark
1898   // membar and a normal subgraph feeding the trailing membar.
1899   //
1900   // The CAS graph when using G1GC also includes an optional
1901   // post-write subgraph. It is very similar to the above graph except
1902   // for a few details.
1903   // 
1904   // - The control flow is gated by an additonal If which tests the
1905   // result from the CompareAndSwapX node
1906   // 
1907   //  - The MergeMem which feeds the card mark membar only merges the
1908   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1909   // slice from the pre-barrier. It does not merge the SCMemProj
1910   // AliasIdxBot slice. So, this subgraph does not look like the
1911   // normal CAS subgraph.
1912   //
1913   // - The MergeMem which feeds the trailing membar merges the
1914   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1915   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1916   // has two AliasIdxBot input slices. However, this subgraph does
1917   // still look like the normal CAS subgraph.
1918   //
1919   // So, the upshot is:
1920   //
1921   // In all cases a volatile put graph will include a *normal*
1922   // volatile store subgraph betwen the leading membar and the
1923   // trailing membar. It may also include a normal volatile store
1924   // subgraph betwen the leading membar and the card mark membar.
1925   //
1926   // In all cases a CAS graph will contain a unique normal CAS graph
1927   // feeding the trailing membar.
1928   //
1929   // In all cases where there is a card mark membar (either as part of
1930   // a volatile object put or CAS) it will be fed by a MergeMem whose
1931   // AliasIdxBot slice feed will be a leading membar.
1932   //
1933   // The predicates controlling generation of instructions for store
1934   // and barrier nodes employ a few simple helper functions (described
1935   // below) which identify the presence or absence of all these
1936   // subgraph configurations and provide a means of traversing from
1937   // one node in the subgraph to another.
1938 
1939   // is_CAS(int opcode)
1940   //
1941   // return true if opcode is one of the possible CompareAndSwapX
1942   // values otherwise false.
1943 
1944   bool is_CAS(int opcode)
1945   {
1946     switch(opcode) {
1947       // We handle these
1948     case Op_CompareAndSwapI:
1949     case Op_CompareAndSwapL:
1950     case Op_CompareAndSwapP:
1951     case Op_CompareAndSwapN:
1952  // case Op_CompareAndSwapB:
1953  // case Op_CompareAndSwapS:
1954       return true;
1955       // These are TBD
1956     case Op_WeakCompareAndSwapB:
1957     case Op_WeakCompareAndSwapS:
1958     case Op_WeakCompareAndSwapI:
1959     case Op_WeakCompareAndSwapL:
1960     case Op_WeakCompareAndSwapP:
1961     case Op_WeakCompareAndSwapN:
1962     case Op_CompareAndExchangeB:
1963     case Op_CompareAndExchangeS:
1964     case Op_CompareAndExchangeI:
1965     case Op_CompareAndExchangeL:
1966     case Op_CompareAndExchangeP:
1967     case Op_CompareAndExchangeN:
1968       return false;
1969     default:
1970       return false;
1971     }
1972   }
1973 
1974 
1975   // leading_to_trailing
1976   //
1977   //graph traversal helper which detects the normal case Mem feed from
1978   // a release membar (or, optionally, its cpuorder child) to a
1979   // dependent volatile membar i.e. it ensures that one or other of
1980   // the following Mem flow subgraph is present.
1981   //
1982   //   MemBarRelease {leading}
1983   //   {MemBarCPUOrder} {optional}
1984   //     Bot |  \      . . .
1985   //         |  StoreN/P[mo_release]  . . .
1986   //         |   /
1987   //        MergeMem
1988   //         |
1989   //   MemBarVolatile {not card mark}
1990   //
1991   //   MemBarRelease {leading}
1992   //   {MemBarCPUOrder} {optional}
1993   //      |       \      . . .
1994   //      |     CompareAndSwapX  . . .
1995   //               |
1996   //     . . .    SCMemProj
1997   //           \   |
1998   //      |    MergeMem
1999   //      |       /
2000   //    MemBarCPUOrder
2001   //    MemBarAcquire {trailing}
2002   //
2003   // the predicate needs to be capable of distinguishing the following
2004   // volatile put graph which may arises when a GC post barrier
2005   // inserts a card mark membar
2006   //
2007   //   MemBarRelease {leading}
2008   //   {MemBarCPUOrder}__
2009   //     Bot |   \       \
2010   //         |   StoreN/P \
2011   //         |    / \     |
2012   //        MergeMem \    |
2013   //         |        \   |
2014   //   MemBarVolatile  \  |
2015   //    {card mark}     \ |
2016   //                  MergeMem
2017   //                      |
2018   // {not card mark} MemBarVolatile
2019   //
2020   // if the correct configuration is present returns the trailing
2021   // membar otherwise NULL.
2022   //
2023   // the input membar is expected to be either a cpuorder membar or a
2024   // release membar. in the latter case it should not have a cpu membar
2025   // child.
2026   //
2027   // the returned value may be a card mark or trailing membar
2028   //
2029 
2030   MemBarNode *leading_to_trailing(MemBarNode *leading)
2031   {
2032     assert((leading->Opcode() == Op_MemBarRelease ||
2033             leading->Opcode() == Op_MemBarCPUOrder),
2034            "expecting a volatile or cpuroder membar!");
2035 
2036     // check the mem flow
2037     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2038 
2039     if (!mem) {
2040       return NULL;
2041     }
2042 
2043     Node *x = NULL;
2044     StoreNode * st = NULL;
2045     LoadStoreNode *cas = NULL;
2046     MergeMemNode *mm = NULL;
2047     MergeMemNode *mm2 = NULL;
2048 
2049     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2050       x = mem->fast_out(i);
2051       if (x->is_MergeMem()) {
2052         if (mm != NULL) {
2053           if (mm2 != NULL) {
2054           // should not see more than 2 merge mems
2055             return NULL;
2056           } else {
2057             mm2 = x->as_MergeMem();
2058           }
2059         } else {
2060           mm = x->as_MergeMem();
2061         }
2062       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2063         // two releasing stores/CAS nodes is one too many
2064         if (st != NULL || cas != NULL) {
2065           return NULL;
2066         }
2067         st = x->as_Store();
2068       } else if (is_CAS(x->Opcode())) {
2069         if (st != NULL || cas != NULL) {
2070           return NULL;
2071         }
2072         cas = x->as_LoadStore();
2073       }
2074     }
2075 
2076     // must have a store or a cas
2077     if (!st && !cas) {
2078       return NULL;
2079     }
2080 
2081     // must have at least one merge if we also have st
2082     if (st && !mm) {
2083       return NULL;
2084     }
2085 
2086     if (cas) {
2087       Node *y = NULL;
2088       // look for an SCMemProj
2089       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2090         x = cas->fast_out(i);
2091         if (x->is_Proj()) {
2092           y = x;
2093           break;
2094         }
2095       }
2096       if (y == NULL) {
2097         return NULL;
2098       }
2099       // the proj must feed a MergeMem
2100       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2101         x = y->fast_out(i);
2102         if (x->is_MergeMem()) {
2103           mm = x->as_MergeMem();
2104           break;
2105         }
2106       }
2107       if (mm == NULL) {
2108         return NULL;
2109       }
2110       MemBarNode *mbar = NULL;
2111       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2112       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2113         x = mm->fast_out(i);
2114         if (x->is_MemBar()) {
2115           int opcode = x->Opcode();
2116           if (opcode == Op_MemBarCPUOrder) {
2117             MemBarNode *z =  x->as_MemBar();
2118             z = child_membar(z);
2119             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2120               mbar = z;
2121             }
2122           }
2123           break;
2124         }
2125       }
2126       return mbar;
2127     } else {
2128       Node *y = NULL;
2129       // ensure the store feeds the first mergemem;
2130       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2131         if (st->fast_out(i) == mm) {
2132           y = st;
2133           break;
2134         }
2135       }
2136       if (y == NULL) {
2137         return NULL;
2138       }
2139       if (mm2 != NULL) {
2140         // ensure the store feeds the second mergemem;
2141         y = NULL;
2142         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2143           if (st->fast_out(i) == mm2) {
2144             y = st;
2145           }
2146         }
2147         if (y == NULL) {
2148           return NULL;
2149         }
2150       }
2151 
2152       MemBarNode *mbar = NULL;
2153       // ensure the first mergemem feeds a volatile membar
2154       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2155         x = mm->fast_out(i);
2156         if (x->is_MemBar()) {
2157           int opcode = x->Opcode();
2158           if (opcode == Op_MemBarVolatile) {
2159             mbar = x->as_MemBar();
2160           }
2161           break;
2162         }
2163       }
2164       if (mm2 == NULL) {
2165         // this is our only option for a trailing membar
2166         return mbar;
2167       }
2168       // ensure the second mergemem feeds a volatile membar
2169       MemBarNode *mbar2 = NULL;
2170       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2171         x = mm2->fast_out(i);
2172         if (x->is_MemBar()) {
2173           int opcode = x->Opcode();
2174           if (opcode == Op_MemBarVolatile) {
2175             mbar2 = x->as_MemBar();
2176           }
2177           break;
2178         }
2179       }
2180       // if we have two merge mems we must have two volatile membars
2181       if (mbar == NULL || mbar2 == NULL) {
2182         return NULL;
2183       }
2184       // return the trailing membar
2185       if (is_card_mark_membar(mbar2)) {
2186         return mbar;
2187       } else {
2188         if (is_card_mark_membar(mbar)) {
2189           return mbar2;
2190         } else {
2191           return NULL;
2192         }
2193       }
2194     }
2195   }
2196 
2197   // trailing_to_leading
2198   //
2199   // graph traversal helper which detects the normal case Mem feed
2200   // from a trailing membar to a preceding release membar (optionally
2201   // its cpuorder child) i.e. it ensures that one or other of the
2202   // following Mem flow subgraphs is present.
2203   //
2204   //   MemBarRelease {leading}
2205   //   MemBarCPUOrder {optional}
2206   //    | Bot |  \      . . .
2207   //    |     |  StoreN/P[mo_release]  . . .
2208   //    |     |   /
2209   //    |    MergeMem
2210   //    |     |
2211   //   MemBarVolatile {not card mark}
2212   //
2213   //   MemBarRelease {leading}
2214   //   MemBarCPUOrder {optional}
2215   //      |       \      . . .
2216   //      |     CompareAndSwapX  . . .
2217   //               |
2218   //     . . .    SCMemProj
2219   //           \   |
2220   //      |    MergeMem
2221   //      |       |
2222   //    MemBarCPUOrder
2223   //    MemBarAcquire {trailing}
2224   //
2225   // this predicate checks for the same flow as the previous predicate
2226   // but starting from the bottom rather than the top.
2227   //
2228   // if the configuration is present returns the cpuorder member for
2229   // preference or when absent the release membar otherwise NULL.
2230   //
2231   // n.b. the input membar is expected to be a MemBarVolatile or
2232   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2233   // mark membar.
2234 
2235   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2236   {
2237     // input must be a volatile membar
2238     assert((barrier->Opcode() == Op_MemBarVolatile ||
2239             barrier->Opcode() == Op_MemBarAcquire),
2240            "expecting a volatile or an acquire membar");
2241 
2242     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2243            !is_card_mark_membar(barrier),
2244            "not expecting a card mark membar");
2245     Node *x;
2246     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2247 
2248     // if we have an acquire membar then it must be fed via a CPUOrder
2249     // membar
2250 
2251     if (is_cas) {
2252       // skip to parent barrier which must be a cpuorder
2253       x = parent_membar(barrier);
2254       if (x->Opcode() != Op_MemBarCPUOrder)
2255         return NULL;
2256     } else {
2257       // start from the supplied barrier
2258       x = (Node *)barrier;
2259     }
2260 
2261     // the Mem feed to the membar should be a merge
2262     x = x ->in(TypeFunc::Memory);
2263     if (!x->is_MergeMem())
2264       return NULL;
2265 
2266     MergeMemNode *mm = x->as_MergeMem();
2267 
2268     if (is_cas) {
2269       // the merge should be fed from the CAS via an SCMemProj node
2270       x = NULL;
2271       for (uint idx = 1; idx < mm->req(); idx++) {
2272         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2273           x = mm->in(idx);
2274           break;
2275         }
2276       }
2277       if (x == NULL) {
2278         return NULL;
2279       }
2280       // check for a CAS feeding this proj
2281       x = x->in(0);
2282       int opcode = x->Opcode();
2283       if (!is_CAS(opcode)) {
2284         return NULL;
2285       }
2286       // the CAS should get its mem feed from the leading membar
2287       x = x->in(MemNode::Memory);
2288     } else {
2289       // the merge should get its Bottom mem feed from the leading membar
2290       x = mm->in(Compile::AliasIdxBot);
2291     }
2292 
2293     // ensure this is a non control projection
2294     if (!x->is_Proj() || x->is_CFG()) {
2295       return NULL;
2296     }
2297     // if it is fed by a membar that's the one we want
2298     x = x->in(0);
2299 
2300     if (!x->is_MemBar()) {
2301       return NULL;
2302     }
2303 
2304     MemBarNode *leading = x->as_MemBar();
2305     // reject invalid candidates
2306     if (!leading_membar(leading)) {
2307       return NULL;
2308     }
2309 
2310     // ok, we have a leading membar, now for the sanity clauses
2311 
2312     // the leading membar must feed Mem to a releasing store or CAS
2313     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2314     StoreNode *st = NULL;
2315     LoadStoreNode *cas = NULL;
2316     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2317       x = mem->fast_out(i);
2318       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2319         // two stores or CASes is one too many
2320         if (st != NULL || cas != NULL) {
2321           return NULL;
2322         }
2323         st = x->as_Store();
2324       } else if (is_CAS(x->Opcode())) {
2325         if (st != NULL || cas != NULL) {
2326           return NULL;
2327         }
2328         cas = x->as_LoadStore();
2329       }
2330     }
2331 
2332     // we should not have both a store and a cas
2333     if (st == NULL & cas == NULL) {
2334       return NULL;
2335     }
2336 
2337     if (st == NULL) {
2338       // nothing more to check
2339       return leading;
2340     } else {
2341       // we should not have a store if we started from an acquire
2342       if (is_cas) {
2343         return NULL;
2344       }
2345 
2346       // the store should feed the merge we used to get here
2347       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2348         if (st->fast_out(i) == mm) {
2349           return leading;
2350         }
2351       }
2352     }
2353 
2354     return NULL;
2355   }
2356 
2357   // card_mark_to_leading
2358   //
2359   // graph traversal helper which traverses from a card mark volatile
2360   // membar to a leading membar i.e. it ensures that the following Mem
2361   // flow subgraph is present.
2362   //
2363   //    MemBarRelease {leading}
2364   //   {MemBarCPUOrder} {optional}
2365   //         |   . . .
2366   //     Bot |   /
2367   //      MergeMem
2368   //         |
2369   //     MemBarVolatile (card mark)
2370   //        |     \
2371   //      . . .   StoreCM
2372   //
2373   // if the configuration is present returns the cpuorder member for
2374   // preference or when absent the release membar otherwise NULL.
2375   //
2376   // n.b. the input membar is expected to be a MemBarVolatile amd must
2377   // be a card mark membar.
2378 
2379   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2380   {
2381     // input must be a card mark volatile membar
2382     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2383 
2384     // the Mem feed to the membar should be a merge
2385     Node *x = barrier->in(TypeFunc::Memory);
2386     if (!x->is_MergeMem()) {
2387       return NULL;
2388     }
2389 
2390     MergeMemNode *mm = x->as_MergeMem();
2391 
2392     x = mm->in(Compile::AliasIdxBot);
2393 
2394     if (!x->is_MemBar()) {
2395       return NULL;
2396     }
2397 
2398     MemBarNode *leading = x->as_MemBar();
2399 
2400     if (leading_membar(leading)) {
2401       return leading;
2402     }
2403 
2404     return NULL;
2405   }
2406 
2407 bool unnecessary_acquire(const Node *barrier)
2408 {
2409   assert(barrier->is_MemBar(), "expecting a membar");
2410 
2411   if (UseBarriersForVolatile) {
2412     // we need to plant a dmb
2413     return false;
2414   }
2415 
2416   // a volatile read derived from bytecode (or also from an inlined
2417   // SHA field read via LibraryCallKit::load_field_from_object)
2418   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2419   // with a bogus read dependency on it's preceding load. so in those
2420   // cases we will find the load node at the PARMS offset of the
2421   // acquire membar.  n.b. there may be an intervening DecodeN node.
2422   //
2423   // a volatile load derived from an inlined unsafe field access
2424   // manifests as a cpuorder membar with Ctl and Mem projections
2425   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2426   // acquire then feeds another cpuorder membar via Ctl and Mem
2427   // projections. The load has no output dependency on these trailing
2428   // membars because subsequent nodes inserted into the graph take
2429   // their control feed from the final membar cpuorder meaning they
2430   // are all ordered after the load.
2431 
2432   Node *x = barrier->lookup(TypeFunc::Parms);
2433   if (x) {
2434     // we are starting from an acquire and it has a fake dependency
2435     //
2436     // need to check for
2437     //
2438     //   LoadX[mo_acquire]
2439     //   {  |1   }
2440     //   {DecodeN}
2441     //      |Parms
2442     //   MemBarAcquire*
2443     //
2444     // where * tags node we were passed
2445     // and |k means input k
2446     if (x->is_DecodeNarrowPtr()) {
2447       x = x->in(1);
2448     }
2449 
2450     return (x->is_Load() && x->as_Load()->is_acquire());
2451   }
2452 
2453   // now check for an unsafe volatile get
2454 
2455   // need to check for
2456   //
2457   //   MemBarCPUOrder
2458   //        ||       \\
2459   //   MemBarAcquire* LoadX[mo_acquire]
2460   //        ||
2461   //   MemBarCPUOrder
2462   //
2463   // where * tags node we were passed
2464   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2465 
2466   // check for a parent MemBarCPUOrder
2467   ProjNode *ctl;
2468   ProjNode *mem;
2469   MemBarNode *parent = parent_membar(barrier);
2470   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2471     return false;
2472   ctl = parent->proj_out(TypeFunc::Control);
2473   mem = parent->proj_out(TypeFunc::Memory);
2474   if (!ctl || !mem) {
2475     return false;
2476   }
2477   // ensure the proj nodes both feed a LoadX[mo_acquire]
2478   LoadNode *ld = NULL;
2479   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2480     x = ctl->fast_out(i);
2481     // if we see a load we keep hold of it and stop searching
2482     if (x->is_Load()) {
2483       ld = x->as_Load();
2484       break;
2485     }
2486   }
2487   // it must be an acquiring load
2488   if (ld && ld->is_acquire()) {
2489 
2490     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2491       x = mem->fast_out(i);
2492       // if we see the same load we drop it and stop searching
2493       if (x == ld) {
2494         ld = NULL;
2495         break;
2496       }
2497     }
2498     // we must have dropped the load
2499     if (ld == NULL) {
2500       // check for a child cpuorder membar
2501       MemBarNode *child  = child_membar(barrier->as_MemBar());
2502       if (child && child->Opcode() == Op_MemBarCPUOrder)
2503         return true;
2504     }
2505   }
2506 
2507   // final option for unnecessary mebar is that it is a trailing node
2508   // belonging to a CAS
2509 
2510   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2511 
2512   return leading != NULL;
2513 }
2514 
2515 bool needs_acquiring_load(const Node *n)
2516 {
2517   assert(n->is_Load(), "expecting a load");
2518   if (UseBarriersForVolatile) {
2519     // we use a normal load and a dmb
2520     return false;
2521   }
2522 
2523   LoadNode *ld = n->as_Load();
2524 
2525   if (!ld->is_acquire()) {
2526     return false;
2527   }
2528 
2529   // check if this load is feeding an acquire membar
2530   //
2531   //   LoadX[mo_acquire]
2532   //   {  |1   }
2533   //   {DecodeN}
2534   //      |Parms
2535   //   MemBarAcquire*
2536   //
2537   // where * tags node we were passed
2538   // and |k means input k
2539 
2540   Node *start = ld;
2541   Node *mbacq = NULL;
2542 
2543   // if we hit a DecodeNarrowPtr we reset the start node and restart
2544   // the search through the outputs
2545  restart:
2546 
2547   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2548     Node *x = start->fast_out(i);
2549     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2550       mbacq = x;
2551     } else if (!mbacq &&
2552                (x->is_DecodeNarrowPtr() ||
2553                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2554       start = x;
2555       goto restart;
2556     }
2557   }
2558 
2559   if (mbacq) {
2560     return true;
2561   }
2562 
2563   // now check for an unsafe volatile get
2564 
2565   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2566   //
2567   //     MemBarCPUOrder
2568   //        ||       \\
2569   //   MemBarAcquire* LoadX[mo_acquire]
2570   //        ||
2571   //   MemBarCPUOrder
2572 
2573   MemBarNode *membar;
2574 
2575   membar = parent_membar(ld);
2576 
2577   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2578     return false;
2579   }
2580 
2581   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2582 
2583   membar = child_membar(membar);
2584 
2585   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2586     return false;
2587   }
2588 
2589   membar = child_membar(membar);
2590 
2591   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2592     return false;
2593   }
2594 
2595   return true;
2596 }
2597 
2598 bool unnecessary_release(const Node *n)
2599 {
2600   assert((n->is_MemBar() &&
2601           n->Opcode() == Op_MemBarRelease),
2602          "expecting a release membar");
2603 
2604   if (UseBarriersForVolatile) {
2605     // we need to plant a dmb
2606     return false;
2607   }
2608 
2609   // if there is a dependent CPUOrder barrier then use that as the
2610   // leading
2611 
2612   MemBarNode *barrier = n->as_MemBar();
2613   // check for an intervening cpuorder membar
2614   MemBarNode *b = child_membar(barrier);
2615   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2616     // ok, so start the check from the dependent cpuorder barrier
2617     barrier = b;
2618   }
2619 
2620   // must start with a normal feed
2621   MemBarNode *trailing = leading_to_trailing(barrier);
2622 
2623   return (trailing != NULL);
2624 }
2625 
2626 bool unnecessary_volatile(const Node *n)
2627 {
2628   // assert n->is_MemBar();
2629   if (UseBarriersForVolatile) {
2630     // we need to plant a dmb
2631     return false;
2632   }
2633 
2634   MemBarNode *mbvol = n->as_MemBar();
2635 
2636   // first we check if this is part of a card mark. if so then we have
2637   // to generate a StoreLoad barrier
2638 
2639   if (is_card_mark_membar(mbvol)) {
2640       return false;
2641   }
2642 
2643   // ok, if it's not a card mark then we still need to check if it is
2644   // a trailing membar of a volatile put graph.
2645 
2646   return (trailing_to_leading(mbvol) != NULL);
2647 }
2648 
2649 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2650 
2651 bool needs_releasing_store(const Node *n)
2652 {
2653   // assert n->is_Store();
2654   if (UseBarriersForVolatile) {
2655     // we use a normal store and dmb combination
2656     return false;
2657   }
2658 
2659   StoreNode *st = n->as_Store();
2660 
2661   // the store must be marked as releasing
2662   if (!st->is_release()) {
2663     return false;
2664   }
2665 
2666   // the store must be fed by a membar
2667 
2668   Node *x = st->lookup(StoreNode::Memory);
2669 
2670   if (! x || !x->is_Proj()) {
2671     return false;
2672   }
2673 
2674   ProjNode *proj = x->as_Proj();
2675 
2676   x = proj->lookup(0);
2677 
2678   if (!x || !x->is_MemBar()) {
2679     return false;
2680   }
2681 
2682   MemBarNode *barrier = x->as_MemBar();
2683 
2684   // if the barrier is a release membar or a cpuorder mmebar fed by a
2685   // release membar then we need to check whether that forms part of a
2686   // volatile put graph.
2687 
2688   // reject invalid candidates
2689   if (!leading_membar(barrier)) {
2690     return false;
2691   }
2692 
2693   // does this lead a normal subgraph?
2694   MemBarNode *trailing = leading_to_trailing(barrier);
2695 
2696   return (trailing != NULL);
2697 }
2698 
2699 // predicate controlling translation of CAS
2700 //
2701 // returns true if CAS needs to use an acquiring load otherwise false
2702 
2703 bool needs_acquiring_load_exclusive(const Node *n)
2704 {
2705   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2706   if (UseBarriersForVolatile) {
2707     return false;
2708   }
2709 
2710   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2711 #ifdef ASSERT
2712   LoadStoreNode *st = n->as_LoadStore();
2713 
2714   // the store must be fed by a membar
2715 
2716   Node *x = st->lookup(StoreNode::Memory);
2717 
2718   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2719 
2720   ProjNode *proj = x->as_Proj();
2721 
2722   x = proj->lookup(0);
2723 
2724   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2725 
2726   MemBarNode *barrier = x->as_MemBar();
2727 
2728   // the barrier must be a cpuorder mmebar fed by a release membar
2729 
2730   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2731          "CAS not fed by cpuorder membar!");
2732 
2733   MemBarNode *b = parent_membar(barrier);
2734   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2735           "CAS not fed by cpuorder+release membar pair!");
2736 
2737   // does this lead a normal subgraph?
2738   MemBarNode *mbar = leading_to_trailing(barrier);
2739 
2740   assert(mbar != NULL, "CAS not embedded in normal graph!");
2741 
2742   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2743 #endif // ASSERT
2744   // so we can just return true here
2745   return true;
2746 }
2747 
2748 // predicate controlling translation of StoreCM
2749 //
2750 // returns true if a StoreStore must precede the card write otherwise
2751 // false
2752 
2753 bool unnecessary_storestore(const Node *storecm)
2754 {
2755   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2756 
2757   // we only ever need to generate a dmb ishst between an object put
2758   // and the associated card mark when we are using CMS without
2759   // conditional card marking. Any other occurence will happen when
2760   // performing a card mark using CMS with conditional card marking or
2761   // G1. In those cases the preceding MamBarVolatile will be
2762   // translated to a dmb ish which guarantes visibility of the
2763   // preceding StoreN/P before this StoreCM
2764 
2765   if (!UseConcMarkSweepGC || UseCondCardMark) {
2766     return true;
2767   }
2768 
2769   // if we are implementing volatile puts using barriers then we must
2770   // insert the dmb ishst
2771 
2772   if (UseBarriersForVolatile) {
2773     return false;
2774   }
2775 
2776   // we must be using CMS with conditional card marking so we ahve to
2777   // generate the StoreStore
2778 
2779   return false;
2780 }
2781 
2782 
2783 #define __ _masm.
2784 
2785 // advance declarations for helper functions to convert register
2786 // indices to register objects
2787 
2788 // the ad file has to provide implementations of certain methods
2789 // expected by the generic code
2790 //
2791 // REQUIRED FUNCTIONALITY
2792 
2793 //=============================================================================
2794 
2795 // !!!!! Special hack to get all types of calls to specify the byte offset
2796 //       from the start of the call to the point where the return address
2797 //       will point.
2798 
2799 int MachCallStaticJavaNode::ret_addr_offset()
2800 {
2801   // call should be a simple bl
2802   int off = 4;
2803   return off;
2804 }
2805 
2806 int MachCallDynamicJavaNode::ret_addr_offset()
2807 {
2808   return 16; // movz, movk, movk, bl
2809 }
2810 
2811 int MachCallRuntimeNode::ret_addr_offset() {
2812   // for generated stubs the call will be
2813   //   far_call(addr)
2814   // for real runtime callouts it will be six instructions
2815   // see aarch64_enc_java_to_runtime
2816   //   adr(rscratch2, retaddr)
2817   //   lea(rscratch1, RuntimeAddress(addr)
2818   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2819   //   blrt rscratch1
2820   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2821   if (cb) {
2822     return MacroAssembler::far_branch_size();
2823   } else {
2824     return 6 * NativeInstruction::instruction_size;
2825   }
2826 }
2827 
2828 // Indicate if the safepoint node needs the polling page as an input
2829 
2830 // the shared code plants the oop data at the start of the generated
2831 // code for the safepoint node and that needs ot be at the load
2832 // instruction itself. so we cannot plant a mov of the safepoint poll
2833 // address followed by a load. setting this to true means the mov is
2834 // scheduled as a prior instruction. that's better for scheduling
2835 // anyway.
2836 
2837 bool SafePointNode::needs_polling_address_input()
2838 {
2839   return true;
2840 }
2841 
2842 //=============================================================================
2843 
2844 #ifndef PRODUCT
2845 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2846   st->print("BREAKPOINT");
2847 }
2848 #endif
2849 
2850 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2851   MacroAssembler _masm(&cbuf);
2852   __ brk(0);
2853 }
2854 
2855 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2856   return MachNode::size(ra_);
2857 }
2858 
2859 //=============================================================================
2860 
2861 #ifndef PRODUCT
2862   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2863     st->print("nop \t# %d bytes pad for loops and calls", _count);
2864   }
2865 #endif
2866 
2867   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2868     MacroAssembler _masm(&cbuf);
2869     for (int i = 0; i < _count; i++) {
2870       __ nop();
2871     }
2872   }
2873 
2874   uint MachNopNode::size(PhaseRegAlloc*) const {
2875     return _count * NativeInstruction::instruction_size;
2876   }
2877 
2878 //=============================================================================
2879 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2880 
2881 int Compile::ConstantTable::calculate_table_base_offset() const {
2882   return 0;  // absolute addressing, no offset
2883 }
2884 
2885 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2886 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2887   ShouldNotReachHere();
2888 }
2889 
2890 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2891   // Empty encoding
2892 }
2893 
2894 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2895   return 0;
2896 }
2897 
2898 #ifndef PRODUCT
2899 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2900   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2901 }
2902 #endif
2903 
2904 #ifndef PRODUCT
2905 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2906   Compile* C = ra_->C;
2907 
2908   int framesize = C->frame_slots() << LogBytesPerInt;
2909 
2910   if (C->need_stack_bang(framesize))
2911     st->print("# stack bang size=%d\n\t", framesize);
2912 
2913   if (framesize < ((1 << 9) + 2 * wordSize)) {
2914     st->print("sub  sp, sp, #%d\n\t", framesize);
2915     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2916     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2917   } else {
2918     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2919     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2920     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2921     st->print("sub  sp, sp, rscratch1");
2922   }
2923 }
2924 #endif
2925 
2926 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2927   Compile* C = ra_->C;
2928   MacroAssembler _masm(&cbuf);
2929 
2930   // n.b. frame size includes space for return pc and rfp
2931   const long framesize = C->frame_size_in_bytes();
2932   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2933 
2934   // insert a nop at the start of the prolog so we can patch in a
2935   // branch if we need to invalidate the method later
2936   __ nop();
2937 
2938   int bangsize = C->bang_size_in_bytes();
2939   if (C->need_stack_bang(bangsize) && UseStackBanging)
2940     __ generate_stack_overflow_check(bangsize);
2941 
2942   __ build_frame(framesize);
2943 
2944   if (NotifySimulator) {
2945     __ notify(Assembler::method_entry);
2946   }
2947 
2948   if (VerifyStackAtCalls) {
2949     Unimplemented();
2950   }
2951 
2952   C->set_frame_complete(cbuf.insts_size());
2953 
2954   if (C->has_mach_constant_base_node()) {
2955     // NOTE: We set the table base offset here because users might be
2956     // emitted before MachConstantBaseNode.
2957     Compile::ConstantTable& constant_table = C->constant_table();
2958     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2959   }
2960 }
2961 
2962 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2963 {
2964   return MachNode::size(ra_); // too many variables; just compute it
2965                               // the hard way
2966 }
2967 
2968 int MachPrologNode::reloc() const
2969 {
2970   return 0;
2971 }
2972 
2973 //=============================================================================
2974 
2975 #ifndef PRODUCT
2976 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2977   Compile* C = ra_->C;
2978   int framesize = C->frame_slots() << LogBytesPerInt;
2979 
2980   st->print("# pop frame %d\n\t",framesize);
2981 
2982   if (framesize == 0) {
2983     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2984   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2985     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2986     st->print("add  sp, sp, #%d\n\t", framesize);
2987   } else {
2988     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2989     st->print("add  sp, sp, rscratch1\n\t");
2990     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2991   }
2992 
2993   if (do_polling() && C->is_method_compilation()) {
2994     st->print("# touch polling page\n\t");
2995     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2996     st->print("ldr zr, [rscratch1]");
2997   }
2998 }
2999 #endif
3000 
3001 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3002   Compile* C = ra_->C;
3003   MacroAssembler _masm(&cbuf);
3004   int framesize = C->frame_slots() << LogBytesPerInt;
3005 
3006   __ remove_frame(framesize);
3007 
3008   if (NotifySimulator) {
3009     __ notify(Assembler::method_reentry);
3010   }
3011 
3012   if (do_polling() && C->is_method_compilation()) {
3013     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3014   }
3015 }
3016 
3017 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3018   // Variable size. Determine dynamically.
3019   return MachNode::size(ra_);
3020 }
3021 
3022 int MachEpilogNode::reloc() const {
3023   // Return number of relocatable values contained in this instruction.
3024   return 1; // 1 for polling page.
3025 }
3026 
3027 const Pipeline * MachEpilogNode::pipeline() const {
3028   return MachNode::pipeline_class();
3029 }
3030 
3031 // This method seems to be obsolete. It is declared in machnode.hpp
3032 // and defined in all *.ad files, but it is never called. Should we
3033 // get rid of it?
3034 int MachEpilogNode::safepoint_offset() const {
3035   assert(do_polling(), "no return for this epilog node");
3036   return 4;
3037 }
3038 
3039 //=============================================================================
3040 
3041 // Figure out which register class each belongs in: rc_int, rc_float or
3042 // rc_stack.
3043 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3044 
3045 static enum RC rc_class(OptoReg::Name reg) {
3046 
3047   if (reg == OptoReg::Bad) {
3048     return rc_bad;
3049   }
3050 
3051   // we have 30 int registers * 2 halves
3052   // (rscratch1 and rscratch2 are omitted)
3053 
3054   if (reg < 60) {
3055     return rc_int;
3056   }
3057 
3058   // we have 32 float register * 2 halves
3059   if (reg < 60 + 128) {
3060     return rc_float;
3061   }
3062 
3063   // Between float regs & stack is the flags regs.
3064   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3065 
3066   return rc_stack;
3067 }
3068 
3069 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3070   Compile* C = ra_->C;
3071 
3072   // Get registers to move.
3073   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3074   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3075   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3076   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3077 
3078   enum RC src_hi_rc = rc_class(src_hi);
3079   enum RC src_lo_rc = rc_class(src_lo);
3080   enum RC dst_hi_rc = rc_class(dst_hi);
3081   enum RC dst_lo_rc = rc_class(dst_lo);
3082 
3083   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3084 
3085   if (src_hi != OptoReg::Bad) {
3086     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3087            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3088            "expected aligned-adjacent pairs");
3089   }
3090 
3091   if (src_lo == dst_lo && src_hi == dst_hi) {
3092     return 0;            // Self copy, no move.
3093   }
3094 
3095   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3096               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3097   int src_offset = ra_->reg2offset(src_lo);
3098   int dst_offset = ra_->reg2offset(dst_lo);
3099 
3100   if (bottom_type()->isa_vect() != NULL) {
3101     uint ireg = ideal_reg();
3102     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3103     if (cbuf) {
3104       MacroAssembler _masm(cbuf);
3105       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3106       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3107         // stack->stack
3108         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3109         if (ireg == Op_VecD) {
3110           __ unspill(rscratch1, true, src_offset);
3111           __ spill(rscratch1, true, dst_offset);
3112         } else {
3113           __ spill_copy128(src_offset, dst_offset);
3114         }
3115       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3116         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3117                ireg == Op_VecD ? __ T8B : __ T16B,
3118                as_FloatRegister(Matcher::_regEncode[src_lo]));
3119       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3120         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3121                        ireg == Op_VecD ? __ D : __ Q,
3122                        ra_->reg2offset(dst_lo));
3123       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3124         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3125                        ireg == Op_VecD ? __ D : __ Q,
3126                        ra_->reg2offset(src_lo));
3127       } else {
3128         ShouldNotReachHere();
3129       }
3130     }
3131   } else if (cbuf) {
3132     MacroAssembler _masm(cbuf);
3133     switch (src_lo_rc) {
3134     case rc_int:
3135       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3136         if (is64) {
3137             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3138                    as_Register(Matcher::_regEncode[src_lo]));
3139         } else {
3140             MacroAssembler _masm(cbuf);
3141             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3142                     as_Register(Matcher::_regEncode[src_lo]));
3143         }
3144       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3145         if (is64) {
3146             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3147                      as_Register(Matcher::_regEncode[src_lo]));
3148         } else {
3149             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3150                      as_Register(Matcher::_regEncode[src_lo]));
3151         }
3152       } else {                    // gpr --> stack spill
3153         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3154         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3155       }
3156       break;
3157     case rc_float:
3158       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3159         if (is64) {
3160             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3161                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3162         } else {
3163             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3164                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3165         }
3166       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3167           if (cbuf) {
3168             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3169                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3170         } else {
3171             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3172                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3173         }
3174       } else {                    // fpr --> stack spill
3175         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3176         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3177                  is64 ? __ D : __ S, dst_offset);
3178       }
3179       break;
3180     case rc_stack:
3181       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3182         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3183       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3184         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3185                    is64 ? __ D : __ S, src_offset);
3186       } else {                    // stack --> stack copy
3187         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3188         __ unspill(rscratch1, is64, src_offset);
3189         __ spill(rscratch1, is64, dst_offset);
3190       }
3191       break;
3192     default:
3193       assert(false, "bad rc_class for spill");
3194       ShouldNotReachHere();
3195     }
3196   }
3197 
3198   if (st) {
3199     st->print("spill ");
3200     if (src_lo_rc == rc_stack) {
3201       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3202     } else {
3203       st->print("%s -> ", Matcher::regName[src_lo]);
3204     }
3205     if (dst_lo_rc == rc_stack) {
3206       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3207     } else {
3208       st->print("%s", Matcher::regName[dst_lo]);
3209     }
3210     if (bottom_type()->isa_vect() != NULL) {
3211       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3212     } else {
3213       st->print("\t# spill size = %d", is64 ? 64:32);
3214     }
3215   }
3216 
3217   return 0;
3218 
3219 }
3220 
3221 #ifndef PRODUCT
3222 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3223   if (!ra_)
3224     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3225   else
3226     implementation(NULL, ra_, false, st);
3227 }
3228 #endif
3229 
3230 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3231   implementation(&cbuf, ra_, false, NULL);
3232 }
3233 
3234 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3235   return MachNode::size(ra_);
3236 }
3237 
3238 //=============================================================================
3239 
3240 #ifndef PRODUCT
3241 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3242   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3243   int reg = ra_->get_reg_first(this);
3244   st->print("add %s, rsp, #%d]\t# box lock",
3245             Matcher::regName[reg], offset);
3246 }
3247 #endif
3248 
3249 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3250   MacroAssembler _masm(&cbuf);
3251 
3252   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3253   int reg    = ra_->get_encode(this);
3254 
3255   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3256     __ add(as_Register(reg), sp, offset);
3257   } else {
3258     ShouldNotReachHere();
3259   }
3260 }
3261 
3262 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3263   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3264   return 4;
3265 }
3266 
3267 //=============================================================================
3268 
3269 #ifndef PRODUCT
3270 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3271 {
3272   st->print_cr("# MachUEPNode");
3273   if (UseCompressedClassPointers) {
3274     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3275     if (Universe::narrow_klass_shift() != 0) {
3276       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3277     }
3278   } else {
3279    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3280   }
3281   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3282   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3283 }
3284 #endif
3285 
3286 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3287 {
3288   // This is the unverified entry point.
3289   MacroAssembler _masm(&cbuf);
3290 
3291   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3292   Label skip;
3293   // TODO
3294   // can we avoid this skip and still use a reloc?
3295   __ br(Assembler::EQ, skip);
3296   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3297   __ bind(skip);
3298 }
3299 
3300 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3301 {
3302   return MachNode::size(ra_);
3303 }
3304 
3305 // REQUIRED EMIT CODE
3306 
3307 //=============================================================================
3308 
3309 // Emit exception handler code.
3310 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3311 {
3312   // mov rscratch1 #exception_blob_entry_point
3313   // br rscratch1
3314   // Note that the code buffer's insts_mark is always relative to insts.
3315   // That's why we must use the macroassembler to generate a handler.
3316   MacroAssembler _masm(&cbuf);
3317   address base = __ start_a_stub(size_exception_handler());
3318   if (base == NULL) {
3319     ciEnv::current()->record_failure("CodeCache is full");
3320     return 0;  // CodeBuffer::expand failed
3321   }
3322   int offset = __ offset();
3323   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3324   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3325   __ end_a_stub();
3326   return offset;
3327 }
3328 
3329 // Emit deopt handler code.
3330 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3331 {
3332   // Note that the code buffer's insts_mark is always relative to insts.
3333   // That's why we must use the macroassembler to generate a handler.
3334   MacroAssembler _masm(&cbuf);
3335   address base = __ start_a_stub(size_deopt_handler());
3336   if (base == NULL) {
3337     ciEnv::current()->record_failure("CodeCache is full");
3338     return 0;  // CodeBuffer::expand failed
3339   }
3340   int offset = __ offset();
3341 
3342   __ adr(lr, __ pc());
3343   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3344 
3345   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3346   __ end_a_stub();
3347   return offset;
3348 }
3349 
3350 // REQUIRED MATCHER CODE
3351 
3352 //=============================================================================
3353 
3354 const bool Matcher::match_rule_supported(int opcode) {
3355 
3356   switch (opcode) {
3357   default:
3358     break;
3359   }
3360 
3361   if (!has_match_rule(opcode)) {
3362     return false;
3363   }
3364 
3365   return true;  // Per default match rules are supported.
3366 }
3367 
3368 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3369 
3370   // TODO
3371   // identify extra cases that we might want to provide match rules for
3372   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3373   bool ret_value = match_rule_supported(opcode);
3374   // Add rules here.
3375 
3376   return ret_value;  // Per default match rules are supported.
3377 }
3378 
3379 const bool Matcher::has_predicated_vectors(void) {
3380   return false;
3381 }
3382 
3383 const int Matcher::float_pressure(int default_pressure_threshold) {
3384   return default_pressure_threshold;
3385 }
3386 
3387 int Matcher::regnum_to_fpu_offset(int regnum)
3388 {
3389   Unimplemented();
3390   return 0;
3391 }
3392 
3393 // Is this branch offset short enough that a short branch can be used?
3394 //
3395 // NOTE: If the platform does not provide any short branch variants, then
3396 //       this method should return false for offset 0.
3397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3398   // The passed offset is relative to address of the branch.
3399 
3400   return (-32768 <= offset && offset < 32768);
3401 }
3402 
3403 const bool Matcher::isSimpleConstant64(jlong value) {
3404   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3405   // Probably always true, even if a temp register is required.
3406   return true;
3407 }
3408 
3409 // true just means we have fast l2f conversion
3410 const bool Matcher::convL2FSupported(void) {
3411   return true;
3412 }
3413 
3414 // Vector width in bytes.
3415 const int Matcher::vector_width_in_bytes(BasicType bt) {
3416   int size = MIN2(16,(int)MaxVectorSize);
3417   // Minimum 2 values in vector
3418   if (size < 2*type2aelembytes(bt)) size = 0;
3419   // But never < 4
3420   if (size < 4) size = 0;
3421   return size;
3422 }
3423 
3424 // Limits on vector size (number of elements) loaded into vector.
3425 const int Matcher::max_vector_size(const BasicType bt) {
3426   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3427 }
3428 const int Matcher::min_vector_size(const BasicType bt) {
3429 //  For the moment limit the vector size to 8 bytes
3430     int size = 8 / type2aelembytes(bt);
3431     if (size < 2) size = 2;
3432     return size;
3433 }
3434 
3435 // Vector ideal reg.
3436 const int Matcher::vector_ideal_reg(int len) {
3437   switch(len) {
3438     case  8: return Op_VecD;
3439     case 16: return Op_VecX;
3440   }
3441   ShouldNotReachHere();
3442   return 0;
3443 }
3444 
3445 const int Matcher::vector_shift_count_ideal_reg(int size) {
3446   return Op_VecX;
3447 }
3448 
3449 // AES support not yet implemented
3450 const bool Matcher::pass_original_key_for_aes() {
3451   return false;
3452 }
3453 
3454 // x86 supports misaligned vectors store/load.
3455 const bool Matcher::misaligned_vectors_ok() {
3456   return !AlignVector; // can be changed by flag
3457 }
3458 
3459 // false => size gets scaled to BytesPerLong, ok.
3460 const bool Matcher::init_array_count_is_in_bytes = false;
3461 
3462 // Use conditional move (CMOVL)
3463 const int Matcher::long_cmove_cost() {
3464   // long cmoves are no more expensive than int cmoves
3465   return 0;
3466 }
3467 
3468 const int Matcher::float_cmove_cost() {
3469   // float cmoves are no more expensive than int cmoves
3470   return 0;
3471 }
3472 
3473 // Does the CPU require late expand (see block.cpp for description of late expand)?
3474 const bool Matcher::require_postalloc_expand = false;
3475 
3476 // Do we need to mask the count passed to shift instructions or does
3477 // the cpu only look at the lower 5/6 bits anyway?
3478 const bool Matcher::need_masked_shift_count = false;
3479 
3480 // This affects two different things:
3481 //  - how Decode nodes are matched
3482 //  - how ImplicitNullCheck opportunities are recognized
3483 // If true, the matcher will try to remove all Decodes and match them
3484 // (as operands) into nodes. NullChecks are not prepared to deal with
3485 // Decodes by final_graph_reshaping().
3486 // If false, final_graph_reshaping() forces the decode behind the Cmp
3487 // for a NullCheck. The matcher matches the Decode node into a register.
3488 // Implicit_null_check optimization moves the Decode along with the
3489 // memory operation back up before the NullCheck.
3490 bool Matcher::narrow_oop_use_complex_address() {
3491   return Universe::narrow_oop_shift() == 0;
3492 }
3493 
3494 bool Matcher::narrow_klass_use_complex_address() {
3495 // TODO
3496 // decide whether we need to set this to true
3497   return false;
3498 }
3499 
3500 // Is it better to copy float constants, or load them directly from
3501 // memory?  Intel can load a float constant from a direct address,
3502 // requiring no extra registers.  Most RISCs will have to materialize
3503 // an address into a register first, so they would do better to copy
3504 // the constant from stack.
3505 const bool Matcher::rematerialize_float_constants = false;
3506 
3507 // If CPU can load and store mis-aligned doubles directly then no
3508 // fixup is needed.  Else we split the double into 2 integer pieces
3509 // and move it piece-by-piece.  Only happens when passing doubles into
3510 // C code as the Java calling convention forces doubles to be aligned.
3511 const bool Matcher::misaligned_doubles_ok = true;
3512 
3513 // No-op on amd64
3514 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3515   Unimplemented();
3516 }
3517 
3518 // Advertise here if the CPU requires explicit rounding operations to
3519 // implement the UseStrictFP mode.
3520 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3521 
3522 // Are floats converted to double when stored to stack during
3523 // deoptimization?
3524 bool Matcher::float_in_double() { return true; }
3525 
3526 // Do ints take an entire long register or just half?
3527 // The relevant question is how the int is callee-saved:
3528 // the whole long is written but de-opt'ing will have to extract
3529 // the relevant 32 bits.
3530 const bool Matcher::int_in_long = true;
3531 
3532 // Return whether or not this register is ever used as an argument.
3533 // This function is used on startup to build the trampoline stubs in
3534 // generateOptoStub.  Registers not mentioned will be killed by the VM
3535 // call in the trampoline, and arguments in those registers not be
3536 // available to the callee.
3537 bool Matcher::can_be_java_arg(int reg)
3538 {
3539   return
3540     reg ==  R0_num || reg == R0_H_num ||
3541     reg ==  R1_num || reg == R1_H_num ||
3542     reg ==  R2_num || reg == R2_H_num ||
3543     reg ==  R3_num || reg == R3_H_num ||
3544     reg ==  R4_num || reg == R4_H_num ||
3545     reg ==  R5_num || reg == R5_H_num ||
3546     reg ==  R6_num || reg == R6_H_num ||
3547     reg ==  R7_num || reg == R7_H_num ||
3548     reg ==  V0_num || reg == V0_H_num ||
3549     reg ==  V1_num || reg == V1_H_num ||
3550     reg ==  V2_num || reg == V2_H_num ||
3551     reg ==  V3_num || reg == V3_H_num ||
3552     reg ==  V4_num || reg == V4_H_num ||
3553     reg ==  V5_num || reg == V5_H_num ||
3554     reg ==  V6_num || reg == V6_H_num ||
3555     reg ==  V7_num || reg == V7_H_num;
3556 }
3557 
3558 bool Matcher::is_spillable_arg(int reg)
3559 {
3560   return can_be_java_arg(reg);
3561 }
3562 
3563 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3564   return false;
3565 }
3566 
3567 RegMask Matcher::divI_proj_mask() {
3568   ShouldNotReachHere();
3569   return RegMask();
3570 }
3571 
3572 // Register for MODI projection of divmodI.
3573 RegMask Matcher::modI_proj_mask() {
3574   ShouldNotReachHere();
3575   return RegMask();
3576 }
3577 
3578 // Register for DIVL projection of divmodL.
3579 RegMask Matcher::divL_proj_mask() {
3580   ShouldNotReachHere();
3581   return RegMask();
3582 }
3583 
3584 // Register for MODL projection of divmodL.
3585 RegMask Matcher::modL_proj_mask() {
3586   ShouldNotReachHere();
3587   return RegMask();
3588 }
3589 
3590 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3591   return FP_REG_mask();
3592 }
3593 
3594 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3595   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3596     Node* u = addp->fast_out(i);
3597     if (u->is_Mem()) {
3598       int opsize = u->as_Mem()->memory_size();
3599       assert(opsize > 0, "unexpected memory operand size");
3600       if (u->as_Mem()->memory_size() != (1<<shift)) {
3601         return false;
3602       }
3603     }
3604   }
3605   return true;
3606 }
3607 
3608 const bool Matcher::convi2l_type_required = false;
3609 
3610 // Should the Matcher clone shifts on addressing modes, expecting them
3611 // to be subsumed into complex addressing expressions or compute them
3612 // into registers?
3613 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3614   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3615     return true;
3616   }
3617 
3618   Node *off = m->in(AddPNode::Offset);
3619   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3620       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3621       // Are there other uses besides address expressions?
3622       !is_visited(off)) {
3623     address_visited.set(off->_idx); // Flag as address_visited
3624     mstack.push(off->in(2), Visit);
3625     Node *conv = off->in(1);
3626     if (conv->Opcode() == Op_ConvI2L &&
3627         // Are there other uses besides address expressions?
3628         !is_visited(conv)) {
3629       address_visited.set(conv->_idx); // Flag as address_visited
3630       mstack.push(conv->in(1), Pre_Visit);
3631     } else {
3632       mstack.push(conv, Pre_Visit);
3633     }
3634     address_visited.test_set(m->_idx); // Flag as address_visited
3635     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3636     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3637     return true;
3638   } else if (off->Opcode() == Op_ConvI2L &&
3639              // Are there other uses besides address expressions?
3640              !is_visited(off)) {
3641     address_visited.test_set(m->_idx); // Flag as address_visited
3642     address_visited.set(off->_idx); // Flag as address_visited
3643     mstack.push(off->in(1), Pre_Visit);
3644     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3645     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3646     return true;
3647   }
3648   return false;
3649 }
3650 
3651 // Transform:
3652 // (AddP base (AddP base address (LShiftL index con)) offset)
3653 // into:
3654 // (AddP base (AddP base offset) (LShiftL index con))
3655 // to take full advantage of ARM's addressing modes
3656 void Compile::reshape_address(AddPNode* addp) {
3657   Node *addr = addp->in(AddPNode::Address);
3658   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3659     const AddPNode *addp2 = addr->as_AddP();
3660     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3661          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3662          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3663         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3664 
3665       // Any use that can't embed the address computation?
3666       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3667         Node* u = addp->fast_out(i);
3668         if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3669           return;
3670         }
3671       }
3672       
3673       Node* off = addp->in(AddPNode::Offset);
3674       Node* addr2 = addp2->in(AddPNode::Address);
3675       Node* base = addp->in(AddPNode::Base);
3676       
3677       Node* new_addr = NULL;
3678       // Check whether the graph already has the new AddP we need
3679       // before we create one (no GVN available here).
3680       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3681         Node* u = addr2->fast_out(i);
3682         if (u->is_AddP() &&
3683             u->in(AddPNode::Base) == base &&
3684             u->in(AddPNode::Address) == addr2 &&
3685             u->in(AddPNode::Offset) == off) {
3686           new_addr = u;
3687           break;
3688         }
3689       }
3690       
3691       if (new_addr == NULL) {
3692         new_addr = new AddPNode(base, addr2, off);
3693       }
3694       Node* new_off = addp2->in(AddPNode::Offset);
3695       addp->set_req(AddPNode::Address, new_addr);
3696       if (addr->outcnt() == 0) {
3697         addr->disconnect_inputs(NULL, this);
3698       }
3699       addp->set_req(AddPNode::Offset, new_off);
3700       if (off->outcnt() == 0) {
3701         off->disconnect_inputs(NULL, this);
3702       }
3703     }
3704   }
3705 }
3706 
3707 // helper for encoding java_to_runtime calls on sim
3708 //
3709 // this is needed to compute the extra arguments required when
3710 // planting a call to the simulator blrt instruction. the TypeFunc
3711 // can be queried to identify the counts for integral, and floating
3712 // arguments and the return type
3713 
3714 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3715 {
3716   int gps = 0;
3717   int fps = 0;
3718   const TypeTuple *domain = tf->domain();
3719   int max = domain->cnt();
3720   for (int i = TypeFunc::Parms; i < max; i++) {
3721     const Type *t = domain->field_at(i);
3722     switch(t->basic_type()) {
3723     case T_FLOAT:
3724     case T_DOUBLE:
3725       fps++;
3726     default:
3727       gps++;
3728     }
3729   }
3730   gpcnt = gps;
3731   fpcnt = fps;
3732   BasicType rt = tf->return_type();
3733   switch (rt) {
3734   case T_VOID:
3735     rtype = MacroAssembler::ret_type_void;
3736     break;
3737   default:
3738     rtype = MacroAssembler::ret_type_integral;
3739     break;
3740   case T_FLOAT:
3741     rtype = MacroAssembler::ret_type_float;
3742     break;
3743   case T_DOUBLE:
3744     rtype = MacroAssembler::ret_type_double;
3745     break;
3746   }
3747 }
3748 
3749 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3750   MacroAssembler _masm(&cbuf);                                          \
3751   {                                                                     \
3752     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3753     guarantee(DISP == 0, "mode not permitted for volatile");            \
3754     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3755     __ INSN(REG, as_Register(BASE));                                    \
3756   }
3757 
3758 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3759 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3760 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3761                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3762 
3763   // Used for all non-volatile memory accesses.  The use of
3764   // $mem->opcode() to discover whether this pattern uses sign-extended
3765   // offsets is something of a kludge.
3766   static void loadStore(MacroAssembler masm, mem_insn insn,
3767                          Register reg, int opcode,
3768                          Register base, int index, int size, int disp)
3769   {
3770     Address::extend scale;
3771 
3772     // Hooboy, this is fugly.  We need a way to communicate to the
3773     // encoder that the index needs to be sign extended, so we have to
3774     // enumerate all the cases.
3775     switch (opcode) {
3776     case INDINDEXSCALEDI2L:
3777     case INDINDEXSCALEDI2LN:
3778     case INDINDEXI2L:
3779     case INDINDEXI2LN:
3780       scale = Address::sxtw(size);
3781       break;
3782     default:
3783       scale = Address::lsl(size);
3784     }
3785 
3786     if (index == -1) {
3787       (masm.*insn)(reg, Address(base, disp));
3788     } else {
3789       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3790       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3791     }
3792   }
3793 
3794   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3795                          FloatRegister reg, int opcode,
3796                          Register base, int index, int size, int disp)
3797   {
3798     Address::extend scale;
3799 
3800     switch (opcode) {
3801     case INDINDEXSCALEDI2L:
3802     case INDINDEXSCALEDI2LN:
3803       scale = Address::sxtw(size);
3804       break;
3805     default:
3806       scale = Address::lsl(size);
3807     }
3808 
3809      if (index == -1) {
3810       (masm.*insn)(reg, Address(base, disp));
3811     } else {
3812       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3813       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3814     }
3815   }
3816 
3817   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3818                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3819                          int opcode, Register base, int index, int size, int disp)
3820   {
3821     if (index == -1) {
3822       (masm.*insn)(reg, T, Address(base, disp));
3823     } else {
3824       assert(disp == 0, "unsupported address mode");
3825       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3826     }
3827   }
3828 
3829 %}
3830 
3831 
3832 
3833 //----------ENCODING BLOCK-----------------------------------------------------
3834 // This block specifies the encoding classes used by the compiler to
3835 // output byte streams.  Encoding classes are parameterized macros
3836 // used by Machine Instruction Nodes in order to generate the bit
3837 // encoding of the instruction.  Operands specify their base encoding
3838 // interface with the interface keyword.  There are currently
3839 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3840 // COND_INTER.  REG_INTER causes an operand to generate a function
3841 // which returns its register number when queried.  CONST_INTER causes
3842 // an operand to generate a function which returns the value of the
3843 // constant when queried.  MEMORY_INTER causes an operand to generate
3844 // four functions which return the Base Register, the Index Register,
3845 // the Scale Value, and the Offset Value of the operand when queried.
3846 // COND_INTER causes an operand to generate six functions which return
3847 // the encoding code (ie - encoding bits for the instruction)
3848 // associated with each basic boolean condition for a conditional
3849 // instruction.
3850 //
3851 // Instructions specify two basic values for encoding.  Again, a
3852 // function is available to check if the constant displacement is an
3853 // oop. They use the ins_encode keyword to specify their encoding
3854 // classes (which must be a sequence of enc_class names, and their
3855 // parameters, specified in the encoding block), and they use the
3856 // opcode keyword to specify, in order, their primary, secondary, and
3857 // tertiary opcode.  Only the opcode sections which a particular
3858 // instruction needs for encoding need to be specified.
3859 encode %{
3860   // Build emit functions for each basic byte or larger field in the
3861   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3862   // from C++ code in the enc_class source block.  Emit functions will
3863   // live in the main source block for now.  In future, we can
3864   // generalize this by adding a syntax that specifies the sizes of
3865   // fields in an order, so that the adlc can build the emit functions
3866   // automagically
3867 
3868   // catch all for unimplemented encodings
3869   enc_class enc_unimplemented %{
3870     MacroAssembler _masm(&cbuf);
3871     __ unimplemented("C2 catch all");
3872   %}
3873 
3874   // BEGIN Non-volatile memory access
3875 
3876   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3877     Register dst_reg = as_Register($dst$$reg);
3878     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3879                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3880   %}
3881 
3882   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3883     Register dst_reg = as_Register($dst$$reg);
3884     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3885                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3886   %}
3887 
3888   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3889     Register dst_reg = as_Register($dst$$reg);
3890     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3891                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3892   %}
3893 
3894   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3895     Register dst_reg = as_Register($dst$$reg);
3896     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3897                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3898   %}
3899 
3900   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3901     Register dst_reg = as_Register($dst$$reg);
3902     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3903                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3904   %}
3905 
3906   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3907     Register dst_reg = as_Register($dst$$reg);
3908     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3909                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3910   %}
3911 
3912   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3913     Register dst_reg = as_Register($dst$$reg);
3914     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3915                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3916   %}
3917 
3918   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3919     Register dst_reg = as_Register($dst$$reg);
3920     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3921                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3922   %}
3923 
3924   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3925     Register dst_reg = as_Register($dst$$reg);
3926     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3927                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3928   %}
3929 
3930   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3931     Register dst_reg = as_Register($dst$$reg);
3932     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3933                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3934   %}
3935 
3936   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3937     Register dst_reg = as_Register($dst$$reg);
3938     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3939                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3940   %}
3941 
3942   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3943     Register dst_reg = as_Register($dst$$reg);
3944     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3945                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3946   %}
3947 
3948   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3949     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3950     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3951                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3952   %}
3953 
3954   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3955     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3956     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3957                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3958   %}
3959 
3960   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3961     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3962     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3963        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3964   %}
3965 
3966   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3967     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3968     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3969        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3970   %}
3971 
3972   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3973     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3974     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3975        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3976   %}
3977 
3978   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3979     Register src_reg = as_Register($src$$reg);
3980     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3981                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3982   %}
3983 
3984   enc_class aarch64_enc_strb0(memory mem) %{
3985     MacroAssembler _masm(&cbuf);
3986     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3987                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3988   %}
3989 
3990   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3991     MacroAssembler _masm(&cbuf);
3992     __ membar(Assembler::StoreStore);
3993     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3994                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3995   %}
3996 
3997   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3998     Register src_reg = as_Register($src$$reg);
3999     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4000                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4001   %}
4002 
4003   enc_class aarch64_enc_strh0(memory mem) %{
4004     MacroAssembler _masm(&cbuf);
4005     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4006                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4007   %}
4008 
4009   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4010     Register src_reg = as_Register($src$$reg);
4011     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4012                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4013   %}
4014 
4015   enc_class aarch64_enc_strw0(memory mem) %{
4016     MacroAssembler _masm(&cbuf);
4017     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4018                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4019   %}
4020 
4021   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4022     Register src_reg = as_Register($src$$reg);
4023     // we sometimes get asked to store the stack pointer into the
4024     // current thread -- we cannot do that directly on AArch64
4025     if (src_reg == r31_sp) {
4026       MacroAssembler _masm(&cbuf);
4027       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4028       __ mov(rscratch2, sp);
4029       src_reg = rscratch2;
4030     }
4031     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4032                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4033   %}
4034 
4035   enc_class aarch64_enc_str0(memory mem) %{
4036     MacroAssembler _masm(&cbuf);
4037     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4038                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4039   %}
4040 
4041   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4042     FloatRegister src_reg = as_FloatRegister($src$$reg);
4043     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4044                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4045   %}
4046 
4047   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4048     FloatRegister src_reg = as_FloatRegister($src$$reg);
4049     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4050                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4051   %}
4052 
4053   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4054     FloatRegister src_reg = as_FloatRegister($src$$reg);
4055     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4056        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4057   %}
4058 
4059   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4060     FloatRegister src_reg = as_FloatRegister($src$$reg);
4061     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4062        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4063   %}
4064 
4065   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4066     FloatRegister src_reg = as_FloatRegister($src$$reg);
4067     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4068        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4069   %}
4070 
4071   // END Non-volatile memory access
4072 
4073   // volatile loads and stores
4074 
4075   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4076     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4077                  rscratch1, stlrb);
4078   %}
4079 
4080   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4081     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4082                  rscratch1, stlrh);
4083   %}
4084 
4085   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4086     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4087                  rscratch1, stlrw);
4088   %}
4089 
4090 
4091   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4092     Register dst_reg = as_Register($dst$$reg);
4093     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4094              rscratch1, ldarb);
4095     __ sxtbw(dst_reg, dst_reg);
4096   %}
4097 
4098   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4099     Register dst_reg = as_Register($dst$$reg);
4100     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4101              rscratch1, ldarb);
4102     __ sxtb(dst_reg, dst_reg);
4103   %}
4104 
4105   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4106     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4107              rscratch1, ldarb);
4108   %}
4109 
4110   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4111     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4112              rscratch1, ldarb);
4113   %}
4114 
4115   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4116     Register dst_reg = as_Register($dst$$reg);
4117     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4118              rscratch1, ldarh);
4119     __ sxthw(dst_reg, dst_reg);
4120   %}
4121 
4122   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4123     Register dst_reg = as_Register($dst$$reg);
4124     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4125              rscratch1, ldarh);
4126     __ sxth(dst_reg, dst_reg);
4127   %}
4128 
4129   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4130     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4131              rscratch1, ldarh);
4132   %}
4133 
4134   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4135     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4136              rscratch1, ldarh);
4137   %}
4138 
4139   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4140     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4141              rscratch1, ldarw);
4142   %}
4143 
4144   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4145     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4146              rscratch1, ldarw);
4147   %}
4148 
4149   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4150     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4151              rscratch1, ldar);
4152   %}
4153 
4154   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4155     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4156              rscratch1, ldarw);
4157     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4158   %}
4159 
4160   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4161     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4162              rscratch1, ldar);
4163     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4164   %}
4165 
4166   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4167     Register src_reg = as_Register($src$$reg);
4168     // we sometimes get asked to store the stack pointer into the
4169     // current thread -- we cannot do that directly on AArch64
4170     if (src_reg == r31_sp) {
4171         MacroAssembler _masm(&cbuf);
4172       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4173       __ mov(rscratch2, sp);
4174       src_reg = rscratch2;
4175     }
4176     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4177                  rscratch1, stlr);
4178   %}
4179 
4180   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4181     {
4182       MacroAssembler _masm(&cbuf);
4183       FloatRegister src_reg = as_FloatRegister($src$$reg);
4184       __ fmovs(rscratch2, src_reg);
4185     }
4186     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4187                  rscratch1, stlrw);
4188   %}
4189 
4190   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4191     {
4192       MacroAssembler _masm(&cbuf);
4193       FloatRegister src_reg = as_FloatRegister($src$$reg);
4194       __ fmovd(rscratch2, src_reg);
4195     }
4196     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4197                  rscratch1, stlr);
4198   %}
4199 
4200   // synchronized read/update encodings
4201 
4202   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4203     MacroAssembler _masm(&cbuf);
4204     Register dst_reg = as_Register($dst$$reg);
4205     Register base = as_Register($mem$$base);
4206     int index = $mem$$index;
4207     int scale = $mem$$scale;
4208     int disp = $mem$$disp;
4209     if (index == -1) {
4210        if (disp != 0) {
4211         __ lea(rscratch1, Address(base, disp));
4212         __ ldaxr(dst_reg, rscratch1);
4213       } else {
4214         // TODO
4215         // should we ever get anything other than this case?
4216         __ ldaxr(dst_reg, base);
4217       }
4218     } else {
4219       Register index_reg = as_Register(index);
4220       if (disp == 0) {
4221         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4222         __ ldaxr(dst_reg, rscratch1);
4223       } else {
4224         __ lea(rscratch1, Address(base, disp));
4225         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4226         __ ldaxr(dst_reg, rscratch1);
4227       }
4228     }
4229   %}
4230 
4231   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4232     MacroAssembler _masm(&cbuf);
4233     Register src_reg = as_Register($src$$reg);
4234     Register base = as_Register($mem$$base);
4235     int index = $mem$$index;
4236     int scale = $mem$$scale;
4237     int disp = $mem$$disp;
4238     if (index == -1) {
4239        if (disp != 0) {
4240         __ lea(rscratch2, Address(base, disp));
4241         __ stlxr(rscratch1, src_reg, rscratch2);
4242       } else {
4243         // TODO
4244         // should we ever get anything other than this case?
4245         __ stlxr(rscratch1, src_reg, base);
4246       }
4247     } else {
4248       Register index_reg = as_Register(index);
4249       if (disp == 0) {
4250         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4251         __ stlxr(rscratch1, src_reg, rscratch2);
4252       } else {
4253         __ lea(rscratch2, Address(base, disp));
4254         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4255         __ stlxr(rscratch1, src_reg, rscratch2);
4256       }
4257     }
4258     __ cmpw(rscratch1, zr);
4259   %}
4260 
4261   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4262     MacroAssembler _masm(&cbuf);
4263     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4264     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4265                Assembler::xword, /*acquire*/ false, /*release*/ true,
4266                /*weak*/ false, noreg);
4267   %}
4268 
4269   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4270     MacroAssembler _masm(&cbuf);
4271     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4272     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4273                Assembler::word, /*acquire*/ false, /*release*/ true,
4274                /*weak*/ false, noreg);
4275   %}
4276 
4277 
4278   enc_class aarch64_enc_cmpxchg_oop_shenandoah(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval, iRegP tmp) %{
4279     MacroAssembler _masm(&cbuf);
4280     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4281     Register tmp = $tmp$$Register;
4282     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4283     __ cmpxchg_oop_shenandoah($res$$base$$Register, $mem$$base$$Register, tmp, $newval$$Register,
4284                               false, /*acquire*/ true, /*release*/ true);
4285   %}
4286 
4287   // The only difference between aarch64_enc_cmpxchg and
4288   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4289   // CompareAndSwap sequence to serve as a barrier on acquiring a
4290   // lock.
4291   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4292     MacroAssembler _masm(&cbuf);
4293     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4294     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4295                Assembler::xword, /*acquire*/ true, /*release*/ true,
4296                /*weak*/ false, noreg);
4297   %}
4298 
4299   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4300     MacroAssembler _masm(&cbuf);
4301     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4302     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4303                Assembler::word, /*acquire*/ true, /*release*/ true,
4304                /*weak*/ false, noreg);
4305   %}
4306 
4307 
4308   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval, iRegP tmp) %{
4309     MacroAssembler _masm(&cbuf);
4310     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4311     Register tmp = $tmp$$Register;
4312     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4313     __ cmpxchg_oop_shenandoah($res$$base$$Register, $mem$$base$$Register, tmp, $newval$$Register,
4314                               false, /*acquire*/ true, /*release*/ true);
4315   %}
4316 
4317   // auxiliary used for CompareAndSwapX to set result register
4318   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4319     MacroAssembler _masm(&cbuf);
4320     Register res_reg = as_Register($res$$reg);
4321     __ cset(res_reg, Assembler::EQ);
4322   %}
4323 
4324   // prefetch encodings
4325 
4326   enc_class aarch64_enc_prefetchw(memory mem) %{
4327     MacroAssembler _masm(&cbuf);
4328     Register base = as_Register($mem$$base);
4329     int index = $mem$$index;
4330     int scale = $mem$$scale;
4331     int disp = $mem$$disp;
4332     if (index == -1) {
4333       __ prfm(Address(base, disp), PSTL1KEEP);
4334     } else {
4335       Register index_reg = as_Register(index);
4336       if (disp == 0) {
4337         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4338       } else {
4339         __ lea(rscratch1, Address(base, disp));
4340         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4341       }
4342     }
4343   %}
4344 
4345   /// mov envcodings
4346 
4347   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4348     MacroAssembler _masm(&cbuf);
4349     u_int32_t con = (u_int32_t)$src$$constant;
4350     Register dst_reg = as_Register($dst$$reg);
4351     if (con == 0) {
4352       __ movw(dst_reg, zr);
4353     } else {
4354       __ movw(dst_reg, con);
4355     }
4356   %}
4357 
4358   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4359     MacroAssembler _masm(&cbuf);
4360     Register dst_reg = as_Register($dst$$reg);
4361     u_int64_t con = (u_int64_t)$src$$constant;
4362     if (con == 0) {
4363       __ mov(dst_reg, zr);
4364     } else {
4365       __ mov(dst_reg, con);
4366     }
4367   %}
4368 
4369   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4370     MacroAssembler _masm(&cbuf);
4371     Register dst_reg = as_Register($dst$$reg);
4372     address con = (address)$src$$constant;
4373     if (con == NULL || con == (address)1) {
4374       ShouldNotReachHere();
4375     } else {
4376       relocInfo::relocType rtype = $src->constant_reloc();
4377       if (rtype == relocInfo::oop_type) {
4378         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4379       } else if (rtype == relocInfo::metadata_type) {
4380         __ mov_metadata(dst_reg, (Metadata*)con);
4381       } else {
4382         assert(rtype == relocInfo::none, "unexpected reloc type");
4383         if (con < (address)(uintptr_t)os::vm_page_size()) {
4384           __ mov(dst_reg, con);
4385         } else {
4386           unsigned long offset;
4387           __ adrp(dst_reg, con, offset);
4388           __ add(dst_reg, dst_reg, offset);
4389         }
4390       }
4391     }
4392   %}
4393 
4394   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4395     MacroAssembler _masm(&cbuf);
4396     Register dst_reg = as_Register($dst$$reg);
4397     __ mov(dst_reg, zr);
4398   %}
4399 
4400   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4401     MacroAssembler _masm(&cbuf);
4402     Register dst_reg = as_Register($dst$$reg);
4403     __ mov(dst_reg, (u_int64_t)1);
4404   %}
4405 
4406   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4407     MacroAssembler _masm(&cbuf);
4408     address page = (address)$src$$constant;
4409     Register dst_reg = as_Register($dst$$reg);
4410     unsigned long off;
4411     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4412     assert(off == 0, "assumed offset == 0");
4413   %}
4414 
4415   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4416     MacroAssembler _masm(&cbuf);
4417     __ load_byte_map_base($dst$$Register);
4418   %}
4419 
4420   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4421     MacroAssembler _masm(&cbuf);
4422     Register dst_reg = as_Register($dst$$reg);
4423     address con = (address)$src$$constant;
4424     if (con == NULL) {
4425       ShouldNotReachHere();
4426     } else {
4427       relocInfo::relocType rtype = $src->constant_reloc();
4428       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4429       __ set_narrow_oop(dst_reg, (jobject)con);
4430     }
4431   %}
4432 
4433   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4434     MacroAssembler _masm(&cbuf);
4435     Register dst_reg = as_Register($dst$$reg);
4436     __ mov(dst_reg, zr);
4437   %}
4438 
4439   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4440     MacroAssembler _masm(&cbuf);
4441     Register dst_reg = as_Register($dst$$reg);
4442     address con = (address)$src$$constant;
4443     if (con == NULL) {
4444       ShouldNotReachHere();
4445     } else {
4446       relocInfo::relocType rtype = $src->constant_reloc();
4447       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4448       __ set_narrow_klass(dst_reg, (Klass *)con);
4449     }
4450   %}
4451 
4452   // arithmetic encodings
4453 
4454   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4455     MacroAssembler _masm(&cbuf);
4456     Register dst_reg = as_Register($dst$$reg);
4457     Register src_reg = as_Register($src1$$reg);
4458     int32_t con = (int32_t)$src2$$constant;
4459     // add has primary == 0, subtract has primary == 1
4460     if ($primary) { con = -con; }
4461     if (con < 0) {
4462       __ subw(dst_reg, src_reg, -con);
4463     } else {
4464       __ addw(dst_reg, src_reg, con);
4465     }
4466   %}
4467 
4468   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4469     MacroAssembler _masm(&cbuf);
4470     Register dst_reg = as_Register($dst$$reg);
4471     Register src_reg = as_Register($src1$$reg);
4472     int32_t con = (int32_t)$src2$$constant;
4473     // add has primary == 0, subtract has primary == 1
4474     if ($primary) { con = -con; }
4475     if (con < 0) {
4476       __ sub(dst_reg, src_reg, -con);
4477     } else {
4478       __ add(dst_reg, src_reg, con);
4479     }
4480   %}
4481 
4482   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4483     MacroAssembler _masm(&cbuf);
4484    Register dst_reg = as_Register($dst$$reg);
4485    Register src1_reg = as_Register($src1$$reg);
4486    Register src2_reg = as_Register($src2$$reg);
4487     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4488   %}
4489 
4490   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4491     MacroAssembler _masm(&cbuf);
4492    Register dst_reg = as_Register($dst$$reg);
4493    Register src1_reg = as_Register($src1$$reg);
4494    Register src2_reg = as_Register($src2$$reg);
4495     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4496   %}
4497 
4498   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4499     MacroAssembler _masm(&cbuf);
4500    Register dst_reg = as_Register($dst$$reg);
4501    Register src1_reg = as_Register($src1$$reg);
4502    Register src2_reg = as_Register($src2$$reg);
4503     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4504   %}
4505 
4506   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4507     MacroAssembler _masm(&cbuf);
4508    Register dst_reg = as_Register($dst$$reg);
4509    Register src1_reg = as_Register($src1$$reg);
4510    Register src2_reg = as_Register($src2$$reg);
4511     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4512   %}
4513 
4514   // compare instruction encodings
4515 
4516   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4517     MacroAssembler _masm(&cbuf);
4518     Register reg1 = as_Register($src1$$reg);
4519     Register reg2 = as_Register($src2$$reg);
4520     __ cmpw(reg1, reg2);
4521   %}
4522 
4523   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4524     MacroAssembler _masm(&cbuf);
4525     Register reg = as_Register($src1$$reg);
4526     int32_t val = $src2$$constant;
4527     if (val >= 0) {
4528       __ subsw(zr, reg, val);
4529     } else {
4530       __ addsw(zr, reg, -val);
4531     }
4532   %}
4533 
4534   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4535     MacroAssembler _masm(&cbuf);
4536     Register reg1 = as_Register($src1$$reg);
4537     u_int32_t val = (u_int32_t)$src2$$constant;
4538     __ movw(rscratch1, val);
4539     __ cmpw(reg1, rscratch1);
4540   %}
4541 
4542   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4543     MacroAssembler _masm(&cbuf);
4544     Register reg1 = as_Register($src1$$reg);
4545     Register reg2 = as_Register($src2$$reg);
4546     __ cmp(reg1, reg2);
4547   %}
4548 
4549   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4550     MacroAssembler _masm(&cbuf);
4551     Register reg = as_Register($src1$$reg);
4552     int64_t val = $src2$$constant;
4553     if (val >= 0) {
4554       __ subs(zr, reg, val);
4555     } else if (val != -val) {
4556       __ adds(zr, reg, -val);
4557     } else {
4558     // aargh, Long.MIN_VALUE is a special case
4559       __ orr(rscratch1, zr, (u_int64_t)val);
4560       __ subs(zr, reg, rscratch1);
4561     }
4562   %}
4563 
4564   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4565     MacroAssembler _masm(&cbuf);
4566     Register reg1 = as_Register($src1$$reg);
4567     u_int64_t val = (u_int64_t)$src2$$constant;
4568     __ mov(rscratch1, val);
4569     __ cmp(reg1, rscratch1);
4570   %}
4571 
4572   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4573     MacroAssembler _masm(&cbuf);
4574     Register reg1 = as_Register($src1$$reg);
4575     Register reg2 = as_Register($src2$$reg);
4576     __ cmp(reg1, reg2);
4577   %}
4578 
4579   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4580     MacroAssembler _masm(&cbuf);
4581     Register reg1 = as_Register($src1$$reg);
4582     Register reg2 = as_Register($src2$$reg);
4583     __ cmpw(reg1, reg2);
4584   %}
4585 
4586   enc_class aarch64_enc_testp(iRegP src) %{
4587     MacroAssembler _masm(&cbuf);
4588     Register reg = as_Register($src$$reg);
4589     __ cmp(reg, zr);
4590   %}
4591 
4592   enc_class aarch64_enc_testn(iRegN src) %{
4593     MacroAssembler _masm(&cbuf);
4594     Register reg = as_Register($src$$reg);
4595     __ cmpw(reg, zr);
4596   %}
4597 
4598   enc_class aarch64_enc_b(label lbl) %{
4599     MacroAssembler _masm(&cbuf);
4600     Label *L = $lbl$$label;
4601     __ b(*L);
4602   %}
4603 
4604   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4605     MacroAssembler _masm(&cbuf);
4606     Label *L = $lbl$$label;
4607     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4608   %}
4609 
4610   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4611     MacroAssembler _masm(&cbuf);
4612     Label *L = $lbl$$label;
4613     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4614   %}
4615 
4616   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4617   %{
4618      Register sub_reg = as_Register($sub$$reg);
4619      Register super_reg = as_Register($super$$reg);
4620      Register temp_reg = as_Register($temp$$reg);
4621      Register result_reg = as_Register($result$$reg);
4622 
4623      Label miss;
4624      MacroAssembler _masm(&cbuf);
4625      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4626                                      NULL, &miss,
4627                                      /*set_cond_codes:*/ true);
4628      if ($primary) {
4629        __ mov(result_reg, zr);
4630      }
4631      __ bind(miss);
4632   %}
4633 
4634   enc_class aarch64_enc_java_static_call(method meth) %{
4635     MacroAssembler _masm(&cbuf);
4636 
4637     address addr = (address)$meth$$method;
4638     address call;
4639     if (!_method) {
4640       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4641       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4642     } else {
4643       int method_index = resolved_method_index(cbuf);
4644       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4645                                                   : static_call_Relocation::spec(method_index);
4646       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4647 
4648       // Emit stub for static call
4649       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4650       if (stub == NULL) {
4651         ciEnv::current()->record_failure("CodeCache is full");
4652         return;
4653       }
4654     }
4655     if (call == NULL) {
4656       ciEnv::current()->record_failure("CodeCache is full");
4657       return;
4658     }
4659   %}
4660 
4661   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4662     MacroAssembler _masm(&cbuf);
4663     int method_index = resolved_method_index(cbuf);
4664     address call = __ ic_call((address)$meth$$method, method_index);
4665     if (call == NULL) {
4666       ciEnv::current()->record_failure("CodeCache is full");
4667       return;
4668     }
4669   %}
4670 
4671   enc_class aarch64_enc_call_epilog() %{
4672     MacroAssembler _masm(&cbuf);
4673     if (VerifyStackAtCalls) {
4674       // Check that stack depth is unchanged: find majik cookie on stack
4675       __ call_Unimplemented();
4676     }
4677   %}
4678 
4679   enc_class aarch64_enc_java_to_runtime(method meth) %{
4680     MacroAssembler _masm(&cbuf);
4681 
4682     // some calls to generated routines (arraycopy code) are scheduled
4683     // by C2 as runtime calls. if so we can call them using a br (they
4684     // will be in a reachable segment) otherwise we have to use a blrt
4685     // which loads the absolute address into a register.
4686     address entry = (address)$meth$$method;
4687     CodeBlob *cb = CodeCache::find_blob(entry);
4688     if (cb) {
4689       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4690       if (call == NULL) {
4691         ciEnv::current()->record_failure("CodeCache is full");
4692         return;
4693       }
4694     } else {
4695       int gpcnt;
4696       int fpcnt;
4697       int rtype;
4698       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4699       Label retaddr;
4700       __ adr(rscratch2, retaddr);
4701       __ lea(rscratch1, RuntimeAddress(entry));
4702       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4703       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4704       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4705       __ bind(retaddr);
4706       __ add(sp, sp, 2 * wordSize);
4707     }
4708   %}
4709 
4710   enc_class aarch64_enc_rethrow() %{
4711     MacroAssembler _masm(&cbuf);
4712     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4713   %}
4714 
4715   enc_class aarch64_enc_ret() %{
4716     MacroAssembler _masm(&cbuf);
4717     __ ret(lr);
4718   %}
4719 
4720   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4721     MacroAssembler _masm(&cbuf);
4722     Register target_reg = as_Register($jump_target$$reg);
4723     __ br(target_reg);
4724   %}
4725 
4726   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4727     MacroAssembler _masm(&cbuf);
4728     Register target_reg = as_Register($jump_target$$reg);
4729     // exception oop should be in r0
4730     // ret addr has been popped into lr
4731     // callee expects it in r3
4732     __ mov(r3, lr);
4733     __ br(target_reg);
4734   %}
4735 
4736   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4737     MacroAssembler _masm(&cbuf);
4738     Register oop = as_Register($object$$reg);
4739     Register box = as_Register($box$$reg);
4740     Register disp_hdr = as_Register($tmp$$reg);
4741     Register tmp = as_Register($tmp2$$reg);
4742     Label cont;
4743     Label object_has_monitor;
4744     Label cas_failed;
4745 
4746     assert_different_registers(oop, box, tmp, disp_hdr);
4747 
4748     // Load markOop from object into displaced_header.
4749     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4750 
4751     // Always do locking in runtime.
4752     if (EmitSync & 0x01) {
4753       __ cmp(oop, zr);
4754       return;
4755     }
4756 
4757     if (UseBiasedLocking && !UseOptoBiasInlining) {
4758       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4759     }
4760 
4761     // Handle existing monitor
4762     if ((EmitSync & 0x02) == 0) {
4763       // we can use AArch64's bit test and branch here but
4764       // markoopDesc does not define a bit index just the bit value
4765       // so assert in case the bit pos changes
4766 #     define __monitor_value_log2 1
4767       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4768       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4769 #     undef __monitor_value_log2
4770     }
4771 
4772     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4773     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4774 
4775     // Load Compare Value application register.
4776 
4777     // Initialize the box. (Must happen before we update the object mark!)
4778     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4779 
4780     // Compare object markOop with mark and if equal exchange scratch1
4781     // with object markOop.
4782     if (UseLSE) {
4783       __ mov(tmp, disp_hdr);
4784       __ casal(Assembler::xword, tmp, box, oop);
4785       __ cmp(tmp, disp_hdr);
4786       __ br(Assembler::EQ, cont);
4787     } else {
4788       Label retry_load;
4789       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4790         __ prfm(Address(oop), PSTL1STRM);
4791       __ bind(retry_load);
4792       __ ldaxr(tmp, oop);
4793       __ cmp(tmp, disp_hdr);
4794       __ br(Assembler::NE, cas_failed);
4795       // use stlxr to ensure update is immediately visible
4796       __ stlxr(tmp, box, oop);
4797       __ cbzw(tmp, cont);
4798       __ b(retry_load);
4799     }
4800 
4801     // Formerly:
4802     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4803     //               /*newv=*/box,
4804     //               /*addr=*/oop,
4805     //               /*tmp=*/tmp,
4806     //               cont,
4807     //               /*fail*/NULL);
4808 
4809     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4810 
4811     // If the compare-and-exchange succeeded, then we found an unlocked
4812     // object, will have now locked it will continue at label cont
4813 
4814     __ bind(cas_failed);
4815     // We did not see an unlocked object so try the fast recursive case.
4816 
4817     // Check if the owner is self by comparing the value in the
4818     // markOop of object (disp_hdr) with the stack pointer.
4819     __ mov(rscratch1, sp);
4820     __ sub(disp_hdr, disp_hdr, rscratch1);
4821     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4822     // If condition is true we are cont and hence we can store 0 as the
4823     // displaced header in the box, which indicates that it is a recursive lock.
4824     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4825     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4826 
4827     // Handle existing monitor.
4828     if ((EmitSync & 0x02) == 0) {
4829       __ b(cont);
4830 
4831       __ bind(object_has_monitor);
4832       // The object's monitor m is unlocked iff m->owner == NULL,
4833       // otherwise m->owner may contain a thread or a stack address.
4834       //
4835       // Try to CAS m->owner from NULL to current thread.
4836       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4837       __ mov(disp_hdr, zr);
4838 
4839       if (UseLSE) {
4840         __ mov(rscratch1, disp_hdr);
4841         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4842         __ cmp(rscratch1, disp_hdr);
4843       } else {
4844         Label retry_load, fail;
4845         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4846           __ prfm(Address(tmp), PSTL1STRM);
4847         __ bind(retry_load);
4848         __ ldaxr(rscratch1, tmp);
4849         __ cmp(disp_hdr, rscratch1);
4850         __ br(Assembler::NE, fail);
4851         // use stlxr to ensure update is immediately visible
4852         __ stlxr(rscratch1, rthread, tmp);
4853         __ cbnzw(rscratch1, retry_load);
4854         __ bind(fail);
4855       }
4856 
4857       // Label next;
4858       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4859       //               /*newv=*/rthread,
4860       //               /*addr=*/tmp,
4861       //               /*tmp=*/rscratch1,
4862       //               /*succeed*/next,
4863       //               /*fail*/NULL);
4864       // __ bind(next);
4865 
4866       // store a non-null value into the box.
4867       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4868 
4869       // PPC port checks the following invariants
4870       // #ifdef ASSERT
4871       // bne(flag, cont);
4872       // We have acquired the monitor, check some invariants.
4873       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4874       // Invariant 1: _recursions should be 0.
4875       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4876       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4877       //                        "monitor->_recursions should be 0", -1);
4878       // Invariant 2: OwnerIsThread shouldn't be 0.
4879       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4880       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4881       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4882       // #endif
4883     }
4884 
4885     __ bind(cont);
4886     // flag == EQ indicates success
4887     // flag == NE indicates failure
4888 
4889   %}
4890 
4891   // TODO
4892   // reimplement this with custom cmpxchgptr code
4893   // which avoids some of the unnecessary branching
4894   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4895     MacroAssembler _masm(&cbuf);
4896     Register oop = as_Register($object$$reg);
4897     Register box = as_Register($box$$reg);
4898     Register disp_hdr = as_Register($tmp$$reg);
4899     Register tmp = as_Register($tmp2$$reg);
4900     Label cont;
4901     Label object_has_monitor;
4902     Label cas_failed;
4903 
4904     assert_different_registers(oop, box, tmp, disp_hdr);
4905 
4906     // Always do locking in runtime.
4907     if (EmitSync & 0x01) {
4908       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4909       return;
4910     }
4911 
4912     if (UseBiasedLocking && !UseOptoBiasInlining) {
4913       __ biased_locking_exit(oop, tmp, cont);
4914     }
4915 
4916     // Find the lock address and load the displaced header from the stack.
4917     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4918 
4919     // If the displaced header is 0, we have a recursive unlock.
4920     __ cmp(disp_hdr, zr);
4921     __ br(Assembler::EQ, cont);
4922 
4923 
4924     // Handle existing monitor.
4925     if ((EmitSync & 0x02) == 0) {
4926       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4927       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4928     }
4929 
4930     // Check if it is still a light weight lock, this is is true if we
4931     // see the stack address of the basicLock in the markOop of the
4932     // object.
4933 
4934       if (UseLSE) {
4935         __ mov(tmp, box);
4936         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4937         __ cmp(tmp, box);
4938       } else {
4939         Label retry_load;
4940         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4941           __ prfm(Address(oop), PSTL1STRM);
4942         __ bind(retry_load);
4943         __ ldxr(tmp, oop);
4944         __ cmp(box, tmp);
4945         __ br(Assembler::NE, cas_failed);
4946         // use stlxr to ensure update is immediately visible
4947         __ stlxr(tmp, disp_hdr, oop);
4948         __ cbzw(tmp, cont);
4949         __ b(retry_load);
4950       }
4951 
4952     // __ cmpxchgptr(/*compare_value=*/box,
4953     //               /*exchange_value=*/disp_hdr,
4954     //               /*where=*/oop,
4955     //               /*result=*/tmp,
4956     //               cont,
4957     //               /*cas_failed*/NULL);
4958     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4959 
4960     __ bind(cas_failed);
4961 
4962     // Handle existing monitor.
4963     if ((EmitSync & 0x02) == 0) {
4964       __ b(cont);
4965 
4966       __ bind(object_has_monitor);
4967       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4968       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4969       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4970       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4971       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4972       __ cmp(rscratch1, zr);
4973       __ br(Assembler::NE, cont);
4974 
4975       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4976       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4977       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4978       __ cmp(rscratch1, zr);
4979       __ cbnz(rscratch1, cont);
4980       // need a release store here
4981       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4982       __ stlr(rscratch1, tmp); // rscratch1 is zero
4983     }
4984 
4985     __ bind(cont);
4986     // flag == EQ indicates success
4987     // flag == NE indicates failure
4988   %}
4989 
4990 %}
4991 
4992 //----------FRAME--------------------------------------------------------------
4993 // Definition of frame structure and management information.
4994 //
4995 //  S T A C K   L A Y O U T    Allocators stack-slot number
4996 //                             |   (to get allocators register number
4997 //  G  Owned by    |        |  v    add OptoReg::stack0())
4998 //  r   CALLER     |        |
4999 //  o     |        +--------+      pad to even-align allocators stack-slot
5000 //  w     V        |  pad0  |        numbers; owned by CALLER
5001 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5002 //  h     ^        |   in   |  5
5003 //        |        |  args  |  4   Holes in incoming args owned by SELF
5004 //  |     |        |        |  3
5005 //  |     |        +--------+
5006 //  V     |        | old out|      Empty on Intel, window on Sparc
5007 //        |    old |preserve|      Must be even aligned.
5008 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5009 //        |        |   in   |  3   area for Intel ret address
5010 //     Owned by    |preserve|      Empty on Sparc.
5011 //       SELF      +--------+
5012 //        |        |  pad2  |  2   pad to align old SP
5013 //        |        +--------+  1
5014 //        |        | locks  |  0
5015 //        |        +--------+----> OptoReg::stack0(), even aligned
5016 //        |        |  pad1  | 11   pad to align new SP
5017 //        |        +--------+
5018 //        |        |        | 10
5019 //        |        | spills |  9   spills
5020 //        V        |        |  8   (pad0 slot for callee)
5021 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5022 //        ^        |  out   |  7
5023 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5024 //     Owned by    +--------+
5025 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5026 //        |    new |preserve|      Must be even-aligned.
5027 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5028 //        |        |        |
5029 //
5030 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5031 //         known from SELF's arguments and the Java calling convention.
5032 //         Region 6-7 is determined per call site.
5033 // Note 2: If the calling convention leaves holes in the incoming argument
5034 //         area, those holes are owned by SELF.  Holes in the outgoing area
5035 //         are owned by the CALLEE.  Holes should not be nessecary in the
5036 //         incoming area, as the Java calling convention is completely under
5037 //         the control of the AD file.  Doubles can be sorted and packed to
5038 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5039 //         varargs C calling conventions.
5040 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5041 //         even aligned with pad0 as needed.
5042 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5043 //           (the latter is true on Intel but is it false on AArch64?)
5044 //         region 6-11 is even aligned; it may be padded out more so that
5045 //         the region from SP to FP meets the minimum stack alignment.
5046 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5047 //         alignment.  Region 11, pad1, may be dynamically extended so that
5048 //         SP meets the minimum alignment.
5049 
5050 frame %{
5051   // What direction does stack grow in (assumed to be same for C & Java)
5052   stack_direction(TOWARDS_LOW);
5053 
5054   // These three registers define part of the calling convention
5055   // between compiled code and the interpreter.
5056 
5057   // Inline Cache Register or methodOop for I2C.
5058   inline_cache_reg(R12);
5059 
5060   // Method Oop Register when calling interpreter.
5061   interpreter_method_oop_reg(R12);
5062 
5063   // Number of stack slots consumed by locking an object
5064   sync_stack_slots(2);
5065 
5066   // Compiled code's Frame Pointer
5067   frame_pointer(R31);
5068 
5069   // Interpreter stores its frame pointer in a register which is
5070   // stored to the stack by I2CAdaptors.
5071   // I2CAdaptors convert from interpreted java to compiled java.
5072   interpreter_frame_pointer(R29);
5073 
5074   // Stack alignment requirement
5075   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5076 
5077   // Number of stack slots between incoming argument block and the start of
5078   // a new frame.  The PROLOG must add this many slots to the stack.  The
5079   // EPILOG must remove this many slots. aarch64 needs two slots for
5080   // return address and fp.
5081   // TODO think this is correct but check
5082   in_preserve_stack_slots(4);
5083 
5084   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5085   // for calls to C.  Supports the var-args backing area for register parms.
5086   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5087 
5088   // The after-PROLOG location of the return address.  Location of
5089   // return address specifies a type (REG or STACK) and a number
5090   // representing the register number (i.e. - use a register name) or
5091   // stack slot.
5092   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5093   // Otherwise, it is above the locks and verification slot and alignment word
5094   // TODO this may well be correct but need to check why that - 2 is there
5095   // ppc port uses 0 but we definitely need to allow for fixed_slots
5096   // which folds in the space used for monitors
5097   return_addr(STACK - 2 +
5098               round_to((Compile::current()->in_preserve_stack_slots() +
5099                         Compile::current()->fixed_slots()),
5100                        stack_alignment_in_slots()));
5101 
5102   // Body of function which returns an integer array locating
5103   // arguments either in registers or in stack slots.  Passed an array
5104   // of ideal registers called "sig" and a "length" count.  Stack-slot
5105   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5106   // arguments for a CALLEE.  Incoming stack arguments are
5107   // automatically biased by the preserve_stack_slots field above.
5108 
5109   calling_convention
5110   %{
5111     // No difference between ingoing/outgoing just pass false
5112     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5113   %}
5114 
5115   c_calling_convention
5116   %{
5117     // This is obviously always outgoing
5118     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5119   %}
5120 
5121   // Location of compiled Java return values.  Same as C for now.
5122   return_value
5123   %{
5124     // TODO do we allow ideal_reg == Op_RegN???
5125     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5126            "only return normal values");
5127 
5128     static const int lo[Op_RegL + 1] = { // enum name
5129       0,                                 // Op_Node
5130       0,                                 // Op_Set
5131       R0_num,                            // Op_RegN
5132       R0_num,                            // Op_RegI
5133       R0_num,                            // Op_RegP
5134       V0_num,                            // Op_RegF
5135       V0_num,                            // Op_RegD
5136       R0_num                             // Op_RegL
5137     };
5138 
5139     static const int hi[Op_RegL + 1] = { // enum name
5140       0,                                 // Op_Node
5141       0,                                 // Op_Set
5142       OptoReg::Bad,                       // Op_RegN
5143       OptoReg::Bad,                      // Op_RegI
5144       R0_H_num,                          // Op_RegP
5145       OptoReg::Bad,                      // Op_RegF
5146       V0_H_num,                          // Op_RegD
5147       R0_H_num                           // Op_RegL
5148     };
5149 
5150     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5151   %}
5152 %}
5153 
5154 //----------ATTRIBUTES---------------------------------------------------------
5155 //----------Operand Attributes-------------------------------------------------
5156 op_attrib op_cost(1);        // Required cost attribute
5157 
5158 //----------Instruction Attributes---------------------------------------------
5159 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5160 ins_attrib ins_size(32);        // Required size attribute (in bits)
5161 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5162                                 // a non-matching short branch variant
5163                                 // of some long branch?
5164 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5165                                 // be a power of 2) specifies the
5166                                 // alignment that some part of the
5167                                 // instruction (not necessarily the
5168                                 // start) requires.  If > 1, a
5169                                 // compute_padding() function must be
5170                                 // provided for the instruction
5171 
5172 //----------OPERANDS-----------------------------------------------------------
5173 // Operand definitions must precede instruction definitions for correct parsing
5174 // in the ADLC because operands constitute user defined types which are used in
5175 // instruction definitions.
5176 
5177 //----------Simple Operands----------------------------------------------------
5178 
5179 // Integer operands 32 bit
5180 // 32 bit immediate
5181 operand immI()
5182 %{
5183   match(ConI);
5184 
5185   op_cost(0);
5186   format %{ %}
5187   interface(CONST_INTER);
5188 %}
5189 
5190 // 32 bit zero
5191 operand immI0()
5192 %{
5193   predicate(n->get_int() == 0);
5194   match(ConI);
5195 
5196   op_cost(0);
5197   format %{ %}
5198   interface(CONST_INTER);
5199 %}
5200 
5201 // 32 bit unit increment
5202 operand immI_1()
5203 %{
5204   predicate(n->get_int() == 1);
5205   match(ConI);
5206 
5207   op_cost(0);
5208   format %{ %}
5209   interface(CONST_INTER);
5210 %}
5211 
5212 // 32 bit unit decrement
5213 operand immI_M1()
5214 %{
5215   predicate(n->get_int() == -1);
5216   match(ConI);
5217 
5218   op_cost(0);
5219   format %{ %}
5220   interface(CONST_INTER);
5221 %}
5222 
5223 operand immI_le_4()
5224 %{
5225   predicate(n->get_int() <= 4);
5226   match(ConI);
5227 
5228   op_cost(0);
5229   format %{ %}
5230   interface(CONST_INTER);
5231 %}
5232 
5233 operand immI_31()
5234 %{
5235   predicate(n->get_int() == 31);
5236   match(ConI);
5237 
5238   op_cost(0);
5239   format %{ %}
5240   interface(CONST_INTER);
5241 %}
5242 
5243 operand immI_8()
5244 %{
5245   predicate(n->get_int() == 8);
5246   match(ConI);
5247 
5248   op_cost(0);
5249   format %{ %}
5250   interface(CONST_INTER);
5251 %}
5252 
5253 operand immI_16()
5254 %{
5255   predicate(n->get_int() == 16);
5256   match(ConI);
5257 
5258   op_cost(0);
5259   format %{ %}
5260   interface(CONST_INTER);
5261 %}
5262 
5263 operand immI_24()
5264 %{
5265   predicate(n->get_int() == 24);
5266   match(ConI);
5267 
5268   op_cost(0);
5269   format %{ %}
5270   interface(CONST_INTER);
5271 %}
5272 
5273 operand immI_32()
5274 %{
5275   predicate(n->get_int() == 32);
5276   match(ConI);
5277 
5278   op_cost(0);
5279   format %{ %}
5280   interface(CONST_INTER);
5281 %}
5282 
5283 operand immI_48()
5284 %{
5285   predicate(n->get_int() == 48);
5286   match(ConI);
5287 
5288   op_cost(0);
5289   format %{ %}
5290   interface(CONST_INTER);
5291 %}
5292 
5293 operand immI_56()
5294 %{
5295   predicate(n->get_int() == 56);
5296   match(ConI);
5297 
5298   op_cost(0);
5299   format %{ %}
5300   interface(CONST_INTER);
5301 %}
5302 
5303 operand immI_64()
5304 %{
5305   predicate(n->get_int() == 64);
5306   match(ConI);
5307 
5308   op_cost(0);
5309   format %{ %}
5310   interface(CONST_INTER);
5311 %}
5312 
5313 operand immI_255()
5314 %{
5315   predicate(n->get_int() == 255);
5316   match(ConI);
5317 
5318   op_cost(0);
5319   format %{ %}
5320   interface(CONST_INTER);
5321 %}
5322 
5323 operand immI_65535()
5324 %{
5325   predicate(n->get_int() == 65535);
5326   match(ConI);
5327 
5328   op_cost(0);
5329   format %{ %}
5330   interface(CONST_INTER);
5331 %}
5332 
5333 operand immL_63()
5334 %{
5335   predicate(n->get_int() == 63);
5336   match(ConI);
5337 
5338   op_cost(0);
5339   format %{ %}
5340   interface(CONST_INTER);
5341 %}
5342 
5343 operand immL_255()
5344 %{
5345   predicate(n->get_int() == 255);
5346   match(ConI);
5347 
5348   op_cost(0);
5349   format %{ %}
5350   interface(CONST_INTER);
5351 %}
5352 
5353 operand immL_65535()
5354 %{
5355   predicate(n->get_long() == 65535L);
5356   match(ConL);
5357 
5358   op_cost(0);
5359   format %{ %}
5360   interface(CONST_INTER);
5361 %}
5362 
5363 operand immL_4294967295()
5364 %{
5365   predicate(n->get_long() == 4294967295L);
5366   match(ConL);
5367 
5368   op_cost(0);
5369   format %{ %}
5370   interface(CONST_INTER);
5371 %}
5372 
5373 operand immL_bitmask()
5374 %{
5375   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5376             && is_power_of_2(n->get_long() + 1));
5377   match(ConL);
5378 
5379   op_cost(0);
5380   format %{ %}
5381   interface(CONST_INTER);
5382 %}
5383 
5384 operand immI_bitmask()
5385 %{
5386   predicate(((n->get_int() & 0xc0000000) == 0)
5387             && is_power_of_2(n->get_int() + 1));
5388   match(ConI);
5389 
5390   op_cost(0);
5391   format %{ %}
5392   interface(CONST_INTER);
5393 %}
5394 
5395 // Scale values for scaled offset addressing modes (up to long but not quad)
5396 operand immIScale()
5397 %{
5398   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5399   match(ConI);
5400 
5401   op_cost(0);
5402   format %{ %}
5403   interface(CONST_INTER);
5404 %}
5405 
5406 // 26 bit signed offset -- for pc-relative branches
5407 operand immI26()
5408 %{
5409   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5410   match(ConI);
5411 
5412   op_cost(0);
5413   format %{ %}
5414   interface(CONST_INTER);
5415 %}
5416 
5417 // 19 bit signed offset -- for pc-relative loads
5418 operand immI19()
5419 %{
5420   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5421   match(ConI);
5422 
5423   op_cost(0);
5424   format %{ %}
5425   interface(CONST_INTER);
5426 %}
5427 
5428 // 12 bit unsigned offset -- for base plus immediate loads
5429 operand immIU12()
5430 %{
5431   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5432   match(ConI);
5433 
5434   op_cost(0);
5435   format %{ %}
5436   interface(CONST_INTER);
5437 %}
5438 
5439 operand immLU12()
5440 %{
5441   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5442   match(ConL);
5443 
5444   op_cost(0);
5445   format %{ %}
5446   interface(CONST_INTER);
5447 %}
5448 
5449 // Offset for scaled or unscaled immediate loads and stores
5450 operand immIOffset()
5451 %{
5452   predicate(Address::offset_ok_for_immed(n->get_int()));
5453   match(ConI);
5454 
5455   op_cost(0);
5456   format %{ %}
5457   interface(CONST_INTER);
5458 %}
5459 
5460 operand immIOffset4()
5461 %{
5462   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5463   match(ConI);
5464 
5465   op_cost(0);
5466   format %{ %}
5467   interface(CONST_INTER);
5468 %}
5469 
5470 operand immIOffset8()
5471 %{
5472   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5473   match(ConI);
5474 
5475   op_cost(0);
5476   format %{ %}
5477   interface(CONST_INTER);
5478 %}
5479 
5480 operand immIOffset16()
5481 %{
5482   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5483   match(ConI);
5484 
5485   op_cost(0);
5486   format %{ %}
5487   interface(CONST_INTER);
5488 %}
5489 
5490 operand immLoffset()
5491 %{
5492   predicate(Address::offset_ok_for_immed(n->get_long()));
5493   match(ConL);
5494 
5495   op_cost(0);
5496   format %{ %}
5497   interface(CONST_INTER);
5498 %}
5499 
5500 operand immLoffset4()
5501 %{
5502   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5503   match(ConL);
5504 
5505   op_cost(0);
5506   format %{ %}
5507   interface(CONST_INTER);
5508 %}
5509 
5510 operand immLoffset8()
5511 %{
5512   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5513   match(ConL);
5514 
5515   op_cost(0);
5516   format %{ %}
5517   interface(CONST_INTER);
5518 %}
5519 
5520 operand immLoffset16()
5521 %{
5522   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5523   match(ConL);
5524 
5525   op_cost(0);
5526   format %{ %}
5527   interface(CONST_INTER);
5528 %}
5529 
5530 // 32 bit integer valid for add sub immediate
5531 operand immIAddSub()
5532 %{
5533   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5534   match(ConI);
5535   op_cost(0);
5536   format %{ %}
5537   interface(CONST_INTER);
5538 %}
5539 
5540 // 32 bit unsigned integer valid for logical immediate
5541 // TODO -- check this is right when e.g the mask is 0x80000000
5542 operand immILog()
5543 %{
5544   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5545   match(ConI);
5546 
5547   op_cost(0);
5548   format %{ %}
5549   interface(CONST_INTER);
5550 %}
5551 
5552 // Integer operands 64 bit
5553 // 64 bit immediate
5554 operand immL()
5555 %{
5556   match(ConL);
5557 
5558   op_cost(0);
5559   format %{ %}
5560   interface(CONST_INTER);
5561 %}
5562 
5563 // 64 bit zero
5564 operand immL0()
5565 %{
5566   predicate(n->get_long() == 0);
5567   match(ConL);
5568 
5569   op_cost(0);
5570   format %{ %}
5571   interface(CONST_INTER);
5572 %}
5573 
5574 // 64 bit unit increment
5575 operand immL_1()
5576 %{
5577   predicate(n->get_long() == 1);
5578   match(ConL);
5579 
5580   op_cost(0);
5581   format %{ %}
5582   interface(CONST_INTER);
5583 %}
5584 
5585 // 64 bit unit decrement
5586 operand immL_M1()
5587 %{
5588   predicate(n->get_long() == -1);
5589   match(ConL);
5590 
5591   op_cost(0);
5592   format %{ %}
5593   interface(CONST_INTER);
5594 %}
5595 
5596 // 32 bit offset of pc in thread anchor
5597 
5598 operand immL_pc_off()
5599 %{
5600   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5601                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5602   match(ConL);
5603 
5604   op_cost(0);
5605   format %{ %}
5606   interface(CONST_INTER);
5607 %}
5608 
5609 // 64 bit integer valid for add sub immediate
5610 operand immLAddSub()
5611 %{
5612   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5613   match(ConL);
5614   op_cost(0);
5615   format %{ %}
5616   interface(CONST_INTER);
5617 %}
5618 
5619 // 64 bit integer valid for logical immediate
5620 operand immLLog()
5621 %{
5622   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5623   match(ConL);
5624   op_cost(0);
5625   format %{ %}
5626   interface(CONST_INTER);
5627 %}
5628 
5629 // Long Immediate: low 32-bit mask
5630 operand immL_32bits()
5631 %{
5632   predicate(n->get_long() == 0xFFFFFFFFL);
5633   match(ConL);
5634   op_cost(0);
5635   format %{ %}
5636   interface(CONST_INTER);
5637 %}
5638 
5639 // Pointer operands
5640 // Pointer Immediate
5641 operand immP()
5642 %{
5643   match(ConP);
5644 
5645   op_cost(0);
5646   format %{ %}
5647   interface(CONST_INTER);
5648 %}
5649 
5650 // NULL Pointer Immediate
5651 operand immP0()
5652 %{
5653   predicate(n->get_ptr() == 0);
5654   match(ConP);
5655 
5656   op_cost(0);
5657   format %{ %}
5658   interface(CONST_INTER);
5659 %}
5660 
5661 // Pointer Immediate One
5662 // this is used in object initialization (initial object header)
5663 operand immP_1()
5664 %{
5665   predicate(n->get_ptr() == 1);
5666   match(ConP);
5667 
5668   op_cost(0);
5669   format %{ %}
5670   interface(CONST_INTER);
5671 %}
5672 
5673 // Polling Page Pointer Immediate
5674 operand immPollPage()
5675 %{
5676   predicate((address)n->get_ptr() == os::get_polling_page());
5677   match(ConP);
5678 
5679   op_cost(0);
5680   format %{ %}
5681   interface(CONST_INTER);
5682 %}
5683 
5684 // Card Table Byte Map Base
5685 operand immByteMapBase()
5686 %{
5687   // Get base of card map
5688   predicate((jbyte*)n->get_ptr() ==
5689         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5690   match(ConP);
5691 
5692   op_cost(0);
5693   format %{ %}
5694   interface(CONST_INTER);
5695 %}
5696 
5697 // Pointer Immediate Minus One
5698 // this is used when we want to write the current PC to the thread anchor
5699 operand immP_M1()
5700 %{
5701   predicate(n->get_ptr() == -1);
5702   match(ConP);
5703 
5704   op_cost(0);
5705   format %{ %}
5706   interface(CONST_INTER);
5707 %}
5708 
5709 // Pointer Immediate Minus Two
5710 // this is used when we want to write the current PC to the thread anchor
5711 operand immP_M2()
5712 %{
5713   predicate(n->get_ptr() == -2);
5714   match(ConP);
5715 
5716   op_cost(0);
5717   format %{ %}
5718   interface(CONST_INTER);
5719 %}
5720 
5721 // Float and Double operands
5722 // Double Immediate
5723 operand immD()
5724 %{
5725   match(ConD);
5726   op_cost(0);
5727   format %{ %}
5728   interface(CONST_INTER);
5729 %}
5730 
5731 // Double Immediate: +0.0d
5732 operand immD0()
5733 %{
5734   predicate(jlong_cast(n->getd()) == 0);
5735   match(ConD);
5736 
5737   op_cost(0);
5738   format %{ %}
5739   interface(CONST_INTER);
5740 %}
5741 
5742 // constant 'double +0.0'.
5743 operand immDPacked()
5744 %{
5745   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5746   match(ConD);
5747   op_cost(0);
5748   format %{ %}
5749   interface(CONST_INTER);
5750 %}
5751 
5752 // Float Immediate
5753 operand immF()
5754 %{
5755   match(ConF);
5756   op_cost(0);
5757   format %{ %}
5758   interface(CONST_INTER);
5759 %}
5760 
5761 // Float Immediate: +0.0f.
5762 operand immF0()
5763 %{
5764   predicate(jint_cast(n->getf()) == 0);
5765   match(ConF);
5766 
5767   op_cost(0);
5768   format %{ %}
5769   interface(CONST_INTER);
5770 %}
5771 
5772 //
5773 operand immFPacked()
5774 %{
5775   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5776   match(ConF);
5777   op_cost(0);
5778   format %{ %}
5779   interface(CONST_INTER);
5780 %}
5781 
5782 // Narrow pointer operands
5783 // Narrow Pointer Immediate
5784 operand immN()
5785 %{
5786   match(ConN);
5787 
5788   op_cost(0);
5789   format %{ %}
5790   interface(CONST_INTER);
5791 %}
5792 
5793 // Narrow NULL Pointer Immediate
5794 operand immN0()
5795 %{
5796   predicate(n->get_narrowcon() == 0);
5797   match(ConN);
5798 
5799   op_cost(0);
5800   format %{ %}
5801   interface(CONST_INTER);
5802 %}
5803 
5804 operand immNKlass()
5805 %{
5806   match(ConNKlass);
5807 
5808   op_cost(0);
5809   format %{ %}
5810   interface(CONST_INTER);
5811 %}
5812 
5813 // Integer 32 bit Register Operands
5814 // Integer 32 bitRegister (excludes SP)
5815 operand iRegI()
5816 %{
5817   constraint(ALLOC_IN_RC(any_reg32));
5818   match(RegI);
5819   match(iRegINoSp);
5820   op_cost(0);
5821   format %{ %}
5822   interface(REG_INTER);
5823 %}
5824 
5825 // Integer 32 bit Register not Special
5826 operand iRegINoSp()
5827 %{
5828   constraint(ALLOC_IN_RC(no_special_reg32));
5829   match(RegI);
5830   op_cost(0);
5831   format %{ %}
5832   interface(REG_INTER);
5833 %}
5834 
5835 // Integer 64 bit Register Operands
5836 // Integer 64 bit Register (includes SP)
5837 operand iRegL()
5838 %{
5839   constraint(ALLOC_IN_RC(any_reg));
5840   match(RegL);
5841   match(iRegLNoSp);
5842   op_cost(0);
5843   format %{ %}
5844   interface(REG_INTER);
5845 %}
5846 
5847 // Integer 64 bit Register not Special
5848 operand iRegLNoSp()
5849 %{
5850   constraint(ALLOC_IN_RC(no_special_reg));
5851   match(RegL);
5852   match(iRegL_R0);
5853   format %{ %}
5854   interface(REG_INTER);
5855 %}
5856 
5857 // Pointer Register Operands
5858 // Pointer Register
5859 operand iRegP()
5860 %{
5861   constraint(ALLOC_IN_RC(ptr_reg));
5862   match(RegP);
5863   match(iRegPNoSp);
5864   match(iRegP_R0);
5865   //match(iRegP_R2);
5866   //match(iRegP_R4);
5867   //match(iRegP_R5);
5868   match(thread_RegP);
5869   op_cost(0);
5870   format %{ %}
5871   interface(REG_INTER);
5872 %}
5873 
5874 // Pointer 64 bit Register not Special
5875 operand iRegPNoSp()
5876 %{
5877   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5878   match(RegP);
5879   // match(iRegP);
5880   // match(iRegP_R0);
5881   // match(iRegP_R2);
5882   // match(iRegP_R4);
5883   // match(iRegP_R5);
5884   // match(thread_RegP);
5885   op_cost(0);
5886   format %{ %}
5887   interface(REG_INTER);
5888 %}
5889 
5890 // Pointer 64 bit Register R0 only
5891 operand iRegP_R0()
5892 %{
5893   constraint(ALLOC_IN_RC(r0_reg));
5894   match(RegP);
5895   // match(iRegP);
5896   match(iRegPNoSp);
5897   op_cost(0);
5898   format %{ %}
5899   interface(REG_INTER);
5900 %}
5901 
5902 // Pointer 64 bit Register R1 only
5903 operand iRegP_R1()
5904 %{
5905   constraint(ALLOC_IN_RC(r1_reg));
5906   match(RegP);
5907   // match(iRegP);
5908   match(iRegPNoSp);
5909   op_cost(0);
5910   format %{ %}
5911   interface(REG_INTER);
5912 %}
5913 
5914 // Pointer 64 bit Register R2 only
5915 operand iRegP_R2()
5916 %{
5917   constraint(ALLOC_IN_RC(r2_reg));
5918   match(RegP);
5919   // match(iRegP);
5920   match(iRegPNoSp);
5921   op_cost(0);
5922   format %{ %}
5923   interface(REG_INTER);
5924 %}
5925 
5926 // Pointer 64 bit Register R3 only
5927 operand iRegP_R3()
5928 %{
5929   constraint(ALLOC_IN_RC(r3_reg));
5930   match(RegP);
5931   // match(iRegP);
5932   match(iRegPNoSp);
5933   op_cost(0);
5934   format %{ %}
5935   interface(REG_INTER);
5936 %}
5937 
5938 // Pointer 64 bit Register R4 only
5939 operand iRegP_R4()
5940 %{
5941   constraint(ALLOC_IN_RC(r4_reg));
5942   match(RegP);
5943   // match(iRegP);
5944   match(iRegPNoSp);
5945   op_cost(0);
5946   format %{ %}
5947   interface(REG_INTER);
5948 %}
5949 
5950 // Pointer 64 bit Register R5 only
5951 operand iRegP_R5()
5952 %{
5953   constraint(ALLOC_IN_RC(r5_reg));
5954   match(RegP);
5955   // match(iRegP);
5956   match(iRegPNoSp);
5957   op_cost(0);
5958   format %{ %}
5959   interface(REG_INTER);
5960 %}
5961 
5962 // Pointer 64 bit Register R10 only
5963 operand iRegP_R10()
5964 %{
5965   constraint(ALLOC_IN_RC(r10_reg));
5966   match(RegP);
5967   // match(iRegP);
5968   match(iRegPNoSp);
5969   op_cost(0);
5970   format %{ %}
5971   interface(REG_INTER);
5972 %}
5973 
5974 // Long 64 bit Register R0 only
5975 operand iRegL_R0()
5976 %{
5977   constraint(ALLOC_IN_RC(r0_reg));
5978   match(RegL);
5979   match(iRegLNoSp);
5980   op_cost(0);
5981   format %{ %}
5982   interface(REG_INTER);
5983 %}
5984 
5985 // Long 64 bit Register R2 only
5986 operand iRegL_R2()
5987 %{
5988   constraint(ALLOC_IN_RC(r2_reg));
5989   match(RegL);
5990   match(iRegLNoSp);
5991   op_cost(0);
5992   format %{ %}
5993   interface(REG_INTER);
5994 %}
5995 
5996 // Long 64 bit Register R3 only
5997 operand iRegL_R3()
5998 %{
5999   constraint(ALLOC_IN_RC(r3_reg));
6000   match(RegL);
6001   match(iRegLNoSp);
6002   op_cost(0);
6003   format %{ %}
6004   interface(REG_INTER);
6005 %}
6006 
6007 // Long 64 bit Register R11 only
6008 operand iRegL_R11()
6009 %{
6010   constraint(ALLOC_IN_RC(r11_reg));
6011   match(RegL);
6012   match(iRegLNoSp);
6013   op_cost(0);
6014   format %{ %}
6015   interface(REG_INTER);
6016 %}
6017 
6018 // Pointer 64 bit Register FP only
6019 operand iRegP_FP()
6020 %{
6021   constraint(ALLOC_IN_RC(fp_reg));
6022   match(RegP);
6023   // match(iRegP);
6024   op_cost(0);
6025   format %{ %}
6026   interface(REG_INTER);
6027 %}
6028 
6029 // Register R0 only
6030 operand iRegI_R0()
6031 %{
6032   constraint(ALLOC_IN_RC(int_r0_reg));
6033   match(RegI);
6034   match(iRegINoSp);
6035   op_cost(0);
6036   format %{ %}
6037   interface(REG_INTER);
6038 %}
6039 
6040 // Register R2 only
6041 operand iRegI_R2()
6042 %{
6043   constraint(ALLOC_IN_RC(int_r2_reg));
6044   match(RegI);
6045   match(iRegINoSp);
6046   op_cost(0);
6047   format %{ %}
6048   interface(REG_INTER);
6049 %}
6050 
6051 // Register R3 only
6052 operand iRegI_R3()
6053 %{
6054   constraint(ALLOC_IN_RC(int_r3_reg));
6055   match(RegI);
6056   match(iRegINoSp);
6057   op_cost(0);
6058   format %{ %}
6059   interface(REG_INTER);
6060 %}
6061 
6062 
6063 // Register R4 only
6064 operand iRegI_R4()
6065 %{
6066   constraint(ALLOC_IN_RC(int_r4_reg));
6067   match(RegI);
6068   match(iRegINoSp);
6069   op_cost(0);
6070   format %{ %}
6071   interface(REG_INTER);
6072 %}
6073 
6074 
6075 // Pointer Register Operands
6076 // Narrow Pointer Register
6077 operand iRegN()
6078 %{
6079   constraint(ALLOC_IN_RC(any_reg32));
6080   match(RegN);
6081   match(iRegNNoSp);
6082   op_cost(0);
6083   format %{ %}
6084   interface(REG_INTER);
6085 %}
6086 
6087 operand iRegN_R0()
6088 %{
6089   constraint(ALLOC_IN_RC(r0_reg));
6090   match(iRegN);
6091   op_cost(0);
6092   format %{ %}
6093   interface(REG_INTER);
6094 %}
6095 
6096 operand iRegN_R2()
6097 %{
6098   constraint(ALLOC_IN_RC(r2_reg));
6099   match(iRegN);
6100   op_cost(0);
6101   format %{ %}
6102   interface(REG_INTER);
6103 %}
6104 
6105 operand iRegN_R3()
6106 %{
6107   constraint(ALLOC_IN_RC(r3_reg));
6108   match(iRegN);
6109   op_cost(0);
6110   format %{ %}
6111   interface(REG_INTER);
6112 %}
6113 
6114 // Integer 64 bit Register not Special
6115 operand iRegNNoSp()
6116 %{
6117   constraint(ALLOC_IN_RC(no_special_reg32));
6118   match(RegN);
6119   op_cost(0);
6120   format %{ %}
6121   interface(REG_INTER);
6122 %}
6123 
6124 // heap base register -- used for encoding immN0
6125 
6126 operand iRegIHeapbase()
6127 %{
6128   constraint(ALLOC_IN_RC(heapbase_reg));
6129   match(RegI);
6130   op_cost(0);
6131   format %{ %}
6132   interface(REG_INTER);
6133 %}
6134 
6135 // Float Register
6136 // Float register operands
6137 operand vRegF()
6138 %{
6139   constraint(ALLOC_IN_RC(float_reg));
6140   match(RegF);
6141 
6142   op_cost(0);
6143   format %{ %}
6144   interface(REG_INTER);
6145 %}
6146 
6147 // Double Register
6148 // Double register operands
6149 operand vRegD()
6150 %{
6151   constraint(ALLOC_IN_RC(double_reg));
6152   match(RegD);
6153 
6154   op_cost(0);
6155   format %{ %}
6156   interface(REG_INTER);
6157 %}
6158 
6159 operand vecD()
6160 %{
6161   constraint(ALLOC_IN_RC(vectord_reg));
6162   match(VecD);
6163 
6164   op_cost(0);
6165   format %{ %}
6166   interface(REG_INTER);
6167 %}
6168 
6169 operand vecX()
6170 %{
6171   constraint(ALLOC_IN_RC(vectorx_reg));
6172   match(VecX);
6173 
6174   op_cost(0);
6175   format %{ %}
6176   interface(REG_INTER);
6177 %}
6178 
6179 operand vRegD_V0()
6180 %{
6181   constraint(ALLOC_IN_RC(v0_reg));
6182   match(RegD);
6183   op_cost(0);
6184   format %{ %}
6185   interface(REG_INTER);
6186 %}
6187 
6188 operand vRegD_V1()
6189 %{
6190   constraint(ALLOC_IN_RC(v1_reg));
6191   match(RegD);
6192   op_cost(0);
6193   format %{ %}
6194   interface(REG_INTER);
6195 %}
6196 
6197 operand vRegD_V2()
6198 %{
6199   constraint(ALLOC_IN_RC(v2_reg));
6200   match(RegD);
6201   op_cost(0);
6202   format %{ %}
6203   interface(REG_INTER);
6204 %}
6205 
6206 operand vRegD_V3()
6207 %{
6208   constraint(ALLOC_IN_RC(v3_reg));
6209   match(RegD);
6210   op_cost(0);
6211   format %{ %}
6212   interface(REG_INTER);
6213 %}
6214 
6215 // Flags register, used as output of signed compare instructions
6216 
6217 // note that on AArch64 we also use this register as the output for
6218 // for floating point compare instructions (CmpF CmpD). this ensures
6219 // that ordered inequality tests use GT, GE, LT or LE none of which
6220 // pass through cases where the result is unordered i.e. one or both
6221 // inputs to the compare is a NaN. this means that the ideal code can
6222 // replace e.g. a GT with an LE and not end up capturing the NaN case
6223 // (where the comparison should always fail). EQ and NE tests are
6224 // always generated in ideal code so that unordered folds into the NE
6225 // case, matching the behaviour of AArch64 NE.
6226 //
6227 // This differs from x86 where the outputs of FP compares use a
6228 // special FP flags registers and where compares based on this
6229 // register are distinguished into ordered inequalities (cmpOpUCF) and
6230 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6231 // to explicitly handle the unordered case in branches. x86 also has
6232 // to include extra CMoveX rules to accept a cmpOpUCF input.
6233 
6234 operand rFlagsReg()
6235 %{
6236   constraint(ALLOC_IN_RC(int_flags));
6237   match(RegFlags);
6238 
6239   op_cost(0);
6240   format %{ "RFLAGS" %}
6241   interface(REG_INTER);
6242 %}
6243 
6244 // Flags register, used as output of unsigned compare instructions
6245 operand rFlagsRegU()
6246 %{
6247   constraint(ALLOC_IN_RC(int_flags));
6248   match(RegFlags);
6249 
6250   op_cost(0);
6251   format %{ "RFLAGSU" %}
6252   interface(REG_INTER);
6253 %}
6254 
6255 // Special Registers
6256 
6257 // Method Register
6258 operand inline_cache_RegP(iRegP reg)
6259 %{
6260   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6261   match(reg);
6262   match(iRegPNoSp);
6263   op_cost(0);
6264   format %{ %}
6265   interface(REG_INTER);
6266 %}
6267 
6268 operand interpreter_method_oop_RegP(iRegP reg)
6269 %{
6270   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6271   match(reg);
6272   match(iRegPNoSp);
6273   op_cost(0);
6274   format %{ %}
6275   interface(REG_INTER);
6276 %}
6277 
6278 // Thread Register
6279 operand thread_RegP(iRegP reg)
6280 %{
6281   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6282   match(reg);
6283   op_cost(0);
6284   format %{ %}
6285   interface(REG_INTER);
6286 %}
6287 
6288 operand lr_RegP(iRegP reg)
6289 %{
6290   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6291   match(reg);
6292   op_cost(0);
6293   format %{ %}
6294   interface(REG_INTER);
6295 %}
6296 
6297 //----------Memory Operands----------------------------------------------------
6298 
6299 operand indirect(iRegP reg)
6300 %{
6301   constraint(ALLOC_IN_RC(ptr_reg));
6302   match(reg);
6303   op_cost(0);
6304   format %{ "[$reg]" %}
6305   interface(MEMORY_INTER) %{
6306     base($reg);
6307     index(0xffffffff);
6308     scale(0x0);
6309     disp(0x0);
6310   %}
6311 %}
6312 
6313 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6314 %{
6315   constraint(ALLOC_IN_RC(ptr_reg));
6316   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6317   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6318   op_cost(0);
6319   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6320   interface(MEMORY_INTER) %{
6321     base($reg);
6322     index($ireg);
6323     scale($scale);
6324     disp(0x0);
6325   %}
6326 %}
6327 
6328 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6329 %{
6330   constraint(ALLOC_IN_RC(ptr_reg));
6331   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6332   match(AddP reg (LShiftL lreg scale));
6333   op_cost(0);
6334   format %{ "$reg, $lreg lsl($scale)" %}
6335   interface(MEMORY_INTER) %{
6336     base($reg);
6337     index($lreg);
6338     scale($scale);
6339     disp(0x0);
6340   %}
6341 %}
6342 
6343 operand indIndexI2L(iRegP reg, iRegI ireg)
6344 %{
6345   constraint(ALLOC_IN_RC(ptr_reg));
6346   match(AddP reg (ConvI2L ireg));
6347   op_cost(0);
6348   format %{ "$reg, $ireg, 0, I2L" %}
6349   interface(MEMORY_INTER) %{
6350     base($reg);
6351     index($ireg);
6352     scale(0x0);
6353     disp(0x0);
6354   %}
6355 %}
6356 
6357 operand indIndex(iRegP reg, iRegL lreg)
6358 %{
6359   constraint(ALLOC_IN_RC(ptr_reg));
6360   match(AddP reg lreg);
6361   op_cost(0);
6362   format %{ "$reg, $lreg" %}
6363   interface(MEMORY_INTER) %{
6364     base($reg);
6365     index($lreg);
6366     scale(0x0);
6367     disp(0x0);
6368   %}
6369 %}
6370 
6371 operand indOffI(iRegP reg, immIOffset off)
6372 %{
6373   constraint(ALLOC_IN_RC(ptr_reg));
6374   match(AddP reg off);
6375   op_cost(0);
6376   format %{ "[$reg, $off]" %}
6377   interface(MEMORY_INTER) %{
6378     base($reg);
6379     index(0xffffffff);
6380     scale(0x0);
6381     disp($off);
6382   %}
6383 %}
6384 
6385 operand indOffI4(iRegP reg, immIOffset4 off)
6386 %{
6387   constraint(ALLOC_IN_RC(ptr_reg));
6388   match(AddP reg off);
6389   op_cost(0);
6390   format %{ "[$reg, $off]" %}
6391   interface(MEMORY_INTER) %{
6392     base($reg);
6393     index(0xffffffff);
6394     scale(0x0);
6395     disp($off);
6396   %}
6397 %}
6398 
6399 operand indOffI8(iRegP reg, immIOffset8 off)
6400 %{
6401   constraint(ALLOC_IN_RC(ptr_reg));
6402   match(AddP reg off);
6403   op_cost(0);
6404   format %{ "[$reg, $off]" %}
6405   interface(MEMORY_INTER) %{
6406     base($reg);
6407     index(0xffffffff);
6408     scale(0x0);
6409     disp($off);
6410   %}
6411 %}
6412 
6413 operand indOffI16(iRegP reg, immIOffset16 off)
6414 %{
6415   constraint(ALLOC_IN_RC(ptr_reg));
6416   match(AddP reg off);
6417   op_cost(0);
6418   format %{ "[$reg, $off]" %}
6419   interface(MEMORY_INTER) %{
6420     base($reg);
6421     index(0xffffffff);
6422     scale(0x0);
6423     disp($off);
6424   %}
6425 %}
6426 
6427 operand indOffL(iRegP reg, immLoffset off)
6428 %{
6429   constraint(ALLOC_IN_RC(ptr_reg));
6430   match(AddP reg off);
6431   op_cost(0);
6432   format %{ "[$reg, $off]" %}
6433   interface(MEMORY_INTER) %{
6434     base($reg);
6435     index(0xffffffff);
6436     scale(0x0);
6437     disp($off);
6438   %}
6439 %}
6440 
6441 operand indOffL4(iRegP reg, immLoffset4 off)
6442 %{
6443   constraint(ALLOC_IN_RC(ptr_reg));
6444   match(AddP reg off);
6445   op_cost(0);
6446   format %{ "[$reg, $off]" %}
6447   interface(MEMORY_INTER) %{
6448     base($reg);
6449     index(0xffffffff);
6450     scale(0x0);
6451     disp($off);
6452   %}
6453 %}
6454 
6455 operand indOffL8(iRegP reg, immLoffset8 off)
6456 %{
6457   constraint(ALLOC_IN_RC(ptr_reg));
6458   match(AddP reg off);
6459   op_cost(0);
6460   format %{ "[$reg, $off]" %}
6461   interface(MEMORY_INTER) %{
6462     base($reg);
6463     index(0xffffffff);
6464     scale(0x0);
6465     disp($off);
6466   %}
6467 %}
6468 
6469 operand indOffL16(iRegP reg, immLoffset16 off)
6470 %{
6471   constraint(ALLOC_IN_RC(ptr_reg));
6472   match(AddP reg off);
6473   op_cost(0);
6474   format %{ "[$reg, $off]" %}
6475   interface(MEMORY_INTER) %{
6476     base($reg);
6477     index(0xffffffff);
6478     scale(0x0);
6479     disp($off);
6480   %}
6481 %}
6482 
6483 operand indirectN(iRegN reg)
6484 %{
6485   predicate(Universe::narrow_oop_shift() == 0);
6486   constraint(ALLOC_IN_RC(ptr_reg));
6487   match(DecodeN reg);
6488   op_cost(0);
6489   format %{ "[$reg]\t# narrow" %}
6490   interface(MEMORY_INTER) %{
6491     base($reg);
6492     index(0xffffffff);
6493     scale(0x0);
6494     disp(0x0);
6495   %}
6496 %}
6497 
6498 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6499 %{
6500   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6501   constraint(ALLOC_IN_RC(ptr_reg));
6502   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6503   op_cost(0);
6504   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6505   interface(MEMORY_INTER) %{
6506     base($reg);
6507     index($ireg);
6508     scale($scale);
6509     disp(0x0);
6510   %}
6511 %}
6512 
6513 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6514 %{
6515   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6516   constraint(ALLOC_IN_RC(ptr_reg));
6517   match(AddP (DecodeN reg) (LShiftL lreg scale));
6518   op_cost(0);
6519   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6520   interface(MEMORY_INTER) %{
6521     base($reg);
6522     index($lreg);
6523     scale($scale);
6524     disp(0x0);
6525   %}
6526 %}
6527 
6528 operand indIndexI2LN(iRegN reg, iRegI ireg)
6529 %{
6530   predicate(Universe::narrow_oop_shift() == 0);
6531   constraint(ALLOC_IN_RC(ptr_reg));
6532   match(AddP (DecodeN reg) (ConvI2L ireg));
6533   op_cost(0);
6534   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6535   interface(MEMORY_INTER) %{
6536     base($reg);
6537     index($ireg);
6538     scale(0x0);
6539     disp(0x0);
6540   %}
6541 %}
6542 
6543 operand indIndexN(iRegN reg, iRegL lreg)
6544 %{
6545   predicate(Universe::narrow_oop_shift() == 0);
6546   constraint(ALLOC_IN_RC(ptr_reg));
6547   match(AddP (DecodeN reg) lreg);
6548   op_cost(0);
6549   format %{ "$reg, $lreg\t# narrow" %}
6550   interface(MEMORY_INTER) %{
6551     base($reg);
6552     index($lreg);
6553     scale(0x0);
6554     disp(0x0);
6555   %}
6556 %}
6557 
6558 operand indOffIN(iRegN reg, immIOffset off)
6559 %{
6560   predicate(Universe::narrow_oop_shift() == 0);
6561   constraint(ALLOC_IN_RC(ptr_reg));
6562   match(AddP (DecodeN reg) off);
6563   op_cost(0);
6564   format %{ "[$reg, $off]\t# narrow" %}
6565   interface(MEMORY_INTER) %{
6566     base($reg);
6567     index(0xffffffff);
6568     scale(0x0);
6569     disp($off);
6570   %}
6571 %}
6572 
6573 operand indOffLN(iRegN reg, immLoffset off)
6574 %{
6575   predicate(Universe::narrow_oop_shift() == 0);
6576   constraint(ALLOC_IN_RC(ptr_reg));
6577   match(AddP (DecodeN reg) off);
6578   op_cost(0);
6579   format %{ "[$reg, $off]\t# narrow" %}
6580   interface(MEMORY_INTER) %{
6581     base($reg);
6582     index(0xffffffff);
6583     scale(0x0);
6584     disp($off);
6585   %}
6586 %}
6587 
6588 
6589 
6590 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6591 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6592 %{
6593   constraint(ALLOC_IN_RC(ptr_reg));
6594   match(AddP reg off);
6595   op_cost(0);
6596   format %{ "[$reg, $off]" %}
6597   interface(MEMORY_INTER) %{
6598     base($reg);
6599     index(0xffffffff);
6600     scale(0x0);
6601     disp($off);
6602   %}
6603 %}
6604 
6605 //----------Special Memory Operands--------------------------------------------
6606 // Stack Slot Operand - This operand is used for loading and storing temporary
6607 //                      values on the stack where a match requires a value to
6608 //                      flow through memory.
6609 operand stackSlotP(sRegP reg)
6610 %{
6611   constraint(ALLOC_IN_RC(stack_slots));
6612   op_cost(100);
6613   // No match rule because this operand is only generated in matching
6614   // match(RegP);
6615   format %{ "[$reg]" %}
6616   interface(MEMORY_INTER) %{
6617     base(0x1e);  // RSP
6618     index(0x0);  // No Index
6619     scale(0x0);  // No Scale
6620     disp($reg);  // Stack Offset
6621   %}
6622 %}
6623 
6624 operand stackSlotI(sRegI reg)
6625 %{
6626   constraint(ALLOC_IN_RC(stack_slots));
6627   // No match rule because this operand is only generated in matching
6628   // match(RegI);
6629   format %{ "[$reg]" %}
6630   interface(MEMORY_INTER) %{
6631     base(0x1e);  // RSP
6632     index(0x0);  // No Index
6633     scale(0x0);  // No Scale
6634     disp($reg);  // Stack Offset
6635   %}
6636 %}
6637 
6638 operand stackSlotF(sRegF reg)
6639 %{
6640   constraint(ALLOC_IN_RC(stack_slots));
6641   // No match rule because this operand is only generated in matching
6642   // match(RegF);
6643   format %{ "[$reg]" %}
6644   interface(MEMORY_INTER) %{
6645     base(0x1e);  // RSP
6646     index(0x0);  // No Index
6647     scale(0x0);  // No Scale
6648     disp($reg);  // Stack Offset
6649   %}
6650 %}
6651 
6652 operand stackSlotD(sRegD reg)
6653 %{
6654   constraint(ALLOC_IN_RC(stack_slots));
6655   // No match rule because this operand is only generated in matching
6656   // match(RegD);
6657   format %{ "[$reg]" %}
6658   interface(MEMORY_INTER) %{
6659     base(0x1e);  // RSP
6660     index(0x0);  // No Index
6661     scale(0x0);  // No Scale
6662     disp($reg);  // Stack Offset
6663   %}
6664 %}
6665 
6666 operand stackSlotL(sRegL reg)
6667 %{
6668   constraint(ALLOC_IN_RC(stack_slots));
6669   // No match rule because this operand is only generated in matching
6670   // match(RegL);
6671   format %{ "[$reg]" %}
6672   interface(MEMORY_INTER) %{
6673     base(0x1e);  // RSP
6674     index(0x0);  // No Index
6675     scale(0x0);  // No Scale
6676     disp($reg);  // Stack Offset
6677   %}
6678 %}
6679 
6680 // Operands for expressing Control Flow
6681 // NOTE: Label is a predefined operand which should not be redefined in
6682 //       the AD file. It is generically handled within the ADLC.
6683 
6684 //----------Conditional Branch Operands----------------------------------------
6685 // Comparison Op  - This is the operation of the comparison, and is limited to
6686 //                  the following set of codes:
6687 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6688 //
6689 // Other attributes of the comparison, such as unsignedness, are specified
6690 // by the comparison instruction that sets a condition code flags register.
6691 // That result is represented by a flags operand whose subtype is appropriate
6692 // to the unsignedness (etc.) of the comparison.
6693 //
6694 // Later, the instruction which matches both the Comparison Op (a Bool) and
6695 // the flags (produced by the Cmp) specifies the coding of the comparison op
6696 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6697 
6698 // used for signed integral comparisons and fp comparisons
6699 
6700 operand cmpOp()
6701 %{
6702   match(Bool);
6703 
6704   format %{ "" %}
6705   interface(COND_INTER) %{
6706     equal(0x0, "eq");
6707     not_equal(0x1, "ne");
6708     less(0xb, "lt");
6709     greater_equal(0xa, "ge");
6710     less_equal(0xd, "le");
6711     greater(0xc, "gt");
6712     overflow(0x6, "vs");
6713     no_overflow(0x7, "vc");
6714   %}
6715 %}
6716 
6717 // used for unsigned integral comparisons
6718 
6719 operand cmpOpU()
6720 %{
6721   match(Bool);
6722 
6723   format %{ "" %}
6724   interface(COND_INTER) %{
6725     equal(0x0, "eq");
6726     not_equal(0x1, "ne");
6727     less(0x3, "lo");
6728     greater_equal(0x2, "hs");
6729     less_equal(0x9, "ls");
6730     greater(0x8, "hi");
6731     overflow(0x6, "vs");
6732     no_overflow(0x7, "vc");
6733   %}
6734 %}
6735 
6736 // used for certain integral comparisons which can be
6737 // converted to cbxx or tbxx instructions
6738 
6739 operand cmpOpEqNe()
6740 %{
6741   match(Bool);
6742   match(CmpOp);
6743   op_cost(0);
6744   predicate(n->as_Bool()->_test._test == BoolTest::ne
6745             || n->as_Bool()->_test._test == BoolTest::eq);
6746 
6747   format %{ "" %}
6748   interface(COND_INTER) %{
6749     equal(0x0, "eq");
6750     not_equal(0x1, "ne");
6751     less(0xb, "lt");
6752     greater_equal(0xa, "ge");
6753     less_equal(0xd, "le");
6754     greater(0xc, "gt");
6755     overflow(0x6, "vs");
6756     no_overflow(0x7, "vc");
6757   %}
6758 %}
6759 
6760 // used for certain integral comparisons which can be
6761 // converted to cbxx or tbxx instructions
6762 
6763 operand cmpOpLtGe()
6764 %{
6765   match(Bool);
6766   match(CmpOp);
6767   op_cost(0);
6768 
6769   predicate(n->as_Bool()->_test._test == BoolTest::lt
6770             || n->as_Bool()->_test._test == BoolTest::ge);
6771 
6772   format %{ "" %}
6773   interface(COND_INTER) %{
6774     equal(0x0, "eq");
6775     not_equal(0x1, "ne");
6776     less(0xb, "lt");
6777     greater_equal(0xa, "ge");
6778     less_equal(0xd, "le");
6779     greater(0xc, "gt");
6780     overflow(0x6, "vs");
6781     no_overflow(0x7, "vc");
6782   %}
6783 %}
6784 
6785 // used for certain unsigned integral comparisons which can be
6786 // converted to cbxx or tbxx instructions
6787 
6788 operand cmpOpUEqNeLtGe()
6789 %{
6790   match(Bool);
6791   match(CmpOp);
6792   op_cost(0);
6793 
6794   predicate(n->as_Bool()->_test._test == BoolTest::eq
6795             || n->as_Bool()->_test._test == BoolTest::ne
6796             || n->as_Bool()->_test._test == BoolTest::lt
6797             || n->as_Bool()->_test._test == BoolTest::ge);
6798 
6799   format %{ "" %}
6800   interface(COND_INTER) %{
6801     equal(0x0, "eq");
6802     not_equal(0x1, "ne");
6803     less(0xb, "lt");
6804     greater_equal(0xa, "ge");
6805     less_equal(0xd, "le");
6806     greater(0xc, "gt");
6807     overflow(0x6, "vs");
6808     no_overflow(0x7, "vc");
6809   %}
6810 %}
6811 
6812 // Special operand allowing long args to int ops to be truncated for free
6813 
6814 operand iRegL2I(iRegL reg) %{
6815 
6816   op_cost(0);
6817 
6818   match(ConvL2I reg);
6819 
6820   format %{ "l2i($reg)" %}
6821 
6822   interface(REG_INTER)
6823 %}
6824 
6825 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6826 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6827 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6828 
6829 //----------OPERAND CLASSES----------------------------------------------------
6830 // Operand Classes are groups of operands that are used as to simplify
6831 // instruction definitions by not requiring the AD writer to specify
6832 // separate instructions for every form of operand when the
6833 // instruction accepts multiple operand types with the same basic
6834 // encoding and format. The classic case of this is memory operands.
6835 
6836 // memory is used to define read/write location for load/store
6837 // instruction defs. we can turn a memory op into an Address
6838 
6839 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6840                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6841 
6842 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6843 // operations. it allows the src to be either an iRegI or a (ConvL2I
6844 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6845 // can be elided because the 32-bit instruction will just employ the
6846 // lower 32 bits anyway.
6847 //
6848 // n.b. this does not elide all L2I conversions. if the truncated
6849 // value is consumed by more than one operation then the ConvL2I
6850 // cannot be bundled into the consuming nodes so an l2i gets planted
6851 // (actually a movw $dst $src) and the downstream instructions consume
6852 // the result of the l2i as an iRegI input. That's a shame since the
6853 // movw is actually redundant but its not too costly.
6854 
6855 opclass iRegIorL2I(iRegI, iRegL2I);
6856 
6857 //----------PIPELINE-----------------------------------------------------------
6858 // Rules which define the behavior of the target architectures pipeline.
6859 
6860 // For specific pipelines, eg A53, define the stages of that pipeline
6861 //pipe_desc(ISS, EX1, EX2, WR);
6862 #define ISS S0
6863 #define EX1 S1
6864 #define EX2 S2
6865 #define WR  S3
6866 
6867 // Integer ALU reg operation
6868 pipeline %{
6869 
6870 attributes %{
6871   // ARM instructions are of fixed length
6872   fixed_size_instructions;        // Fixed size instructions TODO does
6873   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6874   // ARM instructions come in 32-bit word units
6875   instruction_unit_size = 4;         // An instruction is 4 bytes long
6876   instruction_fetch_unit_size = 64;  // The processor fetches one line
6877   instruction_fetch_units = 1;       // of 64 bytes
6878 
6879   // List of nop instructions
6880   nops( MachNop );
6881 %}
6882 
6883 // We don't use an actual pipeline model so don't care about resources
6884 // or description. we do use pipeline classes to introduce fixed
6885 // latencies
6886 
6887 //----------RESOURCES----------------------------------------------------------
6888 // Resources are the functional units available to the machine
6889 
6890 resources( INS0, INS1, INS01 = INS0 | INS1,
6891            ALU0, ALU1, ALU = ALU0 | ALU1,
6892            MAC,
6893            DIV,
6894            BRANCH,
6895            LDST,
6896            NEON_FP);
6897 
6898 //----------PIPELINE DESCRIPTION-----------------------------------------------
6899 // Pipeline Description specifies the stages in the machine's pipeline
6900 
6901 // Define the pipeline as a generic 6 stage pipeline
6902 pipe_desc(S0, S1, S2, S3, S4, S5);
6903 
6904 //----------PIPELINE CLASSES---------------------------------------------------
6905 // Pipeline Classes describe the stages in which input and output are
6906 // referenced by the hardware pipeline.
6907 
6908 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6909 %{
6910   single_instruction;
6911   src1   : S1(read);
6912   src2   : S2(read);
6913   dst    : S5(write);
6914   INS01  : ISS;
6915   NEON_FP : S5;
6916 %}
6917 
6918 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6919 %{
6920   single_instruction;
6921   src1   : S1(read);
6922   src2   : S2(read);
6923   dst    : S5(write);
6924   INS01  : ISS;
6925   NEON_FP : S5;
6926 %}
6927 
6928 pipe_class fp_uop_s(vRegF dst, vRegF src)
6929 %{
6930   single_instruction;
6931   src    : S1(read);
6932   dst    : S5(write);
6933   INS01  : ISS;
6934   NEON_FP : S5;
6935 %}
6936 
6937 pipe_class fp_uop_d(vRegD dst, vRegD src)
6938 %{
6939   single_instruction;
6940   src    : S1(read);
6941   dst    : S5(write);
6942   INS01  : ISS;
6943   NEON_FP : S5;
6944 %}
6945 
6946 pipe_class fp_d2f(vRegF dst, vRegD src)
6947 %{
6948   single_instruction;
6949   src    : S1(read);
6950   dst    : S5(write);
6951   INS01  : ISS;
6952   NEON_FP : S5;
6953 %}
6954 
6955 pipe_class fp_f2d(vRegD dst, vRegF src)
6956 %{
6957   single_instruction;
6958   src    : S1(read);
6959   dst    : S5(write);
6960   INS01  : ISS;
6961   NEON_FP : S5;
6962 %}
6963 
6964 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6965 %{
6966   single_instruction;
6967   src    : S1(read);
6968   dst    : S5(write);
6969   INS01  : ISS;
6970   NEON_FP : S5;
6971 %}
6972 
6973 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6974 %{
6975   single_instruction;
6976   src    : S1(read);
6977   dst    : S5(write);
6978   INS01  : ISS;
6979   NEON_FP : S5;
6980 %}
6981 
6982 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6983 %{
6984   single_instruction;
6985   src    : S1(read);
6986   dst    : S5(write);
6987   INS01  : ISS;
6988   NEON_FP : S5;
6989 %}
6990 
6991 pipe_class fp_l2f(vRegF dst, iRegL src)
6992 %{
6993   single_instruction;
6994   src    : S1(read);
6995   dst    : S5(write);
6996   INS01  : ISS;
6997   NEON_FP : S5;
6998 %}
6999 
7000 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7001 %{
7002   single_instruction;
7003   src    : S1(read);
7004   dst    : S5(write);
7005   INS01  : ISS;
7006   NEON_FP : S5;
7007 %}
7008 
7009 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7010 %{
7011   single_instruction;
7012   src    : S1(read);
7013   dst    : S5(write);
7014   INS01  : ISS;
7015   NEON_FP : S5;
7016 %}
7017 
7018 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7019 %{
7020   single_instruction;
7021   src    : S1(read);
7022   dst    : S5(write);
7023   INS01  : ISS;
7024   NEON_FP : S5;
7025 %}
7026 
7027 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7028 %{
7029   single_instruction;
7030   src    : S1(read);
7031   dst    : S5(write);
7032   INS01  : ISS;
7033   NEON_FP : S5;
7034 %}
7035 
7036 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7037 %{
7038   single_instruction;
7039   src1   : S1(read);
7040   src2   : S2(read);
7041   dst    : S5(write);
7042   INS0   : ISS;
7043   NEON_FP : S5;
7044 %}
7045 
7046 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7047 %{
7048   single_instruction;
7049   src1   : S1(read);
7050   src2   : S2(read);
7051   dst    : S5(write);
7052   INS0   : ISS;
7053   NEON_FP : S5;
7054 %}
7055 
7056 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7057 %{
7058   single_instruction;
7059   cr     : S1(read);
7060   src1   : S1(read);
7061   src2   : S1(read);
7062   dst    : S3(write);
7063   INS01  : ISS;
7064   NEON_FP : S3;
7065 %}
7066 
7067 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7068 %{
7069   single_instruction;
7070   cr     : S1(read);
7071   src1   : S1(read);
7072   src2   : S1(read);
7073   dst    : S3(write);
7074   INS01  : ISS;
7075   NEON_FP : S3;
7076 %}
7077 
7078 pipe_class fp_imm_s(vRegF dst)
7079 %{
7080   single_instruction;
7081   dst    : S3(write);
7082   INS01  : ISS;
7083   NEON_FP : S3;
7084 %}
7085 
7086 pipe_class fp_imm_d(vRegD dst)
7087 %{
7088   single_instruction;
7089   dst    : S3(write);
7090   INS01  : ISS;
7091   NEON_FP : S3;
7092 %}
7093 
7094 pipe_class fp_load_constant_s(vRegF dst)
7095 %{
7096   single_instruction;
7097   dst    : S4(write);
7098   INS01  : ISS;
7099   NEON_FP : S4;
7100 %}
7101 
7102 pipe_class fp_load_constant_d(vRegD dst)
7103 %{
7104   single_instruction;
7105   dst    : S4(write);
7106   INS01  : ISS;
7107   NEON_FP : S4;
7108 %}
7109 
7110 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7111 %{
7112   single_instruction;
7113   dst    : S5(write);
7114   src1   : S1(read);
7115   src2   : S1(read);
7116   INS01  : ISS;
7117   NEON_FP : S5;
7118 %}
7119 
7120 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7121 %{
7122   single_instruction;
7123   dst    : S5(write);
7124   src1   : S1(read);
7125   src2   : S1(read);
7126   INS0   : ISS;
7127   NEON_FP : S5;
7128 %}
7129 
7130 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7131 %{
7132   single_instruction;
7133   dst    : S5(write);
7134   src1   : S1(read);
7135   src2   : S1(read);
7136   dst    : S1(read);
7137   INS01  : ISS;
7138   NEON_FP : S5;
7139 %}
7140 
7141 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7142 %{
7143   single_instruction;
7144   dst    : S5(write);
7145   src1   : S1(read);
7146   src2   : S1(read);
7147   dst    : S1(read);
7148   INS0   : ISS;
7149   NEON_FP : S5;
7150 %}
7151 
7152 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7153 %{
7154   single_instruction;
7155   dst    : S4(write);
7156   src1   : S2(read);
7157   src2   : S2(read);
7158   INS01  : ISS;
7159   NEON_FP : S4;
7160 %}
7161 
7162 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7163 %{
7164   single_instruction;
7165   dst    : S4(write);
7166   src1   : S2(read);
7167   src2   : S2(read);
7168   INS0   : ISS;
7169   NEON_FP : S4;
7170 %}
7171 
7172 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7173 %{
7174   single_instruction;
7175   dst    : S3(write);
7176   src1   : S2(read);
7177   src2   : S2(read);
7178   INS01  : ISS;
7179   NEON_FP : S3;
7180 %}
7181 
7182 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7183 %{
7184   single_instruction;
7185   dst    : S3(write);
7186   src1   : S2(read);
7187   src2   : S2(read);
7188   INS0   : ISS;
7189   NEON_FP : S3;
7190 %}
7191 
7192 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7193 %{
7194   single_instruction;
7195   dst    : S3(write);
7196   src    : S1(read);
7197   shift  : S1(read);
7198   INS01  : ISS;
7199   NEON_FP : S3;
7200 %}
7201 
7202 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7203 %{
7204   single_instruction;
7205   dst    : S3(write);
7206   src    : S1(read);
7207   shift  : S1(read);
7208   INS0   : ISS;
7209   NEON_FP : S3;
7210 %}
7211 
7212 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7213 %{
7214   single_instruction;
7215   dst    : S3(write);
7216   src    : S1(read);
7217   INS01  : ISS;
7218   NEON_FP : S3;
7219 %}
7220 
7221 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7222 %{
7223   single_instruction;
7224   dst    : S3(write);
7225   src    : S1(read);
7226   INS0   : ISS;
7227   NEON_FP : S3;
7228 %}
7229 
7230 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7231 %{
7232   single_instruction;
7233   dst    : S5(write);
7234   src1   : S1(read);
7235   src2   : S1(read);
7236   INS01  : ISS;
7237   NEON_FP : S5;
7238 %}
7239 
7240 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7241 %{
7242   single_instruction;
7243   dst    : S5(write);
7244   src1   : S1(read);
7245   src2   : S1(read);
7246   INS0   : ISS;
7247   NEON_FP : S5;
7248 %}
7249 
7250 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7251 %{
7252   single_instruction;
7253   dst    : S5(write);
7254   src1   : S1(read);
7255   src2   : S1(read);
7256   INS0   : ISS;
7257   NEON_FP : S5;
7258 %}
7259 
7260 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7261 %{
7262   single_instruction;
7263   dst    : S5(write);
7264   src1   : S1(read);
7265   src2   : S1(read);
7266   INS0   : ISS;
7267   NEON_FP : S5;
7268 %}
7269 
7270 pipe_class vsqrt_fp128(vecX dst, vecX src)
7271 %{
7272   single_instruction;
7273   dst    : S5(write);
7274   src    : S1(read);
7275   INS0   : ISS;
7276   NEON_FP : S5;
7277 %}
7278 
7279 pipe_class vunop_fp64(vecD dst, vecD src)
7280 %{
7281   single_instruction;
7282   dst    : S5(write);
7283   src    : S1(read);
7284   INS01  : ISS;
7285   NEON_FP : S5;
7286 %}
7287 
7288 pipe_class vunop_fp128(vecX dst, vecX src)
7289 %{
7290   single_instruction;
7291   dst    : S5(write);
7292   src    : S1(read);
7293   INS0   : ISS;
7294   NEON_FP : S5;
7295 %}
7296 
7297 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7298 %{
7299   single_instruction;
7300   dst    : S3(write);
7301   src    : S1(read);
7302   INS01  : ISS;
7303   NEON_FP : S3;
7304 %}
7305 
7306 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7307 %{
7308   single_instruction;
7309   dst    : S3(write);
7310   src    : S1(read);
7311   INS01  : ISS;
7312   NEON_FP : S3;
7313 %}
7314 
7315 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7316 %{
7317   single_instruction;
7318   dst    : S3(write);
7319   src    : S1(read);
7320   INS01  : ISS;
7321   NEON_FP : S3;
7322 %}
7323 
7324 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7325 %{
7326   single_instruction;
7327   dst    : S3(write);
7328   src    : S1(read);
7329   INS01  : ISS;
7330   NEON_FP : S3;
7331 %}
7332 
7333 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7334 %{
7335   single_instruction;
7336   dst    : S3(write);
7337   src    : S1(read);
7338   INS01  : ISS;
7339   NEON_FP : S3;
7340 %}
7341 
7342 pipe_class vmovi_reg_imm64(vecD dst)
7343 %{
7344   single_instruction;
7345   dst    : S3(write);
7346   INS01  : ISS;
7347   NEON_FP : S3;
7348 %}
7349 
7350 pipe_class vmovi_reg_imm128(vecX dst)
7351 %{
7352   single_instruction;
7353   dst    : S3(write);
7354   INS0   : ISS;
7355   NEON_FP : S3;
7356 %}
7357 
7358 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7359 %{
7360   single_instruction;
7361   dst    : S5(write);
7362   mem    : ISS(read);
7363   INS01  : ISS;
7364   NEON_FP : S3;
7365 %}
7366 
7367 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7368 %{
7369   single_instruction;
7370   dst    : S5(write);
7371   mem    : ISS(read);
7372   INS01  : ISS;
7373   NEON_FP : S3;
7374 %}
7375 
7376 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7377 %{
7378   single_instruction;
7379   mem    : ISS(read);
7380   src    : S2(read);
7381   INS01  : ISS;
7382   NEON_FP : S3;
7383 %}
7384 
7385 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7386 %{
7387   single_instruction;
7388   mem    : ISS(read);
7389   src    : S2(read);
7390   INS01  : ISS;
7391   NEON_FP : S3;
7392 %}
7393 
7394 //------- Integer ALU operations --------------------------
7395 
7396 // Integer ALU reg-reg operation
7397 // Operands needed in EX1, result generated in EX2
7398 // Eg.  ADD     x0, x1, x2
7399 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7400 %{
7401   single_instruction;
7402   dst    : EX2(write);
7403   src1   : EX1(read);
7404   src2   : EX1(read);
7405   INS01  : ISS; // Dual issue as instruction 0 or 1
7406   ALU    : EX2;
7407 %}
7408 
7409 // Integer ALU reg-reg operation with constant shift
7410 // Shifted register must be available in LATE_ISS instead of EX1
7411 // Eg.  ADD     x0, x1, x2, LSL #2
7412 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7413 %{
7414   single_instruction;
7415   dst    : EX2(write);
7416   src1   : EX1(read);
7417   src2   : ISS(read);
7418   INS01  : ISS;
7419   ALU    : EX2;
7420 %}
7421 
7422 // Integer ALU reg operation with constant shift
7423 // Eg.  LSL     x0, x1, #shift
7424 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7425 %{
7426   single_instruction;
7427   dst    : EX2(write);
7428   src1   : ISS(read);
7429   INS01  : ISS;
7430   ALU    : EX2;
7431 %}
7432 
7433 // Integer ALU reg-reg operation with variable shift
7434 // Both operands must be available in LATE_ISS instead of EX1
7435 // Result is available in EX1 instead of EX2
7436 // Eg.  LSLV    x0, x1, x2
7437 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7438 %{
7439   single_instruction;
7440   dst    : EX1(write);
7441   src1   : ISS(read);
7442   src2   : ISS(read);
7443   INS01  : ISS;
7444   ALU    : EX1;
7445 %}
7446 
7447 // Integer ALU reg-reg operation with extract
7448 // As for _vshift above, but result generated in EX2
7449 // Eg.  EXTR    x0, x1, x2, #N
7450 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7451 %{
7452   single_instruction;
7453   dst    : EX2(write);
7454   src1   : ISS(read);
7455   src2   : ISS(read);
7456   INS1   : ISS; // Can only dual issue as Instruction 1
7457   ALU    : EX1;
7458 %}
7459 
7460 // Integer ALU reg operation
7461 // Eg.  NEG     x0, x1
7462 pipe_class ialu_reg(iRegI dst, iRegI src)
7463 %{
7464   single_instruction;
7465   dst    : EX2(write);
7466   src    : EX1(read);
7467   INS01  : ISS;
7468   ALU    : EX2;
7469 %}
7470 
7471 // Integer ALU reg mmediate operation
7472 // Eg.  ADD     x0, x1, #N
7473 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7474 %{
7475   single_instruction;
7476   dst    : EX2(write);
7477   src1   : EX1(read);
7478   INS01  : ISS;
7479   ALU    : EX2;
7480 %}
7481 
7482 // Integer ALU immediate operation (no source operands)
7483 // Eg.  MOV     x0, #N
7484 pipe_class ialu_imm(iRegI dst)
7485 %{
7486   single_instruction;
7487   dst    : EX1(write);
7488   INS01  : ISS;
7489   ALU    : EX1;
7490 %}
7491 
7492 //------- Compare operation -------------------------------
7493 
7494 // Compare reg-reg
7495 // Eg.  CMP     x0, x1
7496 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7497 %{
7498   single_instruction;
7499 //  fixed_latency(16);
7500   cr     : EX2(write);
7501   op1    : EX1(read);
7502   op2    : EX1(read);
7503   INS01  : ISS;
7504   ALU    : EX2;
7505 %}
7506 
7507 // Compare reg-reg
7508 // Eg.  CMP     x0, #N
7509 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7510 %{
7511   single_instruction;
7512 //  fixed_latency(16);
7513   cr     : EX2(write);
7514   op1    : EX1(read);
7515   INS01  : ISS;
7516   ALU    : EX2;
7517 %}
7518 
7519 //------- Conditional instructions ------------------------
7520 
7521 // Conditional no operands
7522 // Eg.  CSINC   x0, zr, zr, <cond>
7523 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7524 %{
7525   single_instruction;
7526   cr     : EX1(read);
7527   dst    : EX2(write);
7528   INS01  : ISS;
7529   ALU    : EX2;
7530 %}
7531 
7532 // Conditional 2 operand
7533 // EG.  CSEL    X0, X1, X2, <cond>
7534 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7535 %{
7536   single_instruction;
7537   cr     : EX1(read);
7538   src1   : EX1(read);
7539   src2   : EX1(read);
7540   dst    : EX2(write);
7541   INS01  : ISS;
7542   ALU    : EX2;
7543 %}
7544 
7545 // Conditional 2 operand
7546 // EG.  CSEL    X0, X1, X2, <cond>
7547 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7548 %{
7549   single_instruction;
7550   cr     : EX1(read);
7551   src    : EX1(read);
7552   dst    : EX2(write);
7553   INS01  : ISS;
7554   ALU    : EX2;
7555 %}
7556 
7557 //------- Multiply pipeline operations --------------------
7558 
7559 // Multiply reg-reg
7560 // Eg.  MUL     w0, w1, w2
7561 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7562 %{
7563   single_instruction;
7564   dst    : WR(write);
7565   src1   : ISS(read);
7566   src2   : ISS(read);
7567   INS01  : ISS;
7568   MAC    : WR;
7569 %}
7570 
7571 // Multiply accumulate
7572 // Eg.  MADD    w0, w1, w2, w3
7573 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7574 %{
7575   single_instruction;
7576   dst    : WR(write);
7577   src1   : ISS(read);
7578   src2   : ISS(read);
7579   src3   : ISS(read);
7580   INS01  : ISS;
7581   MAC    : WR;
7582 %}
7583 
7584 // Eg.  MUL     w0, w1, w2
7585 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7586 %{
7587   single_instruction;
7588   fixed_latency(3); // Maximum latency for 64 bit mul
7589   dst    : WR(write);
7590   src1   : ISS(read);
7591   src2   : ISS(read);
7592   INS01  : ISS;
7593   MAC    : WR;
7594 %}
7595 
7596 // Multiply accumulate
7597 // Eg.  MADD    w0, w1, w2, w3
7598 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7599 %{
7600   single_instruction;
7601   fixed_latency(3); // Maximum latency for 64 bit mul
7602   dst    : WR(write);
7603   src1   : ISS(read);
7604   src2   : ISS(read);
7605   src3   : ISS(read);
7606   INS01  : ISS;
7607   MAC    : WR;
7608 %}
7609 
7610 //------- Divide pipeline operations --------------------
7611 
7612 // Eg.  SDIV    w0, w1, w2
7613 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7614 %{
7615   single_instruction;
7616   fixed_latency(8); // Maximum latency for 32 bit divide
7617   dst    : WR(write);
7618   src1   : ISS(read);
7619   src2   : ISS(read);
7620   INS0   : ISS; // Can only dual issue as instruction 0
7621   DIV    : WR;
7622 %}
7623 
7624 // Eg.  SDIV    x0, x1, x2
7625 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7626 %{
7627   single_instruction;
7628   fixed_latency(16); // Maximum latency for 64 bit divide
7629   dst    : WR(write);
7630   src1   : ISS(read);
7631   src2   : ISS(read);
7632   INS0   : ISS; // Can only dual issue as instruction 0
7633   DIV    : WR;
7634 %}
7635 
7636 //------- Load pipeline operations ------------------------
7637 
7638 // Load - prefetch
7639 // Eg.  PFRM    <mem>
7640 pipe_class iload_prefetch(memory mem)
7641 %{
7642   single_instruction;
7643   mem    : ISS(read);
7644   INS01  : ISS;
7645   LDST   : WR;
7646 %}
7647 
7648 // Load - reg, mem
7649 // Eg.  LDR     x0, <mem>
7650 pipe_class iload_reg_mem(iRegI dst, memory mem)
7651 %{
7652   single_instruction;
7653   dst    : WR(write);
7654   mem    : ISS(read);
7655   INS01  : ISS;
7656   LDST   : WR;
7657 %}
7658 
7659 // Load - reg, reg
7660 // Eg.  LDR     x0, [sp, x1]
7661 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7662 %{
7663   single_instruction;
7664   dst    : WR(write);
7665   src    : ISS(read);
7666   INS01  : ISS;
7667   LDST   : WR;
7668 %}
7669 
7670 //------- Store pipeline operations -----------------------
7671 
7672 // Store - zr, mem
7673 // Eg.  STR     zr, <mem>
7674 pipe_class istore_mem(memory mem)
7675 %{
7676   single_instruction;
7677   mem    : ISS(read);
7678   INS01  : ISS;
7679   LDST   : WR;
7680 %}
7681 
7682 // Store - reg, mem
7683 // Eg.  STR     x0, <mem>
7684 pipe_class istore_reg_mem(iRegI src, memory mem)
7685 %{
7686   single_instruction;
7687   mem    : ISS(read);
7688   src    : EX2(read);
7689   INS01  : ISS;
7690   LDST   : WR;
7691 %}
7692 
7693 // Store - reg, reg
7694 // Eg. STR      x0, [sp, x1]
7695 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7696 %{
7697   single_instruction;
7698   dst    : ISS(read);
7699   src    : EX2(read);
7700   INS01  : ISS;
7701   LDST   : WR;
7702 %}
7703 
7704 //------- Store pipeline operations -----------------------
7705 
7706 // Branch
7707 pipe_class pipe_branch()
7708 %{
7709   single_instruction;
7710   INS01  : ISS;
7711   BRANCH : EX1;
7712 %}
7713 
7714 // Conditional branch
7715 pipe_class pipe_branch_cond(rFlagsReg cr)
7716 %{
7717   single_instruction;
7718   cr     : EX1(read);
7719   INS01  : ISS;
7720   BRANCH : EX1;
7721 %}
7722 
7723 // Compare & Branch
7724 // EG.  CBZ/CBNZ
7725 pipe_class pipe_cmp_branch(iRegI op1)
7726 %{
7727   single_instruction;
7728   op1    : EX1(read);
7729   INS01  : ISS;
7730   BRANCH : EX1;
7731 %}
7732 
7733 //------- Synchronisation operations ----------------------
7734 
7735 // Any operation requiring serialization.
7736 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7737 pipe_class pipe_serial()
7738 %{
7739   single_instruction;
7740   force_serialization;
7741   fixed_latency(16);
7742   INS01  : ISS(2); // Cannot dual issue with any other instruction
7743   LDST   : WR;
7744 %}
7745 
7746 // Generic big/slow expanded idiom - also serialized
7747 pipe_class pipe_slow()
7748 %{
7749   instruction_count(10);
7750   multiple_bundles;
7751   force_serialization;
7752   fixed_latency(16);
7753   INS01  : ISS(2); // Cannot dual issue with any other instruction
7754   LDST   : WR;
7755 %}
7756 
7757 // Empty pipeline class
7758 pipe_class pipe_class_empty()
7759 %{
7760   single_instruction;
7761   fixed_latency(0);
7762 %}
7763 
7764 // Default pipeline class.
7765 pipe_class pipe_class_default()
7766 %{
7767   single_instruction;
7768   fixed_latency(2);
7769 %}
7770 
7771 // Pipeline class for compares.
7772 pipe_class pipe_class_compare()
7773 %{
7774   single_instruction;
7775   fixed_latency(16);
7776 %}
7777 
7778 // Pipeline class for memory operations.
7779 pipe_class pipe_class_memory()
7780 %{
7781   single_instruction;
7782   fixed_latency(16);
7783 %}
7784 
7785 // Pipeline class for call.
7786 pipe_class pipe_class_call()
7787 %{
7788   single_instruction;
7789   fixed_latency(100);
7790 %}
7791 
7792 // Define the class for the Nop node.
7793 define %{
7794    MachNop = pipe_class_empty;
7795 %}
7796 
7797 %}
7798 //----------INSTRUCTIONS-------------------------------------------------------
7799 //
7800 // match      -- States which machine-independent subtree may be replaced
7801 //               by this instruction.
7802 // ins_cost   -- The estimated cost of this instruction is used by instruction
7803 //               selection to identify a minimum cost tree of machine
7804 //               instructions that matches a tree of machine-independent
7805 //               instructions.
7806 // format     -- A string providing the disassembly for this instruction.
7807 //               The value of an instruction's operand may be inserted
7808 //               by referring to it with a '$' prefix.
7809 // opcode     -- Three instruction opcodes may be provided.  These are referred
7810 //               to within an encode class as $primary, $secondary, and $tertiary
7811 //               rrspectively.  The primary opcode is commonly used to
7812 //               indicate the type of machine instruction, while secondary
7813 //               and tertiary are often used for prefix options or addressing
7814 //               modes.
7815 // ins_encode -- A list of encode classes with parameters. The encode class
7816 //               name must have been defined in an 'enc_class' specification
7817 //               in the encode section of the architecture description.
7818 
7819 // ============================================================================
7820 // Memory (Load/Store) Instructions
7821 
7822 // Load Instructions
7823 
7824 // Load Byte (8 bit signed)
7825 instruct loadB(iRegINoSp dst, memory mem)
7826 %{
7827   match(Set dst (LoadB mem));
7828   predicate(!needs_acquiring_load(n));
7829 
7830   ins_cost(4 * INSN_COST);
7831   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7832 
7833   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7834 
7835   ins_pipe(iload_reg_mem);
7836 %}
7837 
7838 // Load Byte (8 bit signed) into long
7839 instruct loadB2L(iRegLNoSp dst, memory mem)
7840 %{
7841   match(Set dst (ConvI2L (LoadB mem)));
7842   predicate(!needs_acquiring_load(n->in(1)));
7843 
7844   ins_cost(4 * INSN_COST);
7845   format %{ "ldrsb  $dst, $mem\t# byte" %}
7846 
7847   ins_encode(aarch64_enc_ldrsb(dst, mem));
7848 
7849   ins_pipe(iload_reg_mem);
7850 %}
7851 
7852 // Load Byte (8 bit unsigned)
7853 instruct loadUB(iRegINoSp dst, memory mem)
7854 %{
7855   match(Set dst (LoadUB mem));
7856   predicate(!needs_acquiring_load(n));
7857 
7858   ins_cost(4 * INSN_COST);
7859   format %{ "ldrbw  $dst, $mem\t# byte" %}
7860 
7861   ins_encode(aarch64_enc_ldrb(dst, mem));
7862 
7863   ins_pipe(iload_reg_mem);
7864 %}
7865 
7866 // Load Byte (8 bit unsigned) into long
7867 instruct loadUB2L(iRegLNoSp dst, memory mem)
7868 %{
7869   match(Set dst (ConvI2L (LoadUB mem)));
7870   predicate(!needs_acquiring_load(n->in(1)));
7871 
7872   ins_cost(4 * INSN_COST);
7873   format %{ "ldrb  $dst, $mem\t# byte" %}
7874 
7875   ins_encode(aarch64_enc_ldrb(dst, mem));
7876 
7877   ins_pipe(iload_reg_mem);
7878 %}
7879 
7880 // Load Short (16 bit signed)
7881 instruct loadS(iRegINoSp dst, memory mem)
7882 %{
7883   match(Set dst (LoadS mem));
7884   predicate(!needs_acquiring_load(n));
7885 
7886   ins_cost(4 * INSN_COST);
7887   format %{ "ldrshw  $dst, $mem\t# short" %}
7888 
7889   ins_encode(aarch64_enc_ldrshw(dst, mem));
7890 
7891   ins_pipe(iload_reg_mem);
7892 %}
7893 
7894 // Load Short (16 bit signed) into long
7895 instruct loadS2L(iRegLNoSp dst, memory mem)
7896 %{
7897   match(Set dst (ConvI2L (LoadS mem)));
7898   predicate(!needs_acquiring_load(n->in(1)));
7899 
7900   ins_cost(4 * INSN_COST);
7901   format %{ "ldrsh  $dst, $mem\t# short" %}
7902 
7903   ins_encode(aarch64_enc_ldrsh(dst, mem));
7904 
7905   ins_pipe(iload_reg_mem);
7906 %}
7907 
7908 // Load Char (16 bit unsigned)
7909 instruct loadUS(iRegINoSp dst, memory mem)
7910 %{
7911   match(Set dst (LoadUS mem));
7912   predicate(!needs_acquiring_load(n));
7913 
7914   ins_cost(4 * INSN_COST);
7915   format %{ "ldrh  $dst, $mem\t# short" %}
7916 
7917   ins_encode(aarch64_enc_ldrh(dst, mem));
7918 
7919   ins_pipe(iload_reg_mem);
7920 %}
7921 
7922 // Load Short/Char (16 bit unsigned) into long
7923 instruct loadUS2L(iRegLNoSp dst, memory mem)
7924 %{
7925   match(Set dst (ConvI2L (LoadUS mem)));
7926   predicate(!needs_acquiring_load(n->in(1)));
7927 
7928   ins_cost(4 * INSN_COST);
7929   format %{ "ldrh  $dst, $mem\t# short" %}
7930 
7931   ins_encode(aarch64_enc_ldrh(dst, mem));
7932 
7933   ins_pipe(iload_reg_mem);
7934 %}
7935 
7936 // Load Integer (32 bit signed)
7937 instruct loadI(iRegINoSp dst, memory mem)
7938 %{
7939   match(Set dst (LoadI mem));
7940   predicate(!needs_acquiring_load(n));
7941 
7942   ins_cost(4 * INSN_COST);
7943   format %{ "ldrw  $dst, $mem\t# int" %}
7944 
7945   ins_encode(aarch64_enc_ldrw(dst, mem));
7946 
7947   ins_pipe(iload_reg_mem);
7948 %}
7949 
7950 // Load Integer (32 bit signed) into long
7951 instruct loadI2L(iRegLNoSp dst, memory mem)
7952 %{
7953   match(Set dst (ConvI2L (LoadI mem)));
7954   predicate(!needs_acquiring_load(n->in(1)));
7955 
7956   ins_cost(4 * INSN_COST);
7957   format %{ "ldrsw  $dst, $mem\t# int" %}
7958 
7959   ins_encode(aarch64_enc_ldrsw(dst, mem));
7960 
7961   ins_pipe(iload_reg_mem);
7962 %}
7963 
7964 // Load Integer (32 bit unsigned) into long
7965 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7966 %{
7967   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7968   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7969 
7970   ins_cost(4 * INSN_COST);
7971   format %{ "ldrw  $dst, $mem\t# int" %}
7972 
7973   ins_encode(aarch64_enc_ldrw(dst, mem));
7974 
7975   ins_pipe(iload_reg_mem);
7976 %}
7977 
7978 // Load Long (64 bit signed)
7979 instruct loadL(iRegLNoSp dst, memory mem)
7980 %{
7981   match(Set dst (LoadL mem));
7982   predicate(!needs_acquiring_load(n));
7983 
7984   ins_cost(4 * INSN_COST);
7985   format %{ "ldr  $dst, $mem\t# int" %}
7986 
7987   ins_encode(aarch64_enc_ldr(dst, mem));
7988 
7989   ins_pipe(iload_reg_mem);
7990 %}
7991 
7992 // Load Range
7993 instruct loadRange(iRegINoSp dst, memory mem)
7994 %{
7995   match(Set dst (LoadRange mem));
7996 
7997   ins_cost(4 * INSN_COST);
7998   format %{ "ldrw  $dst, $mem\t# range" %}
7999 
8000   ins_encode(aarch64_enc_ldrw(dst, mem));
8001 
8002   ins_pipe(iload_reg_mem);
8003 %}
8004 
8005 // Load Pointer
8006 instruct loadP(iRegPNoSp dst, memory mem)
8007 %{
8008   match(Set dst (LoadP mem));
8009   predicate(!needs_acquiring_load(n));
8010 
8011   ins_cost(4 * INSN_COST);
8012   format %{ "ldr  $dst, $mem\t# ptr" %}
8013 
8014   ins_encode(aarch64_enc_ldr(dst, mem));
8015 
8016   ins_pipe(iload_reg_mem);
8017 %}
8018 
8019 // Load Compressed Pointer
8020 instruct loadN(iRegNNoSp dst, memory mem)
8021 %{
8022   match(Set dst (LoadN mem));
8023   predicate(!needs_acquiring_load(n));
8024 
8025   ins_cost(4 * INSN_COST);
8026   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8027 
8028   ins_encode(aarch64_enc_ldrw(dst, mem));
8029 
8030   ins_pipe(iload_reg_mem);
8031 %}
8032 
8033 // Load Klass Pointer
8034 instruct loadKlass(iRegPNoSp dst, memory mem)
8035 %{
8036   match(Set dst (LoadKlass mem));
8037   predicate(!needs_acquiring_load(n));
8038 
8039   ins_cost(4 * INSN_COST);
8040   format %{ "ldr  $dst, $mem\t# class" %}
8041 
8042   ins_encode(aarch64_enc_ldr(dst, mem));
8043 
8044   ins_pipe(iload_reg_mem);
8045 %}
8046 
8047 // Load Narrow Klass Pointer
8048 instruct loadNKlass(iRegNNoSp dst, memory mem)
8049 %{
8050   match(Set dst (LoadNKlass mem));
8051   predicate(!needs_acquiring_load(n));
8052 
8053   ins_cost(4 * INSN_COST);
8054   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8055 
8056   ins_encode(aarch64_enc_ldrw(dst, mem));
8057 
8058   ins_pipe(iload_reg_mem);
8059 %}
8060 
8061 // Load Float
8062 instruct loadF(vRegF dst, memory mem)
8063 %{
8064   match(Set dst (LoadF mem));
8065   predicate(!needs_acquiring_load(n));
8066 
8067   ins_cost(4 * INSN_COST);
8068   format %{ "ldrs  $dst, $mem\t# float" %}
8069 
8070   ins_encode( aarch64_enc_ldrs(dst, mem) );
8071 
8072   ins_pipe(pipe_class_memory);
8073 %}
8074 
8075 // Load Double
8076 instruct loadD(vRegD dst, memory mem)
8077 %{
8078   match(Set dst (LoadD mem));
8079   predicate(!needs_acquiring_load(n));
8080 
8081   ins_cost(4 * INSN_COST);
8082   format %{ "ldrd  $dst, $mem\t# double" %}
8083 
8084   ins_encode( aarch64_enc_ldrd(dst, mem) );
8085 
8086   ins_pipe(pipe_class_memory);
8087 %}
8088 
8089 
8090 // Load Int Constant
8091 instruct loadConI(iRegINoSp dst, immI src)
8092 %{
8093   match(Set dst src);
8094 
8095   ins_cost(INSN_COST);
8096   format %{ "mov $dst, $src\t# int" %}
8097 
8098   ins_encode( aarch64_enc_movw_imm(dst, src) );
8099 
8100   ins_pipe(ialu_imm);
8101 %}
8102 
8103 // Load Long Constant
8104 instruct loadConL(iRegLNoSp dst, immL src)
8105 %{
8106   match(Set dst src);
8107 
8108   ins_cost(INSN_COST);
8109   format %{ "mov $dst, $src\t# long" %}
8110 
8111   ins_encode( aarch64_enc_mov_imm(dst, src) );
8112 
8113   ins_pipe(ialu_imm);
8114 %}
8115 
8116 // Load Pointer Constant
8117 
8118 instruct loadConP(iRegPNoSp dst, immP con)
8119 %{
8120   match(Set dst con);
8121 
8122   ins_cost(INSN_COST * 4);
8123   format %{
8124     "mov  $dst, $con\t# ptr\n\t"
8125   %}
8126 
8127   ins_encode(aarch64_enc_mov_p(dst, con));
8128 
8129   ins_pipe(ialu_imm);
8130 %}
8131 
8132 // Load Null Pointer Constant
8133 
8134 instruct loadConP0(iRegPNoSp dst, immP0 con)
8135 %{
8136   match(Set dst con);
8137 
8138   ins_cost(INSN_COST);
8139   format %{ "mov  $dst, $con\t# NULL ptr" %}
8140 
8141   ins_encode(aarch64_enc_mov_p0(dst, con));
8142 
8143   ins_pipe(ialu_imm);
8144 %}
8145 
8146 // Load Pointer Constant One
8147 
8148 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8149 %{
8150   match(Set dst con);
8151 
8152   ins_cost(INSN_COST);
8153   format %{ "mov  $dst, $con\t# NULL ptr" %}
8154 
8155   ins_encode(aarch64_enc_mov_p1(dst, con));
8156 
8157   ins_pipe(ialu_imm);
8158 %}
8159 
8160 // Load Poll Page Constant
8161 
8162 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8163 %{
8164   match(Set dst con);
8165 
8166   ins_cost(INSN_COST);
8167   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8168 
8169   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8170 
8171   ins_pipe(ialu_imm);
8172 %}
8173 
8174 // Load Byte Map Base Constant
8175 
8176 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8177 %{
8178   match(Set dst con);
8179 
8180   ins_cost(INSN_COST);
8181   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8182 
8183   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8184 
8185   ins_pipe(ialu_imm);
8186 %}
8187 
8188 // Load Narrow Pointer Constant
8189 
8190 instruct loadConN(iRegNNoSp dst, immN con)
8191 %{
8192   match(Set dst con);
8193 
8194   ins_cost(INSN_COST * 4);
8195   format %{ "mov  $dst, $con\t# compressed ptr" %}
8196 
8197   ins_encode(aarch64_enc_mov_n(dst, con));
8198 
8199   ins_pipe(ialu_imm);
8200 %}
8201 
8202 // Load Narrow Null Pointer Constant
8203 
8204 instruct loadConN0(iRegNNoSp dst, immN0 con)
8205 %{
8206   match(Set dst con);
8207 
8208   ins_cost(INSN_COST);
8209   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8210 
8211   ins_encode(aarch64_enc_mov_n0(dst, con));
8212 
8213   ins_pipe(ialu_imm);
8214 %}
8215 
8216 // Load Narrow Klass Constant
8217 
8218 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8219 %{
8220   match(Set dst con);
8221 
8222   ins_cost(INSN_COST);
8223   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8224 
8225   ins_encode(aarch64_enc_mov_nk(dst, con));
8226 
8227   ins_pipe(ialu_imm);
8228 %}
8229 
8230 // Load Packed Float Constant
8231 
8232 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8233   match(Set dst con);
8234   ins_cost(INSN_COST * 4);
8235   format %{ "fmovs  $dst, $con"%}
8236   ins_encode %{
8237     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8238   %}
8239 
8240   ins_pipe(fp_imm_s);
8241 %}
8242 
8243 // Load Float Constant
8244 
8245 instruct loadConF(vRegF dst, immF con) %{
8246   match(Set dst con);
8247 
8248   ins_cost(INSN_COST * 4);
8249 
8250   format %{
8251     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8252   %}
8253 
8254   ins_encode %{
8255     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8256   %}
8257 
8258   ins_pipe(fp_load_constant_s);
8259 %}
8260 
8261 // Load Packed Double Constant
8262 
8263 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8264   match(Set dst con);
8265   ins_cost(INSN_COST);
8266   format %{ "fmovd  $dst, $con"%}
8267   ins_encode %{
8268     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8269   %}
8270 
8271   ins_pipe(fp_imm_d);
8272 %}
8273 
8274 // Load Double Constant
8275 
8276 instruct loadConD(vRegD dst, immD con) %{
8277   match(Set dst con);
8278 
8279   ins_cost(INSN_COST * 5);
8280   format %{
8281     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8282   %}
8283 
8284   ins_encode %{
8285     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8286   %}
8287 
8288   ins_pipe(fp_load_constant_d);
8289 %}
8290 
8291 // Store Instructions
8292 
8293 // Store CMS card-mark Immediate
8294 instruct storeimmCM0(immI0 zero, memory mem)
8295 %{
8296   match(Set mem (StoreCM mem zero));
8297   predicate(unnecessary_storestore(n));
8298 
8299   ins_cost(INSN_COST);
8300   format %{ "strb zr, $mem\t# byte" %}
8301 
8302   ins_encode(aarch64_enc_strb0(mem));
8303 
8304   ins_pipe(istore_mem);
8305 %}
8306 
8307 // Store CMS card-mark Immediate with intervening StoreStore
8308 // needed when using CMS with no conditional card marking
8309 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8310 %{
8311   match(Set mem (StoreCM mem zero));
8312 
8313   ins_cost(INSN_COST * 2);
8314   format %{ "dmb ishst"
8315       "\n\tstrb zr, $mem\t# byte" %}
8316 
8317   ins_encode(aarch64_enc_strb0_ordered(mem));
8318 
8319   ins_pipe(istore_mem);
8320 %}
8321 
8322 // Store Byte
8323 instruct storeB(iRegIorL2I src, memory mem)
8324 %{
8325   match(Set mem (StoreB mem src));
8326   predicate(!needs_releasing_store(n));
8327 
8328   ins_cost(INSN_COST);
8329   format %{ "strb  $src, $mem\t# byte" %}
8330 
8331   ins_encode(aarch64_enc_strb(src, mem));
8332 
8333   ins_pipe(istore_reg_mem);
8334 %}
8335 
8336 
8337 instruct storeimmB0(immI0 zero, memory mem)
8338 %{
8339   match(Set mem (StoreB mem zero));
8340   predicate(!needs_releasing_store(n));
8341 
8342   ins_cost(INSN_COST);
8343   format %{ "strb rscractch2, $mem\t# byte" %}
8344 
8345   ins_encode(aarch64_enc_strb0(mem));
8346 
8347   ins_pipe(istore_mem);
8348 %}
8349 
8350 // Store Char/Short
8351 instruct storeC(iRegIorL2I src, memory mem)
8352 %{
8353   match(Set mem (StoreC mem src));
8354   predicate(!needs_releasing_store(n));
8355 
8356   ins_cost(INSN_COST);
8357   format %{ "strh  $src, $mem\t# short" %}
8358 
8359   ins_encode(aarch64_enc_strh(src, mem));
8360 
8361   ins_pipe(istore_reg_mem);
8362 %}
8363 
8364 instruct storeimmC0(immI0 zero, memory mem)
8365 %{
8366   match(Set mem (StoreC mem zero));
8367   predicate(!needs_releasing_store(n));
8368 
8369   ins_cost(INSN_COST);
8370   format %{ "strh  zr, $mem\t# short" %}
8371 
8372   ins_encode(aarch64_enc_strh0(mem));
8373 
8374   ins_pipe(istore_mem);
8375 %}
8376 
8377 // Store Integer
8378 
8379 instruct storeI(iRegIorL2I src, memory mem)
8380 %{
8381   match(Set mem(StoreI mem src));
8382   predicate(!needs_releasing_store(n));
8383 
8384   ins_cost(INSN_COST);
8385   format %{ "strw  $src, $mem\t# int" %}
8386 
8387   ins_encode(aarch64_enc_strw(src, mem));
8388 
8389   ins_pipe(istore_reg_mem);
8390 %}
8391 
8392 instruct storeimmI0(immI0 zero, memory mem)
8393 %{
8394   match(Set mem(StoreI mem zero));
8395   predicate(!needs_releasing_store(n));
8396 
8397   ins_cost(INSN_COST);
8398   format %{ "strw  zr, $mem\t# int" %}
8399 
8400   ins_encode(aarch64_enc_strw0(mem));
8401 
8402   ins_pipe(istore_mem);
8403 %}
8404 
8405 // Store Long (64 bit signed)
8406 instruct storeL(iRegL src, memory mem)
8407 %{
8408   match(Set mem (StoreL mem src));
8409   predicate(!needs_releasing_store(n));
8410 
8411   ins_cost(INSN_COST);
8412   format %{ "str  $src, $mem\t# int" %}
8413 
8414   ins_encode(aarch64_enc_str(src, mem));
8415 
8416   ins_pipe(istore_reg_mem);
8417 %}
8418 
8419 // Store Long (64 bit signed)
8420 instruct storeimmL0(immL0 zero, memory mem)
8421 %{
8422   match(Set mem (StoreL mem zero));
8423   predicate(!needs_releasing_store(n));
8424 
8425   ins_cost(INSN_COST);
8426   format %{ "str  zr, $mem\t# int" %}
8427 
8428   ins_encode(aarch64_enc_str0(mem));
8429 
8430   ins_pipe(istore_mem);
8431 %}
8432 
8433 // Store Pointer
8434 instruct storeP(iRegP src, memory mem)
8435 %{
8436   match(Set mem (StoreP mem src));
8437   predicate(!needs_releasing_store(n));
8438 
8439   ins_cost(INSN_COST);
8440   format %{ "str  $src, $mem\t# ptr" %}
8441 
8442   ins_encode %{
8443     int opcode = $mem->opcode();
8444     Register base = as_Register($mem$$base);
8445     int index = $mem$$index;
8446     int size = $mem$$scale;
8447     int disp = $mem$$disp;
8448     Register reg = as_Register($src$$reg);
8449 
8450     // we sometimes get asked to store the stack pointer into the
8451     // current thread -- we cannot do that directly on AArch64
8452     if (reg == r31_sp) {
8453       MacroAssembler _masm(&cbuf);
8454       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
8455       __ mov(rscratch2, sp);
8456       reg = rscratch2;
8457     }
8458     Address::extend scale;
8459 
8460     // Hooboy, this is fugly.  We need a way to communicate to the
8461     // encoder that the index needs to be sign extended, so we have to
8462     // enumerate all the cases.
8463     switch (opcode) {
8464     case INDINDEXSCALEDI2L:
8465     case INDINDEXSCALEDI2LN:
8466     case INDINDEXI2L:
8467     case INDINDEXI2LN:
8468       scale = Address::sxtw(size);
8469       break;
8470     default:
8471       scale = Address::lsl(size);
8472     }
8473 
8474     Address adr;
8475     if (index == -1) {
8476       adr = Address(base, disp);
8477     } else {
8478       if (disp == 0) {
8479         adr = Address(base, as_Register(index), scale);
8480       } else {
8481         __ lea(rscratch1, Address(base, disp));
8482         adr = Address(rscratch1, as_Register(index), scale);
8483       }
8484     }
8485 
8486     if (reg != rscratch2)
8487       __ shenandoah_store_check(reg, adr);
8488 
8489     __ str(reg, adr);
8490   %}
8491 
8492   ins_pipe(istore_reg_mem);
8493 %}
8494 
8495 // Store Pointer
8496 instruct storeimmP0(immP0 zero, memory mem)
8497 %{
8498   match(Set mem (StoreP mem zero));
8499   predicate(!needs_releasing_store(n));
8500 
8501   ins_cost(INSN_COST);
8502   format %{ "str zr, $mem\t# ptr" %}
8503 
8504   ins_encode(aarch64_enc_str0(mem));
8505 
8506   ins_pipe(istore_mem);
8507 %}
8508 
8509 // Store Compressed Pointer
8510 instruct storeN(iRegN src, memory mem)
8511 %{
8512   match(Set mem (StoreN mem src));
8513   predicate(!needs_releasing_store(n));
8514 
8515   ins_cost(INSN_COST);
8516   format %{ "strw  $src, $mem\t# compressed ptr" %}
8517 
8518   ins_encode(aarch64_enc_strw(src, mem));
8519 
8520   ins_pipe(istore_reg_mem);
8521 %}
8522 
8523 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8524 %{
8525   match(Set mem (StoreN mem zero));
8526   predicate(Universe::narrow_oop_base() == NULL &&
8527             Universe::narrow_klass_base() == NULL &&
8528             (!needs_releasing_store(n)));
8529 
8530   ins_cost(INSN_COST);
8531   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8532 
8533   ins_encode(aarch64_enc_strw(heapbase, mem));
8534 
8535   ins_pipe(istore_reg_mem);
8536 %}
8537 
8538 // Store Float
8539 instruct storeF(vRegF src, memory mem)
8540 %{
8541   match(Set mem (StoreF mem src));
8542   predicate(!needs_releasing_store(n));
8543 
8544   ins_cost(INSN_COST);
8545   format %{ "strs  $src, $mem\t# float" %}
8546 
8547   ins_encode( aarch64_enc_strs(src, mem) );
8548 
8549   ins_pipe(pipe_class_memory);
8550 %}
8551 
8552 // TODO
8553 // implement storeImmF0 and storeFImmPacked
8554 
8555 // Store Double
8556 instruct storeD(vRegD src, memory mem)
8557 %{
8558   match(Set mem (StoreD mem src));
8559   predicate(!needs_releasing_store(n));
8560 
8561   ins_cost(INSN_COST);
8562   format %{ "strd  $src, $mem\t# double" %}
8563 
8564   ins_encode( aarch64_enc_strd(src, mem) );
8565 
8566   ins_pipe(pipe_class_memory);
8567 %}
8568 
8569 // Store Compressed Klass Pointer
8570 instruct storeNKlass(iRegN src, memory mem)
8571 %{
8572   predicate(!needs_releasing_store(n));
8573   match(Set mem (StoreNKlass mem src));
8574 
8575   ins_cost(INSN_COST);
8576   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8577 
8578   ins_encode(aarch64_enc_strw(src, mem));
8579 
8580   ins_pipe(istore_reg_mem);
8581 %}
8582 
8583 // TODO
8584 // implement storeImmD0 and storeDImmPacked
8585 
8586 // prefetch instructions
8587 // Must be safe to execute with invalid address (cannot fault).
8588 
8589 instruct prefetchalloc( memory mem ) %{
8590   match(PrefetchAllocation mem);
8591 
8592   ins_cost(INSN_COST);
8593   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8594 
8595   ins_encode( aarch64_enc_prefetchw(mem) );
8596 
8597   ins_pipe(iload_prefetch);
8598 %}
8599 
8600 //  ---------------- volatile loads and stores ----------------
8601 
8602 // Load Byte (8 bit signed)
8603 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8604 %{
8605   match(Set dst (LoadB mem));
8606 
8607   ins_cost(VOLATILE_REF_COST);
8608   format %{ "ldarsb  $dst, $mem\t# byte" %}
8609 
8610   ins_encode(aarch64_enc_ldarsb(dst, mem));
8611 
8612   ins_pipe(pipe_serial);
8613 %}
8614 
8615 // Load Byte (8 bit signed) into long
8616 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8617 %{
8618   match(Set dst (ConvI2L (LoadB mem)));
8619 
8620   ins_cost(VOLATILE_REF_COST);
8621   format %{ "ldarsb  $dst, $mem\t# byte" %}
8622 
8623   ins_encode(aarch64_enc_ldarsb(dst, mem));
8624 
8625   ins_pipe(pipe_serial);
8626 %}
8627 
8628 // Load Byte (8 bit unsigned)
8629 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8630 %{
8631   match(Set dst (LoadUB mem));
8632 
8633   ins_cost(VOLATILE_REF_COST);
8634   format %{ "ldarb  $dst, $mem\t# byte" %}
8635 
8636   ins_encode(aarch64_enc_ldarb(dst, mem));
8637 
8638   ins_pipe(pipe_serial);
8639 %}
8640 
8641 // Load Byte (8 bit unsigned) into long
8642 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8643 %{
8644   match(Set dst (ConvI2L (LoadUB mem)));
8645 
8646   ins_cost(VOLATILE_REF_COST);
8647   format %{ "ldarb  $dst, $mem\t# byte" %}
8648 
8649   ins_encode(aarch64_enc_ldarb(dst, mem));
8650 
8651   ins_pipe(pipe_serial);
8652 %}
8653 
8654 // Load Short (16 bit signed)
8655 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8656 %{
8657   match(Set dst (LoadS mem));
8658 
8659   ins_cost(VOLATILE_REF_COST);
8660   format %{ "ldarshw  $dst, $mem\t# short" %}
8661 
8662   ins_encode(aarch64_enc_ldarshw(dst, mem));
8663 
8664   ins_pipe(pipe_serial);
8665 %}
8666 
8667 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8668 %{
8669   match(Set dst (LoadUS mem));
8670 
8671   ins_cost(VOLATILE_REF_COST);
8672   format %{ "ldarhw  $dst, $mem\t# short" %}
8673 
8674   ins_encode(aarch64_enc_ldarhw(dst, mem));
8675 
8676   ins_pipe(pipe_serial);
8677 %}
8678 
8679 // Load Short/Char (16 bit unsigned) into long
8680 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8681 %{
8682   match(Set dst (ConvI2L (LoadUS mem)));
8683 
8684   ins_cost(VOLATILE_REF_COST);
8685   format %{ "ldarh  $dst, $mem\t# short" %}
8686 
8687   ins_encode(aarch64_enc_ldarh(dst, mem));
8688 
8689   ins_pipe(pipe_serial);
8690 %}
8691 
8692 // Load Short/Char (16 bit signed) into long
8693 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8694 %{
8695   match(Set dst (ConvI2L (LoadS mem)));
8696 
8697   ins_cost(VOLATILE_REF_COST);
8698   format %{ "ldarh  $dst, $mem\t# short" %}
8699 
8700   ins_encode(aarch64_enc_ldarsh(dst, mem));
8701 
8702   ins_pipe(pipe_serial);
8703 %}
8704 
8705 // Load Integer (32 bit signed)
8706 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8707 %{
8708   match(Set dst (LoadI mem));
8709 
8710   ins_cost(VOLATILE_REF_COST);
8711   format %{ "ldarw  $dst, $mem\t# int" %}
8712 
8713   ins_encode(aarch64_enc_ldarw(dst, mem));
8714 
8715   ins_pipe(pipe_serial);
8716 %}
8717 
8718 // Load Integer (32 bit unsigned) into long
8719 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8720 %{
8721   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8722 
8723   ins_cost(VOLATILE_REF_COST);
8724   format %{ "ldarw  $dst, $mem\t# int" %}
8725 
8726   ins_encode(aarch64_enc_ldarw(dst, mem));
8727 
8728   ins_pipe(pipe_serial);
8729 %}
8730 
8731 // Load Long (64 bit signed)
8732 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8733 %{
8734   match(Set dst (LoadL mem));
8735 
8736   ins_cost(VOLATILE_REF_COST);
8737   format %{ "ldar  $dst, $mem\t# int" %}
8738 
8739   ins_encode(aarch64_enc_ldar(dst, mem));
8740 
8741   ins_pipe(pipe_serial);
8742 %}
8743 
8744 // Load Pointer
8745 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8746 %{
8747   match(Set dst (LoadP mem));
8748 
8749   ins_cost(VOLATILE_REF_COST);
8750   format %{ "ldar  $dst, $mem\t# ptr" %}
8751 
8752   ins_encode(aarch64_enc_ldar(dst, mem));
8753 
8754   ins_pipe(pipe_serial);
8755 %}
8756 
8757 // Load Compressed Pointer
8758 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8759 %{
8760   match(Set dst (LoadN mem));
8761 
8762   ins_cost(VOLATILE_REF_COST);
8763   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8764 
8765   ins_encode(aarch64_enc_ldarw(dst, mem));
8766 
8767   ins_pipe(pipe_serial);
8768 %}
8769 
8770 // Load Float
8771 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8772 %{
8773   match(Set dst (LoadF mem));
8774 
8775   ins_cost(VOLATILE_REF_COST);
8776   format %{ "ldars  $dst, $mem\t# float" %}
8777 
8778   ins_encode( aarch64_enc_fldars(dst, mem) );
8779 
8780   ins_pipe(pipe_serial);
8781 %}
8782 
8783 // Load Double
8784 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8785 %{
8786   match(Set dst (LoadD mem));
8787 
8788   ins_cost(VOLATILE_REF_COST);
8789   format %{ "ldard  $dst, $mem\t# double" %}
8790 
8791   ins_encode( aarch64_enc_fldard(dst, mem) );
8792 
8793   ins_pipe(pipe_serial);
8794 %}
8795 
8796 // Store Byte
8797 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8798 %{
8799   match(Set mem (StoreB mem src));
8800 
8801   ins_cost(VOLATILE_REF_COST);
8802   format %{ "stlrb  $src, $mem\t# byte" %}
8803 
8804   ins_encode(aarch64_enc_stlrb(src, mem));
8805 
8806   ins_pipe(pipe_class_memory);
8807 %}
8808 
8809 // Store Char/Short
8810 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8811 %{
8812   match(Set mem (StoreC mem src));
8813 
8814   ins_cost(VOLATILE_REF_COST);
8815   format %{ "stlrh  $src, $mem\t# short" %}
8816 
8817   ins_encode(aarch64_enc_stlrh(src, mem));
8818 
8819   ins_pipe(pipe_class_memory);
8820 %}
8821 
8822 // Store Integer
8823 
8824 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8825 %{
8826   match(Set mem(StoreI mem src));
8827 
8828   ins_cost(VOLATILE_REF_COST);
8829   format %{ "stlrw  $src, $mem\t# int" %}
8830 
8831   ins_encode(aarch64_enc_stlrw(src, mem));
8832 
8833   ins_pipe(pipe_class_memory);
8834 %}
8835 
8836 // Store Long (64 bit signed)
8837 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8838 %{
8839   match(Set mem (StoreL mem src));
8840 
8841   ins_cost(VOLATILE_REF_COST);
8842   format %{ "stlr  $src, $mem\t# int" %}
8843 
8844   ins_encode(aarch64_enc_stlr(src, mem));
8845 
8846   ins_pipe(pipe_class_memory);
8847 %}
8848 
8849 // Store Pointer
8850 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8851 %{
8852   match(Set mem (StoreP mem src));
8853 
8854   ins_cost(VOLATILE_REF_COST);
8855   format %{ "stlr  $src, $mem\t# ptr" %}
8856 
8857   ins_encode(aarch64_enc_stlr(src, mem));
8858 
8859   ins_pipe(pipe_class_memory);
8860 %}
8861 
8862 // Store Compressed Pointer
8863 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8864 %{
8865   match(Set mem (StoreN mem src));
8866 
8867   ins_cost(VOLATILE_REF_COST);
8868   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8869 
8870   ins_encode(aarch64_enc_stlrw(src, mem));
8871 
8872   ins_pipe(pipe_class_memory);
8873 %}
8874 
8875 // Store Float
8876 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8877 %{
8878   match(Set mem (StoreF mem src));
8879 
8880   ins_cost(VOLATILE_REF_COST);
8881   format %{ "stlrs  $src, $mem\t# float" %}
8882 
8883   ins_encode( aarch64_enc_fstlrs(src, mem) );
8884 
8885   ins_pipe(pipe_class_memory);
8886 %}
8887 
8888 // TODO
8889 // implement storeImmF0 and storeFImmPacked
8890 
8891 // Store Double
8892 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8893 %{
8894   match(Set mem (StoreD mem src));
8895 
8896   ins_cost(VOLATILE_REF_COST);
8897   format %{ "stlrd  $src, $mem\t# double" %}
8898 
8899   ins_encode( aarch64_enc_fstlrd(src, mem) );
8900 
8901   ins_pipe(pipe_class_memory);
8902 %}
8903 
8904 //  ---------------- end of volatile loads and stores ----------------
8905 
8906 // ============================================================================
8907 // BSWAP Instructions
8908 
8909 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8910   match(Set dst (ReverseBytesI src));
8911 
8912   ins_cost(INSN_COST);
8913   format %{ "revw  $dst, $src" %}
8914 
8915   ins_encode %{
8916     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8917   %}
8918 
8919   ins_pipe(ialu_reg);
8920 %}
8921 
8922 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8923   match(Set dst (ReverseBytesL src));
8924 
8925   ins_cost(INSN_COST);
8926   format %{ "rev  $dst, $src" %}
8927 
8928   ins_encode %{
8929     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8930   %}
8931 
8932   ins_pipe(ialu_reg);
8933 %}
8934 
8935 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8936   match(Set dst (ReverseBytesUS src));
8937 
8938   ins_cost(INSN_COST);
8939   format %{ "rev16w  $dst, $src" %}
8940 
8941   ins_encode %{
8942     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8943   %}
8944 
8945   ins_pipe(ialu_reg);
8946 %}
8947 
8948 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8949   match(Set dst (ReverseBytesS src));
8950 
8951   ins_cost(INSN_COST);
8952   format %{ "rev16w  $dst, $src\n\t"
8953             "sbfmw $dst, $dst, #0, #15" %}
8954 
8955   ins_encode %{
8956     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8957     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8958   %}
8959 
8960   ins_pipe(ialu_reg);
8961 %}
8962 
8963 // ============================================================================
8964 // Zero Count Instructions
8965 
8966 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8967   match(Set dst (CountLeadingZerosI src));
8968 
8969   ins_cost(INSN_COST);
8970   format %{ "clzw  $dst, $src" %}
8971   ins_encode %{
8972     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8973   %}
8974 
8975   ins_pipe(ialu_reg);
8976 %}
8977 
8978 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8979   match(Set dst (CountLeadingZerosL src));
8980 
8981   ins_cost(INSN_COST);
8982   format %{ "clz   $dst, $src" %}
8983   ins_encode %{
8984     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8985   %}
8986 
8987   ins_pipe(ialu_reg);
8988 %}
8989 
8990 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8991   match(Set dst (CountTrailingZerosI src));
8992 
8993   ins_cost(INSN_COST * 2);
8994   format %{ "rbitw  $dst, $src\n\t"
8995             "clzw   $dst, $dst" %}
8996   ins_encode %{
8997     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8998     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8999   %}
9000 
9001   ins_pipe(ialu_reg);
9002 %}
9003 
9004 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9005   match(Set dst (CountTrailingZerosL src));
9006 
9007   ins_cost(INSN_COST * 2);
9008   format %{ "rbit   $dst, $src\n\t"
9009             "clz    $dst, $dst" %}
9010   ins_encode %{
9011     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9012     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9013   %}
9014 
9015   ins_pipe(ialu_reg);
9016 %}
9017 
9018 //---------- Population Count Instructions -------------------------------------
9019 //
9020 
9021 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9022   predicate(UsePopCountInstruction);
9023   match(Set dst (PopCountI src));
9024   effect(TEMP tmp);
9025   ins_cost(INSN_COST * 13);
9026 
9027   format %{ "movw   $src, $src\n\t"
9028             "mov    $tmp, $src\t# vector (1D)\n\t"
9029             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9030             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9031             "mov    $dst, $tmp\t# vector (1D)" %}
9032   ins_encode %{
9033     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9034     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9035     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9036     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9037     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9038   %}
9039 
9040   ins_pipe(pipe_class_default);
9041 %}
9042 
9043 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9044   predicate(UsePopCountInstruction);
9045   match(Set dst (PopCountI (LoadI mem)));
9046   effect(TEMP tmp);
9047   ins_cost(INSN_COST * 13);
9048 
9049   format %{ "ldrs   $tmp, $mem\n\t"
9050             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9051             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9052             "mov    $dst, $tmp\t# vector (1D)" %}
9053   ins_encode %{
9054     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9055     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9056                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9057     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9058     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9059     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9060   %}
9061 
9062   ins_pipe(pipe_class_default);
9063 %}
9064 
9065 // Note: Long.bitCount(long) returns an int.
9066 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9067   predicate(UsePopCountInstruction);
9068   match(Set dst (PopCountL src));
9069   effect(TEMP tmp);
9070   ins_cost(INSN_COST * 13);
9071 
9072   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9073             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9074             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9075             "mov    $dst, $tmp\t# vector (1D)" %}
9076   ins_encode %{
9077     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9078     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9079     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9080     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9081   %}
9082 
9083   ins_pipe(pipe_class_default);
9084 %}
9085 
9086 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9087   predicate(UsePopCountInstruction);
9088   match(Set dst (PopCountL (LoadL mem)));
9089   effect(TEMP tmp);
9090   ins_cost(INSN_COST * 13);
9091 
9092   format %{ "ldrd   $tmp, $mem\n\t"
9093             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9094             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9095             "mov    $dst, $tmp\t# vector (1D)" %}
9096   ins_encode %{
9097     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9098     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9099                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9100     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9101     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9102     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9103   %}
9104 
9105   ins_pipe(pipe_class_default);
9106 %}
9107 
9108 // ============================================================================
9109 // MemBar Instruction
9110 
9111 instruct load_fence() %{
9112   match(LoadFence);
9113   ins_cost(VOLATILE_REF_COST);
9114 
9115   format %{ "load_fence" %}
9116 
9117   ins_encode %{
9118     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9119   %}
9120   ins_pipe(pipe_serial);
9121 %}
9122 
9123 instruct unnecessary_membar_acquire() %{
9124   predicate(unnecessary_acquire(n));
9125   match(MemBarAcquire);
9126   ins_cost(0);
9127 
9128   format %{ "membar_acquire (elided)" %}
9129 
9130   ins_encode %{
9131     __ block_comment("membar_acquire (elided)");
9132   %}
9133 
9134   ins_pipe(pipe_class_empty);
9135 %}
9136 
9137 instruct membar_acquire() %{
9138   match(MemBarAcquire);
9139   ins_cost(VOLATILE_REF_COST);
9140 
9141   format %{ "membar_acquire" %}
9142 
9143   ins_encode %{
9144     __ block_comment("membar_acquire");
9145     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9146   %}
9147 
9148   ins_pipe(pipe_serial);
9149 %}
9150 
9151 
9152 instruct membar_acquire_lock() %{
9153   match(MemBarAcquireLock);
9154   ins_cost(VOLATILE_REF_COST);
9155 
9156   format %{ "membar_acquire_lock (elided)" %}
9157 
9158   ins_encode %{
9159     __ block_comment("membar_acquire_lock (elided)");
9160   %}
9161 
9162   ins_pipe(pipe_serial);
9163 %}
9164 
9165 instruct store_fence() %{
9166   match(StoreFence);
9167   ins_cost(VOLATILE_REF_COST);
9168 
9169   format %{ "store_fence" %}
9170 
9171   ins_encode %{
9172     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9173   %}
9174   ins_pipe(pipe_serial);
9175 %}
9176 
9177 instruct unnecessary_membar_release() %{
9178   predicate(unnecessary_release(n));
9179   match(MemBarRelease);
9180   ins_cost(0);
9181 
9182   format %{ "membar_release (elided)" %}
9183 
9184   ins_encode %{
9185     __ block_comment("membar_release (elided)");
9186   %}
9187   ins_pipe(pipe_serial);
9188 %}
9189 
9190 instruct membar_release() %{
9191   match(MemBarRelease);
9192   ins_cost(VOLATILE_REF_COST);
9193 
9194   format %{ "membar_release" %}
9195 
9196   ins_encode %{
9197     __ block_comment("membar_release");
9198     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9199   %}
9200   ins_pipe(pipe_serial);
9201 %}
9202 
9203 instruct membar_storestore() %{
9204   match(MemBarStoreStore);
9205   ins_cost(VOLATILE_REF_COST);
9206 
9207   format %{ "MEMBAR-store-store" %}
9208 
9209   ins_encode %{
9210     __ membar(Assembler::StoreStore);
9211   %}
9212   ins_pipe(pipe_serial);
9213 %}
9214 
9215 instruct membar_release_lock() %{
9216   match(MemBarReleaseLock);
9217   ins_cost(VOLATILE_REF_COST);
9218 
9219   format %{ "membar_release_lock (elided)" %}
9220 
9221   ins_encode %{
9222     __ block_comment("membar_release_lock (elided)");
9223   %}
9224 
9225   ins_pipe(pipe_serial);
9226 %}
9227 
9228 instruct unnecessary_membar_volatile() %{
9229   predicate(unnecessary_volatile(n));
9230   match(MemBarVolatile);
9231   ins_cost(0);
9232 
9233   format %{ "membar_volatile (elided)" %}
9234 
9235   ins_encode %{
9236     __ block_comment("membar_volatile (elided)");
9237   %}
9238 
9239   ins_pipe(pipe_serial);
9240 %}
9241 
9242 instruct membar_volatile() %{
9243   match(MemBarVolatile);
9244   ins_cost(VOLATILE_REF_COST*100);
9245 
9246   format %{ "membar_volatile" %}
9247 
9248   ins_encode %{
9249     __ block_comment("membar_volatile");
9250     __ membar(Assembler::StoreLoad);
9251   %}
9252 
9253   ins_pipe(pipe_serial);
9254 %}
9255 
9256 // ============================================================================
9257 // Cast/Convert Instructions
9258 
9259 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9260   match(Set dst (CastX2P src));
9261 
9262   ins_cost(INSN_COST);
9263   format %{ "mov $dst, $src\t# long -> ptr" %}
9264 
9265   ins_encode %{
9266     if ($dst$$reg != $src$$reg) {
9267       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9268     }
9269   %}
9270 
9271   ins_pipe(ialu_reg);
9272 %}
9273 
9274 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9275   match(Set dst (CastP2X src));
9276 
9277   ins_cost(INSN_COST);
9278   format %{ "mov $dst, $src\t# ptr -> long" %}
9279 
9280   ins_encode %{
9281     if ($dst$$reg != $src$$reg) {
9282       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9283     }
9284   %}
9285 
9286   ins_pipe(ialu_reg);
9287 %}
9288 
9289 // Convert oop into int for vectors alignment masking
9290 instruct convP2I(iRegINoSp dst, iRegP src) %{
9291   match(Set dst (ConvL2I (CastP2X src)));
9292 
9293   ins_cost(INSN_COST);
9294   format %{ "movw $dst, $src\t# ptr -> int" %}
9295   ins_encode %{
9296     __ movw($dst$$Register, $src$$Register);
9297   %}
9298 
9299   ins_pipe(ialu_reg);
9300 %}
9301 
9302 // Convert compressed oop into int for vectors alignment masking
9303 // in case of 32bit oops (heap < 4Gb).
9304 instruct convN2I(iRegINoSp dst, iRegN src)
9305 %{
9306   predicate(Universe::narrow_oop_shift() == 0);
9307   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9308 
9309   ins_cost(INSN_COST);
9310   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9311   ins_encode %{
9312     __ movw($dst$$Register, $src$$Register);
9313   %}
9314 
9315   ins_pipe(ialu_reg);
9316 %}
9317 
9318 instruct shenandoahRB(iRegPNoSp dst, iRegP src, rFlagsReg cr) %{
9319   match(Set dst (ShenandoahReadBarrier src));
9320   format %{ "shenandoah_rb $dst,$src" %}
9321   ins_encode %{
9322     Register s = $src$$Register;
9323     Register d = $dst$$Register;
9324     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9325   %}
9326   ins_pipe(pipe_class_memory);
9327 %}
9328 
9329 instruct shenandoahWB(iRegP_R0 dst, iRegP src, rFlagsReg cr) %{
9330   match(Set dst (ShenandoahWriteBarrier src));
9331   effect(KILL cr);
9332 
9333   format %{ "shenandoah_wb $dst,$src" %}
9334   ins_encode %{
9335     Label done;
9336     Register s = $src$$Register;
9337     Register d = $dst$$Register;
9338     assert(d == r0, "result in r0");
9339     Address evacuation_in_progress = Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset()));
9340     __ block_comment("Shenandoah write barrier {");
9341     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9342     __ ldrb(rscratch1, evacuation_in_progress);
9343     __ membar(Assembler::LoadLoad);
9344     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9345     __ cbzw(rscratch1, done);
9346     __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::aarch64::shenandoah_wb())), NULL, lr);
9347     __ bind(done);
9348     __ block_comment("} Shenandoah write barrier");
9349   %}
9350   ins_pipe(pipe_slow);
9351 %}
9352 
9353 
9354 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9355   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9356   match(Set dst (EncodeP src));
9357   effect(KILL cr);
9358   ins_cost(INSN_COST * 3);
9359   format %{ "encode_heap_oop $dst, $src" %}
9360   ins_encode %{
9361     Register s = $src$$Register;
9362     Register d = $dst$$Register;
9363     __ encode_heap_oop(d, s);
9364   %}
9365   ins_pipe(ialu_reg);
9366 %}
9367 
9368 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9369   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9370   match(Set dst (EncodeP src));
9371   ins_cost(INSN_COST * 3);
9372   format %{ "encode_heap_oop_not_null $dst, $src" %}
9373   ins_encode %{
9374     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9375   %}
9376   ins_pipe(ialu_reg);
9377 %}
9378 
9379 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9380   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9381             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9382   match(Set dst (DecodeN src));
9383   ins_cost(INSN_COST * 3);
9384   format %{ "decode_heap_oop $dst, $src" %}
9385   ins_encode %{
9386     Register s = $src$$Register;
9387     Register d = $dst$$Register;
9388     __ decode_heap_oop(d, s);
9389   %}
9390   ins_pipe(ialu_reg);
9391 %}
9392 
9393 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9394   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9395             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9396   match(Set dst (DecodeN src));
9397   ins_cost(INSN_COST * 3);
9398   format %{ "decode_heap_oop_not_null $dst, $src" %}
9399   ins_encode %{
9400     Register s = $src$$Register;
9401     Register d = $dst$$Register;
9402     __ decode_heap_oop_not_null(d, s);
9403   %}
9404   ins_pipe(ialu_reg);
9405 %}
9406 
9407 // n.b. AArch64 implementations of encode_klass_not_null and
9408 // decode_klass_not_null do not modify the flags register so, unlike
9409 // Intel, we don't kill CR as a side effect here
9410 
9411 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9412   match(Set dst (EncodePKlass src));
9413 
9414   ins_cost(INSN_COST * 3);
9415   format %{ "encode_klass_not_null $dst,$src" %}
9416 
9417   ins_encode %{
9418     Register src_reg = as_Register($src$$reg);
9419     Register dst_reg = as_Register($dst$$reg);
9420     __ encode_klass_not_null(dst_reg, src_reg);
9421   %}
9422 
9423    ins_pipe(ialu_reg);
9424 %}
9425 
9426 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9427   match(Set dst (DecodeNKlass src));
9428 
9429   ins_cost(INSN_COST * 3);
9430   format %{ "decode_klass_not_null $dst,$src" %}
9431 
9432   ins_encode %{
9433     Register src_reg = as_Register($src$$reg);
9434     Register dst_reg = as_Register($dst$$reg);
9435     if (dst_reg != src_reg) {
9436       __ decode_klass_not_null(dst_reg, src_reg);
9437     } else {
9438       __ decode_klass_not_null(dst_reg);
9439     }
9440   %}
9441 
9442    ins_pipe(ialu_reg);
9443 %}
9444 
9445 instruct checkCastPP(iRegPNoSp dst)
9446 %{
9447   match(Set dst (CheckCastPP dst));
9448 
9449   size(0);
9450   format %{ "# checkcastPP of $dst" %}
9451   ins_encode(/* empty encoding */);
9452   ins_pipe(pipe_class_empty);
9453 %}
9454 
9455 instruct castPP(iRegPNoSp dst)
9456 %{
9457   match(Set dst (CastPP dst));
9458 
9459   size(0);
9460   format %{ "# castPP of $dst" %}
9461   ins_encode(/* empty encoding */);
9462   ins_pipe(pipe_class_empty);
9463 %}
9464 
9465 instruct castII(iRegI dst)
9466 %{
9467   match(Set dst (CastII dst));
9468 
9469   size(0);
9470   format %{ "# castII of $dst" %}
9471   ins_encode(/* empty encoding */);
9472   ins_cost(0);
9473   ins_pipe(pipe_class_empty);
9474 %}
9475 
9476 // ============================================================================
9477 // Atomic operation instructions
9478 //
9479 // Intel and SPARC both implement Ideal Node LoadPLocked and
9480 // Store{PIL}Conditional instructions using a normal load for the
9481 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9482 //
9483 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9484 // pair to lock object allocations from Eden space when not using
9485 // TLABs.
9486 //
9487 // There does not appear to be a Load{IL}Locked Ideal Node and the
9488 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9489 // and to use StoreIConditional only for 32-bit and StoreLConditional
9490 // only for 64-bit.
9491 //
9492 // We implement LoadPLocked and StorePLocked instructions using,
9493 // respectively the AArch64 hw load-exclusive and store-conditional
9494 // instructions. Whereas we must implement each of
9495 // Store{IL}Conditional using a CAS which employs a pair of
9496 // instructions comprising a load-exclusive followed by a
9497 // store-conditional.
9498 
9499 
9500 // Locked-load (linked load) of the current heap-top
9501 // used when updating the eden heap top
9502 // implemented using ldaxr on AArch64
9503 
9504 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9505 %{
9506   match(Set dst (LoadPLocked mem));
9507 
9508   ins_cost(VOLATILE_REF_COST);
9509 
9510   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9511 
9512   ins_encode(aarch64_enc_ldaxr(dst, mem));
9513 
9514   ins_pipe(pipe_serial);
9515 %}
9516 
9517 // Conditional-store of the updated heap-top.
9518 // Used during allocation of the shared heap.
9519 // Sets flag (EQ) on success.
9520 // implemented using stlxr on AArch64.
9521 
9522 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9523 %{
9524   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9525 
9526   ins_cost(VOLATILE_REF_COST);
9527 
9528  // TODO
9529  // do we need to do a store-conditional release or can we just use a
9530  // plain store-conditional?
9531 
9532   format %{
9533     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9534     "cmpw rscratch1, zr\t# EQ on successful write"
9535   %}
9536 
9537   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9538 
9539   ins_pipe(pipe_serial);
9540 %}
9541 
9542 
9543 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9544 // when attempting to rebias a lock towards the current thread.  We
9545 // must use the acquire form of cmpxchg in order to guarantee acquire
9546 // semantics in this case.
9547 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9548 %{
9549   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9550 
9551   ins_cost(VOLATILE_REF_COST);
9552 
9553   format %{
9554     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9555     "cmpw rscratch1, zr\t# EQ on successful write"
9556   %}
9557 
9558   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9559 
9560   ins_pipe(pipe_slow);
9561 %}
9562 
9563 // storeIConditional also has acquire semantics, for no better reason
9564 // than matching storeLConditional.  At the time of writing this
9565 // comment storeIConditional was not used anywhere by AArch64.
9566 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9567 %{
9568   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9569 
9570   ins_cost(VOLATILE_REF_COST);
9571 
9572   format %{
9573     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9574     "cmpw rscratch1, zr\t# EQ on successful write"
9575   %}
9576 
9577   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9578 
9579   ins_pipe(pipe_slow);
9580 %}
9581 
9582 // standard CompareAndSwapX when we are using barriers
9583 // these have higher priority than the rules selected by a predicate
9584 
9585 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9586 // can't match them
9587 
9588 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9589 
9590   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9591   ins_cost(2 * VOLATILE_REF_COST);
9592 
9593   effect(KILL cr);
9594 
9595  format %{
9596     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9597     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9598  %}
9599 
9600  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9601             aarch64_enc_cset_eq(res));
9602 
9603   ins_pipe(pipe_slow);
9604 %}
9605 
9606 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9607 
9608   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9609   ins_cost(2 * VOLATILE_REF_COST);
9610 
9611   effect(KILL cr);
9612 
9613  format %{
9614     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9615     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9616  %}
9617 
9618  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9619             aarch64_enc_cset_eq(res));
9620 
9621   ins_pipe(pipe_slow);
9622 %}
9623 
9624 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9625 
9626   predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9627   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9628   ins_cost(2 * VOLATILE_REF_COST);
9629 
9630   effect(KILL cr);
9631 
9632  format %{
9633     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9634     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9635  %}
9636 
9637  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9638             aarch64_enc_cset_eq(res));
9639 
9640   ins_pipe(pipe_slow);
9641 %}
9642 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegP tmp, rFlagsReg cr) %{
9643 
9644   predicate(UseShenandoahGC);
9645   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9646   ins_cost(3 * VOLATILE_REF_COST);
9647 
9648   effect(TEMP tmp, KILL cr);
9649 
9650   format %{
9651     "cmpxchg_oop_shenandoah $res, $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9652   %}
9653 
9654   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(res, mem, oldval, newval, tmp));
9655 
9656   ins_pipe(pipe_slow);
9657 %}
9658 
9659 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9660 
9661   predicate(!UseShenandoahGC);
9662   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9663   ins_cost(2 * VOLATILE_REF_COST);
9664 
9665   effect(KILL cr);
9666 
9667  format %{
9668     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9669     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9670  %}
9671 
9672   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9673              aarch64_enc_cset_eq(res));
9674 
9675   ins_pipe(pipe_slow);
9676 %}
9677 
9678 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, iRegP tmp, rFlagsReg cr) %{
9679 
9680   predicate(UseShenandoahGC);
9681   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9682   ins_cost(2 * VOLATILE_REF_COST);
9683 
9684   effect(TEMP tmp, KILL cr);
9685 
9686  format %{
9687     "cmpxchg_narrow_oop_shenandoah $res, $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9688  %}
9689 
9690   ins_encode %{
9691     Register tmp = $tmp$$Register;
9692     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9693     __ cmpxchg_oop_shenandoah($res$$base$$Register, $mem$$base$$Register, tmp, $newval$$Register, true, /*acquire*/ true, /*release*/ true);
9694   %}
9695 
9696   ins_pipe(pipe_slow);
9697 %}
9698 
9699 // alternative CompareAndSwapX when we are eliding barriers
9700 
9701 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9702 
9703   predicate(needs_acquiring_load_exclusive(n));
9704   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9705   ins_cost(VOLATILE_REF_COST);
9706 
9707   effect(KILL cr);
9708 
9709  format %{
9710     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9711     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9712  %}
9713 
9714  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9715             aarch64_enc_cset_eq(res));
9716 
9717   ins_pipe(pipe_slow);
9718 %}
9719 
9720 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9721 
9722   predicate(needs_acquiring_load_exclusive(n));
9723   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9724   ins_cost(VOLATILE_REF_COST);
9725 
9726   effect(KILL cr);
9727 
9728  format %{
9729     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9730     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9731  %}
9732 
9733  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9734             aarch64_enc_cset_eq(res));
9735 
9736   ins_pipe(pipe_slow);
9737 %}
9738 
9739 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9740 
9741   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
9742   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9743   ins_cost(VOLATILE_REF_COST);
9744 
9745   effect(KILL cr);
9746 
9747  format %{
9748     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9749     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9750  %}
9751 
9752  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9753             aarch64_enc_cset_eq(res));
9754 
9755   ins_pipe(pipe_slow);
9756 %}
9757 
9758 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegP tmp, rFlagsReg cr) %{
9759 
9760   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC);
9761   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9762   ins_cost(2 * VOLATILE_REF_COST);
9763 
9764   effect(TEMP tmp, KILL cr);
9765 
9766   format %{
9767     "cmpxchg_acq_oop_shenandoah $res,$mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9768   %}
9769 
9770   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(res, mem, oldval, newval, tmp));
9771 
9772   ins_pipe(pipe_slow);
9773 %}
9774 
9775 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9776 
9777   predicate(needs_acquiring_load_exclusive(n) && ! UseShenandoahGC);
9778   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9779   ins_cost(VOLATILE_REF_COST);
9780 
9781   effect(KILL cr);
9782 
9783  format %{
9784     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9785     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9786  %}
9787 
9788  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9789             aarch64_enc_cset_eq(res));
9790 
9791   ins_pipe(pipe_slow);
9792 %}
9793 
9794 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, iRegP tmp, rFlagsReg cr) %{
9795 
9796   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC);
9797   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9798   ins_cost(2 * VOLATILE_REF_COST);
9799 
9800   effect(TEMP tmp, KILL cr);
9801 
9802  format %{
9803     "cmpxchg_narrow_oop_shenandoah $res, $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9804  %}
9805 
9806   ins_encode %{
9807     Register tmp = $tmp$$Register;
9808     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9809     __ cmpxchg_oop_shenandoah($res$$base$$Register, $mem$$base$$Register, tmp, $newval$$Register, true, /*acquire*/ true, /*release*/ true);
9810   %}
9811 
9812   ins_pipe(pipe_slow);
9813 %}
9814 
9815 // ---------------------------------------------------------------------
9816 // Sundry CAS operations.  Note that release is always true,
9817 // regardless of the memory ordering of the CAS.  This is because we
9818 // need the volatile case to be sequentially consistent but there is
9819 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9820 // can't check the type of memory ordering here, so we always emit a
9821 // STLXR.
9822 
9823 // This section is generated from aarch64_ad_cas.m4
9824 
9825 
9826 instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9827   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9828   ins_cost(2 * VOLATILE_REF_COST);
9829   effect(KILL cr);
9830   format %{
9831     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9832   %}
9833   ins_encode %{
9834     __ uxtbw(rscratch2, $oldval$$Register);
9835     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9836                Assembler::byte, /*acquire*/ false, /*release*/ true,
9837                /*weak*/ false, $res$$Register);
9838     __ sxtbw($res$$Register, $res$$Register);
9839   %}
9840   ins_pipe(pipe_slow);
9841 %}
9842 
9843 instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9844   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9845   ins_cost(2 * VOLATILE_REF_COST);
9846   effect(KILL cr);
9847   format %{
9848     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9849   %}
9850   ins_encode %{
9851     __ uxthw(rscratch2, $oldval$$Register);
9852     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9853                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9854                /*weak*/ false, $res$$Register);
9855     __ sxthw($res$$Register, $res$$Register);
9856   %}
9857   ins_pipe(pipe_slow);
9858 %}
9859 
9860 instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
9861   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9862   ins_cost(2 * VOLATILE_REF_COST);
9863   effect(KILL cr);
9864   format %{
9865     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9866   %}
9867   ins_encode %{
9868     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9869                Assembler::word, /*acquire*/ false, /*release*/ true,
9870                /*weak*/ false, $res$$Register);
9871   %}
9872   ins_pipe(pipe_slow);
9873 %}
9874 
9875 instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_R3 newval, rFlagsReg cr) %{
9876   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9877   ins_cost(2 * VOLATILE_REF_COST);
9878   effect(KILL cr);
9879   format %{
9880     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9881   %}
9882   ins_encode %{
9883     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9884                Assembler::xword, /*acquire*/ false, /*release*/ true,
9885                /*weak*/ false, $res$$Register);
9886   %}
9887   ins_pipe(pipe_slow);
9888 %}
9889 
9890 instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, rFlagsReg cr) %{
9891   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9892   ins_cost(2 * VOLATILE_REF_COST);
9893   effect(KILL cr);
9894   format %{
9895     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9896   %}
9897   ins_encode %{
9898     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9899                Assembler::word, /*acquire*/ false, /*release*/ true,
9900                /*weak*/ false, $res$$Register);
9901   %}
9902   ins_pipe(pipe_slow);
9903 %}
9904 
9905 instruct compareAndExchangeP(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, rFlagsReg cr) %{
9906   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9907   ins_cost(2 * VOLATILE_REF_COST);
9908   effect(KILL cr);
9909   format %{
9910     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9911   %}
9912   ins_encode %{
9913     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9914                Assembler::xword, /*acquire*/ false, /*release*/ true,
9915                /*weak*/ false, $res$$Register);
9916   %}
9917   ins_pipe(pipe_slow);
9918 %}
9919 
9920 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9921   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9922   ins_cost(2 * VOLATILE_REF_COST);
9923   effect(KILL cr);
9924   format %{
9925     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9926     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9927   %}
9928   ins_encode %{
9929     __ uxtbw(rscratch2, $oldval$$Register);
9930     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9931                Assembler::byte, /*acquire*/ false, /*release*/ true,
9932                /*weak*/ true, noreg);
9933     __ csetw($res$$Register, Assembler::EQ);
9934   %}
9935   ins_pipe(pipe_slow);
9936 %}
9937 
9938 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9939   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9940   ins_cost(2 * VOLATILE_REF_COST);
9941   effect(KILL cr);
9942   format %{
9943     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9944     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9945   %}
9946   ins_encode %{
9947     __ uxthw(rscratch2, $oldval$$Register);
9948     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9949                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9950                /*weak*/ true, noreg);
9951     __ csetw($res$$Register, Assembler::EQ);
9952   %}
9953   ins_pipe(pipe_slow);
9954 %}
9955 
9956 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9957   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9958   ins_cost(2 * VOLATILE_REF_COST);
9959   effect(KILL cr);
9960   format %{
9961     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9962     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9963   %}
9964   ins_encode %{
9965     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9966                Assembler::word, /*acquire*/ false, /*release*/ true,
9967                /*weak*/ true, noreg);
9968     __ csetw($res$$Register, Assembler::EQ);
9969   %}
9970   ins_pipe(pipe_slow);
9971 %}
9972 
9973 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9974   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9975   ins_cost(2 * VOLATILE_REF_COST);
9976   effect(KILL cr);
9977   format %{
9978     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9979     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9980   %}
9981   ins_encode %{
9982     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9983                Assembler::xword, /*acquire*/ false, /*release*/ true,
9984                /*weak*/ true, noreg);
9985     __ csetw($res$$Register, Assembler::EQ);
9986   %}
9987   ins_pipe(pipe_slow);
9988 %}
9989 
9990 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9991   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9992   ins_cost(2 * VOLATILE_REF_COST);
9993   effect(KILL cr);
9994   format %{
9995     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9996     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9997   %}
9998   ins_encode %{
9999     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10000                Assembler::word, /*acquire*/ false, /*release*/ true,
10001                /*weak*/ true, noreg);
10002     __ csetw($res$$Register, Assembler::EQ);
10003   %}
10004   ins_pipe(pipe_slow);
10005 %}
10006 
10007 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10008   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10009   ins_cost(2 * VOLATILE_REF_COST);
10010   effect(KILL cr);
10011   format %{
10012     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10013     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10014   %}
10015   ins_encode %{
10016     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10017                Assembler::xword, /*acquire*/ false, /*release*/ true,
10018                /*weak*/ true, noreg);
10019     __ csetw($res$$Register, Assembler::EQ);
10020   %}
10021   ins_pipe(pipe_slow);
10022 %}
10023 // ---------------------------------------------------------------------
10024 
10025 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
10026   match(Set prev (GetAndSetI mem newv));
10027   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10028   ins_encode %{
10029     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10030   %}
10031   ins_pipe(pipe_serial);
10032 %}
10033 
10034 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
10035   match(Set prev (GetAndSetL mem newv));
10036   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10037   ins_encode %{
10038     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10039   %}
10040   ins_pipe(pipe_serial);
10041 %}
10042 
10043 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
10044   match(Set prev (GetAndSetN mem newv));
10045   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10046   ins_encode %{
10047     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10048   %}
10049   ins_pipe(pipe_serial);
10050 %}
10051 
10052 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
10053   match(Set prev (GetAndSetP mem newv));
10054   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10055   ins_encode %{
10056     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10057   %}
10058   ins_pipe(pipe_serial);
10059 %}
10060 
10061 
10062 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10063   match(Set newval (GetAndAddL mem incr));
10064   ins_cost(INSN_COST * 10);
10065   format %{ "get_and_addL $newval, [$mem], $incr" %}
10066   ins_encode %{
10067     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10068   %}
10069   ins_pipe(pipe_serial);
10070 %}
10071 
10072 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10073   predicate(n->as_LoadStore()->result_not_used());
10074   match(Set dummy (GetAndAddL mem incr));
10075   ins_cost(INSN_COST * 9);
10076   format %{ "get_and_addL [$mem], $incr" %}
10077   ins_encode %{
10078     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10079   %}
10080   ins_pipe(pipe_serial);
10081 %}
10082 
10083 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10084   match(Set newval (GetAndAddL mem incr));
10085   ins_cost(INSN_COST * 10);
10086   format %{ "get_and_addL $newval, [$mem], $incr" %}
10087   ins_encode %{
10088     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10089   %}
10090   ins_pipe(pipe_serial);
10091 %}
10092 
10093 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10094   predicate(n->as_LoadStore()->result_not_used());
10095   match(Set dummy (GetAndAddL mem incr));
10096   ins_cost(INSN_COST * 9);
10097   format %{ "get_and_addL [$mem], $incr" %}
10098   ins_encode %{
10099     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10100   %}
10101   ins_pipe(pipe_serial);
10102 %}
10103 
10104 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10105   match(Set newval (GetAndAddI mem incr));
10106   ins_cost(INSN_COST * 10);
10107   format %{ "get_and_addI $newval, [$mem], $incr" %}
10108   ins_encode %{
10109     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10110   %}
10111   ins_pipe(pipe_serial);
10112 %}
10113 
10114 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10115   predicate(n->as_LoadStore()->result_not_used());
10116   match(Set dummy (GetAndAddI mem incr));
10117   ins_cost(INSN_COST * 9);
10118   format %{ "get_and_addI [$mem], $incr" %}
10119   ins_encode %{
10120     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10121   %}
10122   ins_pipe(pipe_serial);
10123 %}
10124 
10125 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10126   match(Set newval (GetAndAddI mem incr));
10127   ins_cost(INSN_COST * 10);
10128   format %{ "get_and_addI $newval, [$mem], $incr" %}
10129   ins_encode %{
10130     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10131   %}
10132   ins_pipe(pipe_serial);
10133 %}
10134 
10135 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10136   predicate(n->as_LoadStore()->result_not_used());
10137   match(Set dummy (GetAndAddI mem incr));
10138   ins_cost(INSN_COST * 9);
10139   format %{ "get_and_addI [$mem], $incr" %}
10140   ins_encode %{
10141     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10142   %}
10143   ins_pipe(pipe_serial);
10144 %}
10145 
10146 // Manifest a CmpL result in an integer register.
10147 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10148 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10149 %{
10150   match(Set dst (CmpL3 src1 src2));
10151   effect(KILL flags);
10152 
10153   ins_cost(INSN_COST * 6);
10154   format %{
10155       "cmp $src1, $src2"
10156       "csetw $dst, ne"
10157       "cnegw $dst, lt"
10158   %}
10159   // format %{ "CmpL3 $dst, $src1, $src2" %}
10160   ins_encode %{
10161     __ cmp($src1$$Register, $src2$$Register);
10162     __ csetw($dst$$Register, Assembler::NE);
10163     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10164   %}
10165 
10166   ins_pipe(pipe_class_default);
10167 %}
10168 
10169 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10170 %{
10171   match(Set dst (CmpL3 src1 src2));
10172   effect(KILL flags);
10173 
10174   ins_cost(INSN_COST * 6);
10175   format %{
10176       "cmp $src1, $src2"
10177       "csetw $dst, ne"
10178       "cnegw $dst, lt"
10179   %}
10180   ins_encode %{
10181     int32_t con = (int32_t)$src2$$constant;
10182      if (con < 0) {
10183       __ adds(zr, $src1$$Register, -con);
10184     } else {
10185       __ subs(zr, $src1$$Register, con);
10186     }
10187     __ csetw($dst$$Register, Assembler::NE);
10188     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10189   %}
10190 
10191   ins_pipe(pipe_class_default);
10192 %}
10193 
10194 // ============================================================================
10195 // Conditional Move Instructions
10196 
10197 // n.b. we have identical rules for both a signed compare op (cmpOp)
10198 // and an unsigned compare op (cmpOpU). it would be nice if we could
10199 // define an op class which merged both inputs and use it to type the
10200 // argument to a single rule. unfortunatelyt his fails because the
10201 // opclass does not live up to the COND_INTER interface of its
10202 // component operands. When the generic code tries to negate the
10203 // operand it ends up running the generci Machoper::negate method
10204 // which throws a ShouldNotHappen. So, we have to provide two flavours
10205 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10206 
10207 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10208   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10209 
10210   ins_cost(INSN_COST * 2);
10211   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10212 
10213   ins_encode %{
10214     __ cselw(as_Register($dst$$reg),
10215              as_Register($src2$$reg),
10216              as_Register($src1$$reg),
10217              (Assembler::Condition)$cmp$$cmpcode);
10218   %}
10219 
10220   ins_pipe(icond_reg_reg);
10221 %}
10222 
10223 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10224   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10225 
10226   ins_cost(INSN_COST * 2);
10227   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10228 
10229   ins_encode %{
10230     __ cselw(as_Register($dst$$reg),
10231              as_Register($src2$$reg),
10232              as_Register($src1$$reg),
10233              (Assembler::Condition)$cmp$$cmpcode);
10234   %}
10235 
10236   ins_pipe(icond_reg_reg);
10237 %}
10238 
10239 // special cases where one arg is zero
10240 
10241 // n.b. this is selected in preference to the rule above because it
10242 // avoids loading constant 0 into a source register
10243 
10244 // TODO
10245 // we ought only to be able to cull one of these variants as the ideal
10246 // transforms ought always to order the zero consistently (to left/right?)
10247 
10248 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10249   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10250 
10251   ins_cost(INSN_COST * 2);
10252   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10253 
10254   ins_encode %{
10255     __ cselw(as_Register($dst$$reg),
10256              as_Register($src$$reg),
10257              zr,
10258              (Assembler::Condition)$cmp$$cmpcode);
10259   %}
10260 
10261   ins_pipe(icond_reg);
10262 %}
10263 
10264 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10265   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10266 
10267   ins_cost(INSN_COST * 2);
10268   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10269 
10270   ins_encode %{
10271     __ cselw(as_Register($dst$$reg),
10272              as_Register($src$$reg),
10273              zr,
10274              (Assembler::Condition)$cmp$$cmpcode);
10275   %}
10276 
10277   ins_pipe(icond_reg);
10278 %}
10279 
10280 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10281   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10282 
10283   ins_cost(INSN_COST * 2);
10284   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10285 
10286   ins_encode %{
10287     __ cselw(as_Register($dst$$reg),
10288              zr,
10289              as_Register($src$$reg),
10290              (Assembler::Condition)$cmp$$cmpcode);
10291   %}
10292 
10293   ins_pipe(icond_reg);
10294 %}
10295 
10296 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10297   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10298 
10299   ins_cost(INSN_COST * 2);
10300   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10301 
10302   ins_encode %{
10303     __ cselw(as_Register($dst$$reg),
10304              zr,
10305              as_Register($src$$reg),
10306              (Assembler::Condition)$cmp$$cmpcode);
10307   %}
10308 
10309   ins_pipe(icond_reg);
10310 %}
10311 
10312 // special case for creating a boolean 0 or 1
10313 
10314 // n.b. this is selected in preference to the rule above because it
10315 // avoids loading constants 0 and 1 into a source register
10316 
10317 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10318   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10319 
10320   ins_cost(INSN_COST * 2);
10321   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10322 
10323   ins_encode %{
10324     // equivalently
10325     // cset(as_Register($dst$$reg),
10326     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10327     __ csincw(as_Register($dst$$reg),
10328              zr,
10329              zr,
10330              (Assembler::Condition)$cmp$$cmpcode);
10331   %}
10332 
10333   ins_pipe(icond_none);
10334 %}
10335 
10336 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10337   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10338 
10339   ins_cost(INSN_COST * 2);
10340   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10341 
10342   ins_encode %{
10343     // equivalently
10344     // cset(as_Register($dst$$reg),
10345     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10346     __ csincw(as_Register($dst$$reg),
10347              zr,
10348              zr,
10349              (Assembler::Condition)$cmp$$cmpcode);
10350   %}
10351 
10352   ins_pipe(icond_none);
10353 %}
10354 
10355 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10356   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10357 
10358   ins_cost(INSN_COST * 2);
10359   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10360 
10361   ins_encode %{
10362     __ csel(as_Register($dst$$reg),
10363             as_Register($src2$$reg),
10364             as_Register($src1$$reg),
10365             (Assembler::Condition)$cmp$$cmpcode);
10366   %}
10367 
10368   ins_pipe(icond_reg_reg);
10369 %}
10370 
10371 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10372   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10373 
10374   ins_cost(INSN_COST * 2);
10375   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10376 
10377   ins_encode %{
10378     __ csel(as_Register($dst$$reg),
10379             as_Register($src2$$reg),
10380             as_Register($src1$$reg),
10381             (Assembler::Condition)$cmp$$cmpcode);
10382   %}
10383 
10384   ins_pipe(icond_reg_reg);
10385 %}
10386 
10387 // special cases where one arg is zero
10388 
10389 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10390   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10391 
10392   ins_cost(INSN_COST * 2);
10393   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10394 
10395   ins_encode %{
10396     __ csel(as_Register($dst$$reg),
10397             zr,
10398             as_Register($src$$reg),
10399             (Assembler::Condition)$cmp$$cmpcode);
10400   %}
10401 
10402   ins_pipe(icond_reg);
10403 %}
10404 
10405 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10406   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10407 
10408   ins_cost(INSN_COST * 2);
10409   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10410 
10411   ins_encode %{
10412     __ csel(as_Register($dst$$reg),
10413             zr,
10414             as_Register($src$$reg),
10415             (Assembler::Condition)$cmp$$cmpcode);
10416   %}
10417 
10418   ins_pipe(icond_reg);
10419 %}
10420 
10421 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10422   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10423 
10424   ins_cost(INSN_COST * 2);
10425   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10426 
10427   ins_encode %{
10428     __ csel(as_Register($dst$$reg),
10429             as_Register($src$$reg),
10430             zr,
10431             (Assembler::Condition)$cmp$$cmpcode);
10432   %}
10433 
10434   ins_pipe(icond_reg);
10435 %}
10436 
10437 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10438   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10439 
10440   ins_cost(INSN_COST * 2);
10441   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10442 
10443   ins_encode %{
10444     __ csel(as_Register($dst$$reg),
10445             as_Register($src$$reg),
10446             zr,
10447             (Assembler::Condition)$cmp$$cmpcode);
10448   %}
10449 
10450   ins_pipe(icond_reg);
10451 %}
10452 
10453 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10454   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10455 
10456   ins_cost(INSN_COST * 2);
10457   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10458 
10459   ins_encode %{
10460     __ csel(as_Register($dst$$reg),
10461             as_Register($src2$$reg),
10462             as_Register($src1$$reg),
10463             (Assembler::Condition)$cmp$$cmpcode);
10464   %}
10465 
10466   ins_pipe(icond_reg_reg);
10467 %}
10468 
10469 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10470   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10471 
10472   ins_cost(INSN_COST * 2);
10473   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10474 
10475   ins_encode %{
10476     __ csel(as_Register($dst$$reg),
10477             as_Register($src2$$reg),
10478             as_Register($src1$$reg),
10479             (Assembler::Condition)$cmp$$cmpcode);
10480   %}
10481 
10482   ins_pipe(icond_reg_reg);
10483 %}
10484 
10485 // special cases where one arg is zero
10486 
10487 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10488   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10489 
10490   ins_cost(INSN_COST * 2);
10491   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10492 
10493   ins_encode %{
10494     __ csel(as_Register($dst$$reg),
10495             zr,
10496             as_Register($src$$reg),
10497             (Assembler::Condition)$cmp$$cmpcode);
10498   %}
10499 
10500   ins_pipe(icond_reg);
10501 %}
10502 
10503 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10504   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10505 
10506   ins_cost(INSN_COST * 2);
10507   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10508 
10509   ins_encode %{
10510     __ csel(as_Register($dst$$reg),
10511             zr,
10512             as_Register($src$$reg),
10513             (Assembler::Condition)$cmp$$cmpcode);
10514   %}
10515 
10516   ins_pipe(icond_reg);
10517 %}
10518 
10519 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10520   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10521 
10522   ins_cost(INSN_COST * 2);
10523   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10524 
10525   ins_encode %{
10526     __ csel(as_Register($dst$$reg),
10527             as_Register($src$$reg),
10528             zr,
10529             (Assembler::Condition)$cmp$$cmpcode);
10530   %}
10531 
10532   ins_pipe(icond_reg);
10533 %}
10534 
10535 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10536   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10537 
10538   ins_cost(INSN_COST * 2);
10539   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10540 
10541   ins_encode %{
10542     __ csel(as_Register($dst$$reg),
10543             as_Register($src$$reg),
10544             zr,
10545             (Assembler::Condition)$cmp$$cmpcode);
10546   %}
10547 
10548   ins_pipe(icond_reg);
10549 %}
10550 
10551 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10552   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10553 
10554   ins_cost(INSN_COST * 2);
10555   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10556 
10557   ins_encode %{
10558     __ cselw(as_Register($dst$$reg),
10559              as_Register($src2$$reg),
10560              as_Register($src1$$reg),
10561              (Assembler::Condition)$cmp$$cmpcode);
10562   %}
10563 
10564   ins_pipe(icond_reg_reg);
10565 %}
10566 
10567 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10568   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10569 
10570   ins_cost(INSN_COST * 2);
10571   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10572 
10573   ins_encode %{
10574     __ cselw(as_Register($dst$$reg),
10575              as_Register($src2$$reg),
10576              as_Register($src1$$reg),
10577              (Assembler::Condition)$cmp$$cmpcode);
10578   %}
10579 
10580   ins_pipe(icond_reg_reg);
10581 %}
10582 
10583 // special cases where one arg is zero
10584 
10585 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10586   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10587 
10588   ins_cost(INSN_COST * 2);
10589   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10590 
10591   ins_encode %{
10592     __ cselw(as_Register($dst$$reg),
10593              zr,
10594              as_Register($src$$reg),
10595              (Assembler::Condition)$cmp$$cmpcode);
10596   %}
10597 
10598   ins_pipe(icond_reg);
10599 %}
10600 
10601 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10602   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10603 
10604   ins_cost(INSN_COST * 2);
10605   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10606 
10607   ins_encode %{
10608     __ cselw(as_Register($dst$$reg),
10609              zr,
10610              as_Register($src$$reg),
10611              (Assembler::Condition)$cmp$$cmpcode);
10612   %}
10613 
10614   ins_pipe(icond_reg);
10615 %}
10616 
10617 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10618   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10619 
10620   ins_cost(INSN_COST * 2);
10621   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10622 
10623   ins_encode %{
10624     __ cselw(as_Register($dst$$reg),
10625              as_Register($src$$reg),
10626              zr,
10627              (Assembler::Condition)$cmp$$cmpcode);
10628   %}
10629 
10630   ins_pipe(icond_reg);
10631 %}
10632 
10633 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10634   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10635 
10636   ins_cost(INSN_COST * 2);
10637   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10638 
10639   ins_encode %{
10640     __ cselw(as_Register($dst$$reg),
10641              as_Register($src$$reg),
10642              zr,
10643              (Assembler::Condition)$cmp$$cmpcode);
10644   %}
10645 
10646   ins_pipe(icond_reg);
10647 %}
10648 
10649 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10650 %{
10651   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10652 
10653   ins_cost(INSN_COST * 3);
10654 
10655   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10656   ins_encode %{
10657     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10658     __ fcsels(as_FloatRegister($dst$$reg),
10659               as_FloatRegister($src2$$reg),
10660               as_FloatRegister($src1$$reg),
10661               cond);
10662   %}
10663 
10664   ins_pipe(fp_cond_reg_reg_s);
10665 %}
10666 
10667 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10668 %{
10669   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10670 
10671   ins_cost(INSN_COST * 3);
10672 
10673   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10674   ins_encode %{
10675     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10676     __ fcsels(as_FloatRegister($dst$$reg),
10677               as_FloatRegister($src2$$reg),
10678               as_FloatRegister($src1$$reg),
10679               cond);
10680   %}
10681 
10682   ins_pipe(fp_cond_reg_reg_s);
10683 %}
10684 
10685 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10686 %{
10687   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10688 
10689   ins_cost(INSN_COST * 3);
10690 
10691   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10692   ins_encode %{
10693     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10694     __ fcseld(as_FloatRegister($dst$$reg),
10695               as_FloatRegister($src2$$reg),
10696               as_FloatRegister($src1$$reg),
10697               cond);
10698   %}
10699 
10700   ins_pipe(fp_cond_reg_reg_d);
10701 %}
10702 
10703 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10704 %{
10705   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10706 
10707   ins_cost(INSN_COST * 3);
10708 
10709   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10710   ins_encode %{
10711     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10712     __ fcseld(as_FloatRegister($dst$$reg),
10713               as_FloatRegister($src2$$reg),
10714               as_FloatRegister($src1$$reg),
10715               cond);
10716   %}
10717 
10718   ins_pipe(fp_cond_reg_reg_d);
10719 %}
10720 
10721 // ============================================================================
10722 // Arithmetic Instructions
10723 //
10724 
10725 // Integer Addition
10726 
10727 // TODO
10728 // these currently employ operations which do not set CR and hence are
10729 // not flagged as killing CR but we would like to isolate the cases
10730 // where we want to set flags from those where we don't. need to work
10731 // out how to do that.
10732 
10733 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10734   match(Set dst (AddI src1 src2));
10735 
10736   ins_cost(INSN_COST);
10737   format %{ "addw  $dst, $src1, $src2" %}
10738 
10739   ins_encode %{
10740     __ addw(as_Register($dst$$reg),
10741             as_Register($src1$$reg),
10742             as_Register($src2$$reg));
10743   %}
10744 
10745   ins_pipe(ialu_reg_reg);
10746 %}
10747 
10748 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10749   match(Set dst (AddI src1 src2));
10750 
10751   ins_cost(INSN_COST);
10752   format %{ "addw $dst, $src1, $src2" %}
10753 
10754   // use opcode to indicate that this is an add not a sub
10755   opcode(0x0);
10756 
10757   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10758 
10759   ins_pipe(ialu_reg_imm);
10760 %}
10761 
10762 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10763   match(Set dst (AddI (ConvL2I src1) src2));
10764 
10765   ins_cost(INSN_COST);
10766   format %{ "addw $dst, $src1, $src2" %}
10767 
10768   // use opcode to indicate that this is an add not a sub
10769   opcode(0x0);
10770 
10771   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10772 
10773   ins_pipe(ialu_reg_imm);
10774 %}
10775 
10776 // Pointer Addition
10777 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10778   match(Set dst (AddP src1 src2));
10779 
10780   ins_cost(INSN_COST);
10781   format %{ "add $dst, $src1, $src2\t# ptr" %}
10782 
10783   ins_encode %{
10784     __ add(as_Register($dst$$reg),
10785            as_Register($src1$$reg),
10786            as_Register($src2$$reg));
10787   %}
10788 
10789   ins_pipe(ialu_reg_reg);
10790 %}
10791 
10792 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10793   match(Set dst (AddP src1 (ConvI2L src2)));
10794 
10795   ins_cost(1.9 * INSN_COST);
10796   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10797 
10798   ins_encode %{
10799     __ add(as_Register($dst$$reg),
10800            as_Register($src1$$reg),
10801            as_Register($src2$$reg), ext::sxtw);
10802   %}
10803 
10804   ins_pipe(ialu_reg_reg);
10805 %}
10806 
10807 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10808   match(Set dst (AddP src1 (LShiftL src2 scale)));
10809 
10810   ins_cost(1.9 * INSN_COST);
10811   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10812 
10813   ins_encode %{
10814     __ lea(as_Register($dst$$reg),
10815            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10816                    Address::lsl($scale$$constant)));
10817   %}
10818 
10819   ins_pipe(ialu_reg_reg_shift);
10820 %}
10821 
10822 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10823   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10824 
10825   ins_cost(1.9 * INSN_COST);
10826   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10827 
10828   ins_encode %{
10829     __ lea(as_Register($dst$$reg),
10830            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10831                    Address::sxtw($scale$$constant)));
10832   %}
10833 
10834   ins_pipe(ialu_reg_reg_shift);
10835 %}
10836 
10837 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10838   match(Set dst (LShiftL (ConvI2L src) scale));
10839 
10840   ins_cost(INSN_COST);
10841   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10842 
10843   ins_encode %{
10844     __ sbfiz(as_Register($dst$$reg),
10845           as_Register($src$$reg),
10846           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10847   %}
10848 
10849   ins_pipe(ialu_reg_shift);
10850 %}
10851 
10852 // Pointer Immediate Addition
10853 // n.b. this needs to be more expensive than using an indirect memory
10854 // operand
10855 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10856   match(Set dst (AddP src1 src2));
10857 
10858   ins_cost(INSN_COST);
10859   format %{ "add $dst, $src1, $src2\t# ptr" %}
10860 
10861   // use opcode to indicate that this is an add not a sub
10862   opcode(0x0);
10863 
10864   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10865 
10866   ins_pipe(ialu_reg_imm);
10867 %}
10868 
10869 // Long Addition
10870 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10871 
10872   match(Set dst (AddL src1 src2));
10873 
10874   ins_cost(INSN_COST);
10875   format %{ "add  $dst, $src1, $src2" %}
10876 
10877   ins_encode %{
10878     __ add(as_Register($dst$$reg),
10879            as_Register($src1$$reg),
10880            as_Register($src2$$reg));
10881   %}
10882 
10883   ins_pipe(ialu_reg_reg);
10884 %}
10885 
10886 // No constant pool entries requiredLong Immediate Addition.
10887 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10888   match(Set dst (AddL src1 src2));
10889 
10890   ins_cost(INSN_COST);
10891   format %{ "add $dst, $src1, $src2" %}
10892 
10893   // use opcode to indicate that this is an add not a sub
10894   opcode(0x0);
10895 
10896   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10897 
10898   ins_pipe(ialu_reg_imm);
10899 %}
10900 
10901 // Integer Subtraction
10902 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10903   match(Set dst (SubI src1 src2));
10904 
10905   ins_cost(INSN_COST);
10906   format %{ "subw  $dst, $src1, $src2" %}
10907 
10908   ins_encode %{
10909     __ subw(as_Register($dst$$reg),
10910             as_Register($src1$$reg),
10911             as_Register($src2$$reg));
10912   %}
10913 
10914   ins_pipe(ialu_reg_reg);
10915 %}
10916 
10917 // Immediate Subtraction
10918 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10919   match(Set dst (SubI src1 src2));
10920 
10921   ins_cost(INSN_COST);
10922   format %{ "subw $dst, $src1, $src2" %}
10923 
10924   // use opcode to indicate that this is a sub not an add
10925   opcode(0x1);
10926 
10927   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10928 
10929   ins_pipe(ialu_reg_imm);
10930 %}
10931 
10932 // Long Subtraction
10933 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10934 
10935   match(Set dst (SubL src1 src2));
10936 
10937   ins_cost(INSN_COST);
10938   format %{ "sub  $dst, $src1, $src2" %}
10939 
10940   ins_encode %{
10941     __ sub(as_Register($dst$$reg),
10942            as_Register($src1$$reg),
10943            as_Register($src2$$reg));
10944   %}
10945 
10946   ins_pipe(ialu_reg_reg);
10947 %}
10948 
10949 // No constant pool entries requiredLong Immediate Subtraction.
10950 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10951   match(Set dst (SubL src1 src2));
10952 
10953   ins_cost(INSN_COST);
10954   format %{ "sub$dst, $src1, $src2" %}
10955 
10956   // use opcode to indicate that this is a sub not an add
10957   opcode(0x1);
10958 
10959   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10960 
10961   ins_pipe(ialu_reg_imm);
10962 %}
10963 
10964 // Integer Negation (special case for sub)
10965 
10966 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10967   match(Set dst (SubI zero src));
10968 
10969   ins_cost(INSN_COST);
10970   format %{ "negw $dst, $src\t# int" %}
10971 
10972   ins_encode %{
10973     __ negw(as_Register($dst$$reg),
10974             as_Register($src$$reg));
10975   %}
10976 
10977   ins_pipe(ialu_reg);
10978 %}
10979 
10980 // Long Negation
10981 
10982 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
10983   match(Set dst (SubL zero src));
10984 
10985   ins_cost(INSN_COST);
10986   format %{ "neg $dst, $src\t# long" %}
10987 
10988   ins_encode %{
10989     __ neg(as_Register($dst$$reg),
10990            as_Register($src$$reg));
10991   %}
10992 
10993   ins_pipe(ialu_reg);
10994 %}
10995 
10996 // Integer Multiply
10997 
10998 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10999   match(Set dst (MulI src1 src2));
11000 
11001   ins_cost(INSN_COST * 3);
11002   format %{ "mulw  $dst, $src1, $src2" %}
11003 
11004   ins_encode %{
11005     __ mulw(as_Register($dst$$reg),
11006             as_Register($src1$$reg),
11007             as_Register($src2$$reg));
11008   %}
11009 
11010   ins_pipe(imul_reg_reg);
11011 %}
11012 
11013 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11014   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11015 
11016   ins_cost(INSN_COST * 3);
11017   format %{ "smull  $dst, $src1, $src2" %}
11018 
11019   ins_encode %{
11020     __ smull(as_Register($dst$$reg),
11021              as_Register($src1$$reg),
11022              as_Register($src2$$reg));
11023   %}
11024 
11025   ins_pipe(imul_reg_reg);
11026 %}
11027 
11028 // Long Multiply
11029 
11030 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11031   match(Set dst (MulL src1 src2));
11032 
11033   ins_cost(INSN_COST * 5);
11034   format %{ "mul  $dst, $src1, $src2" %}
11035 
11036   ins_encode %{
11037     __ mul(as_Register($dst$$reg),
11038            as_Register($src1$$reg),
11039            as_Register($src2$$reg));
11040   %}
11041 
11042   ins_pipe(lmul_reg_reg);
11043 %}
11044 
11045 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11046 %{
11047   match(Set dst (MulHiL src1 src2));
11048 
11049   ins_cost(INSN_COST * 7);
11050   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11051 
11052   ins_encode %{
11053     __ smulh(as_Register($dst$$reg),
11054              as_Register($src1$$reg),
11055              as_Register($src2$$reg));
11056   %}
11057 
11058   ins_pipe(lmul_reg_reg);
11059 %}
11060 
11061 // Combined Integer Multiply & Add/Sub
11062 
11063 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11064   match(Set dst (AddI src3 (MulI src1 src2)));
11065 
11066   ins_cost(INSN_COST * 3);
11067   format %{ "madd  $dst, $src1, $src2, $src3" %}
11068 
11069   ins_encode %{
11070     __ maddw(as_Register($dst$$reg),
11071              as_Register($src1$$reg),
11072              as_Register($src2$$reg),
11073              as_Register($src3$$reg));
11074   %}
11075 
11076   ins_pipe(imac_reg_reg);
11077 %}
11078 
11079 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11080   match(Set dst (SubI src3 (MulI src1 src2)));
11081 
11082   ins_cost(INSN_COST * 3);
11083   format %{ "msub  $dst, $src1, $src2, $src3" %}
11084 
11085   ins_encode %{
11086     __ msubw(as_Register($dst$$reg),
11087              as_Register($src1$$reg),
11088              as_Register($src2$$reg),
11089              as_Register($src3$$reg));
11090   %}
11091 
11092   ins_pipe(imac_reg_reg);
11093 %}
11094 
11095 // Combined Long Multiply & Add/Sub
11096 
11097 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11098   match(Set dst (AddL src3 (MulL src1 src2)));
11099 
11100   ins_cost(INSN_COST * 5);
11101   format %{ "madd  $dst, $src1, $src2, $src3" %}
11102 
11103   ins_encode %{
11104     __ madd(as_Register($dst$$reg),
11105             as_Register($src1$$reg),
11106             as_Register($src2$$reg),
11107             as_Register($src3$$reg));
11108   %}
11109 
11110   ins_pipe(lmac_reg_reg);
11111 %}
11112 
11113 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11114   match(Set dst (SubL src3 (MulL src1 src2)));
11115 
11116   ins_cost(INSN_COST * 5);
11117   format %{ "msub  $dst, $src1, $src2, $src3" %}
11118 
11119   ins_encode %{
11120     __ msub(as_Register($dst$$reg),
11121             as_Register($src1$$reg),
11122             as_Register($src2$$reg),
11123             as_Register($src3$$reg));
11124   %}
11125 
11126   ins_pipe(lmac_reg_reg);
11127 %}
11128 
11129 // Integer Divide
11130 
11131 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11132   match(Set dst (DivI src1 src2));
11133 
11134   ins_cost(INSN_COST * 19);
11135   format %{ "sdivw  $dst, $src1, $src2" %}
11136 
11137   ins_encode(aarch64_enc_divw(dst, src1, src2));
11138   ins_pipe(idiv_reg_reg);
11139 %}
11140 
11141 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11142   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11143   ins_cost(INSN_COST);
11144   format %{ "lsrw $dst, $src1, $div1" %}
11145   ins_encode %{
11146     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11147   %}
11148   ins_pipe(ialu_reg_shift);
11149 %}
11150 
11151 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11152   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11153   ins_cost(INSN_COST);
11154   format %{ "addw $dst, $src, LSR $div1" %}
11155 
11156   ins_encode %{
11157     __ addw(as_Register($dst$$reg),
11158               as_Register($src$$reg),
11159               as_Register($src$$reg),
11160               Assembler::LSR, 31);
11161   %}
11162   ins_pipe(ialu_reg);
11163 %}
11164 
11165 // Long Divide
11166 
11167 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11168   match(Set dst (DivL src1 src2));
11169 
11170   ins_cost(INSN_COST * 35);
11171   format %{ "sdiv   $dst, $src1, $src2" %}
11172 
11173   ins_encode(aarch64_enc_div(dst, src1, src2));
11174   ins_pipe(ldiv_reg_reg);
11175 %}
11176 
11177 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
11178   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11179   ins_cost(INSN_COST);
11180   format %{ "lsr $dst, $src1, $div1" %}
11181   ins_encode %{
11182     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11183   %}
11184   ins_pipe(ialu_reg_shift);
11185 %}
11186 
11187 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
11188   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11189   ins_cost(INSN_COST);
11190   format %{ "add $dst, $src, $div1" %}
11191 
11192   ins_encode %{
11193     __ add(as_Register($dst$$reg),
11194               as_Register($src$$reg),
11195               as_Register($src$$reg),
11196               Assembler::LSR, 63);
11197   %}
11198   ins_pipe(ialu_reg);
11199 %}
11200 
11201 // Integer Remainder
11202 
11203 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11204   match(Set dst (ModI src1 src2));
11205 
11206   ins_cost(INSN_COST * 22);
11207   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11208             "msubw($dst, rscratch1, $src2, $src1" %}
11209 
11210   ins_encode(aarch64_enc_modw(dst, src1, src2));
11211   ins_pipe(idiv_reg_reg);
11212 %}
11213 
11214 // Long Remainder
11215 
11216 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11217   match(Set dst (ModL src1 src2));
11218 
11219   ins_cost(INSN_COST * 38);
11220   format %{ "sdiv   rscratch1, $src1, $src2\n"
11221             "msub($dst, rscratch1, $src2, $src1" %}
11222 
11223   ins_encode(aarch64_enc_mod(dst, src1, src2));
11224   ins_pipe(ldiv_reg_reg);
11225 %}
11226 
11227 // Integer Shifts
11228 
11229 // Shift Left Register
11230 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11231   match(Set dst (LShiftI src1 src2));
11232 
11233   ins_cost(INSN_COST * 2);
11234   format %{ "lslvw  $dst, $src1, $src2" %}
11235 
11236   ins_encode %{
11237     __ lslvw(as_Register($dst$$reg),
11238              as_Register($src1$$reg),
11239              as_Register($src2$$reg));
11240   %}
11241 
11242   ins_pipe(ialu_reg_reg_vshift);
11243 %}
11244 
11245 // Shift Left Immediate
11246 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11247   match(Set dst (LShiftI src1 src2));
11248 
11249   ins_cost(INSN_COST);
11250   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11251 
11252   ins_encode %{
11253     __ lslw(as_Register($dst$$reg),
11254             as_Register($src1$$reg),
11255             $src2$$constant & 0x1f);
11256   %}
11257 
11258   ins_pipe(ialu_reg_shift);
11259 %}
11260 
11261 // Shift Right Logical Register
11262 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11263   match(Set dst (URShiftI src1 src2));
11264 
11265   ins_cost(INSN_COST * 2);
11266   format %{ "lsrvw  $dst, $src1, $src2" %}
11267 
11268   ins_encode %{
11269     __ lsrvw(as_Register($dst$$reg),
11270              as_Register($src1$$reg),
11271              as_Register($src2$$reg));
11272   %}
11273 
11274   ins_pipe(ialu_reg_reg_vshift);
11275 %}
11276 
11277 // Shift Right Logical Immediate
11278 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11279   match(Set dst (URShiftI src1 src2));
11280 
11281   ins_cost(INSN_COST);
11282   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11283 
11284   ins_encode %{
11285     __ lsrw(as_Register($dst$$reg),
11286             as_Register($src1$$reg),
11287             $src2$$constant & 0x1f);
11288   %}
11289 
11290   ins_pipe(ialu_reg_shift);
11291 %}
11292 
11293 // Shift Right Arithmetic Register
11294 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11295   match(Set dst (RShiftI src1 src2));
11296 
11297   ins_cost(INSN_COST * 2);
11298   format %{ "asrvw  $dst, $src1, $src2" %}
11299 
11300   ins_encode %{
11301     __ asrvw(as_Register($dst$$reg),
11302              as_Register($src1$$reg),
11303              as_Register($src2$$reg));
11304   %}
11305 
11306   ins_pipe(ialu_reg_reg_vshift);
11307 %}
11308 
11309 // Shift Right Arithmetic Immediate
11310 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11311   match(Set dst (RShiftI src1 src2));
11312 
11313   ins_cost(INSN_COST);
11314   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11315 
11316   ins_encode %{
11317     __ asrw(as_Register($dst$$reg),
11318             as_Register($src1$$reg),
11319             $src2$$constant & 0x1f);
11320   %}
11321 
11322   ins_pipe(ialu_reg_shift);
11323 %}
11324 
11325 // Combined Int Mask and Right Shift (using UBFM)
11326 // TODO
11327 
11328 // Long Shifts
11329 
11330 // Shift Left Register
11331 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11332   match(Set dst (LShiftL src1 src2));
11333 
11334   ins_cost(INSN_COST * 2);
11335   format %{ "lslv  $dst, $src1, $src2" %}
11336 
11337   ins_encode %{
11338     __ lslv(as_Register($dst$$reg),
11339             as_Register($src1$$reg),
11340             as_Register($src2$$reg));
11341   %}
11342 
11343   ins_pipe(ialu_reg_reg_vshift);
11344 %}
11345 
11346 // Shift Left Immediate
11347 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11348   match(Set dst (LShiftL src1 src2));
11349 
11350   ins_cost(INSN_COST);
11351   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11352 
11353   ins_encode %{
11354     __ lsl(as_Register($dst$$reg),
11355             as_Register($src1$$reg),
11356             $src2$$constant & 0x3f);
11357   %}
11358 
11359   ins_pipe(ialu_reg_shift);
11360 %}
11361 
11362 // Shift Right Logical Register
11363 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11364   match(Set dst (URShiftL src1 src2));
11365 
11366   ins_cost(INSN_COST * 2);
11367   format %{ "lsrv  $dst, $src1, $src2" %}
11368 
11369   ins_encode %{
11370     __ lsrv(as_Register($dst$$reg),
11371             as_Register($src1$$reg),
11372             as_Register($src2$$reg));
11373   %}
11374 
11375   ins_pipe(ialu_reg_reg_vshift);
11376 %}
11377 
11378 // Shift Right Logical Immediate
11379 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11380   match(Set dst (URShiftL src1 src2));
11381 
11382   ins_cost(INSN_COST);
11383   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11384 
11385   ins_encode %{
11386     __ lsr(as_Register($dst$$reg),
11387            as_Register($src1$$reg),
11388            $src2$$constant & 0x3f);
11389   %}
11390 
11391   ins_pipe(ialu_reg_shift);
11392 %}
11393 
11394 // A special-case pattern for card table stores.
11395 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11396   match(Set dst (URShiftL (CastP2X src1) src2));
11397 
11398   ins_cost(INSN_COST);
11399   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11400 
11401   ins_encode %{
11402     __ lsr(as_Register($dst$$reg),
11403            as_Register($src1$$reg),
11404            $src2$$constant & 0x3f);
11405   %}
11406 
11407   ins_pipe(ialu_reg_shift);
11408 %}
11409 
11410 // Shift Right Arithmetic Register
11411 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11412   match(Set dst (RShiftL src1 src2));
11413 
11414   ins_cost(INSN_COST * 2);
11415   format %{ "asrv  $dst, $src1, $src2" %}
11416 
11417   ins_encode %{
11418     __ asrv(as_Register($dst$$reg),
11419             as_Register($src1$$reg),
11420             as_Register($src2$$reg));
11421   %}
11422 
11423   ins_pipe(ialu_reg_reg_vshift);
11424 %}
11425 
11426 // Shift Right Arithmetic Immediate
11427 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11428   match(Set dst (RShiftL src1 src2));
11429 
11430   ins_cost(INSN_COST);
11431   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11432 
11433   ins_encode %{
11434     __ asr(as_Register($dst$$reg),
11435            as_Register($src1$$reg),
11436            $src2$$constant & 0x3f);
11437   %}
11438 
11439   ins_pipe(ialu_reg_shift);
11440 %}
11441 
11442 // BEGIN This section of the file is automatically generated. Do not edit --------------
11443 
11444 instruct regL_not_reg(iRegLNoSp dst,
11445                          iRegL src1, immL_M1 m1,
11446                          rFlagsReg cr) %{
11447   match(Set dst (XorL src1 m1));
11448   ins_cost(INSN_COST);
11449   format %{ "eon  $dst, $src1, zr" %}
11450 
11451   ins_encode %{
11452     __ eon(as_Register($dst$$reg),
11453               as_Register($src1$$reg),
11454               zr,
11455               Assembler::LSL, 0);
11456   %}
11457 
11458   ins_pipe(ialu_reg);
11459 %}
11460 instruct regI_not_reg(iRegINoSp dst,
11461                          iRegIorL2I src1, immI_M1 m1,
11462                          rFlagsReg cr) %{
11463   match(Set dst (XorI src1 m1));
11464   ins_cost(INSN_COST);
11465   format %{ "eonw  $dst, $src1, zr" %}
11466 
11467   ins_encode %{
11468     __ eonw(as_Register($dst$$reg),
11469               as_Register($src1$$reg),
11470               zr,
11471               Assembler::LSL, 0);
11472   %}
11473 
11474   ins_pipe(ialu_reg);
11475 %}
11476 
11477 instruct AndI_reg_not_reg(iRegINoSp dst,
11478                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11479                          rFlagsReg cr) %{
11480   match(Set dst (AndI src1 (XorI src2 m1)));
11481   ins_cost(INSN_COST);
11482   format %{ "bicw  $dst, $src1, $src2" %}
11483 
11484   ins_encode %{
11485     __ bicw(as_Register($dst$$reg),
11486               as_Register($src1$$reg),
11487               as_Register($src2$$reg),
11488               Assembler::LSL, 0);
11489   %}
11490 
11491   ins_pipe(ialu_reg_reg);
11492 %}
11493 
11494 instruct AndL_reg_not_reg(iRegLNoSp dst,
11495                          iRegL src1, iRegL src2, immL_M1 m1,
11496                          rFlagsReg cr) %{
11497   match(Set dst (AndL src1 (XorL src2 m1)));
11498   ins_cost(INSN_COST);
11499   format %{ "bic  $dst, $src1, $src2" %}
11500 
11501   ins_encode %{
11502     __ bic(as_Register($dst$$reg),
11503               as_Register($src1$$reg),
11504               as_Register($src2$$reg),
11505               Assembler::LSL, 0);
11506   %}
11507 
11508   ins_pipe(ialu_reg_reg);
11509 %}
11510 
11511 instruct OrI_reg_not_reg(iRegINoSp dst,
11512                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11513                          rFlagsReg cr) %{
11514   match(Set dst (OrI src1 (XorI src2 m1)));
11515   ins_cost(INSN_COST);
11516   format %{ "ornw  $dst, $src1, $src2" %}
11517 
11518   ins_encode %{
11519     __ ornw(as_Register($dst$$reg),
11520               as_Register($src1$$reg),
11521               as_Register($src2$$reg),
11522               Assembler::LSL, 0);
11523   %}
11524 
11525   ins_pipe(ialu_reg_reg);
11526 %}
11527 
11528 instruct OrL_reg_not_reg(iRegLNoSp dst,
11529                          iRegL src1, iRegL src2, immL_M1 m1,
11530                          rFlagsReg cr) %{
11531   match(Set dst (OrL src1 (XorL src2 m1)));
11532   ins_cost(INSN_COST);
11533   format %{ "orn  $dst, $src1, $src2" %}
11534 
11535   ins_encode %{
11536     __ orn(as_Register($dst$$reg),
11537               as_Register($src1$$reg),
11538               as_Register($src2$$reg),
11539               Assembler::LSL, 0);
11540   %}
11541 
11542   ins_pipe(ialu_reg_reg);
11543 %}
11544 
11545 instruct XorI_reg_not_reg(iRegINoSp dst,
11546                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11547                          rFlagsReg cr) %{
11548   match(Set dst (XorI m1 (XorI src2 src1)));
11549   ins_cost(INSN_COST);
11550   format %{ "eonw  $dst, $src1, $src2" %}
11551 
11552   ins_encode %{
11553     __ eonw(as_Register($dst$$reg),
11554               as_Register($src1$$reg),
11555               as_Register($src2$$reg),
11556               Assembler::LSL, 0);
11557   %}
11558 
11559   ins_pipe(ialu_reg_reg);
11560 %}
11561 
11562 instruct XorL_reg_not_reg(iRegLNoSp dst,
11563                          iRegL src1, iRegL src2, immL_M1 m1,
11564                          rFlagsReg cr) %{
11565   match(Set dst (XorL m1 (XorL src2 src1)));
11566   ins_cost(INSN_COST);
11567   format %{ "eon  $dst, $src1, $src2" %}
11568 
11569   ins_encode %{
11570     __ eon(as_Register($dst$$reg),
11571               as_Register($src1$$reg),
11572               as_Register($src2$$reg),
11573               Assembler::LSL, 0);
11574   %}
11575 
11576   ins_pipe(ialu_reg_reg);
11577 %}
11578 
11579 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11580                          iRegIorL2I src1, iRegIorL2I src2,
11581                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11582   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11583   ins_cost(1.9 * INSN_COST);
11584   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11585 
11586   ins_encode %{
11587     __ bicw(as_Register($dst$$reg),
11588               as_Register($src1$$reg),
11589               as_Register($src2$$reg),
11590               Assembler::LSR,
11591               $src3$$constant & 0x1f);
11592   %}
11593 
11594   ins_pipe(ialu_reg_reg_shift);
11595 %}
11596 
11597 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11598                          iRegL src1, iRegL src2,
11599                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11600   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11601   ins_cost(1.9 * INSN_COST);
11602   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11603 
11604   ins_encode %{
11605     __ bic(as_Register($dst$$reg),
11606               as_Register($src1$$reg),
11607               as_Register($src2$$reg),
11608               Assembler::LSR,
11609               $src3$$constant & 0x3f);
11610   %}
11611 
11612   ins_pipe(ialu_reg_reg_shift);
11613 %}
11614 
11615 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11616                          iRegIorL2I src1, iRegIorL2I src2,
11617                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11618   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11619   ins_cost(1.9 * INSN_COST);
11620   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11621 
11622   ins_encode %{
11623     __ bicw(as_Register($dst$$reg),
11624               as_Register($src1$$reg),
11625               as_Register($src2$$reg),
11626               Assembler::ASR,
11627               $src3$$constant & 0x1f);
11628   %}
11629 
11630   ins_pipe(ialu_reg_reg_shift);
11631 %}
11632 
11633 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11634                          iRegL src1, iRegL src2,
11635                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11636   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11637   ins_cost(1.9 * INSN_COST);
11638   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11639 
11640   ins_encode %{
11641     __ bic(as_Register($dst$$reg),
11642               as_Register($src1$$reg),
11643               as_Register($src2$$reg),
11644               Assembler::ASR,
11645               $src3$$constant & 0x3f);
11646   %}
11647 
11648   ins_pipe(ialu_reg_reg_shift);
11649 %}
11650 
11651 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11652                          iRegIorL2I src1, iRegIorL2I src2,
11653                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11654   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11655   ins_cost(1.9 * INSN_COST);
11656   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11657 
11658   ins_encode %{
11659     __ bicw(as_Register($dst$$reg),
11660               as_Register($src1$$reg),
11661               as_Register($src2$$reg),
11662               Assembler::LSL,
11663               $src3$$constant & 0x1f);
11664   %}
11665 
11666   ins_pipe(ialu_reg_reg_shift);
11667 %}
11668 
11669 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11670                          iRegL src1, iRegL src2,
11671                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11672   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11673   ins_cost(1.9 * INSN_COST);
11674   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11675 
11676   ins_encode %{
11677     __ bic(as_Register($dst$$reg),
11678               as_Register($src1$$reg),
11679               as_Register($src2$$reg),
11680               Assembler::LSL,
11681               $src3$$constant & 0x3f);
11682   %}
11683 
11684   ins_pipe(ialu_reg_reg_shift);
11685 %}
11686 
11687 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11688                          iRegIorL2I src1, iRegIorL2I src2,
11689                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11690   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11691   ins_cost(1.9 * INSN_COST);
11692   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11693 
11694   ins_encode %{
11695     __ eonw(as_Register($dst$$reg),
11696               as_Register($src1$$reg),
11697               as_Register($src2$$reg),
11698               Assembler::LSR,
11699               $src3$$constant & 0x1f);
11700   %}
11701 
11702   ins_pipe(ialu_reg_reg_shift);
11703 %}
11704 
11705 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11706                          iRegL src1, iRegL src2,
11707                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11708   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11709   ins_cost(1.9 * INSN_COST);
11710   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11711 
11712   ins_encode %{
11713     __ eon(as_Register($dst$$reg),
11714               as_Register($src1$$reg),
11715               as_Register($src2$$reg),
11716               Assembler::LSR,
11717               $src3$$constant & 0x3f);
11718   %}
11719 
11720   ins_pipe(ialu_reg_reg_shift);
11721 %}
11722 
11723 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11724                          iRegIorL2I src1, iRegIorL2I src2,
11725                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11726   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11727   ins_cost(1.9 * INSN_COST);
11728   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11729 
11730   ins_encode %{
11731     __ eonw(as_Register($dst$$reg),
11732               as_Register($src1$$reg),
11733               as_Register($src2$$reg),
11734               Assembler::ASR,
11735               $src3$$constant & 0x1f);
11736   %}
11737 
11738   ins_pipe(ialu_reg_reg_shift);
11739 %}
11740 
11741 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11742                          iRegL src1, iRegL src2,
11743                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11744   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11745   ins_cost(1.9 * INSN_COST);
11746   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11747 
11748   ins_encode %{
11749     __ eon(as_Register($dst$$reg),
11750               as_Register($src1$$reg),
11751               as_Register($src2$$reg),
11752               Assembler::ASR,
11753               $src3$$constant & 0x3f);
11754   %}
11755 
11756   ins_pipe(ialu_reg_reg_shift);
11757 %}
11758 
11759 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11760                          iRegIorL2I src1, iRegIorL2I src2,
11761                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11762   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11763   ins_cost(1.9 * INSN_COST);
11764   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11765 
11766   ins_encode %{
11767     __ eonw(as_Register($dst$$reg),
11768               as_Register($src1$$reg),
11769               as_Register($src2$$reg),
11770               Assembler::LSL,
11771               $src3$$constant & 0x1f);
11772   %}
11773 
11774   ins_pipe(ialu_reg_reg_shift);
11775 %}
11776 
11777 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11778                          iRegL src1, iRegL src2,
11779                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11780   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11781   ins_cost(1.9 * INSN_COST);
11782   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11783 
11784   ins_encode %{
11785     __ eon(as_Register($dst$$reg),
11786               as_Register($src1$$reg),
11787               as_Register($src2$$reg),
11788               Assembler::LSL,
11789               $src3$$constant & 0x3f);
11790   %}
11791 
11792   ins_pipe(ialu_reg_reg_shift);
11793 %}
11794 
11795 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11796                          iRegIorL2I src1, iRegIorL2I src2,
11797                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11798   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11799   ins_cost(1.9 * INSN_COST);
11800   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11801 
11802   ins_encode %{
11803     __ ornw(as_Register($dst$$reg),
11804               as_Register($src1$$reg),
11805               as_Register($src2$$reg),
11806               Assembler::LSR,
11807               $src3$$constant & 0x1f);
11808   %}
11809 
11810   ins_pipe(ialu_reg_reg_shift);
11811 %}
11812 
11813 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11814                          iRegL src1, iRegL src2,
11815                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11816   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11817   ins_cost(1.9 * INSN_COST);
11818   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11819 
11820   ins_encode %{
11821     __ orn(as_Register($dst$$reg),
11822               as_Register($src1$$reg),
11823               as_Register($src2$$reg),
11824               Assembler::LSR,
11825               $src3$$constant & 0x3f);
11826   %}
11827 
11828   ins_pipe(ialu_reg_reg_shift);
11829 %}
11830 
11831 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11832                          iRegIorL2I src1, iRegIorL2I src2,
11833                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11834   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11835   ins_cost(1.9 * INSN_COST);
11836   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11837 
11838   ins_encode %{
11839     __ ornw(as_Register($dst$$reg),
11840               as_Register($src1$$reg),
11841               as_Register($src2$$reg),
11842               Assembler::ASR,
11843               $src3$$constant & 0x1f);
11844   %}
11845 
11846   ins_pipe(ialu_reg_reg_shift);
11847 %}
11848 
11849 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11850                          iRegL src1, iRegL src2,
11851                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11852   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11853   ins_cost(1.9 * INSN_COST);
11854   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11855 
11856   ins_encode %{
11857     __ orn(as_Register($dst$$reg),
11858               as_Register($src1$$reg),
11859               as_Register($src2$$reg),
11860               Assembler::ASR,
11861               $src3$$constant & 0x3f);
11862   %}
11863 
11864   ins_pipe(ialu_reg_reg_shift);
11865 %}
11866 
11867 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11868                          iRegIorL2I src1, iRegIorL2I src2,
11869                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11870   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11871   ins_cost(1.9 * INSN_COST);
11872   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11873 
11874   ins_encode %{
11875     __ ornw(as_Register($dst$$reg),
11876               as_Register($src1$$reg),
11877               as_Register($src2$$reg),
11878               Assembler::LSL,
11879               $src3$$constant & 0x1f);
11880   %}
11881 
11882   ins_pipe(ialu_reg_reg_shift);
11883 %}
11884 
11885 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11886                          iRegL src1, iRegL src2,
11887                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11888   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11889   ins_cost(1.9 * INSN_COST);
11890   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11891 
11892   ins_encode %{
11893     __ orn(as_Register($dst$$reg),
11894               as_Register($src1$$reg),
11895               as_Register($src2$$reg),
11896               Assembler::LSL,
11897               $src3$$constant & 0x3f);
11898   %}
11899 
11900   ins_pipe(ialu_reg_reg_shift);
11901 %}
11902 
11903 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11904                          iRegIorL2I src1, iRegIorL2I src2,
11905                          immI src3, rFlagsReg cr) %{
11906   match(Set dst (AndI src1 (URShiftI src2 src3)));
11907 
11908   ins_cost(1.9 * INSN_COST);
11909   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11910 
11911   ins_encode %{
11912     __ andw(as_Register($dst$$reg),
11913               as_Register($src1$$reg),
11914               as_Register($src2$$reg),
11915               Assembler::LSR,
11916               $src3$$constant & 0x1f);
11917   %}
11918 
11919   ins_pipe(ialu_reg_reg_shift);
11920 %}
11921 
11922 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11923                          iRegL src1, iRegL src2,
11924                          immI src3, rFlagsReg cr) %{
11925   match(Set dst (AndL src1 (URShiftL src2 src3)));
11926 
11927   ins_cost(1.9 * INSN_COST);
11928   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11929 
11930   ins_encode %{
11931     __ andr(as_Register($dst$$reg),
11932               as_Register($src1$$reg),
11933               as_Register($src2$$reg),
11934               Assembler::LSR,
11935               $src3$$constant & 0x3f);
11936   %}
11937 
11938   ins_pipe(ialu_reg_reg_shift);
11939 %}
11940 
11941 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11942                          iRegIorL2I src1, iRegIorL2I src2,
11943                          immI src3, rFlagsReg cr) %{
11944   match(Set dst (AndI src1 (RShiftI src2 src3)));
11945 
11946   ins_cost(1.9 * INSN_COST);
11947   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11948 
11949   ins_encode %{
11950     __ andw(as_Register($dst$$reg),
11951               as_Register($src1$$reg),
11952               as_Register($src2$$reg),
11953               Assembler::ASR,
11954               $src3$$constant & 0x1f);
11955   %}
11956 
11957   ins_pipe(ialu_reg_reg_shift);
11958 %}
11959 
11960 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11961                          iRegL src1, iRegL src2,
11962                          immI src3, rFlagsReg cr) %{
11963   match(Set dst (AndL src1 (RShiftL src2 src3)));
11964 
11965   ins_cost(1.9 * INSN_COST);
11966   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11967 
11968   ins_encode %{
11969     __ andr(as_Register($dst$$reg),
11970               as_Register($src1$$reg),
11971               as_Register($src2$$reg),
11972               Assembler::ASR,
11973               $src3$$constant & 0x3f);
11974   %}
11975 
11976   ins_pipe(ialu_reg_reg_shift);
11977 %}
11978 
11979 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11980                          iRegIorL2I src1, iRegIorL2I src2,
11981                          immI src3, rFlagsReg cr) %{
11982   match(Set dst (AndI src1 (LShiftI src2 src3)));
11983 
11984   ins_cost(1.9 * INSN_COST);
11985   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11986 
11987   ins_encode %{
11988     __ andw(as_Register($dst$$reg),
11989               as_Register($src1$$reg),
11990               as_Register($src2$$reg),
11991               Assembler::LSL,
11992               $src3$$constant & 0x1f);
11993   %}
11994 
11995   ins_pipe(ialu_reg_reg_shift);
11996 %}
11997 
11998 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11999                          iRegL src1, iRegL src2,
12000                          immI src3, rFlagsReg cr) %{
12001   match(Set dst (AndL src1 (LShiftL src2 src3)));
12002 
12003   ins_cost(1.9 * INSN_COST);
12004   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12005 
12006   ins_encode %{
12007     __ andr(as_Register($dst$$reg),
12008               as_Register($src1$$reg),
12009               as_Register($src2$$reg),
12010               Assembler::LSL,
12011               $src3$$constant & 0x3f);
12012   %}
12013 
12014   ins_pipe(ialu_reg_reg_shift);
12015 %}
12016 
12017 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12018                          iRegIorL2I src1, iRegIorL2I src2,
12019                          immI src3, rFlagsReg cr) %{
12020   match(Set dst (XorI src1 (URShiftI src2 src3)));
12021 
12022   ins_cost(1.9 * INSN_COST);
12023   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12024 
12025   ins_encode %{
12026     __ eorw(as_Register($dst$$reg),
12027               as_Register($src1$$reg),
12028               as_Register($src2$$reg),
12029               Assembler::LSR,
12030               $src3$$constant & 0x1f);
12031   %}
12032 
12033   ins_pipe(ialu_reg_reg_shift);
12034 %}
12035 
12036 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12037                          iRegL src1, iRegL src2,
12038                          immI src3, rFlagsReg cr) %{
12039   match(Set dst (XorL src1 (URShiftL src2 src3)));
12040 
12041   ins_cost(1.9 * INSN_COST);
12042   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12043 
12044   ins_encode %{
12045     __ eor(as_Register($dst$$reg),
12046               as_Register($src1$$reg),
12047               as_Register($src2$$reg),
12048               Assembler::LSR,
12049               $src3$$constant & 0x3f);
12050   %}
12051 
12052   ins_pipe(ialu_reg_reg_shift);
12053 %}
12054 
12055 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12056                          iRegIorL2I src1, iRegIorL2I src2,
12057                          immI src3, rFlagsReg cr) %{
12058   match(Set dst (XorI src1 (RShiftI src2 src3)));
12059 
12060   ins_cost(1.9 * INSN_COST);
12061   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12062 
12063   ins_encode %{
12064     __ eorw(as_Register($dst$$reg),
12065               as_Register($src1$$reg),
12066               as_Register($src2$$reg),
12067               Assembler::ASR,
12068               $src3$$constant & 0x1f);
12069   %}
12070 
12071   ins_pipe(ialu_reg_reg_shift);
12072 %}
12073 
12074 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12075                          iRegL src1, iRegL src2,
12076                          immI src3, rFlagsReg cr) %{
12077   match(Set dst (XorL src1 (RShiftL src2 src3)));
12078 
12079   ins_cost(1.9 * INSN_COST);
12080   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12081 
12082   ins_encode %{
12083     __ eor(as_Register($dst$$reg),
12084               as_Register($src1$$reg),
12085               as_Register($src2$$reg),
12086               Assembler::ASR,
12087               $src3$$constant & 0x3f);
12088   %}
12089 
12090   ins_pipe(ialu_reg_reg_shift);
12091 %}
12092 
12093 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12094                          iRegIorL2I src1, iRegIorL2I src2,
12095                          immI src3, rFlagsReg cr) %{
12096   match(Set dst (XorI src1 (LShiftI src2 src3)));
12097 
12098   ins_cost(1.9 * INSN_COST);
12099   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12100 
12101   ins_encode %{
12102     __ eorw(as_Register($dst$$reg),
12103               as_Register($src1$$reg),
12104               as_Register($src2$$reg),
12105               Assembler::LSL,
12106               $src3$$constant & 0x1f);
12107   %}
12108 
12109   ins_pipe(ialu_reg_reg_shift);
12110 %}
12111 
12112 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12113                          iRegL src1, iRegL src2,
12114                          immI src3, rFlagsReg cr) %{
12115   match(Set dst (XorL src1 (LShiftL src2 src3)));
12116 
12117   ins_cost(1.9 * INSN_COST);
12118   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12119 
12120   ins_encode %{
12121     __ eor(as_Register($dst$$reg),
12122               as_Register($src1$$reg),
12123               as_Register($src2$$reg),
12124               Assembler::LSL,
12125               $src3$$constant & 0x3f);
12126   %}
12127 
12128   ins_pipe(ialu_reg_reg_shift);
12129 %}
12130 
12131 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12132                          iRegIorL2I src1, iRegIorL2I src2,
12133                          immI src3, rFlagsReg cr) %{
12134   match(Set dst (OrI src1 (URShiftI src2 src3)));
12135 
12136   ins_cost(1.9 * INSN_COST);
12137   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12138 
12139   ins_encode %{
12140     __ orrw(as_Register($dst$$reg),
12141               as_Register($src1$$reg),
12142               as_Register($src2$$reg),
12143               Assembler::LSR,
12144               $src3$$constant & 0x1f);
12145   %}
12146 
12147   ins_pipe(ialu_reg_reg_shift);
12148 %}
12149 
12150 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12151                          iRegL src1, iRegL src2,
12152                          immI src3, rFlagsReg cr) %{
12153   match(Set dst (OrL src1 (URShiftL src2 src3)));
12154 
12155   ins_cost(1.9 * INSN_COST);
12156   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12157 
12158   ins_encode %{
12159     __ orr(as_Register($dst$$reg),
12160               as_Register($src1$$reg),
12161               as_Register($src2$$reg),
12162               Assembler::LSR,
12163               $src3$$constant & 0x3f);
12164   %}
12165 
12166   ins_pipe(ialu_reg_reg_shift);
12167 %}
12168 
12169 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12170                          iRegIorL2I src1, iRegIorL2I src2,
12171                          immI src3, rFlagsReg cr) %{
12172   match(Set dst (OrI src1 (RShiftI src2 src3)));
12173 
12174   ins_cost(1.9 * INSN_COST);
12175   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12176 
12177   ins_encode %{
12178     __ orrw(as_Register($dst$$reg),
12179               as_Register($src1$$reg),
12180               as_Register($src2$$reg),
12181               Assembler::ASR,
12182               $src3$$constant & 0x1f);
12183   %}
12184 
12185   ins_pipe(ialu_reg_reg_shift);
12186 %}
12187 
12188 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12189                          iRegL src1, iRegL src2,
12190                          immI src3, rFlagsReg cr) %{
12191   match(Set dst (OrL src1 (RShiftL src2 src3)));
12192 
12193   ins_cost(1.9 * INSN_COST);
12194   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12195 
12196   ins_encode %{
12197     __ orr(as_Register($dst$$reg),
12198               as_Register($src1$$reg),
12199               as_Register($src2$$reg),
12200               Assembler::ASR,
12201               $src3$$constant & 0x3f);
12202   %}
12203 
12204   ins_pipe(ialu_reg_reg_shift);
12205 %}
12206 
12207 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12208                          iRegIorL2I src1, iRegIorL2I src2,
12209                          immI src3, rFlagsReg cr) %{
12210   match(Set dst (OrI src1 (LShiftI src2 src3)));
12211 
12212   ins_cost(1.9 * INSN_COST);
12213   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12214 
12215   ins_encode %{
12216     __ orrw(as_Register($dst$$reg),
12217               as_Register($src1$$reg),
12218               as_Register($src2$$reg),
12219               Assembler::LSL,
12220               $src3$$constant & 0x1f);
12221   %}
12222 
12223   ins_pipe(ialu_reg_reg_shift);
12224 %}
12225 
12226 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12227                          iRegL src1, iRegL src2,
12228                          immI src3, rFlagsReg cr) %{
12229   match(Set dst (OrL src1 (LShiftL src2 src3)));
12230 
12231   ins_cost(1.9 * INSN_COST);
12232   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12233 
12234   ins_encode %{
12235     __ orr(as_Register($dst$$reg),
12236               as_Register($src1$$reg),
12237               as_Register($src2$$reg),
12238               Assembler::LSL,
12239               $src3$$constant & 0x3f);
12240   %}
12241 
12242   ins_pipe(ialu_reg_reg_shift);
12243 %}
12244 
12245 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12246                          iRegIorL2I src1, iRegIorL2I src2,
12247                          immI src3, rFlagsReg cr) %{
12248   match(Set dst (AddI src1 (URShiftI src2 src3)));
12249 
12250   ins_cost(1.9 * INSN_COST);
12251   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12252 
12253   ins_encode %{
12254     __ addw(as_Register($dst$$reg),
12255               as_Register($src1$$reg),
12256               as_Register($src2$$reg),
12257               Assembler::LSR,
12258               $src3$$constant & 0x1f);
12259   %}
12260 
12261   ins_pipe(ialu_reg_reg_shift);
12262 %}
12263 
12264 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12265                          iRegL src1, iRegL src2,
12266                          immI src3, rFlagsReg cr) %{
12267   match(Set dst (AddL src1 (URShiftL src2 src3)));
12268 
12269   ins_cost(1.9 * INSN_COST);
12270   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12271 
12272   ins_encode %{
12273     __ add(as_Register($dst$$reg),
12274               as_Register($src1$$reg),
12275               as_Register($src2$$reg),
12276               Assembler::LSR,
12277               $src3$$constant & 0x3f);
12278   %}
12279 
12280   ins_pipe(ialu_reg_reg_shift);
12281 %}
12282 
12283 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12284                          iRegIorL2I src1, iRegIorL2I src2,
12285                          immI src3, rFlagsReg cr) %{
12286   match(Set dst (AddI src1 (RShiftI src2 src3)));
12287 
12288   ins_cost(1.9 * INSN_COST);
12289   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12290 
12291   ins_encode %{
12292     __ addw(as_Register($dst$$reg),
12293               as_Register($src1$$reg),
12294               as_Register($src2$$reg),
12295               Assembler::ASR,
12296               $src3$$constant & 0x1f);
12297   %}
12298 
12299   ins_pipe(ialu_reg_reg_shift);
12300 %}
12301 
12302 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12303                          iRegL src1, iRegL src2,
12304                          immI src3, rFlagsReg cr) %{
12305   match(Set dst (AddL src1 (RShiftL src2 src3)));
12306 
12307   ins_cost(1.9 * INSN_COST);
12308   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12309 
12310   ins_encode %{
12311     __ add(as_Register($dst$$reg),
12312               as_Register($src1$$reg),
12313               as_Register($src2$$reg),
12314               Assembler::ASR,
12315               $src3$$constant & 0x3f);
12316   %}
12317 
12318   ins_pipe(ialu_reg_reg_shift);
12319 %}
12320 
12321 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12322                          iRegIorL2I src1, iRegIorL2I src2,
12323                          immI src3, rFlagsReg cr) %{
12324   match(Set dst (AddI src1 (LShiftI src2 src3)));
12325 
12326   ins_cost(1.9 * INSN_COST);
12327   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12328 
12329   ins_encode %{
12330     __ addw(as_Register($dst$$reg),
12331               as_Register($src1$$reg),
12332               as_Register($src2$$reg),
12333               Assembler::LSL,
12334               $src3$$constant & 0x1f);
12335   %}
12336 
12337   ins_pipe(ialu_reg_reg_shift);
12338 %}
12339 
12340 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12341                          iRegL src1, iRegL src2,
12342                          immI src3, rFlagsReg cr) %{
12343   match(Set dst (AddL src1 (LShiftL src2 src3)));
12344 
12345   ins_cost(1.9 * INSN_COST);
12346   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12347 
12348   ins_encode %{
12349     __ add(as_Register($dst$$reg),
12350               as_Register($src1$$reg),
12351               as_Register($src2$$reg),
12352               Assembler::LSL,
12353               $src3$$constant & 0x3f);
12354   %}
12355 
12356   ins_pipe(ialu_reg_reg_shift);
12357 %}
12358 
12359 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12360                          iRegIorL2I src1, iRegIorL2I src2,
12361                          immI src3, rFlagsReg cr) %{
12362   match(Set dst (SubI src1 (URShiftI src2 src3)));
12363 
12364   ins_cost(1.9 * INSN_COST);
12365   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12366 
12367   ins_encode %{
12368     __ subw(as_Register($dst$$reg),
12369               as_Register($src1$$reg),
12370               as_Register($src2$$reg),
12371               Assembler::LSR,
12372               $src3$$constant & 0x1f);
12373   %}
12374 
12375   ins_pipe(ialu_reg_reg_shift);
12376 %}
12377 
12378 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12379                          iRegL src1, iRegL src2,
12380                          immI src3, rFlagsReg cr) %{
12381   match(Set dst (SubL src1 (URShiftL src2 src3)));
12382 
12383   ins_cost(1.9 * INSN_COST);
12384   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12385 
12386   ins_encode %{
12387     __ sub(as_Register($dst$$reg),
12388               as_Register($src1$$reg),
12389               as_Register($src2$$reg),
12390               Assembler::LSR,
12391               $src3$$constant & 0x3f);
12392   %}
12393 
12394   ins_pipe(ialu_reg_reg_shift);
12395 %}
12396 
12397 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12398                          iRegIorL2I src1, iRegIorL2I src2,
12399                          immI src3, rFlagsReg cr) %{
12400   match(Set dst (SubI src1 (RShiftI src2 src3)));
12401 
12402   ins_cost(1.9 * INSN_COST);
12403   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12404 
12405   ins_encode %{
12406     __ subw(as_Register($dst$$reg),
12407               as_Register($src1$$reg),
12408               as_Register($src2$$reg),
12409               Assembler::ASR,
12410               $src3$$constant & 0x1f);
12411   %}
12412 
12413   ins_pipe(ialu_reg_reg_shift);
12414 %}
12415 
12416 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12417                          iRegL src1, iRegL src2,
12418                          immI src3, rFlagsReg cr) %{
12419   match(Set dst (SubL src1 (RShiftL src2 src3)));
12420 
12421   ins_cost(1.9 * INSN_COST);
12422   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12423 
12424   ins_encode %{
12425     __ sub(as_Register($dst$$reg),
12426               as_Register($src1$$reg),
12427               as_Register($src2$$reg),
12428               Assembler::ASR,
12429               $src3$$constant & 0x3f);
12430   %}
12431 
12432   ins_pipe(ialu_reg_reg_shift);
12433 %}
12434 
12435 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12436                          iRegIorL2I src1, iRegIorL2I src2,
12437                          immI src3, rFlagsReg cr) %{
12438   match(Set dst (SubI src1 (LShiftI src2 src3)));
12439 
12440   ins_cost(1.9 * INSN_COST);
12441   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12442 
12443   ins_encode %{
12444     __ subw(as_Register($dst$$reg),
12445               as_Register($src1$$reg),
12446               as_Register($src2$$reg),
12447               Assembler::LSL,
12448               $src3$$constant & 0x1f);
12449   %}
12450 
12451   ins_pipe(ialu_reg_reg_shift);
12452 %}
12453 
12454 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12455                          iRegL src1, iRegL src2,
12456                          immI src3, rFlagsReg cr) %{
12457   match(Set dst (SubL src1 (LShiftL src2 src3)));
12458 
12459   ins_cost(1.9 * INSN_COST);
12460   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12461 
12462   ins_encode %{
12463     __ sub(as_Register($dst$$reg),
12464               as_Register($src1$$reg),
12465               as_Register($src2$$reg),
12466               Assembler::LSL,
12467               $src3$$constant & 0x3f);
12468   %}
12469 
12470   ins_pipe(ialu_reg_reg_shift);
12471 %}
12472 
12473 
12474 
12475 // Shift Left followed by Shift Right.
12476 // This idiom is used by the compiler for the i2b bytecode etc.
12477 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12478 %{
12479   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12480   // Make sure we are not going to exceed what sbfm can do.
12481   predicate((unsigned int)n->in(2)->get_int() <= 63
12482             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12483 
12484   ins_cost(INSN_COST * 2);
12485   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12486   ins_encode %{
12487     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12488     int s = 63 - lshift;
12489     int r = (rshift - lshift) & 63;
12490     __ sbfm(as_Register($dst$$reg),
12491             as_Register($src$$reg),
12492             r, s);
12493   %}
12494 
12495   ins_pipe(ialu_reg_shift);
12496 %}
12497 
12498 // Shift Left followed by Shift Right.
12499 // This idiom is used by the compiler for the i2b bytecode etc.
12500 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12501 %{
12502   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12503   // Make sure we are not going to exceed what sbfmw can do.
12504   predicate((unsigned int)n->in(2)->get_int() <= 31
12505             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12506 
12507   ins_cost(INSN_COST * 2);
12508   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12509   ins_encode %{
12510     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12511     int s = 31 - lshift;
12512     int r = (rshift - lshift) & 31;
12513     __ sbfmw(as_Register($dst$$reg),
12514             as_Register($src$$reg),
12515             r, s);
12516   %}
12517 
12518   ins_pipe(ialu_reg_shift);
12519 %}
12520 
12521 // Shift Left followed by Shift Right.
12522 // This idiom is used by the compiler for the i2b bytecode etc.
12523 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12524 %{
12525   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12526   // Make sure we are not going to exceed what ubfm can do.
12527   predicate((unsigned int)n->in(2)->get_int() <= 63
12528             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12529 
12530   ins_cost(INSN_COST * 2);
12531   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12532   ins_encode %{
12533     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12534     int s = 63 - lshift;
12535     int r = (rshift - lshift) & 63;
12536     __ ubfm(as_Register($dst$$reg),
12537             as_Register($src$$reg),
12538             r, s);
12539   %}
12540 
12541   ins_pipe(ialu_reg_shift);
12542 %}
12543 
12544 // Shift Left followed by Shift Right.
12545 // This idiom is used by the compiler for the i2b bytecode etc.
12546 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12547 %{
12548   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12549   // Make sure we are not going to exceed what ubfmw can do.
12550   predicate((unsigned int)n->in(2)->get_int() <= 31
12551             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12552 
12553   ins_cost(INSN_COST * 2);
12554   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12555   ins_encode %{
12556     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12557     int s = 31 - lshift;
12558     int r = (rshift - lshift) & 31;
12559     __ ubfmw(as_Register($dst$$reg),
12560             as_Register($src$$reg),
12561             r, s);
12562   %}
12563 
12564   ins_pipe(ialu_reg_shift);
12565 %}
12566 // Bitfield extract with shift & mask
12567 
12568 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12569 %{
12570   match(Set dst (AndI (URShiftI src rshift) mask));
12571 
12572   ins_cost(INSN_COST);
12573   format %{ "ubfxw $dst, $src, $mask" %}
12574   ins_encode %{
12575     int rshift = $rshift$$constant;
12576     long mask = $mask$$constant;
12577     int width = exact_log2(mask+1);
12578     __ ubfxw(as_Register($dst$$reg),
12579             as_Register($src$$reg), rshift, width);
12580   %}
12581   ins_pipe(ialu_reg_shift);
12582 %}
12583 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12584 %{
12585   match(Set dst (AndL (URShiftL src rshift) mask));
12586 
12587   ins_cost(INSN_COST);
12588   format %{ "ubfx $dst, $src, $mask" %}
12589   ins_encode %{
12590     int rshift = $rshift$$constant;
12591     long mask = $mask$$constant;
12592     int width = exact_log2(mask+1);
12593     __ ubfx(as_Register($dst$$reg),
12594             as_Register($src$$reg), rshift, width);
12595   %}
12596   ins_pipe(ialu_reg_shift);
12597 %}
12598 
12599 // We can use ubfx when extending an And with a mask when we know mask
12600 // is positive.  We know that because immI_bitmask guarantees it.
12601 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12602 %{
12603   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12604 
12605   ins_cost(INSN_COST * 2);
12606   format %{ "ubfx $dst, $src, $mask" %}
12607   ins_encode %{
12608     int rshift = $rshift$$constant;
12609     long mask = $mask$$constant;
12610     int width = exact_log2(mask+1);
12611     __ ubfx(as_Register($dst$$reg),
12612             as_Register($src$$reg), rshift, width);
12613   %}
12614   ins_pipe(ialu_reg_shift);
12615 %}
12616 
12617 // Rotations
12618 
12619 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12620 %{
12621   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12622   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12623 
12624   ins_cost(INSN_COST);
12625   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12626 
12627   ins_encode %{
12628     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12629             $rshift$$constant & 63);
12630   %}
12631   ins_pipe(ialu_reg_reg_extr);
12632 %}
12633 
12634 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12635 %{
12636   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12637   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12638 
12639   ins_cost(INSN_COST);
12640   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12641 
12642   ins_encode %{
12643     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12644             $rshift$$constant & 31);
12645   %}
12646   ins_pipe(ialu_reg_reg_extr);
12647 %}
12648 
12649 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12650 %{
12651   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12652   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12653 
12654   ins_cost(INSN_COST);
12655   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12656 
12657   ins_encode %{
12658     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12659             $rshift$$constant & 63);
12660   %}
12661   ins_pipe(ialu_reg_reg_extr);
12662 %}
12663 
12664 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12665 %{
12666   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12667   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12668 
12669   ins_cost(INSN_COST);
12670   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12671 
12672   ins_encode %{
12673     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12674             $rshift$$constant & 31);
12675   %}
12676   ins_pipe(ialu_reg_reg_extr);
12677 %}
12678 
12679 
12680 // rol expander
12681 
12682 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12683 %{
12684   effect(DEF dst, USE src, USE shift);
12685 
12686   format %{ "rol    $dst, $src, $shift" %}
12687   ins_cost(INSN_COST * 3);
12688   ins_encode %{
12689     __ subw(rscratch1, zr, as_Register($shift$$reg));
12690     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12691             rscratch1);
12692     %}
12693   ins_pipe(ialu_reg_reg_vshift);
12694 %}
12695 
12696 // rol expander
12697 
12698 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12699 %{
12700   effect(DEF dst, USE src, USE shift);
12701 
12702   format %{ "rol    $dst, $src, $shift" %}
12703   ins_cost(INSN_COST * 3);
12704   ins_encode %{
12705     __ subw(rscratch1, zr, as_Register($shift$$reg));
12706     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12707             rscratch1);
12708     %}
12709   ins_pipe(ialu_reg_reg_vshift);
12710 %}
12711 
12712 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12713 %{
12714   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12715 
12716   expand %{
12717     rolL_rReg(dst, src, shift, cr);
12718   %}
12719 %}
12720 
12721 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12722 %{
12723   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12724 
12725   expand %{
12726     rolL_rReg(dst, src, shift, cr);
12727   %}
12728 %}
12729 
12730 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12731 %{
12732   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12733 
12734   expand %{
12735     rolI_rReg(dst, src, shift, cr);
12736   %}
12737 %}
12738 
12739 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12740 %{
12741   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12742 
12743   expand %{
12744     rolI_rReg(dst, src, shift, cr);
12745   %}
12746 %}
12747 
12748 // ror expander
12749 
12750 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12751 %{
12752   effect(DEF dst, USE src, USE shift);
12753 
12754   format %{ "ror    $dst, $src, $shift" %}
12755   ins_cost(INSN_COST);
12756   ins_encode %{
12757     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12758             as_Register($shift$$reg));
12759     %}
12760   ins_pipe(ialu_reg_reg_vshift);
12761 %}
12762 
12763 // ror expander
12764 
12765 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12766 %{
12767   effect(DEF dst, USE src, USE shift);
12768 
12769   format %{ "ror    $dst, $src, $shift" %}
12770   ins_cost(INSN_COST);
12771   ins_encode %{
12772     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12773             as_Register($shift$$reg));
12774     %}
12775   ins_pipe(ialu_reg_reg_vshift);
12776 %}
12777 
12778 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12779 %{
12780   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12781 
12782   expand %{
12783     rorL_rReg(dst, src, shift, cr);
12784   %}
12785 %}
12786 
12787 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12788 %{
12789   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12790 
12791   expand %{
12792     rorL_rReg(dst, src, shift, cr);
12793   %}
12794 %}
12795 
12796 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12797 %{
12798   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12799 
12800   expand %{
12801     rorI_rReg(dst, src, shift, cr);
12802   %}
12803 %}
12804 
12805 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12806 %{
12807   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12808 
12809   expand %{
12810     rorI_rReg(dst, src, shift, cr);
12811   %}
12812 %}
12813 
12814 // Add/subtract (extended)
12815 
12816 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12817 %{
12818   match(Set dst (AddL src1 (ConvI2L src2)));
12819   ins_cost(INSN_COST);
12820   format %{ "add  $dst, $src1, sxtw $src2" %}
12821 
12822    ins_encode %{
12823      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12824             as_Register($src2$$reg), ext::sxtw);
12825    %}
12826   ins_pipe(ialu_reg_reg);
12827 %};
12828 
12829 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12830 %{
12831   match(Set dst (SubL src1 (ConvI2L src2)));
12832   ins_cost(INSN_COST);
12833   format %{ "sub  $dst, $src1, sxtw $src2" %}
12834 
12835    ins_encode %{
12836      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12837             as_Register($src2$$reg), ext::sxtw);
12838    %}
12839   ins_pipe(ialu_reg_reg);
12840 %};
12841 
12842 
12843 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12844 %{
12845   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12846   ins_cost(INSN_COST);
12847   format %{ "add  $dst, $src1, sxth $src2" %}
12848 
12849    ins_encode %{
12850      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12851             as_Register($src2$$reg), ext::sxth);
12852    %}
12853   ins_pipe(ialu_reg_reg);
12854 %}
12855 
12856 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12857 %{
12858   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12859   ins_cost(INSN_COST);
12860   format %{ "add  $dst, $src1, sxtb $src2" %}
12861 
12862    ins_encode %{
12863      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12864             as_Register($src2$$reg), ext::sxtb);
12865    %}
12866   ins_pipe(ialu_reg_reg);
12867 %}
12868 
12869 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12870 %{
12871   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12872   ins_cost(INSN_COST);
12873   format %{ "add  $dst, $src1, uxtb $src2" %}
12874 
12875    ins_encode %{
12876      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12877             as_Register($src2$$reg), ext::uxtb);
12878    %}
12879   ins_pipe(ialu_reg_reg);
12880 %}
12881 
12882 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12883 %{
12884   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12885   ins_cost(INSN_COST);
12886   format %{ "add  $dst, $src1, sxth $src2" %}
12887 
12888    ins_encode %{
12889      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12890             as_Register($src2$$reg), ext::sxth);
12891    %}
12892   ins_pipe(ialu_reg_reg);
12893 %}
12894 
12895 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12896 %{
12897   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12898   ins_cost(INSN_COST);
12899   format %{ "add  $dst, $src1, sxtw $src2" %}
12900 
12901    ins_encode %{
12902      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12903             as_Register($src2$$reg), ext::sxtw);
12904    %}
12905   ins_pipe(ialu_reg_reg);
12906 %}
12907 
12908 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12909 %{
12910   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12911   ins_cost(INSN_COST);
12912   format %{ "add  $dst, $src1, sxtb $src2" %}
12913 
12914    ins_encode %{
12915      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12916             as_Register($src2$$reg), ext::sxtb);
12917    %}
12918   ins_pipe(ialu_reg_reg);
12919 %}
12920 
12921 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12922 %{
12923   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12924   ins_cost(INSN_COST);
12925   format %{ "add  $dst, $src1, uxtb $src2" %}
12926 
12927    ins_encode %{
12928      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12929             as_Register($src2$$reg), ext::uxtb);
12930    %}
12931   ins_pipe(ialu_reg_reg);
12932 %}
12933 
12934 
12935 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12936 %{
12937   match(Set dst (AddI src1 (AndI src2 mask)));
12938   ins_cost(INSN_COST);
12939   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12940 
12941    ins_encode %{
12942      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12943             as_Register($src2$$reg), ext::uxtb);
12944    %}
12945   ins_pipe(ialu_reg_reg);
12946 %}
12947 
12948 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12949 %{
12950   match(Set dst (AddI src1 (AndI src2 mask)));
12951   ins_cost(INSN_COST);
12952   format %{ "addw  $dst, $src1, $src2, uxth" %}
12953 
12954    ins_encode %{
12955      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12956             as_Register($src2$$reg), ext::uxth);
12957    %}
12958   ins_pipe(ialu_reg_reg);
12959 %}
12960 
12961 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12962 %{
12963   match(Set dst (AddL src1 (AndL src2 mask)));
12964   ins_cost(INSN_COST);
12965   format %{ "add  $dst, $src1, $src2, uxtb" %}
12966 
12967    ins_encode %{
12968      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12969             as_Register($src2$$reg), ext::uxtb);
12970    %}
12971   ins_pipe(ialu_reg_reg);
12972 %}
12973 
12974 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12975 %{
12976   match(Set dst (AddL src1 (AndL src2 mask)));
12977   ins_cost(INSN_COST);
12978   format %{ "add  $dst, $src1, $src2, uxth" %}
12979 
12980    ins_encode %{
12981      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12982             as_Register($src2$$reg), ext::uxth);
12983    %}
12984   ins_pipe(ialu_reg_reg);
12985 %}
12986 
12987 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12988 %{
12989   match(Set dst (AddL src1 (AndL src2 mask)));
12990   ins_cost(INSN_COST);
12991   format %{ "add  $dst, $src1, $src2, uxtw" %}
12992 
12993    ins_encode %{
12994      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12995             as_Register($src2$$reg), ext::uxtw);
12996    %}
12997   ins_pipe(ialu_reg_reg);
12998 %}
12999 
13000 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13001 %{
13002   match(Set dst (SubI src1 (AndI src2 mask)));
13003   ins_cost(INSN_COST);
13004   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13005 
13006    ins_encode %{
13007      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13008             as_Register($src2$$reg), ext::uxtb);
13009    %}
13010   ins_pipe(ialu_reg_reg);
13011 %}
13012 
13013 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13014 %{
13015   match(Set dst (SubI src1 (AndI src2 mask)));
13016   ins_cost(INSN_COST);
13017   format %{ "subw  $dst, $src1, $src2, uxth" %}
13018 
13019    ins_encode %{
13020      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13021             as_Register($src2$$reg), ext::uxth);
13022    %}
13023   ins_pipe(ialu_reg_reg);
13024 %}
13025 
13026 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13027 %{
13028   match(Set dst (SubL src1 (AndL src2 mask)));
13029   ins_cost(INSN_COST);
13030   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13031 
13032    ins_encode %{
13033      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13034             as_Register($src2$$reg), ext::uxtb);
13035    %}
13036   ins_pipe(ialu_reg_reg);
13037 %}
13038 
13039 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13040 %{
13041   match(Set dst (SubL src1 (AndL src2 mask)));
13042   ins_cost(INSN_COST);
13043   format %{ "sub  $dst, $src1, $src2, uxth" %}
13044 
13045    ins_encode %{
13046      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13047             as_Register($src2$$reg), ext::uxth);
13048    %}
13049   ins_pipe(ialu_reg_reg);
13050 %}
13051 
13052 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13053 %{
13054   match(Set dst (SubL src1 (AndL src2 mask)));
13055   ins_cost(INSN_COST);
13056   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13057 
13058    ins_encode %{
13059      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13060             as_Register($src2$$reg), ext::uxtw);
13061    %}
13062   ins_pipe(ialu_reg_reg);
13063 %}
13064 
13065 // END This section of the file is automatically generated. Do not edit --------------
13066 
13067 // ============================================================================
13068 // Floating Point Arithmetic Instructions
13069 
13070 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13071   match(Set dst (AddF src1 src2));
13072 
13073   ins_cost(INSN_COST * 5);
13074   format %{ "fadds   $dst, $src1, $src2" %}
13075 
13076   ins_encode %{
13077     __ fadds(as_FloatRegister($dst$$reg),
13078              as_FloatRegister($src1$$reg),
13079              as_FloatRegister($src2$$reg));
13080   %}
13081 
13082   ins_pipe(fp_dop_reg_reg_s);
13083 %}
13084 
13085 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13086   match(Set dst (AddD src1 src2));
13087 
13088   ins_cost(INSN_COST * 5);
13089   format %{ "faddd   $dst, $src1, $src2" %}
13090 
13091   ins_encode %{
13092     __ faddd(as_FloatRegister($dst$$reg),
13093              as_FloatRegister($src1$$reg),
13094              as_FloatRegister($src2$$reg));
13095   %}
13096 
13097   ins_pipe(fp_dop_reg_reg_d);
13098 %}
13099 
13100 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13101   match(Set dst (SubF src1 src2));
13102 
13103   ins_cost(INSN_COST * 5);
13104   format %{ "fsubs   $dst, $src1, $src2" %}
13105 
13106   ins_encode %{
13107     __ fsubs(as_FloatRegister($dst$$reg),
13108              as_FloatRegister($src1$$reg),
13109              as_FloatRegister($src2$$reg));
13110   %}
13111 
13112   ins_pipe(fp_dop_reg_reg_s);
13113 %}
13114 
13115 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13116   match(Set dst (SubD src1 src2));
13117 
13118   ins_cost(INSN_COST * 5);
13119   format %{ "fsubd   $dst, $src1, $src2" %}
13120 
13121   ins_encode %{
13122     __ fsubd(as_FloatRegister($dst$$reg),
13123              as_FloatRegister($src1$$reg),
13124              as_FloatRegister($src2$$reg));
13125   %}
13126 
13127   ins_pipe(fp_dop_reg_reg_d);
13128 %}
13129 
13130 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13131   match(Set dst (MulF src1 src2));
13132 
13133   ins_cost(INSN_COST * 6);
13134   format %{ "fmuls   $dst, $src1, $src2" %}
13135 
13136   ins_encode %{
13137     __ fmuls(as_FloatRegister($dst$$reg),
13138              as_FloatRegister($src1$$reg),
13139              as_FloatRegister($src2$$reg));
13140   %}
13141 
13142   ins_pipe(fp_dop_reg_reg_s);
13143 %}
13144 
13145 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13146   match(Set dst (MulD src1 src2));
13147 
13148   ins_cost(INSN_COST * 6);
13149   format %{ "fmuld   $dst, $src1, $src2" %}
13150 
13151   ins_encode %{
13152     __ fmuld(as_FloatRegister($dst$$reg),
13153              as_FloatRegister($src1$$reg),
13154              as_FloatRegister($src2$$reg));
13155   %}
13156 
13157   ins_pipe(fp_dop_reg_reg_d);
13158 %}
13159 
13160 // We cannot use these fused mul w add/sub ops because they don't
13161 // produce the same result as the equivalent separated ops
13162 // (essentially they don't round the intermediate result). that's a
13163 // shame. leaving them here in case we can idenitfy cases where it is
13164 // legitimate to use them
13165 
13166 
13167 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13168 //   match(Set dst (AddF (MulF src1 src2) src3));
13169 
13170 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13171 
13172 //   ins_encode %{
13173 //     __ fmadds(as_FloatRegister($dst$$reg),
13174 //              as_FloatRegister($src1$$reg),
13175 //              as_FloatRegister($src2$$reg),
13176 //              as_FloatRegister($src3$$reg));
13177 //   %}
13178 
13179 //   ins_pipe(pipe_class_default);
13180 // %}
13181 
13182 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13183 //   match(Set dst (AddD (MulD src1 src2) src3));
13184 
13185 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13186 
13187 //   ins_encode %{
13188 //     __ fmaddd(as_FloatRegister($dst$$reg),
13189 //              as_FloatRegister($src1$$reg),
13190 //              as_FloatRegister($src2$$reg),
13191 //              as_FloatRegister($src3$$reg));
13192 //   %}
13193 
13194 //   ins_pipe(pipe_class_default);
13195 // %}
13196 
13197 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13198 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
13199 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
13200 
13201 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13202 
13203 //   ins_encode %{
13204 //     __ fmsubs(as_FloatRegister($dst$$reg),
13205 //               as_FloatRegister($src1$$reg),
13206 //               as_FloatRegister($src2$$reg),
13207 //              as_FloatRegister($src3$$reg));
13208 //   %}
13209 
13210 //   ins_pipe(pipe_class_default);
13211 // %}
13212 
13213 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13214 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
13215 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
13216 
13217 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13218 
13219 //   ins_encode %{
13220 //     __ fmsubd(as_FloatRegister($dst$$reg),
13221 //               as_FloatRegister($src1$$reg),
13222 //               as_FloatRegister($src2$$reg),
13223 //               as_FloatRegister($src3$$reg));
13224 //   %}
13225 
13226 //   ins_pipe(pipe_class_default);
13227 // %}
13228 
13229 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13230 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
13231 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
13232 
13233 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13234 
13235 //   ins_encode %{
13236 //     __ fnmadds(as_FloatRegister($dst$$reg),
13237 //                as_FloatRegister($src1$$reg),
13238 //                as_FloatRegister($src2$$reg),
13239 //                as_FloatRegister($src3$$reg));
13240 //   %}
13241 
13242 //   ins_pipe(pipe_class_default);
13243 // %}
13244 
13245 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13246 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
13247 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
13248 
13249 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13250 
13251 //   ins_encode %{
13252 //     __ fnmaddd(as_FloatRegister($dst$$reg),
13253 //                as_FloatRegister($src1$$reg),
13254 //                as_FloatRegister($src2$$reg),
13255 //                as_FloatRegister($src3$$reg));
13256 //   %}
13257 
13258 //   ins_pipe(pipe_class_default);
13259 // %}
13260 
13261 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13262 //   match(Set dst (SubF (MulF src1 src2) src3));
13263 
13264 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13265 
13266 //   ins_encode %{
13267 //     __ fnmsubs(as_FloatRegister($dst$$reg),
13268 //                as_FloatRegister($src1$$reg),
13269 //                as_FloatRegister($src2$$reg),
13270 //                as_FloatRegister($src3$$reg));
13271 //   %}
13272 
13273 //   ins_pipe(pipe_class_default);
13274 // %}
13275 
13276 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13277 //   match(Set dst (SubD (MulD src1 src2) src3));
13278 
13279 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13280 
13281 //   ins_encode %{
13282 //   // n.b. insn name should be fnmsubd
13283 //     __ fnmsub(as_FloatRegister($dst$$reg),
13284 //                as_FloatRegister($src1$$reg),
13285 //                as_FloatRegister($src2$$reg),
13286 //                as_FloatRegister($src3$$reg));
13287 //   %}
13288 
13289 //   ins_pipe(pipe_class_default);
13290 // %}
13291 
13292 
13293 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13294   match(Set dst (DivF src1  src2));
13295 
13296   ins_cost(INSN_COST * 18);
13297   format %{ "fdivs   $dst, $src1, $src2" %}
13298 
13299   ins_encode %{
13300     __ fdivs(as_FloatRegister($dst$$reg),
13301              as_FloatRegister($src1$$reg),
13302              as_FloatRegister($src2$$reg));
13303   %}
13304 
13305   ins_pipe(fp_div_s);
13306 %}
13307 
13308 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13309   match(Set dst (DivD src1  src2));
13310 
13311   ins_cost(INSN_COST * 32);
13312   format %{ "fdivd   $dst, $src1, $src2" %}
13313 
13314   ins_encode %{
13315     __ fdivd(as_FloatRegister($dst$$reg),
13316              as_FloatRegister($src1$$reg),
13317              as_FloatRegister($src2$$reg));
13318   %}
13319 
13320   ins_pipe(fp_div_d);
13321 %}
13322 
13323 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13324   match(Set dst (NegF src));
13325 
13326   ins_cost(INSN_COST * 3);
13327   format %{ "fneg   $dst, $src" %}
13328 
13329   ins_encode %{
13330     __ fnegs(as_FloatRegister($dst$$reg),
13331              as_FloatRegister($src$$reg));
13332   %}
13333 
13334   ins_pipe(fp_uop_s);
13335 %}
13336 
13337 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13338   match(Set dst (NegD src));
13339 
13340   ins_cost(INSN_COST * 3);
13341   format %{ "fnegd   $dst, $src" %}
13342 
13343   ins_encode %{
13344     __ fnegd(as_FloatRegister($dst$$reg),
13345              as_FloatRegister($src$$reg));
13346   %}
13347 
13348   ins_pipe(fp_uop_d);
13349 %}
13350 
13351 instruct absF_reg(vRegF dst, vRegF src) %{
13352   match(Set dst (AbsF src));
13353 
13354   ins_cost(INSN_COST * 3);
13355   format %{ "fabss   $dst, $src" %}
13356   ins_encode %{
13357     __ fabss(as_FloatRegister($dst$$reg),
13358              as_FloatRegister($src$$reg));
13359   %}
13360 
13361   ins_pipe(fp_uop_s);
13362 %}
13363 
13364 instruct absD_reg(vRegD dst, vRegD src) %{
13365   match(Set dst (AbsD src));
13366 
13367   ins_cost(INSN_COST * 3);
13368   format %{ "fabsd   $dst, $src" %}
13369   ins_encode %{
13370     __ fabsd(as_FloatRegister($dst$$reg),
13371              as_FloatRegister($src$$reg));
13372   %}
13373 
13374   ins_pipe(fp_uop_d);
13375 %}
13376 
13377 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13378   match(Set dst (SqrtD src));
13379 
13380   ins_cost(INSN_COST * 50);
13381   format %{ "fsqrtd  $dst, $src" %}
13382   ins_encode %{
13383     __ fsqrtd(as_FloatRegister($dst$$reg),
13384              as_FloatRegister($src$$reg));
13385   %}
13386 
13387   ins_pipe(fp_div_s);
13388 %}
13389 
13390 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13391   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13392 
13393   ins_cost(INSN_COST * 50);
13394   format %{ "fsqrts  $dst, $src" %}
13395   ins_encode %{
13396     __ fsqrts(as_FloatRegister($dst$$reg),
13397              as_FloatRegister($src$$reg));
13398   %}
13399 
13400   ins_pipe(fp_div_d);
13401 %}
13402 
13403 // ============================================================================
13404 // Logical Instructions
13405 
13406 // Integer Logical Instructions
13407 
13408 // And Instructions
13409 
13410 
13411 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13412   match(Set dst (AndI src1 src2));
13413 
13414   format %{ "andw  $dst, $src1, $src2\t# int" %}
13415 
13416   ins_cost(INSN_COST);
13417   ins_encode %{
13418     __ andw(as_Register($dst$$reg),
13419             as_Register($src1$$reg),
13420             as_Register($src2$$reg));
13421   %}
13422 
13423   ins_pipe(ialu_reg_reg);
13424 %}
13425 
13426 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13427   match(Set dst (AndI src1 src2));
13428 
13429   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13430 
13431   ins_cost(INSN_COST);
13432   ins_encode %{
13433     __ andw(as_Register($dst$$reg),
13434             as_Register($src1$$reg),
13435             (unsigned long)($src2$$constant));
13436   %}
13437 
13438   ins_pipe(ialu_reg_imm);
13439 %}
13440 
13441 // Or Instructions
13442 
13443 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13444   match(Set dst (OrI src1 src2));
13445 
13446   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13447 
13448   ins_cost(INSN_COST);
13449   ins_encode %{
13450     __ orrw(as_Register($dst$$reg),
13451             as_Register($src1$$reg),
13452             as_Register($src2$$reg));
13453   %}
13454 
13455   ins_pipe(ialu_reg_reg);
13456 %}
13457 
13458 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13459   match(Set dst (OrI src1 src2));
13460 
13461   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13462 
13463   ins_cost(INSN_COST);
13464   ins_encode %{
13465     __ orrw(as_Register($dst$$reg),
13466             as_Register($src1$$reg),
13467             (unsigned long)($src2$$constant));
13468   %}
13469 
13470   ins_pipe(ialu_reg_imm);
13471 %}
13472 
13473 // Xor Instructions
13474 
13475 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13476   match(Set dst (XorI src1 src2));
13477 
13478   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13479 
13480   ins_cost(INSN_COST);
13481   ins_encode %{
13482     __ eorw(as_Register($dst$$reg),
13483             as_Register($src1$$reg),
13484             as_Register($src2$$reg));
13485   %}
13486 
13487   ins_pipe(ialu_reg_reg);
13488 %}
13489 
13490 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13491   match(Set dst (XorI src1 src2));
13492 
13493   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13494 
13495   ins_cost(INSN_COST);
13496   ins_encode %{
13497     __ eorw(as_Register($dst$$reg),
13498             as_Register($src1$$reg),
13499             (unsigned long)($src2$$constant));
13500   %}
13501 
13502   ins_pipe(ialu_reg_imm);
13503 %}
13504 
13505 // Long Logical Instructions
13506 // TODO
13507 
13508 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13509   match(Set dst (AndL src1 src2));
13510 
13511   format %{ "and  $dst, $src1, $src2\t# int" %}
13512 
13513   ins_cost(INSN_COST);
13514   ins_encode %{
13515     __ andr(as_Register($dst$$reg),
13516             as_Register($src1$$reg),
13517             as_Register($src2$$reg));
13518   %}
13519 
13520   ins_pipe(ialu_reg_reg);
13521 %}
13522 
13523 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13524   match(Set dst (AndL src1 src2));
13525 
13526   format %{ "and  $dst, $src1, $src2\t# int" %}
13527 
13528   ins_cost(INSN_COST);
13529   ins_encode %{
13530     __ andr(as_Register($dst$$reg),
13531             as_Register($src1$$reg),
13532             (unsigned long)($src2$$constant));
13533   %}
13534 
13535   ins_pipe(ialu_reg_imm);
13536 %}
13537 
13538 // Or Instructions
13539 
13540 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13541   match(Set dst (OrL src1 src2));
13542 
13543   format %{ "orr  $dst, $src1, $src2\t# int" %}
13544 
13545   ins_cost(INSN_COST);
13546   ins_encode %{
13547     __ orr(as_Register($dst$$reg),
13548            as_Register($src1$$reg),
13549            as_Register($src2$$reg));
13550   %}
13551 
13552   ins_pipe(ialu_reg_reg);
13553 %}
13554 
13555 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13556   match(Set dst (OrL src1 src2));
13557 
13558   format %{ "orr  $dst, $src1, $src2\t# int" %}
13559 
13560   ins_cost(INSN_COST);
13561   ins_encode %{
13562     __ orr(as_Register($dst$$reg),
13563            as_Register($src1$$reg),
13564            (unsigned long)($src2$$constant));
13565   %}
13566 
13567   ins_pipe(ialu_reg_imm);
13568 %}
13569 
13570 // Xor Instructions
13571 
13572 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13573   match(Set dst (XorL src1 src2));
13574 
13575   format %{ "eor  $dst, $src1, $src2\t# int" %}
13576 
13577   ins_cost(INSN_COST);
13578   ins_encode %{
13579     __ eor(as_Register($dst$$reg),
13580            as_Register($src1$$reg),
13581            as_Register($src2$$reg));
13582   %}
13583 
13584   ins_pipe(ialu_reg_reg);
13585 %}
13586 
13587 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13588   match(Set dst (XorL src1 src2));
13589 
13590   ins_cost(INSN_COST);
13591   format %{ "eor  $dst, $src1, $src2\t# int" %}
13592 
13593   ins_encode %{
13594     __ eor(as_Register($dst$$reg),
13595            as_Register($src1$$reg),
13596            (unsigned long)($src2$$constant));
13597   %}
13598 
13599   ins_pipe(ialu_reg_imm);
13600 %}
13601 
13602 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13603 %{
13604   match(Set dst (ConvI2L src));
13605 
13606   ins_cost(INSN_COST);
13607   format %{ "sxtw  $dst, $src\t# i2l" %}
13608   ins_encode %{
13609     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13610   %}
13611   ins_pipe(ialu_reg_shift);
13612 %}
13613 
13614 // this pattern occurs in bigmath arithmetic
13615 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13616 %{
13617   match(Set dst (AndL (ConvI2L src) mask));
13618 
13619   ins_cost(INSN_COST);
13620   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13621   ins_encode %{
13622     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13623   %}
13624 
13625   ins_pipe(ialu_reg_shift);
13626 %}
13627 
13628 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13629   match(Set dst (ConvL2I src));
13630 
13631   ins_cost(INSN_COST);
13632   format %{ "movw  $dst, $src \t// l2i" %}
13633 
13634   ins_encode %{
13635     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13636   %}
13637 
13638   ins_pipe(ialu_reg);
13639 %}
13640 
13641 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13642 %{
13643   match(Set dst (Conv2B src));
13644   effect(KILL cr);
13645 
13646   format %{
13647     "cmpw $src, zr\n\t"
13648     "cset $dst, ne"
13649   %}
13650 
13651   ins_encode %{
13652     __ cmpw(as_Register($src$$reg), zr);
13653     __ cset(as_Register($dst$$reg), Assembler::NE);
13654   %}
13655 
13656   ins_pipe(ialu_reg);
13657 %}
13658 
13659 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13660 %{
13661   match(Set dst (Conv2B src));
13662   effect(KILL cr);
13663 
13664   format %{
13665     "cmp  $src, zr\n\t"
13666     "cset $dst, ne"
13667   %}
13668 
13669   ins_encode %{
13670     __ cmp(as_Register($src$$reg), zr);
13671     __ cset(as_Register($dst$$reg), Assembler::NE);
13672   %}
13673 
13674   ins_pipe(ialu_reg);
13675 %}
13676 
13677 instruct convD2F_reg(vRegF dst, vRegD src) %{
13678   match(Set dst (ConvD2F src));
13679 
13680   ins_cost(INSN_COST * 5);
13681   format %{ "fcvtd  $dst, $src \t// d2f" %}
13682 
13683   ins_encode %{
13684     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13685   %}
13686 
13687   ins_pipe(fp_d2f);
13688 %}
13689 
13690 instruct convF2D_reg(vRegD dst, vRegF src) %{
13691   match(Set dst (ConvF2D src));
13692 
13693   ins_cost(INSN_COST * 5);
13694   format %{ "fcvts  $dst, $src \t// f2d" %}
13695 
13696   ins_encode %{
13697     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13698   %}
13699 
13700   ins_pipe(fp_f2d);
13701 %}
13702 
13703 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13704   match(Set dst (ConvF2I src));
13705 
13706   ins_cost(INSN_COST * 5);
13707   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13708 
13709   ins_encode %{
13710     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13711   %}
13712 
13713   ins_pipe(fp_f2i);
13714 %}
13715 
13716 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13717   match(Set dst (ConvF2L src));
13718 
13719   ins_cost(INSN_COST * 5);
13720   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13721 
13722   ins_encode %{
13723     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13724   %}
13725 
13726   ins_pipe(fp_f2l);
13727 %}
13728 
13729 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13730   match(Set dst (ConvI2F src));
13731 
13732   ins_cost(INSN_COST * 5);
13733   format %{ "scvtfws  $dst, $src \t// i2f" %}
13734 
13735   ins_encode %{
13736     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13737   %}
13738 
13739   ins_pipe(fp_i2f);
13740 %}
13741 
13742 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13743   match(Set dst (ConvL2F src));
13744 
13745   ins_cost(INSN_COST * 5);
13746   format %{ "scvtfs  $dst, $src \t// l2f" %}
13747 
13748   ins_encode %{
13749     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13750   %}
13751 
13752   ins_pipe(fp_l2f);
13753 %}
13754 
13755 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13756   match(Set dst (ConvD2I src));
13757 
13758   ins_cost(INSN_COST * 5);
13759   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13760 
13761   ins_encode %{
13762     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13763   %}
13764 
13765   ins_pipe(fp_d2i);
13766 %}
13767 
13768 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13769   match(Set dst (ConvD2L src));
13770 
13771   ins_cost(INSN_COST * 5);
13772   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13773 
13774   ins_encode %{
13775     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13776   %}
13777 
13778   ins_pipe(fp_d2l);
13779 %}
13780 
13781 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13782   match(Set dst (ConvI2D src));
13783 
13784   ins_cost(INSN_COST * 5);
13785   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13786 
13787   ins_encode %{
13788     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13789   %}
13790 
13791   ins_pipe(fp_i2d);
13792 %}
13793 
13794 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13795   match(Set dst (ConvL2D src));
13796 
13797   ins_cost(INSN_COST * 5);
13798   format %{ "scvtfd  $dst, $src \t// l2d" %}
13799 
13800   ins_encode %{
13801     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13802   %}
13803 
13804   ins_pipe(fp_l2d);
13805 %}
13806 
13807 // stack <-> reg and reg <-> reg shuffles with no conversion
13808 
13809 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13810 
13811   match(Set dst (MoveF2I src));
13812 
13813   effect(DEF dst, USE src);
13814 
13815   ins_cost(4 * INSN_COST);
13816 
13817   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13818 
13819   ins_encode %{
13820     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13821   %}
13822 
13823   ins_pipe(iload_reg_reg);
13824 
13825 %}
13826 
13827 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13828 
13829   match(Set dst (MoveI2F src));
13830 
13831   effect(DEF dst, USE src);
13832 
13833   ins_cost(4 * INSN_COST);
13834 
13835   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13836 
13837   ins_encode %{
13838     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13839   %}
13840 
13841   ins_pipe(pipe_class_memory);
13842 
13843 %}
13844 
13845 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13846 
13847   match(Set dst (MoveD2L src));
13848 
13849   effect(DEF dst, USE src);
13850 
13851   ins_cost(4 * INSN_COST);
13852 
13853   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13854 
13855   ins_encode %{
13856     __ ldr($dst$$Register, Address(sp, $src$$disp));
13857   %}
13858 
13859   ins_pipe(iload_reg_reg);
13860 
13861 %}
13862 
13863 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13864 
13865   match(Set dst (MoveL2D src));
13866 
13867   effect(DEF dst, USE src);
13868 
13869   ins_cost(4 * INSN_COST);
13870 
13871   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13872 
13873   ins_encode %{
13874     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13875   %}
13876 
13877   ins_pipe(pipe_class_memory);
13878 
13879 %}
13880 
13881 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13882 
13883   match(Set dst (MoveF2I src));
13884 
13885   effect(DEF dst, USE src);
13886 
13887   ins_cost(INSN_COST);
13888 
13889   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13890 
13891   ins_encode %{
13892     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13893   %}
13894 
13895   ins_pipe(pipe_class_memory);
13896 
13897 %}
13898 
13899 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13900 
13901   match(Set dst (MoveI2F src));
13902 
13903   effect(DEF dst, USE src);
13904 
13905   ins_cost(INSN_COST);
13906 
13907   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13908 
13909   ins_encode %{
13910     __ strw($src$$Register, Address(sp, $dst$$disp));
13911   %}
13912 
13913   ins_pipe(istore_reg_reg);
13914 
13915 %}
13916 
13917 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13918 
13919   match(Set dst (MoveD2L src));
13920 
13921   effect(DEF dst, USE src);
13922 
13923   ins_cost(INSN_COST);
13924 
13925   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13926 
13927   ins_encode %{
13928     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13929   %}
13930 
13931   ins_pipe(pipe_class_memory);
13932 
13933 %}
13934 
13935 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13936 
13937   match(Set dst (MoveL2D src));
13938 
13939   effect(DEF dst, USE src);
13940 
13941   ins_cost(INSN_COST);
13942 
13943   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13944 
13945   ins_encode %{
13946     __ str($src$$Register, Address(sp, $dst$$disp));
13947   %}
13948 
13949   ins_pipe(istore_reg_reg);
13950 
13951 %}
13952 
13953 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13954 
13955   match(Set dst (MoveF2I src));
13956 
13957   effect(DEF dst, USE src);
13958 
13959   ins_cost(INSN_COST);
13960 
13961   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13962 
13963   ins_encode %{
13964     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13965   %}
13966 
13967   ins_pipe(fp_f2i);
13968 
13969 %}
13970 
13971 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13972 
13973   match(Set dst (MoveI2F src));
13974 
13975   effect(DEF dst, USE src);
13976 
13977   ins_cost(INSN_COST);
13978 
13979   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13980 
13981   ins_encode %{
13982     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13983   %}
13984 
13985   ins_pipe(fp_i2f);
13986 
13987 %}
13988 
13989 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13990 
13991   match(Set dst (MoveD2L src));
13992 
13993   effect(DEF dst, USE src);
13994 
13995   ins_cost(INSN_COST);
13996 
13997   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13998 
13999   ins_encode %{
14000     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14001   %}
14002 
14003   ins_pipe(fp_d2l);
14004 
14005 %}
14006 
14007 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14008 
14009   match(Set dst (MoveL2D src));
14010 
14011   effect(DEF dst, USE src);
14012 
14013   ins_cost(INSN_COST);
14014 
14015   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14016 
14017   ins_encode %{
14018     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14019   %}
14020 
14021   ins_pipe(fp_l2d);
14022 
14023 %}
14024 
14025 // ============================================================================
14026 // clearing of an array
14027 
14028 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14029 %{
14030   match(Set dummy (ClearArray cnt base));
14031   effect(USE_KILL cnt, USE_KILL base);
14032 
14033   ins_cost(4 * INSN_COST);
14034   format %{ "ClearArray $cnt, $base" %}
14035 
14036   ins_encode %{
14037     __ zero_words($base$$Register, $cnt$$Register);
14038   %}
14039 
14040   ins_pipe(pipe_class_memory);
14041 %}
14042 
14043 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
14044 %{
14045   match(Set dummy (ClearArray cnt base));
14046   effect(USE_KILL base, TEMP tmp);
14047 
14048   ins_cost(4 * INSN_COST);
14049   format %{ "ClearArray $cnt, $base" %}
14050 
14051   ins_encode %{
14052     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14053   %}
14054 
14055   ins_pipe(pipe_class_memory);
14056 %}
14057 
14058 // ============================================================================
14059 // Overflow Math Instructions
14060 
14061 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14062 %{
14063   match(Set cr (OverflowAddI op1 op2));
14064 
14065   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14066   ins_cost(INSN_COST);
14067   ins_encode %{
14068     __ cmnw($op1$$Register, $op2$$Register);
14069   %}
14070 
14071   ins_pipe(icmp_reg_reg);
14072 %}
14073 
14074 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14075 %{
14076   match(Set cr (OverflowAddI op1 op2));
14077 
14078   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14079   ins_cost(INSN_COST);
14080   ins_encode %{
14081     __ cmnw($op1$$Register, $op2$$constant);
14082   %}
14083 
14084   ins_pipe(icmp_reg_imm);
14085 %}
14086 
14087 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14088 %{
14089   match(Set cr (OverflowAddL op1 op2));
14090 
14091   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14092   ins_cost(INSN_COST);
14093   ins_encode %{
14094     __ cmn($op1$$Register, $op2$$Register);
14095   %}
14096 
14097   ins_pipe(icmp_reg_reg);
14098 %}
14099 
14100 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14101 %{
14102   match(Set cr (OverflowAddL op1 op2));
14103 
14104   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14105   ins_cost(INSN_COST);
14106   ins_encode %{
14107     __ cmn($op1$$Register, $op2$$constant);
14108   %}
14109 
14110   ins_pipe(icmp_reg_imm);
14111 %}
14112 
14113 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14114 %{
14115   match(Set cr (OverflowSubI op1 op2));
14116 
14117   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14118   ins_cost(INSN_COST);
14119   ins_encode %{
14120     __ cmpw($op1$$Register, $op2$$Register);
14121   %}
14122 
14123   ins_pipe(icmp_reg_reg);
14124 %}
14125 
14126 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14127 %{
14128   match(Set cr (OverflowSubI op1 op2));
14129 
14130   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14131   ins_cost(INSN_COST);
14132   ins_encode %{
14133     __ cmpw($op1$$Register, $op2$$constant);
14134   %}
14135 
14136   ins_pipe(icmp_reg_imm);
14137 %}
14138 
14139 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14140 %{
14141   match(Set cr (OverflowSubL op1 op2));
14142 
14143   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14144   ins_cost(INSN_COST);
14145   ins_encode %{
14146     __ cmp($op1$$Register, $op2$$Register);
14147   %}
14148 
14149   ins_pipe(icmp_reg_reg);
14150 %}
14151 
14152 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14153 %{
14154   match(Set cr (OverflowSubL op1 op2));
14155 
14156   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14157   ins_cost(INSN_COST);
14158   ins_encode %{
14159     __ cmp($op1$$Register, $op2$$constant);
14160   %}
14161 
14162   ins_pipe(icmp_reg_imm);
14163 %}
14164 
14165 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14166 %{
14167   match(Set cr (OverflowSubI zero op1));
14168 
14169   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14170   ins_cost(INSN_COST);
14171   ins_encode %{
14172     __ cmpw(zr, $op1$$Register);
14173   %}
14174 
14175   ins_pipe(icmp_reg_imm);
14176 %}
14177 
14178 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14179 %{
14180   match(Set cr (OverflowSubL zero op1));
14181 
14182   format %{ "cmp   zr, $op1\t# overflow check long" %}
14183   ins_cost(INSN_COST);
14184   ins_encode %{
14185     __ cmp(zr, $op1$$Register);
14186   %}
14187 
14188   ins_pipe(icmp_reg_imm);
14189 %}
14190 
14191 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14192 %{
14193   match(Set cr (OverflowMulI op1 op2));
14194 
14195   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14196             "cmp   rscratch1, rscratch1, sxtw\n\t"
14197             "movw  rscratch1, #0x80000000\n\t"
14198             "cselw rscratch1, rscratch1, zr, NE\n\t"
14199             "cmpw  rscratch1, #1" %}
14200   ins_cost(5 * INSN_COST);
14201   ins_encode %{
14202     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14203     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14204     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14205     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14206     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14207   %}
14208 
14209   ins_pipe(pipe_slow);
14210 %}
14211 
14212 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14213 %{
14214   match(If cmp (OverflowMulI op1 op2));
14215   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14216             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14217   effect(USE labl, KILL cr);
14218 
14219   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14220             "cmp   rscratch1, rscratch1, sxtw\n\t"
14221             "b$cmp   $labl" %}
14222   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14223   ins_encode %{
14224     Label* L = $labl$$label;
14225     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14226     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14227     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14228     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14229   %}
14230 
14231   ins_pipe(pipe_serial);
14232 %}
14233 
14234 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14235 %{
14236   match(Set cr (OverflowMulL op1 op2));
14237 
14238   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14239             "smulh rscratch2, $op1, $op2\n\t"
14240             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14241             "movw  rscratch1, #0x80000000\n\t"
14242             "cselw rscratch1, rscratch1, zr, NE\n\t"
14243             "cmpw  rscratch1, #1" %}
14244   ins_cost(6 * INSN_COST);
14245   ins_encode %{
14246     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14247     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14248     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14249     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14250     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14251     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14252   %}
14253 
14254   ins_pipe(pipe_slow);
14255 %}
14256 
14257 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14258 %{
14259   match(If cmp (OverflowMulL op1 op2));
14260   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14261             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14262   effect(USE labl, KILL cr);
14263 
14264   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14265             "smulh rscratch2, $op1, $op2\n\t"
14266             "cmp   rscratch2, rscratch1, ASR #31\n\t"
14267             "b$cmp $labl" %}
14268   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14269   ins_encode %{
14270     Label* L = $labl$$label;
14271     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14272     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14273     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14274     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
14275     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14276   %}
14277 
14278   ins_pipe(pipe_serial);
14279 %}
14280 
14281 // ============================================================================
14282 // Compare Instructions
14283 
14284 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14285 %{
14286   match(Set cr (CmpI op1 op2));
14287 
14288   effect(DEF cr, USE op1, USE op2);
14289 
14290   ins_cost(INSN_COST);
14291   format %{ "cmpw  $op1, $op2" %}
14292 
14293   ins_encode(aarch64_enc_cmpw(op1, op2));
14294 
14295   ins_pipe(icmp_reg_reg);
14296 %}
14297 
14298 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14299 %{
14300   match(Set cr (CmpI op1 zero));
14301 
14302   effect(DEF cr, USE op1);
14303 
14304   ins_cost(INSN_COST);
14305   format %{ "cmpw $op1, 0" %}
14306 
14307   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14308 
14309   ins_pipe(icmp_reg_imm);
14310 %}
14311 
14312 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14313 %{
14314   match(Set cr (CmpI op1 op2));
14315 
14316   effect(DEF cr, USE op1);
14317 
14318   ins_cost(INSN_COST);
14319   format %{ "cmpw  $op1, $op2" %}
14320 
14321   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14322 
14323   ins_pipe(icmp_reg_imm);
14324 %}
14325 
14326 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14327 %{
14328   match(Set cr (CmpI op1 op2));
14329 
14330   effect(DEF cr, USE op1);
14331 
14332   ins_cost(INSN_COST * 2);
14333   format %{ "cmpw  $op1, $op2" %}
14334 
14335   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14336 
14337   ins_pipe(icmp_reg_imm);
14338 %}
14339 
14340 // Unsigned compare Instructions; really, same as signed compare
14341 // except it should only be used to feed an If or a CMovI which takes a
14342 // cmpOpU.
14343 
14344 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14345 %{
14346   match(Set cr (CmpU op1 op2));
14347 
14348   effect(DEF cr, USE op1, USE op2);
14349 
14350   ins_cost(INSN_COST);
14351   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14352 
14353   ins_encode(aarch64_enc_cmpw(op1, op2));
14354 
14355   ins_pipe(icmp_reg_reg);
14356 %}
14357 
14358 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14359 %{
14360   match(Set cr (CmpU op1 zero));
14361 
14362   effect(DEF cr, USE op1);
14363 
14364   ins_cost(INSN_COST);
14365   format %{ "cmpw $op1, #0\t# unsigned" %}
14366 
14367   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14368 
14369   ins_pipe(icmp_reg_imm);
14370 %}
14371 
14372 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14373 %{
14374   match(Set cr (CmpU op1 op2));
14375 
14376   effect(DEF cr, USE op1);
14377 
14378   ins_cost(INSN_COST);
14379   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14380 
14381   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14382 
14383   ins_pipe(icmp_reg_imm);
14384 %}
14385 
14386 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14387 %{
14388   match(Set cr (CmpU op1 op2));
14389 
14390   effect(DEF cr, USE op1);
14391 
14392   ins_cost(INSN_COST * 2);
14393   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14394 
14395   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14396 
14397   ins_pipe(icmp_reg_imm);
14398 %}
14399 
14400 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14401 %{
14402   match(Set cr (CmpL op1 op2));
14403 
14404   effect(DEF cr, USE op1, USE op2);
14405 
14406   ins_cost(INSN_COST);
14407   format %{ "cmp  $op1, $op2" %}
14408 
14409   ins_encode(aarch64_enc_cmp(op1, op2));
14410 
14411   ins_pipe(icmp_reg_reg);
14412 %}
14413 
14414 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
14415 %{
14416   match(Set cr (CmpL op1 zero));
14417 
14418   effect(DEF cr, USE op1);
14419 
14420   ins_cost(INSN_COST);
14421   format %{ "tst  $op1" %}
14422 
14423   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14424 
14425   ins_pipe(icmp_reg_imm);
14426 %}
14427 
14428 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14429 %{
14430   match(Set cr (CmpL op1 op2));
14431 
14432   effect(DEF cr, USE op1);
14433 
14434   ins_cost(INSN_COST);
14435   format %{ "cmp  $op1, $op2" %}
14436 
14437   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14438 
14439   ins_pipe(icmp_reg_imm);
14440 %}
14441 
14442 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14443 %{
14444   match(Set cr (CmpL op1 op2));
14445 
14446   effect(DEF cr, USE op1);
14447 
14448   ins_cost(INSN_COST * 2);
14449   format %{ "cmp  $op1, $op2" %}
14450 
14451   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14452 
14453   ins_pipe(icmp_reg_imm);
14454 %}
14455 
14456 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14457 %{
14458   match(Set cr (CmpP op1 op2));
14459 
14460   effect(DEF cr, USE op1, USE op2);
14461 
14462   ins_cost(INSN_COST);
14463   format %{ "cmp  $op1, $op2\t // ptr" %}
14464 
14465   ins_encode(aarch64_enc_cmpp(op1, op2));
14466 
14467   ins_pipe(icmp_reg_reg);
14468 %}
14469 
14470 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14471 %{
14472   match(Set cr (CmpN op1 op2));
14473 
14474   effect(DEF cr, USE op1, USE op2);
14475 
14476   ins_cost(INSN_COST);
14477   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14478 
14479   ins_encode(aarch64_enc_cmpn(op1, op2));
14480 
14481   ins_pipe(icmp_reg_reg);
14482 %}
14483 
14484 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14485 %{
14486   match(Set cr (CmpP op1 zero));
14487 
14488   effect(DEF cr, USE op1, USE zero);
14489 
14490   ins_cost(INSN_COST);
14491   format %{ "cmp  $op1, 0\t // ptr" %}
14492 
14493   ins_encode(aarch64_enc_testp(op1));
14494 
14495   ins_pipe(icmp_reg_imm);
14496 %}
14497 
14498 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14499 %{
14500   match(Set cr (CmpN op1 zero));
14501 
14502   effect(DEF cr, USE op1, USE zero);
14503 
14504   ins_cost(INSN_COST);
14505   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14506 
14507   ins_encode(aarch64_enc_testn(op1));
14508 
14509   ins_pipe(icmp_reg_imm);
14510 %}
14511 
14512 // FP comparisons
14513 //
14514 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14515 // using normal cmpOp. See declaration of rFlagsReg for details.
14516 
14517 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14518 %{
14519   match(Set cr (CmpF src1 src2));
14520 
14521   ins_cost(3 * INSN_COST);
14522   format %{ "fcmps $src1, $src2" %}
14523 
14524   ins_encode %{
14525     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14526   %}
14527 
14528   ins_pipe(pipe_class_compare);
14529 %}
14530 
14531 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14532 %{
14533   match(Set cr (CmpF src1 src2));
14534 
14535   ins_cost(3 * INSN_COST);
14536   format %{ "fcmps $src1, 0.0" %}
14537 
14538   ins_encode %{
14539     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14540   %}
14541 
14542   ins_pipe(pipe_class_compare);
14543 %}
14544 // FROM HERE
14545 
14546 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14547 %{
14548   match(Set cr (CmpD src1 src2));
14549 
14550   ins_cost(3 * INSN_COST);
14551   format %{ "fcmpd $src1, $src2" %}
14552 
14553   ins_encode %{
14554     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14555   %}
14556 
14557   ins_pipe(pipe_class_compare);
14558 %}
14559 
14560 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14561 %{
14562   match(Set cr (CmpD src1 src2));
14563 
14564   ins_cost(3 * INSN_COST);
14565   format %{ "fcmpd $src1, 0.0" %}
14566 
14567   ins_encode %{
14568     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14569   %}
14570 
14571   ins_pipe(pipe_class_compare);
14572 %}
14573 
14574 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14575 %{
14576   match(Set dst (CmpF3 src1 src2));
14577   effect(KILL cr);
14578 
14579   ins_cost(5 * INSN_COST);
14580   format %{ "fcmps $src1, $src2\n\t"
14581             "csinvw($dst, zr, zr, eq\n\t"
14582             "csnegw($dst, $dst, $dst, lt)"
14583   %}
14584 
14585   ins_encode %{
14586     Label done;
14587     FloatRegister s1 = as_FloatRegister($src1$$reg);
14588     FloatRegister s2 = as_FloatRegister($src2$$reg);
14589     Register d = as_Register($dst$$reg);
14590     __ fcmps(s1, s2);
14591     // installs 0 if EQ else -1
14592     __ csinvw(d, zr, zr, Assembler::EQ);
14593     // keeps -1 if less or unordered else installs 1
14594     __ csnegw(d, d, d, Assembler::LT);
14595     __ bind(done);
14596   %}
14597 
14598   ins_pipe(pipe_class_default);
14599 
14600 %}
14601 
14602 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14603 %{
14604   match(Set dst (CmpD3 src1 src2));
14605   effect(KILL cr);
14606 
14607   ins_cost(5 * INSN_COST);
14608   format %{ "fcmpd $src1, $src2\n\t"
14609             "csinvw($dst, zr, zr, eq\n\t"
14610             "csnegw($dst, $dst, $dst, lt)"
14611   %}
14612 
14613   ins_encode %{
14614     Label done;
14615     FloatRegister s1 = as_FloatRegister($src1$$reg);
14616     FloatRegister s2 = as_FloatRegister($src2$$reg);
14617     Register d = as_Register($dst$$reg);
14618     __ fcmpd(s1, s2);
14619     // installs 0 if EQ else -1
14620     __ csinvw(d, zr, zr, Assembler::EQ);
14621     // keeps -1 if less or unordered else installs 1
14622     __ csnegw(d, d, d, Assembler::LT);
14623     __ bind(done);
14624   %}
14625   ins_pipe(pipe_class_default);
14626 
14627 %}
14628 
14629 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14630 %{
14631   match(Set dst (CmpF3 src1 zero));
14632   effect(KILL cr);
14633 
14634   ins_cost(5 * INSN_COST);
14635   format %{ "fcmps $src1, 0.0\n\t"
14636             "csinvw($dst, zr, zr, eq\n\t"
14637             "csnegw($dst, $dst, $dst, lt)"
14638   %}
14639 
14640   ins_encode %{
14641     Label done;
14642     FloatRegister s1 = as_FloatRegister($src1$$reg);
14643     Register d = as_Register($dst$$reg);
14644     __ fcmps(s1, 0.0D);
14645     // installs 0 if EQ else -1
14646     __ csinvw(d, zr, zr, Assembler::EQ);
14647     // keeps -1 if less or unordered else installs 1
14648     __ csnegw(d, d, d, Assembler::LT);
14649     __ bind(done);
14650   %}
14651 
14652   ins_pipe(pipe_class_default);
14653 
14654 %}
14655 
14656 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14657 %{
14658   match(Set dst (CmpD3 src1 zero));
14659   effect(KILL cr);
14660 
14661   ins_cost(5 * INSN_COST);
14662   format %{ "fcmpd $src1, 0.0\n\t"
14663             "csinvw($dst, zr, zr, eq\n\t"
14664             "csnegw($dst, $dst, $dst, lt)"
14665   %}
14666 
14667   ins_encode %{
14668     Label done;
14669     FloatRegister s1 = as_FloatRegister($src1$$reg);
14670     Register d = as_Register($dst$$reg);
14671     __ fcmpd(s1, 0.0D);
14672     // installs 0 if EQ else -1
14673     __ csinvw(d, zr, zr, Assembler::EQ);
14674     // keeps -1 if less or unordered else installs 1
14675     __ csnegw(d, d, d, Assembler::LT);
14676     __ bind(done);
14677   %}
14678   ins_pipe(pipe_class_default);
14679 
14680 %}
14681 
14682 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14683 %{
14684   match(Set dst (CmpLTMask p q));
14685   effect(KILL cr);
14686 
14687   ins_cost(3 * INSN_COST);
14688 
14689   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14690             "csetw $dst, lt\n\t"
14691             "subw $dst, zr, $dst"
14692   %}
14693 
14694   ins_encode %{
14695     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14696     __ csetw(as_Register($dst$$reg), Assembler::LT);
14697     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14698   %}
14699 
14700   ins_pipe(ialu_reg_reg);
14701 %}
14702 
14703 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14704 %{
14705   match(Set dst (CmpLTMask src zero));
14706   effect(KILL cr);
14707 
14708   ins_cost(INSN_COST);
14709 
14710   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14711 
14712   ins_encode %{
14713     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14714   %}
14715 
14716   ins_pipe(ialu_reg_shift);
14717 %}
14718 
14719 // ============================================================================
14720 // Max and Min
14721 
14722 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14723 %{
14724   match(Set dst (MinI src1 src2));
14725 
14726   effect(DEF dst, USE src1, USE src2, KILL cr);
14727   size(8);
14728 
14729   ins_cost(INSN_COST * 3);
14730   format %{
14731     "cmpw $src1 $src2\t signed int\n\t"
14732     "cselw $dst, $src1, $src2 lt\t"
14733   %}
14734 
14735   ins_encode %{
14736     __ cmpw(as_Register($src1$$reg),
14737             as_Register($src2$$reg));
14738     __ cselw(as_Register($dst$$reg),
14739              as_Register($src1$$reg),
14740              as_Register($src2$$reg),
14741              Assembler::LT);
14742   %}
14743 
14744   ins_pipe(ialu_reg_reg);
14745 %}
14746 // FROM HERE
14747 
14748 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14749 %{
14750   match(Set dst (MaxI src1 src2));
14751 
14752   effect(DEF dst, USE src1, USE src2, KILL cr);
14753   size(8);
14754 
14755   ins_cost(INSN_COST * 3);
14756   format %{
14757     "cmpw $src1 $src2\t signed int\n\t"
14758     "cselw $dst, $src1, $src2 gt\t"
14759   %}
14760 
14761   ins_encode %{
14762     __ cmpw(as_Register($src1$$reg),
14763             as_Register($src2$$reg));
14764     __ cselw(as_Register($dst$$reg),
14765              as_Register($src1$$reg),
14766              as_Register($src2$$reg),
14767              Assembler::GT);
14768   %}
14769 
14770   ins_pipe(ialu_reg_reg);
14771 %}
14772 
14773 // ============================================================================
14774 // Branch Instructions
14775 
14776 // Direct Branch.
14777 instruct branch(label lbl)
14778 %{
14779   match(Goto);
14780 
14781   effect(USE lbl);
14782 
14783   ins_cost(BRANCH_COST);
14784   format %{ "b  $lbl" %}
14785 
14786   ins_encode(aarch64_enc_b(lbl));
14787 
14788   ins_pipe(pipe_branch);
14789 %}
14790 
14791 // Conditional Near Branch
14792 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14793 %{
14794   // Same match rule as `branchConFar'.
14795   match(If cmp cr);
14796 
14797   effect(USE lbl);
14798 
14799   ins_cost(BRANCH_COST);
14800   // If set to 1 this indicates that the current instruction is a
14801   // short variant of a long branch. This avoids using this
14802   // instruction in first-pass matching. It will then only be used in
14803   // the `Shorten_branches' pass.
14804   // ins_short_branch(1);
14805   format %{ "b$cmp  $lbl" %}
14806 
14807   ins_encode(aarch64_enc_br_con(cmp, lbl));
14808 
14809   ins_pipe(pipe_branch_cond);
14810 %}
14811 
14812 // Conditional Near Branch Unsigned
14813 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14814 %{
14815   // Same match rule as `branchConFar'.
14816   match(If cmp cr);
14817 
14818   effect(USE lbl);
14819 
14820   ins_cost(BRANCH_COST);
14821   // If set to 1 this indicates that the current instruction is a
14822   // short variant of a long branch. This avoids using this
14823   // instruction in first-pass matching. It will then only be used in
14824   // the `Shorten_branches' pass.
14825   // ins_short_branch(1);
14826   format %{ "b$cmp  $lbl\t# unsigned" %}
14827 
14828   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14829 
14830   ins_pipe(pipe_branch_cond);
14831 %}
14832 
14833 // Make use of CBZ and CBNZ.  These instructions, as well as being
14834 // shorter than (cmp; branch), have the additional benefit of not
14835 // killing the flags.
14836 
14837 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14838   match(If cmp (CmpI op1 op2));
14839   effect(USE labl);
14840 
14841   ins_cost(BRANCH_COST);
14842   format %{ "cbw$cmp   $op1, $labl" %}
14843   ins_encode %{
14844     Label* L = $labl$$label;
14845     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14846     if (cond == Assembler::EQ)
14847       __ cbzw($op1$$Register, *L);
14848     else
14849       __ cbnzw($op1$$Register, *L);
14850   %}
14851   ins_pipe(pipe_cmp_branch);
14852 %}
14853 
14854 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14855   match(If cmp (CmpL op1 op2));
14856   effect(USE labl);
14857 
14858   ins_cost(BRANCH_COST);
14859   format %{ "cb$cmp   $op1, $labl" %}
14860   ins_encode %{
14861     Label* L = $labl$$label;
14862     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14863     if (cond == Assembler::EQ)
14864       __ cbz($op1$$Register, *L);
14865     else
14866       __ cbnz($op1$$Register, *L);
14867   %}
14868   ins_pipe(pipe_cmp_branch);
14869 %}
14870 
14871 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14872   match(If cmp (CmpP op1 op2));
14873   effect(USE labl);
14874 
14875   ins_cost(BRANCH_COST);
14876   format %{ "cb$cmp   $op1, $labl" %}
14877   ins_encode %{
14878     Label* L = $labl$$label;
14879     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14880     if (cond == Assembler::EQ)
14881       __ cbz($op1$$Register, *L);
14882     else
14883       __ cbnz($op1$$Register, *L);
14884   %}
14885   ins_pipe(pipe_cmp_branch);
14886 %}
14887 
14888 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14889   match(If cmp (CmpN op1 op2));
14890   effect(USE labl);
14891 
14892   ins_cost(BRANCH_COST);
14893   format %{ "cbw$cmp   $op1, $labl" %}
14894   ins_encode %{
14895     Label* L = $labl$$label;
14896     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14897     if (cond == Assembler::EQ)
14898       __ cbzw($op1$$Register, *L);
14899     else
14900       __ cbnzw($op1$$Register, *L);
14901   %}
14902   ins_pipe(pipe_cmp_branch);
14903 %}
14904 
14905 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14906   match(If cmp (CmpP (DecodeN oop) zero));
14907   effect(USE labl);
14908 
14909   ins_cost(BRANCH_COST);
14910   format %{ "cb$cmp   $oop, $labl" %}
14911   ins_encode %{
14912     Label* L = $labl$$label;
14913     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14914     if (cond == Assembler::EQ)
14915       __ cbzw($oop$$Register, *L);
14916     else
14917       __ cbnzw($oop$$Register, *L);
14918   %}
14919   ins_pipe(pipe_cmp_branch);
14920 %}
14921 
14922 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14923   match(If cmp (CmpU op1 op2));
14924   effect(USE labl);
14925 
14926   ins_cost(BRANCH_COST);
14927   format %{ "cbw$cmp   $op1, $labl" %}
14928   ins_encode %{
14929     Label* L = $labl$$label;
14930     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14931     if (cond == Assembler::EQ || cond == Assembler::LS)
14932       __ cbzw($op1$$Register, *L);
14933     else
14934       __ cbnzw($op1$$Register, *L);
14935   %}
14936   ins_pipe(pipe_cmp_branch);
14937 %}
14938 
14939 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14940   match(If cmp (CmpU op1 op2));
14941   effect(USE labl);
14942 
14943   ins_cost(BRANCH_COST);
14944   format %{ "cb$cmp   $op1, $labl" %}
14945   ins_encode %{
14946     Label* L = $labl$$label;
14947     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14948     if (cond == Assembler::EQ || cond == Assembler::LS)
14949       __ cbz($op1$$Register, *L);
14950     else
14951       __ cbnz($op1$$Register, *L);
14952   %}
14953   ins_pipe(pipe_cmp_branch);
14954 %}
14955 
14956 // Test bit and Branch
14957 
14958 // Patterns for short (< 32KiB) variants
14959 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14960   match(If cmp (CmpL op1 op2));
14961   effect(USE labl);
14962 
14963   ins_cost(BRANCH_COST);
14964   format %{ "cb$cmp   $op1, $labl # long" %}
14965   ins_encode %{
14966     Label* L = $labl$$label;
14967     Assembler::Condition cond =
14968       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14969     __ tbr(cond, $op1$$Register, 63, *L);
14970   %}
14971   ins_pipe(pipe_cmp_branch);
14972   ins_short_branch(1);
14973 %}
14974 
14975 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14976   match(If cmp (CmpI op1 op2));
14977   effect(USE labl);
14978 
14979   ins_cost(BRANCH_COST);
14980   format %{ "cb$cmp   $op1, $labl # int" %}
14981   ins_encode %{
14982     Label* L = $labl$$label;
14983     Assembler::Condition cond =
14984       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14985     __ tbr(cond, $op1$$Register, 31, *L);
14986   %}
14987   ins_pipe(pipe_cmp_branch);
14988   ins_short_branch(1);
14989 %}
14990 
14991 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14992   match(If cmp (CmpL (AndL op1 op2) op3));
14993   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14994   effect(USE labl);
14995 
14996   ins_cost(BRANCH_COST);
14997   format %{ "tb$cmp   $op1, $op2, $labl" %}
14998   ins_encode %{
14999     Label* L = $labl$$label;
15000     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15001     int bit = exact_log2($op2$$constant);
15002     __ tbr(cond, $op1$$Register, bit, *L);
15003   %}
15004   ins_pipe(pipe_cmp_branch);
15005   ins_short_branch(1);
15006 %}
15007 
15008 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15009   match(If cmp (CmpI (AndI op1 op2) op3));
15010   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15011   effect(USE labl);
15012 
15013   ins_cost(BRANCH_COST);
15014   format %{ "tb$cmp   $op1, $op2, $labl" %}
15015   ins_encode %{
15016     Label* L = $labl$$label;
15017     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15018     int bit = exact_log2($op2$$constant);
15019     __ tbr(cond, $op1$$Register, bit, *L);
15020   %}
15021   ins_pipe(pipe_cmp_branch);
15022   ins_short_branch(1);
15023 %}
15024 
15025 // And far variants
15026 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15027   match(If cmp (CmpL op1 op2));
15028   effect(USE labl);
15029 
15030   ins_cost(BRANCH_COST);
15031   format %{ "cb$cmp   $op1, $labl # long" %}
15032   ins_encode %{
15033     Label* L = $labl$$label;
15034     Assembler::Condition cond =
15035       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15036     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15037   %}
15038   ins_pipe(pipe_cmp_branch);
15039 %}
15040 
15041 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15042   match(If cmp (CmpI op1 op2));
15043   effect(USE labl);
15044 
15045   ins_cost(BRANCH_COST);
15046   format %{ "cb$cmp   $op1, $labl # int" %}
15047   ins_encode %{
15048     Label* L = $labl$$label;
15049     Assembler::Condition cond =
15050       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15051     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15052   %}
15053   ins_pipe(pipe_cmp_branch);
15054 %}
15055 
15056 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15057   match(If cmp (CmpL (AndL op1 op2) op3));
15058   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15059   effect(USE labl);
15060 
15061   ins_cost(BRANCH_COST);
15062   format %{ "tb$cmp   $op1, $op2, $labl" %}
15063   ins_encode %{
15064     Label* L = $labl$$label;
15065     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15066     int bit = exact_log2($op2$$constant);
15067     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15068   %}
15069   ins_pipe(pipe_cmp_branch);
15070 %}
15071 
15072 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15073   match(If cmp (CmpI (AndI op1 op2) op3));
15074   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15075   effect(USE labl);
15076 
15077   ins_cost(BRANCH_COST);
15078   format %{ "tb$cmp   $op1, $op2, $labl" %}
15079   ins_encode %{
15080     Label* L = $labl$$label;
15081     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15082     int bit = exact_log2($op2$$constant);
15083     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15084   %}
15085   ins_pipe(pipe_cmp_branch);
15086 %}
15087 
15088 // Test bits
15089 
15090 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15091   match(Set cr (CmpL (AndL op1 op2) op3));
15092   predicate(Assembler::operand_valid_for_logical_immediate
15093             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15094 
15095   ins_cost(INSN_COST);
15096   format %{ "tst $op1, $op2 # long" %}
15097   ins_encode %{
15098     __ tst($op1$$Register, $op2$$constant);
15099   %}
15100   ins_pipe(ialu_reg_reg);
15101 %}
15102 
15103 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15104   match(Set cr (CmpI (AndI op1 op2) op3));
15105   predicate(Assembler::operand_valid_for_logical_immediate
15106             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15107 
15108   ins_cost(INSN_COST);
15109   format %{ "tst $op1, $op2 # int" %}
15110   ins_encode %{
15111     __ tstw($op1$$Register, $op2$$constant);
15112   %}
15113   ins_pipe(ialu_reg_reg);
15114 %}
15115 
15116 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15117   match(Set cr (CmpL (AndL op1 op2) op3));
15118 
15119   ins_cost(INSN_COST);
15120   format %{ "tst $op1, $op2 # long" %}
15121   ins_encode %{
15122     __ tst($op1$$Register, $op2$$Register);
15123   %}
15124   ins_pipe(ialu_reg_reg);
15125 %}
15126 
15127 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15128   match(Set cr (CmpI (AndI op1 op2) op3));
15129 
15130   ins_cost(INSN_COST);
15131   format %{ "tstw $op1, $op2 # int" %}
15132   ins_encode %{
15133     __ tstw($op1$$Register, $op2$$Register);
15134   %}
15135   ins_pipe(ialu_reg_reg);
15136 %}
15137 
15138 
15139 // Conditional Far Branch
15140 // Conditional Far Branch Unsigned
15141 // TODO: fixme
15142 
15143 // counted loop end branch near
15144 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15145 %{
15146   match(CountedLoopEnd cmp cr);
15147 
15148   effect(USE lbl);
15149 
15150   ins_cost(BRANCH_COST);
15151   // short variant.
15152   // ins_short_branch(1);
15153   format %{ "b$cmp $lbl \t// counted loop end" %}
15154 
15155   ins_encode(aarch64_enc_br_con(cmp, lbl));
15156 
15157   ins_pipe(pipe_branch);
15158 %}
15159 
15160 // counted loop end branch near Unsigned
15161 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15162 %{
15163   match(CountedLoopEnd cmp cr);
15164 
15165   effect(USE lbl);
15166 
15167   ins_cost(BRANCH_COST);
15168   // short variant.
15169   // ins_short_branch(1);
15170   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15171 
15172   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15173 
15174   ins_pipe(pipe_branch);
15175 %}
15176 
15177 // counted loop end branch far
15178 // counted loop end branch far unsigned
15179 // TODO: fixme
15180 
15181 // ============================================================================
15182 // inlined locking and unlocking
15183 
15184 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15185 %{
15186   match(Set cr (FastLock object box));
15187   effect(TEMP tmp, TEMP tmp2);
15188 
15189   // TODO
15190   // identify correct cost
15191   ins_cost(5 * INSN_COST);
15192   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15193 
15194   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15195 
15196   ins_pipe(pipe_serial);
15197 %}
15198 
15199 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15200 %{
15201   match(Set cr (FastUnlock object box));
15202   effect(TEMP tmp, TEMP tmp2);
15203 
15204   ins_cost(5 * INSN_COST);
15205   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15206 
15207   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15208 
15209   ins_pipe(pipe_serial);
15210 %}
15211 
15212 
15213 // ============================================================================
15214 // Safepoint Instructions
15215 
15216 // TODO
15217 // provide a near and far version of this code
15218 
15219 instruct safePoint(iRegP poll)
15220 %{
15221   match(SafePoint poll);
15222 
15223   format %{
15224     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15225   %}
15226   ins_encode %{
15227     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15228   %}
15229   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15230 %}
15231 
15232 
15233 // ============================================================================
15234 // Procedure Call/Return Instructions
15235 
15236 // Call Java Static Instruction
15237 
15238 instruct CallStaticJavaDirect(method meth)
15239 %{
15240   match(CallStaticJava);
15241 
15242   effect(USE meth);
15243 
15244   ins_cost(CALL_COST);
15245 
15246   format %{ "call,static $meth \t// ==> " %}
15247 
15248   ins_encode( aarch64_enc_java_static_call(meth),
15249               aarch64_enc_call_epilog );
15250 
15251   ins_pipe(pipe_class_call);
15252 %}
15253 
15254 // TO HERE
15255 
15256 // Call Java Dynamic Instruction
15257 instruct CallDynamicJavaDirect(method meth)
15258 %{
15259   match(CallDynamicJava);
15260 
15261   effect(USE meth);
15262 
15263   ins_cost(CALL_COST);
15264 
15265   format %{ "CALL,dynamic $meth \t// ==> " %}
15266 
15267   ins_encode( aarch64_enc_java_dynamic_call(meth),
15268                aarch64_enc_call_epilog );
15269 
15270   ins_pipe(pipe_class_call);
15271 %}
15272 
15273 // Call Runtime Instruction
15274 
15275 instruct CallRuntimeDirect(method meth)
15276 %{
15277   match(CallRuntime);
15278 
15279   effect(USE meth);
15280 
15281   ins_cost(CALL_COST);
15282 
15283   format %{ "CALL, runtime $meth" %}
15284 
15285   ins_encode( aarch64_enc_java_to_runtime(meth) );
15286 
15287   ins_pipe(pipe_class_call);
15288 %}
15289 
15290 // Call Runtime Instruction
15291 
15292 instruct CallLeafDirect(method meth)
15293 %{
15294   match(CallLeaf);
15295 
15296   effect(USE meth);
15297 
15298   ins_cost(CALL_COST);
15299 
15300   format %{ "CALL, runtime leaf $meth" %}
15301 
15302   ins_encode( aarch64_enc_java_to_runtime(meth) );
15303 
15304   ins_pipe(pipe_class_call);
15305 %}
15306 
15307 // Call Runtime Instruction
15308 
15309 instruct CallLeafNoFPDirect(method meth)
15310 %{
15311   match(CallLeafNoFP);
15312 
15313   effect(USE meth);
15314 
15315   ins_cost(CALL_COST);
15316 
15317   format %{ "CALL, runtime leaf nofp $meth" %}
15318 
15319   ins_encode( aarch64_enc_java_to_runtime(meth) );
15320 
15321   ins_pipe(pipe_class_call);
15322 %}
15323 
15324 // Tail Call; Jump from runtime stub to Java code.
15325 // Also known as an 'interprocedural jump'.
15326 // Target of jump will eventually return to caller.
15327 // TailJump below removes the return address.
15328 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15329 %{
15330   match(TailCall jump_target method_oop);
15331 
15332   ins_cost(CALL_COST);
15333 
15334   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15335 
15336   ins_encode(aarch64_enc_tail_call(jump_target));
15337 
15338   ins_pipe(pipe_class_call);
15339 %}
15340 
15341 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15342 %{
15343   match(TailJump jump_target ex_oop);
15344 
15345   ins_cost(CALL_COST);
15346 
15347   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15348 
15349   ins_encode(aarch64_enc_tail_jmp(jump_target));
15350 
15351   ins_pipe(pipe_class_call);
15352 %}
15353 
15354 // Create exception oop: created by stack-crawling runtime code.
15355 // Created exception is now available to this handler, and is setup
15356 // just prior to jumping to this handler. No code emitted.
15357 // TODO check
15358 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15359 instruct CreateException(iRegP_R0 ex_oop)
15360 %{
15361   match(Set ex_oop (CreateEx));
15362 
15363   format %{ " -- \t// exception oop; no code emitted" %}
15364 
15365   size(0);
15366 
15367   ins_encode( /*empty*/ );
15368 
15369   ins_pipe(pipe_class_empty);
15370 %}
15371 
15372 // Rethrow exception: The exception oop will come in the first
15373 // argument position. Then JUMP (not call) to the rethrow stub code.
15374 instruct RethrowException() %{
15375   match(Rethrow);
15376   ins_cost(CALL_COST);
15377 
15378   format %{ "b rethrow_stub" %}
15379 
15380   ins_encode( aarch64_enc_rethrow() );
15381 
15382   ins_pipe(pipe_class_call);
15383 %}
15384 
15385 
15386 // Return Instruction
15387 // epilog node loads ret address into lr as part of frame pop
15388 instruct Ret()
15389 %{
15390   match(Return);
15391 
15392   format %{ "ret\t// return register" %}
15393 
15394   ins_encode( aarch64_enc_ret() );
15395 
15396   ins_pipe(pipe_branch);
15397 %}
15398 
15399 // Die now.
15400 instruct ShouldNotReachHere() %{
15401   match(Halt);
15402 
15403   ins_cost(CALL_COST);
15404   format %{ "ShouldNotReachHere" %}
15405 
15406   ins_encode %{
15407     // TODO
15408     // implement proper trap call here
15409     __ brk(999);
15410   %}
15411 
15412   ins_pipe(pipe_class_default);
15413 %}
15414 
15415 // ============================================================================
15416 // Partial Subtype Check
15417 //
15418 // superklass array for an instance of the superklass.  Set a hidden
15419 // internal cache on a hit (cache is checked with exposed code in
15420 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15421 // encoding ALSO sets flags.
15422 
15423 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15424 %{
15425   match(Set result (PartialSubtypeCheck sub super));
15426   effect(KILL cr, KILL temp);
15427 
15428   ins_cost(1100);  // slightly larger than the next version
15429   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15430 
15431   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15432 
15433   opcode(0x1); // Force zero of result reg on hit
15434 
15435   ins_pipe(pipe_class_memory);
15436 %}
15437 
15438 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15439 %{
15440   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15441   effect(KILL temp, KILL result);
15442 
15443   ins_cost(1100);  // slightly larger than the next version
15444   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15445 
15446   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15447 
15448   opcode(0x0); // Don't zero result reg on hit
15449 
15450   ins_pipe(pipe_class_memory);
15451 %}
15452 
15453 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15454                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15455 %{
15456   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15457   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15458   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15459 
15460   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15461   ins_encode %{
15462     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15463     __ string_compare($str1$$Register, $str2$$Register,
15464                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15465                       $tmp1$$Register,
15466                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15467   %}
15468   ins_pipe(pipe_class_memory);
15469 %}
15470 
15471 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15472                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15473 %{
15474   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15475   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15476   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15477 
15478   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15479   ins_encode %{
15480     __ string_compare($str1$$Register, $str2$$Register,
15481                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15482                       $tmp1$$Register,
15483                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15484   %}
15485   ins_pipe(pipe_class_memory);
15486 %}
15487 
15488 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15489                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15490 %{
15491   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15492   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15493   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15494 
15495   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15496   ins_encode %{
15497     __ string_compare($str1$$Register, $str2$$Register,
15498                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15499                       $tmp1$$Register,
15500                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15501   %}
15502   ins_pipe(pipe_class_memory);
15503 %}
15504 
15505 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15506                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15507 %{
15508   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15509   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15510   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15511 
15512   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15513   ins_encode %{
15514     __ string_compare($str1$$Register, $str2$$Register,
15515                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15516                       $tmp1$$Register,
15517                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15518   %}
15519   ins_pipe(pipe_class_memory);
15520 %}
15521 
15522 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15523        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15524 %{
15525   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15526   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15527   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15528          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15529   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15530 
15531   ins_encode %{
15532     __ string_indexof($str1$$Register, $str2$$Register,
15533                       $cnt1$$Register, $cnt2$$Register,
15534                       $tmp1$$Register, $tmp2$$Register,
15535                       $tmp3$$Register, $tmp4$$Register,
15536                       -1, $result$$Register, StrIntrinsicNode::UU);
15537   %}
15538   ins_pipe(pipe_class_memory);
15539 %}
15540 
15541 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15542        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15543 %{
15544   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15545   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15546   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15547          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15548   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15549 
15550   ins_encode %{
15551     __ string_indexof($str1$$Register, $str2$$Register,
15552                       $cnt1$$Register, $cnt2$$Register,
15553                       $tmp1$$Register, $tmp2$$Register,
15554                       $tmp3$$Register, $tmp4$$Register,
15555                       -1, $result$$Register, StrIntrinsicNode::LL);
15556   %}
15557   ins_pipe(pipe_class_memory);
15558 %}
15559 
15560 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15561        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15562 %{
15563   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15564   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15565   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15566          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15567   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15568 
15569   ins_encode %{
15570     __ string_indexof($str1$$Register, $str2$$Register,
15571                       $cnt1$$Register, $cnt2$$Register,
15572                       $tmp1$$Register, $tmp2$$Register,
15573                       $tmp3$$Register, $tmp4$$Register,
15574                       -1, $result$$Register, StrIntrinsicNode::UL);
15575   %}
15576   ins_pipe(pipe_class_memory);
15577 %}
15578 
15579 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15580        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15581 %{
15582   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15583   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15584   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15585          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15586   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15587 
15588   ins_encode %{
15589     __ string_indexof($str1$$Register, $str2$$Register,
15590                       $cnt1$$Register, $cnt2$$Register,
15591                       $tmp1$$Register, $tmp2$$Register,
15592                       $tmp3$$Register, $tmp4$$Register,
15593                       -1, $result$$Register, StrIntrinsicNode::LU);
15594   %}
15595   ins_pipe(pipe_class_memory);
15596 %}
15597 
15598 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15599                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15600                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15601 %{
15602   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15603   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15604   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15605          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15606   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15607 
15608   ins_encode %{
15609     int icnt2 = (int)$int_cnt2$$constant;
15610     __ string_indexof($str1$$Register, $str2$$Register,
15611                       $cnt1$$Register, zr,
15612                       $tmp1$$Register, $tmp2$$Register,
15613                       $tmp3$$Register, $tmp4$$Register,
15614                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15615   %}
15616   ins_pipe(pipe_class_memory);
15617 %}
15618 
15619 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15620                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15621                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15622 %{
15623   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15624   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15625   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15626          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15627   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15628 
15629   ins_encode %{
15630     int icnt2 = (int)$int_cnt2$$constant;
15631     __ string_indexof($str1$$Register, $str2$$Register,
15632                       $cnt1$$Register, zr,
15633                       $tmp1$$Register, $tmp2$$Register,
15634                       $tmp3$$Register, $tmp4$$Register,
15635                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15636   %}
15637   ins_pipe(pipe_class_memory);
15638 %}
15639 
15640 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15641                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15642                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15643 %{
15644   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15645   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15646   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15647          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15648   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15649 
15650   ins_encode %{
15651     int icnt2 = (int)$int_cnt2$$constant;
15652     __ string_indexof($str1$$Register, $str2$$Register,
15653                       $cnt1$$Register, zr,
15654                       $tmp1$$Register, $tmp2$$Register,
15655                       $tmp3$$Register, $tmp4$$Register,
15656                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15657   %}
15658   ins_pipe(pipe_class_memory);
15659 %}
15660 
15661 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15662                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15663                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15664 %{
15665   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15666   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15667   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15668          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15669   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15670 
15671   ins_encode %{
15672     int icnt2 = (int)$int_cnt2$$constant;
15673     __ string_indexof($str1$$Register, $str2$$Register,
15674                       $cnt1$$Register, zr,
15675                       $tmp1$$Register, $tmp2$$Register,
15676                       $tmp3$$Register, $tmp4$$Register,
15677                       icnt2, $result$$Register, StrIntrinsicNode::LU);
15678   %}
15679   ins_pipe(pipe_class_memory);
15680 %}
15681 
15682 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15683                         iRegI_R0 result, rFlagsReg cr)
15684 %{
15685   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15686   match(Set result (StrEquals (Binary str1 str2) cnt));
15687   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15688 
15689   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15690   ins_encode %{
15691     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15692     __ arrays_equals($str1$$Register, $str2$$Register,
15693                      $result$$Register, $cnt$$Register,
15694                      1, /*is_string*/true);
15695   %}
15696   ins_pipe(pipe_class_memory);
15697 %}
15698 
15699 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15700                         iRegI_R0 result, rFlagsReg cr)
15701 %{
15702   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15703   match(Set result (StrEquals (Binary str1 str2) cnt));
15704   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15705 
15706   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15707   ins_encode %{
15708     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15709     __ asrw($cnt$$Register, $cnt$$Register, 1);
15710     __ arrays_equals($str1$$Register, $str2$$Register,
15711                      $result$$Register, $cnt$$Register,
15712                      2, /*is_string*/true);
15713   %}
15714   ins_pipe(pipe_class_memory);
15715 %}
15716 
15717 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15718                       iRegP_R10 tmp, rFlagsReg cr)
15719 %{
15720   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15721   match(Set result (AryEq ary1 ary2));
15722   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15723 
15724   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15725   ins_encode %{
15726     __ arrays_equals($ary1$$Register, $ary2$$Register,
15727                      $result$$Register, $tmp$$Register,
15728                      1, /*is_string*/false);
15729     %}
15730   ins_pipe(pipe_class_memory);
15731 %}
15732 
15733 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15734                       iRegP_R10 tmp, rFlagsReg cr)
15735 %{
15736   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15737   match(Set result (AryEq ary1 ary2));
15738   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15739 
15740   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15741   ins_encode %{
15742     __ arrays_equals($ary1$$Register, $ary2$$Register,
15743                      $result$$Register, $tmp$$Register,
15744                      2, /*is_string*/false);
15745   %}
15746   ins_pipe(pipe_class_memory);
15747 %}
15748 
15749 
15750 // fast char[] to byte[] compression
15751 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15752                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15753                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15754                          iRegI_R0 result, rFlagsReg cr)
15755 %{
15756   match(Set result (StrCompressedCopy src (Binary dst len)));
15757   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15758 
15759   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15760   ins_encode %{
15761     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15762                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15763                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15764                            $result$$Register);
15765   %}
15766   ins_pipe( pipe_slow );
15767 %}
15768 
15769 // fast byte[] to char[] inflation
15770 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15771                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15772 %{
15773   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15774   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15775 
15776   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15777   ins_encode %{
15778     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15779                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15780   %}
15781   ins_pipe(pipe_class_memory);
15782 %}
15783 
15784 // encode char[] to byte[] in ISO_8859_1
15785 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15786                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15787                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15788                           iRegI_R0 result, rFlagsReg cr)
15789 %{
15790   match(Set result (EncodeISOArray src (Binary dst len)));
15791   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15792          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15793 
15794   format %{ "Encode array $src,$dst,$len -> $result" %}
15795   ins_encode %{
15796     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15797          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15798          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15799   %}
15800   ins_pipe( pipe_class_memory );
15801 %}
15802 
15803 // ============================================================================
15804 // This name is KNOWN by the ADLC and cannot be changed.
15805 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15806 // for this guy.
15807 instruct tlsLoadP(thread_RegP dst)
15808 %{
15809   match(Set dst (ThreadLocal));
15810 
15811   ins_cost(0);
15812 
15813   format %{ " -- \t// $dst=Thread::current(), empty" %}
15814 
15815   size(0);
15816 
15817   ins_encode( /*empty*/ );
15818 
15819   ins_pipe(pipe_class_empty);
15820 %}
15821 
15822 // ====================VECTOR INSTRUCTIONS=====================================
15823 
15824 // Load vector (32 bits)
15825 instruct loadV4(vecD dst, vmem4 mem)
15826 %{
15827   predicate(n->as_LoadVector()->memory_size() == 4);
15828   match(Set dst (LoadVector mem));
15829   ins_cost(4 * INSN_COST);
15830   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15831   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15832   ins_pipe(vload_reg_mem64);
15833 %}
15834 
15835 // Load vector (64 bits)
15836 instruct loadV8(vecD dst, vmem8 mem)
15837 %{
15838   predicate(n->as_LoadVector()->memory_size() == 8);
15839   match(Set dst (LoadVector mem));
15840   ins_cost(4 * INSN_COST);
15841   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15842   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15843   ins_pipe(vload_reg_mem64);
15844 %}
15845 
15846 // Load Vector (128 bits)
15847 instruct loadV16(vecX dst, vmem16 mem)
15848 %{
15849   predicate(n->as_LoadVector()->memory_size() == 16);
15850   match(Set dst (LoadVector mem));
15851   ins_cost(4 * INSN_COST);
15852   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15853   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15854   ins_pipe(vload_reg_mem128);
15855 %}
15856 
15857 // Store Vector (32 bits)
15858 instruct storeV4(vecD src, vmem4 mem)
15859 %{
15860   predicate(n->as_StoreVector()->memory_size() == 4);
15861   match(Set mem (StoreVector mem src));
15862   ins_cost(4 * INSN_COST);
15863   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15864   ins_encode( aarch64_enc_strvS(src, mem) );
15865   ins_pipe(vstore_reg_mem64);
15866 %}
15867 
15868 // Store Vector (64 bits)
15869 instruct storeV8(vecD src, vmem8 mem)
15870 %{
15871   predicate(n->as_StoreVector()->memory_size() == 8);
15872   match(Set mem (StoreVector mem src));
15873   ins_cost(4 * INSN_COST);
15874   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15875   ins_encode( aarch64_enc_strvD(src, mem) );
15876   ins_pipe(vstore_reg_mem64);
15877 %}
15878 
15879 // Store Vector (128 bits)
15880 instruct storeV16(vecX src, vmem16 mem)
15881 %{
15882   predicate(n->as_StoreVector()->memory_size() == 16);
15883   match(Set mem (StoreVector mem src));
15884   ins_cost(4 * INSN_COST);
15885   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15886   ins_encode( aarch64_enc_strvQ(src, mem) );
15887   ins_pipe(vstore_reg_mem128);
15888 %}
15889 
15890 instruct replicate8B(vecD dst, iRegIorL2I src)
15891 %{
15892   predicate(n->as_Vector()->length() == 4 ||
15893             n->as_Vector()->length() == 8);
15894   match(Set dst (ReplicateB src));
15895   ins_cost(INSN_COST);
15896   format %{ "dup  $dst, $src\t# vector (8B)" %}
15897   ins_encode %{
15898     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15899   %}
15900   ins_pipe(vdup_reg_reg64);
15901 %}
15902 
15903 instruct replicate16B(vecX dst, iRegIorL2I src)
15904 %{
15905   predicate(n->as_Vector()->length() == 16);
15906   match(Set dst (ReplicateB src));
15907   ins_cost(INSN_COST);
15908   format %{ "dup  $dst, $src\t# vector (16B)" %}
15909   ins_encode %{
15910     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15911   %}
15912   ins_pipe(vdup_reg_reg128);
15913 %}
15914 
15915 instruct replicate8B_imm(vecD dst, immI con)
15916 %{
15917   predicate(n->as_Vector()->length() == 4 ||
15918             n->as_Vector()->length() == 8);
15919   match(Set dst (ReplicateB con));
15920   ins_cost(INSN_COST);
15921   format %{ "movi  $dst, $con\t# vector(8B)" %}
15922   ins_encode %{
15923     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15924   %}
15925   ins_pipe(vmovi_reg_imm64);
15926 %}
15927 
15928 instruct replicate16B_imm(vecX dst, immI con)
15929 %{
15930   predicate(n->as_Vector()->length() == 16);
15931   match(Set dst (ReplicateB con));
15932   ins_cost(INSN_COST);
15933   format %{ "movi  $dst, $con\t# vector(16B)" %}
15934   ins_encode %{
15935     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15936   %}
15937   ins_pipe(vmovi_reg_imm128);
15938 %}
15939 
15940 instruct replicate4S(vecD dst, iRegIorL2I src)
15941 %{
15942   predicate(n->as_Vector()->length() == 2 ||
15943             n->as_Vector()->length() == 4);
15944   match(Set dst (ReplicateS src));
15945   ins_cost(INSN_COST);
15946   format %{ "dup  $dst, $src\t# vector (4S)" %}
15947   ins_encode %{
15948     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15949   %}
15950   ins_pipe(vdup_reg_reg64);
15951 %}
15952 
15953 instruct replicate8S(vecX dst, iRegIorL2I src)
15954 %{
15955   predicate(n->as_Vector()->length() == 8);
15956   match(Set dst (ReplicateS src));
15957   ins_cost(INSN_COST);
15958   format %{ "dup  $dst, $src\t# vector (8S)" %}
15959   ins_encode %{
15960     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15961   %}
15962   ins_pipe(vdup_reg_reg128);
15963 %}
15964 
15965 instruct replicate4S_imm(vecD dst, immI con)
15966 %{
15967   predicate(n->as_Vector()->length() == 2 ||
15968             n->as_Vector()->length() == 4);
15969   match(Set dst (ReplicateS con));
15970   ins_cost(INSN_COST);
15971   format %{ "movi  $dst, $con\t# vector(4H)" %}
15972   ins_encode %{
15973     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15974   %}
15975   ins_pipe(vmovi_reg_imm64);
15976 %}
15977 
15978 instruct replicate8S_imm(vecX dst, immI con)
15979 %{
15980   predicate(n->as_Vector()->length() == 8);
15981   match(Set dst (ReplicateS con));
15982   ins_cost(INSN_COST);
15983   format %{ "movi  $dst, $con\t# vector(8H)" %}
15984   ins_encode %{
15985     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15986   %}
15987   ins_pipe(vmovi_reg_imm128);
15988 %}
15989 
15990 instruct replicate2I(vecD dst, iRegIorL2I src)
15991 %{
15992   predicate(n->as_Vector()->length() == 2);
15993   match(Set dst (ReplicateI src));
15994   ins_cost(INSN_COST);
15995   format %{ "dup  $dst, $src\t# vector (2I)" %}
15996   ins_encode %{
15997     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15998   %}
15999   ins_pipe(vdup_reg_reg64);
16000 %}
16001 
16002 instruct replicate4I(vecX dst, iRegIorL2I src)
16003 %{
16004   predicate(n->as_Vector()->length() == 4);
16005   match(Set dst (ReplicateI src));
16006   ins_cost(INSN_COST);
16007   format %{ "dup  $dst, $src\t# vector (4I)" %}
16008   ins_encode %{
16009     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16010   %}
16011   ins_pipe(vdup_reg_reg128);
16012 %}
16013 
16014 instruct replicate2I_imm(vecD dst, immI con)
16015 %{
16016   predicate(n->as_Vector()->length() == 2);
16017   match(Set dst (ReplicateI con));
16018   ins_cost(INSN_COST);
16019   format %{ "movi  $dst, $con\t# vector(2I)" %}
16020   ins_encode %{
16021     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16022   %}
16023   ins_pipe(vmovi_reg_imm64);
16024 %}
16025 
16026 instruct replicate4I_imm(vecX dst, immI con)
16027 %{
16028   predicate(n->as_Vector()->length() == 4);
16029   match(Set dst (ReplicateI con));
16030   ins_cost(INSN_COST);
16031   format %{ "movi  $dst, $con\t# vector(4I)" %}
16032   ins_encode %{
16033     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16034   %}
16035   ins_pipe(vmovi_reg_imm128);
16036 %}
16037 
16038 instruct replicate2L(vecX dst, iRegL src)
16039 %{
16040   predicate(n->as_Vector()->length() == 2);
16041   match(Set dst (ReplicateL src));
16042   ins_cost(INSN_COST);
16043   format %{ "dup  $dst, $src\t# vector (2L)" %}
16044   ins_encode %{
16045     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16046   %}
16047   ins_pipe(vdup_reg_reg128);
16048 %}
16049 
16050 instruct replicate2L_zero(vecX dst, immI0 zero)
16051 %{
16052   predicate(n->as_Vector()->length() == 2);
16053   match(Set dst (ReplicateI zero));
16054   ins_cost(INSN_COST);
16055   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16056   ins_encode %{
16057     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16058            as_FloatRegister($dst$$reg),
16059            as_FloatRegister($dst$$reg));
16060   %}
16061   ins_pipe(vmovi_reg_imm128);
16062 %}
16063 
16064 instruct replicate2F(vecD dst, vRegF src)
16065 %{
16066   predicate(n->as_Vector()->length() == 2);
16067   match(Set dst (ReplicateF src));
16068   ins_cost(INSN_COST);
16069   format %{ "dup  $dst, $src\t# vector (2F)" %}
16070   ins_encode %{
16071     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16072            as_FloatRegister($src$$reg));
16073   %}
16074   ins_pipe(vdup_reg_freg64);
16075 %}
16076 
16077 instruct replicate4F(vecX dst, vRegF src)
16078 %{
16079   predicate(n->as_Vector()->length() == 4);
16080   match(Set dst (ReplicateF src));
16081   ins_cost(INSN_COST);
16082   format %{ "dup  $dst, $src\t# vector (4F)" %}
16083   ins_encode %{
16084     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16085            as_FloatRegister($src$$reg));
16086   %}
16087   ins_pipe(vdup_reg_freg128);
16088 %}
16089 
16090 instruct replicate2D(vecX dst, vRegD src)
16091 %{
16092   predicate(n->as_Vector()->length() == 2);
16093   match(Set dst (ReplicateD src));
16094   ins_cost(INSN_COST);
16095   format %{ "dup  $dst, $src\t# vector (2D)" %}
16096   ins_encode %{
16097     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16098            as_FloatRegister($src$$reg));
16099   %}
16100   ins_pipe(vdup_reg_dreg128);
16101 %}
16102 
16103 // ====================REDUCTION ARITHMETIC====================================
16104 
16105 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
16106 %{
16107   match(Set dst (AddReductionVI src1 src2));
16108   ins_cost(INSN_COST);
16109   effect(TEMP tmp, TEMP tmp2);
16110   format %{ "umov  $tmp, $src2, S, 0\n\t"
16111             "umov  $tmp2, $src2, S, 1\n\t"
16112             "addw  $dst, $src1, $tmp\n\t"
16113             "addw  $dst, $dst, $tmp2\t add reduction2i"
16114   %}
16115   ins_encode %{
16116     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16117     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16118     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16119     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16120   %}
16121   ins_pipe(pipe_class_default);
16122 %}
16123 
16124 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16125 %{
16126   match(Set dst (AddReductionVI src1 src2));
16127   ins_cost(INSN_COST);
16128   effect(TEMP tmp, TEMP tmp2);
16129   format %{ "addv  $tmp, T4S, $src2\n\t"
16130             "umov  $tmp2, $tmp, S, 0\n\t"
16131             "addw  $dst, $tmp2, $src1\t add reduction4i"
16132   %}
16133   ins_encode %{
16134     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16135             as_FloatRegister($src2$$reg));
16136     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16137     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16138   %}
16139   ins_pipe(pipe_class_default);
16140 %}
16141 
16142 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
16143 %{
16144   match(Set dst (MulReductionVI src1 src2));
16145   ins_cost(INSN_COST);
16146   effect(TEMP tmp, TEMP dst);
16147   format %{ "umov  $tmp, $src2, S, 0\n\t"
16148             "mul   $dst, $tmp, $src1\n\t"
16149             "umov  $tmp, $src2, S, 1\n\t"
16150             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16151   %}
16152   ins_encode %{
16153     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16154     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16155     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16156     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16157   %}
16158   ins_pipe(pipe_class_default);
16159 %}
16160 
16161 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16162 %{
16163   match(Set dst (MulReductionVI src1 src2));
16164   ins_cost(INSN_COST);
16165   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16166   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16167             "mul   $tmp, $tmp, $src2\n\t"
16168             "umov  $tmp2, $tmp, S, 0\n\t"
16169             "mul   $dst, $tmp2, $src1\n\t"
16170             "umov  $tmp2, $tmp, S, 1\n\t"
16171             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16172   %}
16173   ins_encode %{
16174     __ ins(as_FloatRegister($tmp$$reg), __ D,
16175            as_FloatRegister($src2$$reg), 0, 1);
16176     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16177            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16178     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16179     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16180     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16181     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16182   %}
16183   ins_pipe(pipe_class_default);
16184 %}
16185 
16186 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16187 %{
16188   match(Set dst (AddReductionVF src1 src2));
16189   ins_cost(INSN_COST);
16190   effect(TEMP tmp, TEMP dst);
16191   format %{ "fadds $dst, $src1, $src2\n\t"
16192             "ins   $tmp, S, $src2, 0, 1\n\t"
16193             "fadds $dst, $dst, $tmp\t add reduction2f"
16194   %}
16195   ins_encode %{
16196     __ fadds(as_FloatRegister($dst$$reg),
16197              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16198     __ ins(as_FloatRegister($tmp$$reg), __ S,
16199            as_FloatRegister($src2$$reg), 0, 1);
16200     __ fadds(as_FloatRegister($dst$$reg),
16201              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16202   %}
16203   ins_pipe(pipe_class_default);
16204 %}
16205 
16206 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16207 %{
16208   match(Set dst (AddReductionVF src1 src2));
16209   ins_cost(INSN_COST);
16210   effect(TEMP tmp, TEMP dst);
16211   format %{ "fadds $dst, $src1, $src2\n\t"
16212             "ins   $tmp, S, $src2, 0, 1\n\t"
16213             "fadds $dst, $dst, $tmp\n\t"
16214             "ins   $tmp, S, $src2, 0, 2\n\t"
16215             "fadds $dst, $dst, $tmp\n\t"
16216             "ins   $tmp, S, $src2, 0, 3\n\t"
16217             "fadds $dst, $dst, $tmp\t add reduction4f"
16218   %}
16219   ins_encode %{
16220     __ fadds(as_FloatRegister($dst$$reg),
16221              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16222     __ ins(as_FloatRegister($tmp$$reg), __ S,
16223            as_FloatRegister($src2$$reg), 0, 1);
16224     __ fadds(as_FloatRegister($dst$$reg),
16225              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16226     __ ins(as_FloatRegister($tmp$$reg), __ S,
16227            as_FloatRegister($src2$$reg), 0, 2);
16228     __ fadds(as_FloatRegister($dst$$reg),
16229              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16230     __ ins(as_FloatRegister($tmp$$reg), __ S,
16231            as_FloatRegister($src2$$reg), 0, 3);
16232     __ fadds(as_FloatRegister($dst$$reg),
16233              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16234   %}
16235   ins_pipe(pipe_class_default);
16236 %}
16237 
16238 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16239 %{
16240   match(Set dst (MulReductionVF src1 src2));
16241   ins_cost(INSN_COST);
16242   effect(TEMP tmp, TEMP dst);
16243   format %{ "fmuls $dst, $src1, $src2\n\t"
16244             "ins   $tmp, S, $src2, 0, 1\n\t"
16245             "fmuls $dst, $dst, $tmp\t add reduction4f"
16246   %}
16247   ins_encode %{
16248     __ fmuls(as_FloatRegister($dst$$reg),
16249              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16250     __ ins(as_FloatRegister($tmp$$reg), __ S,
16251            as_FloatRegister($src2$$reg), 0, 1);
16252     __ fmuls(as_FloatRegister($dst$$reg),
16253              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16254   %}
16255   ins_pipe(pipe_class_default);
16256 %}
16257 
16258 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16259 %{
16260   match(Set dst (MulReductionVF src1 src2));
16261   ins_cost(INSN_COST);
16262   effect(TEMP tmp, TEMP dst);
16263   format %{ "fmuls $dst, $src1, $src2\n\t"
16264             "ins   $tmp, S, $src2, 0, 1\n\t"
16265             "fmuls $dst, $dst, $tmp\n\t"
16266             "ins   $tmp, S, $src2, 0, 2\n\t"
16267             "fmuls $dst, $dst, $tmp\n\t"
16268             "ins   $tmp, S, $src2, 0, 3\n\t"
16269             "fmuls $dst, $dst, $tmp\t add reduction4f"
16270   %}
16271   ins_encode %{
16272     __ fmuls(as_FloatRegister($dst$$reg),
16273              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16274     __ ins(as_FloatRegister($tmp$$reg), __ S,
16275            as_FloatRegister($src2$$reg), 0, 1);
16276     __ fmuls(as_FloatRegister($dst$$reg),
16277              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16278     __ ins(as_FloatRegister($tmp$$reg), __ S,
16279            as_FloatRegister($src2$$reg), 0, 2);
16280     __ fmuls(as_FloatRegister($dst$$reg),
16281              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16282     __ ins(as_FloatRegister($tmp$$reg), __ S,
16283            as_FloatRegister($src2$$reg), 0, 3);
16284     __ fmuls(as_FloatRegister($dst$$reg),
16285              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16286   %}
16287   ins_pipe(pipe_class_default);
16288 %}
16289 
16290 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16291 %{
16292   match(Set dst (AddReductionVD src1 src2));
16293   ins_cost(INSN_COST);
16294   effect(TEMP tmp, TEMP dst);
16295   format %{ "faddd $dst, $src1, $src2\n\t"
16296             "ins   $tmp, D, $src2, 0, 1\n\t"
16297             "faddd $dst, $dst, $tmp\t add reduction2d"
16298   %}
16299   ins_encode %{
16300     __ faddd(as_FloatRegister($dst$$reg),
16301              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16302     __ ins(as_FloatRegister($tmp$$reg), __ D,
16303            as_FloatRegister($src2$$reg), 0, 1);
16304     __ faddd(as_FloatRegister($dst$$reg),
16305              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16306   %}
16307   ins_pipe(pipe_class_default);
16308 %}
16309 
16310 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16311 %{
16312   match(Set dst (MulReductionVD src1 src2));
16313   ins_cost(INSN_COST);
16314   effect(TEMP tmp, TEMP dst);
16315   format %{ "fmuld $dst, $src1, $src2\n\t"
16316             "ins   $tmp, D, $src2, 0, 1\n\t"
16317             "fmuld $dst, $dst, $tmp\t add reduction2d"
16318   %}
16319   ins_encode %{
16320     __ fmuld(as_FloatRegister($dst$$reg),
16321              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16322     __ ins(as_FloatRegister($tmp$$reg), __ D,
16323            as_FloatRegister($src2$$reg), 0, 1);
16324     __ fmuld(as_FloatRegister($dst$$reg),
16325              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16326   %}
16327   ins_pipe(pipe_class_default);
16328 %}
16329 
16330 // ====================VECTOR ARITHMETIC=======================================
16331 
16332 // --------------------------------- ADD --------------------------------------
16333 
16334 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16335 %{
16336   predicate(n->as_Vector()->length() == 4 ||
16337             n->as_Vector()->length() == 8);
16338   match(Set dst (AddVB src1 src2));
16339   ins_cost(INSN_COST);
16340   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16341   ins_encode %{
16342     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16343             as_FloatRegister($src1$$reg),
16344             as_FloatRegister($src2$$reg));
16345   %}
16346   ins_pipe(vdop64);
16347 %}
16348 
16349 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16350 %{
16351   predicate(n->as_Vector()->length() == 16);
16352   match(Set dst (AddVB src1 src2));
16353   ins_cost(INSN_COST);
16354   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16355   ins_encode %{
16356     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16357             as_FloatRegister($src1$$reg),
16358             as_FloatRegister($src2$$reg));
16359   %}
16360   ins_pipe(vdop128);
16361 %}
16362 
16363 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16364 %{
16365   predicate(n->as_Vector()->length() == 2 ||
16366             n->as_Vector()->length() == 4);
16367   match(Set dst (AddVS src1 src2));
16368   ins_cost(INSN_COST);
16369   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16370   ins_encode %{
16371     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16372             as_FloatRegister($src1$$reg),
16373             as_FloatRegister($src2$$reg));
16374   %}
16375   ins_pipe(vdop64);
16376 %}
16377 
16378 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16379 %{
16380   predicate(n->as_Vector()->length() == 8);
16381   match(Set dst (AddVS src1 src2));
16382   ins_cost(INSN_COST);
16383   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16384   ins_encode %{
16385     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16386             as_FloatRegister($src1$$reg),
16387             as_FloatRegister($src2$$reg));
16388   %}
16389   ins_pipe(vdop128);
16390 %}
16391 
16392 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16393 %{
16394   predicate(n->as_Vector()->length() == 2);
16395   match(Set dst (AddVI src1 src2));
16396   ins_cost(INSN_COST);
16397   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16398   ins_encode %{
16399     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16400             as_FloatRegister($src1$$reg),
16401             as_FloatRegister($src2$$reg));
16402   %}
16403   ins_pipe(vdop64);
16404 %}
16405 
16406 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16407 %{
16408   predicate(n->as_Vector()->length() == 4);
16409   match(Set dst (AddVI src1 src2));
16410   ins_cost(INSN_COST);
16411   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16412   ins_encode %{
16413     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16414             as_FloatRegister($src1$$reg),
16415             as_FloatRegister($src2$$reg));
16416   %}
16417   ins_pipe(vdop128);
16418 %}
16419 
16420 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16421 %{
16422   predicate(n->as_Vector()->length() == 2);
16423   match(Set dst (AddVL src1 src2));
16424   ins_cost(INSN_COST);
16425   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16426   ins_encode %{
16427     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16428             as_FloatRegister($src1$$reg),
16429             as_FloatRegister($src2$$reg));
16430   %}
16431   ins_pipe(vdop128);
16432 %}
16433 
16434 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16435 %{
16436   predicate(n->as_Vector()->length() == 2);
16437   match(Set dst (AddVF src1 src2));
16438   ins_cost(INSN_COST);
16439   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16440   ins_encode %{
16441     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16442             as_FloatRegister($src1$$reg),
16443             as_FloatRegister($src2$$reg));
16444   %}
16445   ins_pipe(vdop_fp64);
16446 %}
16447 
16448 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16449 %{
16450   predicate(n->as_Vector()->length() == 4);
16451   match(Set dst (AddVF src1 src2));
16452   ins_cost(INSN_COST);
16453   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16454   ins_encode %{
16455     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16456             as_FloatRegister($src1$$reg),
16457             as_FloatRegister($src2$$reg));
16458   %}
16459   ins_pipe(vdop_fp128);
16460 %}
16461 
16462 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16463 %{
16464   match(Set dst (AddVD src1 src2));
16465   ins_cost(INSN_COST);
16466   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16467   ins_encode %{
16468     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16469             as_FloatRegister($src1$$reg),
16470             as_FloatRegister($src2$$reg));
16471   %}
16472   ins_pipe(vdop_fp128);
16473 %}
16474 
16475 // --------------------------------- SUB --------------------------------------
16476 
16477 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16478 %{
16479   predicate(n->as_Vector()->length() == 4 ||
16480             n->as_Vector()->length() == 8);
16481   match(Set dst (SubVB src1 src2));
16482   ins_cost(INSN_COST);
16483   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16484   ins_encode %{
16485     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16486             as_FloatRegister($src1$$reg),
16487             as_FloatRegister($src2$$reg));
16488   %}
16489   ins_pipe(vdop64);
16490 %}
16491 
16492 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16493 %{
16494   predicate(n->as_Vector()->length() == 16);
16495   match(Set dst (SubVB src1 src2));
16496   ins_cost(INSN_COST);
16497   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16498   ins_encode %{
16499     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16500             as_FloatRegister($src1$$reg),
16501             as_FloatRegister($src2$$reg));
16502   %}
16503   ins_pipe(vdop128);
16504 %}
16505 
16506 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16507 %{
16508   predicate(n->as_Vector()->length() == 2 ||
16509             n->as_Vector()->length() == 4);
16510   match(Set dst (SubVS src1 src2));
16511   ins_cost(INSN_COST);
16512   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16513   ins_encode %{
16514     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16515             as_FloatRegister($src1$$reg),
16516             as_FloatRegister($src2$$reg));
16517   %}
16518   ins_pipe(vdop64);
16519 %}
16520 
16521 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16522 %{
16523   predicate(n->as_Vector()->length() == 8);
16524   match(Set dst (SubVS src1 src2));
16525   ins_cost(INSN_COST);
16526   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16527   ins_encode %{
16528     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16529             as_FloatRegister($src1$$reg),
16530             as_FloatRegister($src2$$reg));
16531   %}
16532   ins_pipe(vdop128);
16533 %}
16534 
16535 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16536 %{
16537   predicate(n->as_Vector()->length() == 2);
16538   match(Set dst (SubVI src1 src2));
16539   ins_cost(INSN_COST);
16540   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16541   ins_encode %{
16542     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16543             as_FloatRegister($src1$$reg),
16544             as_FloatRegister($src2$$reg));
16545   %}
16546   ins_pipe(vdop64);
16547 %}
16548 
16549 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16550 %{
16551   predicate(n->as_Vector()->length() == 4);
16552   match(Set dst (SubVI src1 src2));
16553   ins_cost(INSN_COST);
16554   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16555   ins_encode %{
16556     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16557             as_FloatRegister($src1$$reg),
16558             as_FloatRegister($src2$$reg));
16559   %}
16560   ins_pipe(vdop128);
16561 %}
16562 
16563 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16564 %{
16565   predicate(n->as_Vector()->length() == 2);
16566   match(Set dst (SubVL src1 src2));
16567   ins_cost(INSN_COST);
16568   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16569   ins_encode %{
16570     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16571             as_FloatRegister($src1$$reg),
16572             as_FloatRegister($src2$$reg));
16573   %}
16574   ins_pipe(vdop128);
16575 %}
16576 
16577 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16578 %{
16579   predicate(n->as_Vector()->length() == 2);
16580   match(Set dst (SubVF src1 src2));
16581   ins_cost(INSN_COST);
16582   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16583   ins_encode %{
16584     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16585             as_FloatRegister($src1$$reg),
16586             as_FloatRegister($src2$$reg));
16587   %}
16588   ins_pipe(vdop_fp64);
16589 %}
16590 
16591 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16592 %{
16593   predicate(n->as_Vector()->length() == 4);
16594   match(Set dst (SubVF src1 src2));
16595   ins_cost(INSN_COST);
16596   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16597   ins_encode %{
16598     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16599             as_FloatRegister($src1$$reg),
16600             as_FloatRegister($src2$$reg));
16601   %}
16602   ins_pipe(vdop_fp128);
16603 %}
16604 
16605 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16606 %{
16607   predicate(n->as_Vector()->length() == 2);
16608   match(Set dst (SubVD src1 src2));
16609   ins_cost(INSN_COST);
16610   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16611   ins_encode %{
16612     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16613             as_FloatRegister($src1$$reg),
16614             as_FloatRegister($src2$$reg));
16615   %}
16616   ins_pipe(vdop_fp128);
16617 %}
16618 
16619 // --------------------------------- MUL --------------------------------------
16620 
16621 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16622 %{
16623   predicate(n->as_Vector()->length() == 2 ||
16624             n->as_Vector()->length() == 4);
16625   match(Set dst (MulVS src1 src2));
16626   ins_cost(INSN_COST);
16627   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16628   ins_encode %{
16629     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16630             as_FloatRegister($src1$$reg),
16631             as_FloatRegister($src2$$reg));
16632   %}
16633   ins_pipe(vmul64);
16634 %}
16635 
16636 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16637 %{
16638   predicate(n->as_Vector()->length() == 8);
16639   match(Set dst (MulVS src1 src2));
16640   ins_cost(INSN_COST);
16641   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16642   ins_encode %{
16643     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16644             as_FloatRegister($src1$$reg),
16645             as_FloatRegister($src2$$reg));
16646   %}
16647   ins_pipe(vmul128);
16648 %}
16649 
16650 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16651 %{
16652   predicate(n->as_Vector()->length() == 2);
16653   match(Set dst (MulVI src1 src2));
16654   ins_cost(INSN_COST);
16655   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16656   ins_encode %{
16657     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16658             as_FloatRegister($src1$$reg),
16659             as_FloatRegister($src2$$reg));
16660   %}
16661   ins_pipe(vmul64);
16662 %}
16663 
16664 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16665 %{
16666   predicate(n->as_Vector()->length() == 4);
16667   match(Set dst (MulVI src1 src2));
16668   ins_cost(INSN_COST);
16669   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16670   ins_encode %{
16671     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16672             as_FloatRegister($src1$$reg),
16673             as_FloatRegister($src2$$reg));
16674   %}
16675   ins_pipe(vmul128);
16676 %}
16677 
16678 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16679 %{
16680   predicate(n->as_Vector()->length() == 2);
16681   match(Set dst (MulVF src1 src2));
16682   ins_cost(INSN_COST);
16683   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16684   ins_encode %{
16685     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16686             as_FloatRegister($src1$$reg),
16687             as_FloatRegister($src2$$reg));
16688   %}
16689   ins_pipe(vmuldiv_fp64);
16690 %}
16691 
16692 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16693 %{
16694   predicate(n->as_Vector()->length() == 4);
16695   match(Set dst (MulVF src1 src2));
16696   ins_cost(INSN_COST);
16697   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16698   ins_encode %{
16699     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16700             as_FloatRegister($src1$$reg),
16701             as_FloatRegister($src2$$reg));
16702   %}
16703   ins_pipe(vmuldiv_fp128);
16704 %}
16705 
16706 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16707 %{
16708   predicate(n->as_Vector()->length() == 2);
16709   match(Set dst (MulVD src1 src2));
16710   ins_cost(INSN_COST);
16711   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16712   ins_encode %{
16713     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16714             as_FloatRegister($src1$$reg),
16715             as_FloatRegister($src2$$reg));
16716   %}
16717   ins_pipe(vmuldiv_fp128);
16718 %}
16719 
16720 // --------------------------------- MLA --------------------------------------
16721 
16722 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16723 %{
16724   predicate(n->as_Vector()->length() == 2 ||
16725             n->as_Vector()->length() == 4);
16726   match(Set dst (AddVS dst (MulVS src1 src2)));
16727   ins_cost(INSN_COST);
16728   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16729   ins_encode %{
16730     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16731             as_FloatRegister($src1$$reg),
16732             as_FloatRegister($src2$$reg));
16733   %}
16734   ins_pipe(vmla64);
16735 %}
16736 
16737 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16738 %{
16739   predicate(n->as_Vector()->length() == 8);
16740   match(Set dst (AddVS dst (MulVS src1 src2)));
16741   ins_cost(INSN_COST);
16742   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16743   ins_encode %{
16744     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16745             as_FloatRegister($src1$$reg),
16746             as_FloatRegister($src2$$reg));
16747   %}
16748   ins_pipe(vmla128);
16749 %}
16750 
16751 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16752 %{
16753   predicate(n->as_Vector()->length() == 2);
16754   match(Set dst (AddVI dst (MulVI src1 src2)));
16755   ins_cost(INSN_COST);
16756   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16757   ins_encode %{
16758     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16759             as_FloatRegister($src1$$reg),
16760             as_FloatRegister($src2$$reg));
16761   %}
16762   ins_pipe(vmla64);
16763 %}
16764 
16765 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16766 %{
16767   predicate(n->as_Vector()->length() == 4);
16768   match(Set dst (AddVI dst (MulVI src1 src2)));
16769   ins_cost(INSN_COST);
16770   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16771   ins_encode %{
16772     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16773             as_FloatRegister($src1$$reg),
16774             as_FloatRegister($src2$$reg));
16775   %}
16776   ins_pipe(vmla128);
16777 %}
16778 
16779 // --------------------------------- MLS --------------------------------------
16780 
16781 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16782 %{
16783   predicate(n->as_Vector()->length() == 2 ||
16784             n->as_Vector()->length() == 4);
16785   match(Set dst (SubVS dst (MulVS src1 src2)));
16786   ins_cost(INSN_COST);
16787   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16788   ins_encode %{
16789     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16790             as_FloatRegister($src1$$reg),
16791             as_FloatRegister($src2$$reg));
16792   %}
16793   ins_pipe(vmla64);
16794 %}
16795 
16796 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16797 %{
16798   predicate(n->as_Vector()->length() == 8);
16799   match(Set dst (SubVS dst (MulVS src1 src2)));
16800   ins_cost(INSN_COST);
16801   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16802   ins_encode %{
16803     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16804             as_FloatRegister($src1$$reg),
16805             as_FloatRegister($src2$$reg));
16806   %}
16807   ins_pipe(vmla128);
16808 %}
16809 
16810 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16811 %{
16812   predicate(n->as_Vector()->length() == 2);
16813   match(Set dst (SubVI dst (MulVI src1 src2)));
16814   ins_cost(INSN_COST);
16815   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16816   ins_encode %{
16817     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16818             as_FloatRegister($src1$$reg),
16819             as_FloatRegister($src2$$reg));
16820   %}
16821   ins_pipe(vmla64);
16822 %}
16823 
16824 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16825 %{
16826   predicate(n->as_Vector()->length() == 4);
16827   match(Set dst (SubVI dst (MulVI src1 src2)));
16828   ins_cost(INSN_COST);
16829   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16830   ins_encode %{
16831     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16832             as_FloatRegister($src1$$reg),
16833             as_FloatRegister($src2$$reg));
16834   %}
16835   ins_pipe(vmla128);
16836 %}
16837 
16838 // --------------------------------- DIV --------------------------------------
16839 
16840 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16841 %{
16842   predicate(n->as_Vector()->length() == 2);
16843   match(Set dst (DivVF src1 src2));
16844   ins_cost(INSN_COST);
16845   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16846   ins_encode %{
16847     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16848             as_FloatRegister($src1$$reg),
16849             as_FloatRegister($src2$$reg));
16850   %}
16851   ins_pipe(vmuldiv_fp64);
16852 %}
16853 
16854 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16855 %{
16856   predicate(n->as_Vector()->length() == 4);
16857   match(Set dst (DivVF src1 src2));
16858   ins_cost(INSN_COST);
16859   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16860   ins_encode %{
16861     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16862             as_FloatRegister($src1$$reg),
16863             as_FloatRegister($src2$$reg));
16864   %}
16865   ins_pipe(vmuldiv_fp128);
16866 %}
16867 
16868 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16869 %{
16870   predicate(n->as_Vector()->length() == 2);
16871   match(Set dst (DivVD src1 src2));
16872   ins_cost(INSN_COST);
16873   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16874   ins_encode %{
16875     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16876             as_FloatRegister($src1$$reg),
16877             as_FloatRegister($src2$$reg));
16878   %}
16879   ins_pipe(vmuldiv_fp128);
16880 %}
16881 
16882 // --------------------------------- SQRT -------------------------------------
16883 
16884 instruct vsqrt2D(vecX dst, vecX src)
16885 %{
16886   predicate(n->as_Vector()->length() == 2);
16887   match(Set dst (SqrtVD src));
16888   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16889   ins_encode %{
16890     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16891              as_FloatRegister($src$$reg));
16892   %}
16893   ins_pipe(vsqrt_fp128);
16894 %}
16895 
16896 // --------------------------------- ABS --------------------------------------
16897 
16898 instruct vabs2F(vecD dst, vecD src)
16899 %{
16900   predicate(n->as_Vector()->length() == 2);
16901   match(Set dst (AbsVF src));
16902   ins_cost(INSN_COST * 3);
16903   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16904   ins_encode %{
16905     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16906             as_FloatRegister($src$$reg));
16907   %}
16908   ins_pipe(vunop_fp64);
16909 %}
16910 
16911 instruct vabs4F(vecX dst, vecX src)
16912 %{
16913   predicate(n->as_Vector()->length() == 4);
16914   match(Set dst (AbsVF src));
16915   ins_cost(INSN_COST * 3);
16916   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16917   ins_encode %{
16918     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16919             as_FloatRegister($src$$reg));
16920   %}
16921   ins_pipe(vunop_fp128);
16922 %}
16923 
16924 instruct vabs2D(vecX dst, vecX src)
16925 %{
16926   predicate(n->as_Vector()->length() == 2);
16927   match(Set dst (AbsVD src));
16928   ins_cost(INSN_COST * 3);
16929   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16930   ins_encode %{
16931     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16932             as_FloatRegister($src$$reg));
16933   %}
16934   ins_pipe(vunop_fp128);
16935 %}
16936 
16937 // --------------------------------- NEG --------------------------------------
16938 
16939 instruct vneg2F(vecD dst, vecD src)
16940 %{
16941   predicate(n->as_Vector()->length() == 2);
16942   match(Set dst (NegVF src));
16943   ins_cost(INSN_COST * 3);
16944   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16945   ins_encode %{
16946     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16947             as_FloatRegister($src$$reg));
16948   %}
16949   ins_pipe(vunop_fp64);
16950 %}
16951 
16952 instruct vneg4F(vecX dst, vecX src)
16953 %{
16954   predicate(n->as_Vector()->length() == 4);
16955   match(Set dst (NegVF src));
16956   ins_cost(INSN_COST * 3);
16957   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16958   ins_encode %{
16959     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16960             as_FloatRegister($src$$reg));
16961   %}
16962   ins_pipe(vunop_fp128);
16963 %}
16964 
16965 instruct vneg2D(vecX dst, vecX src)
16966 %{
16967   predicate(n->as_Vector()->length() == 2);
16968   match(Set dst (NegVD src));
16969   ins_cost(INSN_COST * 3);
16970   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16971   ins_encode %{
16972     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16973             as_FloatRegister($src$$reg));
16974   %}
16975   ins_pipe(vunop_fp128);
16976 %}
16977 
16978 // --------------------------------- AND --------------------------------------
16979 
16980 instruct vand8B(vecD dst, vecD src1, vecD src2)
16981 %{
16982   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16983             n->as_Vector()->length_in_bytes() == 8);
16984   match(Set dst (AndV src1 src2));
16985   ins_cost(INSN_COST);
16986   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16987   ins_encode %{
16988     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16989             as_FloatRegister($src1$$reg),
16990             as_FloatRegister($src2$$reg));
16991   %}
16992   ins_pipe(vlogical64);
16993 %}
16994 
16995 instruct vand16B(vecX dst, vecX src1, vecX src2)
16996 %{
16997   predicate(n->as_Vector()->length_in_bytes() == 16);
16998   match(Set dst (AndV src1 src2));
16999   ins_cost(INSN_COST);
17000   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17001   ins_encode %{
17002     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17003             as_FloatRegister($src1$$reg),
17004             as_FloatRegister($src2$$reg));
17005   %}
17006   ins_pipe(vlogical128);
17007 %}
17008 
17009 // --------------------------------- OR ---------------------------------------
17010 
17011 instruct vor8B(vecD dst, vecD src1, vecD src2)
17012 %{
17013   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17014             n->as_Vector()->length_in_bytes() == 8);
17015   match(Set dst (OrV src1 src2));
17016   ins_cost(INSN_COST);
17017   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17018   ins_encode %{
17019     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17020             as_FloatRegister($src1$$reg),
17021             as_FloatRegister($src2$$reg));
17022   %}
17023   ins_pipe(vlogical64);
17024 %}
17025 
17026 instruct vor16B(vecX dst, vecX src1, vecX src2)
17027 %{
17028   predicate(n->as_Vector()->length_in_bytes() == 16);
17029   match(Set dst (OrV src1 src2));
17030   ins_cost(INSN_COST);
17031   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17032   ins_encode %{
17033     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17034             as_FloatRegister($src1$$reg),
17035             as_FloatRegister($src2$$reg));
17036   %}
17037   ins_pipe(vlogical128);
17038 %}
17039 
17040 // --------------------------------- XOR --------------------------------------
17041 
17042 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17043 %{
17044   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17045             n->as_Vector()->length_in_bytes() == 8);
17046   match(Set dst (XorV src1 src2));
17047   ins_cost(INSN_COST);
17048   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17049   ins_encode %{
17050     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17051             as_FloatRegister($src1$$reg),
17052             as_FloatRegister($src2$$reg));
17053   %}
17054   ins_pipe(vlogical64);
17055 %}
17056 
17057 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17058 %{
17059   predicate(n->as_Vector()->length_in_bytes() == 16);
17060   match(Set dst (XorV src1 src2));
17061   ins_cost(INSN_COST);
17062   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17063   ins_encode %{
17064     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17065             as_FloatRegister($src1$$reg),
17066             as_FloatRegister($src2$$reg));
17067   %}
17068   ins_pipe(vlogical128);
17069 %}
17070 
17071 // ------------------------------ Shift ---------------------------------------
17072 
17073 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17074   match(Set dst (LShiftCntV cnt));
17075   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17076   ins_encode %{
17077     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17078   %}
17079   ins_pipe(vdup_reg_reg128);
17080 %}
17081 
17082 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17083 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17084   match(Set dst (RShiftCntV cnt));
17085   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17086   ins_encode %{
17087     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17088     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17089   %}
17090   ins_pipe(vdup_reg_reg128);
17091 %}
17092 
17093 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17094   predicate(n->as_Vector()->length() == 4 ||
17095             n->as_Vector()->length() == 8);
17096   match(Set dst (LShiftVB src shift));
17097   match(Set dst (RShiftVB src shift));
17098   ins_cost(INSN_COST);
17099   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17100   ins_encode %{
17101     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17102             as_FloatRegister($src$$reg),
17103             as_FloatRegister($shift$$reg));
17104   %}
17105   ins_pipe(vshift64);
17106 %}
17107 
17108 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17109   predicate(n->as_Vector()->length() == 16);
17110   match(Set dst (LShiftVB src shift));
17111   match(Set dst (RShiftVB src shift));
17112   ins_cost(INSN_COST);
17113   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17114   ins_encode %{
17115     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17116             as_FloatRegister($src$$reg),
17117             as_FloatRegister($shift$$reg));
17118   %}
17119   ins_pipe(vshift128);
17120 %}
17121 
17122 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17123   predicate(n->as_Vector()->length() == 4 ||
17124             n->as_Vector()->length() == 8);
17125   match(Set dst (URShiftVB src shift));
17126   ins_cost(INSN_COST);
17127   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17128   ins_encode %{
17129     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17130             as_FloatRegister($src$$reg),
17131             as_FloatRegister($shift$$reg));
17132   %}
17133   ins_pipe(vshift64);
17134 %}
17135 
17136 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17137   predicate(n->as_Vector()->length() == 16);
17138   match(Set dst (URShiftVB src shift));
17139   ins_cost(INSN_COST);
17140   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17141   ins_encode %{
17142     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17143             as_FloatRegister($src$$reg),
17144             as_FloatRegister($shift$$reg));
17145   %}
17146   ins_pipe(vshift128);
17147 %}
17148 
17149 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17150   predicate(n->as_Vector()->length() == 4 ||
17151             n->as_Vector()->length() == 8);
17152   match(Set dst (LShiftVB src shift));
17153   ins_cost(INSN_COST);
17154   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17155   ins_encode %{
17156     int sh = (int)$shift$$constant & 31;
17157     if (sh >= 8) {
17158       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17159              as_FloatRegister($src$$reg),
17160              as_FloatRegister($src$$reg));
17161     } else {
17162       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17163              as_FloatRegister($src$$reg), sh);
17164     }
17165   %}
17166   ins_pipe(vshift64_imm);
17167 %}
17168 
17169 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17170   predicate(n->as_Vector()->length() == 16);
17171   match(Set dst (LShiftVB src shift));
17172   ins_cost(INSN_COST);
17173   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17174   ins_encode %{
17175     int sh = (int)$shift$$constant & 31;
17176     if (sh >= 8) {
17177       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17178              as_FloatRegister($src$$reg),
17179              as_FloatRegister($src$$reg));
17180     } else {
17181       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17182              as_FloatRegister($src$$reg), sh);
17183     }
17184   %}
17185   ins_pipe(vshift128_imm);
17186 %}
17187 
17188 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17189   predicate(n->as_Vector()->length() == 4 ||
17190             n->as_Vector()->length() == 8);
17191   match(Set dst (RShiftVB src shift));
17192   ins_cost(INSN_COST);
17193   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17194   ins_encode %{
17195     int sh = (int)$shift$$constant & 31;
17196     if (sh >= 8) sh = 7;
17197     sh = -sh & 7;
17198     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17199            as_FloatRegister($src$$reg), sh);
17200   %}
17201   ins_pipe(vshift64_imm);
17202 %}
17203 
17204 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17205   predicate(n->as_Vector()->length() == 16);
17206   match(Set dst (RShiftVB src shift));
17207   ins_cost(INSN_COST);
17208   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17209   ins_encode %{
17210     int sh = (int)$shift$$constant & 31;
17211     if (sh >= 8) sh = 7;
17212     sh = -sh & 7;
17213     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17214            as_FloatRegister($src$$reg), sh);
17215   %}
17216   ins_pipe(vshift128_imm);
17217 %}
17218 
17219 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17220   predicate(n->as_Vector()->length() == 4 ||
17221             n->as_Vector()->length() == 8);
17222   match(Set dst (URShiftVB src shift));
17223   ins_cost(INSN_COST);
17224   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17225   ins_encode %{
17226     int sh = (int)$shift$$constant & 31;
17227     if (sh >= 8) {
17228       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17229              as_FloatRegister($src$$reg),
17230              as_FloatRegister($src$$reg));
17231     } else {
17232       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17233              as_FloatRegister($src$$reg), -sh & 7);
17234     }
17235   %}
17236   ins_pipe(vshift64_imm);
17237 %}
17238 
17239 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17240   predicate(n->as_Vector()->length() == 16);
17241   match(Set dst (URShiftVB src shift));
17242   ins_cost(INSN_COST);
17243   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17244   ins_encode %{
17245     int sh = (int)$shift$$constant & 31;
17246     if (sh >= 8) {
17247       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17248              as_FloatRegister($src$$reg),
17249              as_FloatRegister($src$$reg));
17250     } else {
17251       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17252              as_FloatRegister($src$$reg), -sh & 7);
17253     }
17254   %}
17255   ins_pipe(vshift128_imm);
17256 %}
17257 
17258 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17259   predicate(n->as_Vector()->length() == 2 ||
17260             n->as_Vector()->length() == 4);
17261   match(Set dst (LShiftVS src shift));
17262   match(Set dst (RShiftVS src shift));
17263   ins_cost(INSN_COST);
17264   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17265   ins_encode %{
17266     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17267             as_FloatRegister($src$$reg),
17268             as_FloatRegister($shift$$reg));
17269   %}
17270   ins_pipe(vshift64);
17271 %}
17272 
17273 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17274   predicate(n->as_Vector()->length() == 8);
17275   match(Set dst (LShiftVS src shift));
17276   match(Set dst (RShiftVS src shift));
17277   ins_cost(INSN_COST);
17278   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17279   ins_encode %{
17280     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17281             as_FloatRegister($src$$reg),
17282             as_FloatRegister($shift$$reg));
17283   %}
17284   ins_pipe(vshift128);
17285 %}
17286 
17287 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17288   predicate(n->as_Vector()->length() == 2 ||
17289             n->as_Vector()->length() == 4);
17290   match(Set dst (URShiftVS src shift));
17291   ins_cost(INSN_COST);
17292   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17293   ins_encode %{
17294     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17295             as_FloatRegister($src$$reg),
17296             as_FloatRegister($shift$$reg));
17297   %}
17298   ins_pipe(vshift64);
17299 %}
17300 
17301 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17302   predicate(n->as_Vector()->length() == 8);
17303   match(Set dst (URShiftVS src shift));
17304   ins_cost(INSN_COST);
17305   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17306   ins_encode %{
17307     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17308             as_FloatRegister($src$$reg),
17309             as_FloatRegister($shift$$reg));
17310   %}
17311   ins_pipe(vshift128);
17312 %}
17313 
17314 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17315   predicate(n->as_Vector()->length() == 2 ||
17316             n->as_Vector()->length() == 4);
17317   match(Set dst (LShiftVS src shift));
17318   ins_cost(INSN_COST);
17319   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17320   ins_encode %{
17321     int sh = (int)$shift$$constant & 31;
17322     if (sh >= 16) {
17323       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17324              as_FloatRegister($src$$reg),
17325              as_FloatRegister($src$$reg));
17326     } else {
17327       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17328              as_FloatRegister($src$$reg), sh);
17329     }
17330   %}
17331   ins_pipe(vshift64_imm);
17332 %}
17333 
17334 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17335   predicate(n->as_Vector()->length() == 8);
17336   match(Set dst (LShiftVS src shift));
17337   ins_cost(INSN_COST);
17338   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17339   ins_encode %{
17340     int sh = (int)$shift$$constant & 31;
17341     if (sh >= 16) {
17342       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17343              as_FloatRegister($src$$reg),
17344              as_FloatRegister($src$$reg));
17345     } else {
17346       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17347              as_FloatRegister($src$$reg), sh);
17348     }
17349   %}
17350   ins_pipe(vshift128_imm);
17351 %}
17352 
17353 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17354   predicate(n->as_Vector()->length() == 2 ||
17355             n->as_Vector()->length() == 4);
17356   match(Set dst (RShiftVS src shift));
17357   ins_cost(INSN_COST);
17358   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17359   ins_encode %{
17360     int sh = (int)$shift$$constant & 31;
17361     if (sh >= 16) sh = 15;
17362     sh = -sh & 15;
17363     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17364            as_FloatRegister($src$$reg), sh);
17365   %}
17366   ins_pipe(vshift64_imm);
17367 %}
17368 
17369 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17370   predicate(n->as_Vector()->length() == 8);
17371   match(Set dst (RShiftVS src shift));
17372   ins_cost(INSN_COST);
17373   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17374   ins_encode %{
17375     int sh = (int)$shift$$constant & 31;
17376     if (sh >= 16) sh = 15;
17377     sh = -sh & 15;
17378     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17379            as_FloatRegister($src$$reg), sh);
17380   %}
17381   ins_pipe(vshift128_imm);
17382 %}
17383 
17384 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17385   predicate(n->as_Vector()->length() == 2 ||
17386             n->as_Vector()->length() == 4);
17387   match(Set dst (URShiftVS src shift));
17388   ins_cost(INSN_COST);
17389   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17390   ins_encode %{
17391     int sh = (int)$shift$$constant & 31;
17392     if (sh >= 16) {
17393       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17394              as_FloatRegister($src$$reg),
17395              as_FloatRegister($src$$reg));
17396     } else {
17397       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17398              as_FloatRegister($src$$reg), -sh & 15);
17399     }
17400   %}
17401   ins_pipe(vshift64_imm);
17402 %}
17403 
17404 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17405   predicate(n->as_Vector()->length() == 8);
17406   match(Set dst (URShiftVS src shift));
17407   ins_cost(INSN_COST);
17408   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17409   ins_encode %{
17410     int sh = (int)$shift$$constant & 31;
17411     if (sh >= 16) {
17412       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17413              as_FloatRegister($src$$reg),
17414              as_FloatRegister($src$$reg));
17415     } else {
17416       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17417              as_FloatRegister($src$$reg), -sh & 15);
17418     }
17419   %}
17420   ins_pipe(vshift128_imm);
17421 %}
17422 
17423 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17424   predicate(n->as_Vector()->length() == 2);
17425   match(Set dst (LShiftVI src shift));
17426   match(Set dst (RShiftVI src shift));
17427   ins_cost(INSN_COST);
17428   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17429   ins_encode %{
17430     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17431             as_FloatRegister($src$$reg),
17432             as_FloatRegister($shift$$reg));
17433   %}
17434   ins_pipe(vshift64);
17435 %}
17436 
17437 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17438   predicate(n->as_Vector()->length() == 4);
17439   match(Set dst (LShiftVI src shift));
17440   match(Set dst (RShiftVI src shift));
17441   ins_cost(INSN_COST);
17442   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17443   ins_encode %{
17444     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17445             as_FloatRegister($src$$reg),
17446             as_FloatRegister($shift$$reg));
17447   %}
17448   ins_pipe(vshift128);
17449 %}
17450 
17451 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17452   predicate(n->as_Vector()->length() == 2);
17453   match(Set dst (URShiftVI src shift));
17454   ins_cost(INSN_COST);
17455   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17456   ins_encode %{
17457     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17458             as_FloatRegister($src$$reg),
17459             as_FloatRegister($shift$$reg));
17460   %}
17461   ins_pipe(vshift64);
17462 %}
17463 
17464 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17465   predicate(n->as_Vector()->length() == 4);
17466   match(Set dst (URShiftVI src shift));
17467   ins_cost(INSN_COST);
17468   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17469   ins_encode %{
17470     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17471             as_FloatRegister($src$$reg),
17472             as_FloatRegister($shift$$reg));
17473   %}
17474   ins_pipe(vshift128);
17475 %}
17476 
17477 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17478   predicate(n->as_Vector()->length() == 2);
17479   match(Set dst (LShiftVI src shift));
17480   ins_cost(INSN_COST);
17481   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17482   ins_encode %{
17483     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17484            as_FloatRegister($src$$reg),
17485            (int)$shift$$constant & 31);
17486   %}
17487   ins_pipe(vshift64_imm);
17488 %}
17489 
17490 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17491   predicate(n->as_Vector()->length() == 4);
17492   match(Set dst (LShiftVI src shift));
17493   ins_cost(INSN_COST);
17494   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17495   ins_encode %{
17496     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17497            as_FloatRegister($src$$reg),
17498            (int)$shift$$constant & 31);
17499   %}
17500   ins_pipe(vshift128_imm);
17501 %}
17502 
17503 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17504   predicate(n->as_Vector()->length() == 2);
17505   match(Set dst (RShiftVI src shift));
17506   ins_cost(INSN_COST);
17507   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17508   ins_encode %{
17509     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17510             as_FloatRegister($src$$reg),
17511             -(int)$shift$$constant & 31);
17512   %}
17513   ins_pipe(vshift64_imm);
17514 %}
17515 
17516 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17517   predicate(n->as_Vector()->length() == 4);
17518   match(Set dst (RShiftVI src shift));
17519   ins_cost(INSN_COST);
17520   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17521   ins_encode %{
17522     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17523             as_FloatRegister($src$$reg),
17524             -(int)$shift$$constant & 31);
17525   %}
17526   ins_pipe(vshift128_imm);
17527 %}
17528 
17529 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17530   predicate(n->as_Vector()->length() == 2);
17531   match(Set dst (URShiftVI src shift));
17532   ins_cost(INSN_COST);
17533   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17534   ins_encode %{
17535     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17536             as_FloatRegister($src$$reg),
17537             -(int)$shift$$constant & 31);
17538   %}
17539   ins_pipe(vshift64_imm);
17540 %}
17541 
17542 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17543   predicate(n->as_Vector()->length() == 4);
17544   match(Set dst (URShiftVI src shift));
17545   ins_cost(INSN_COST);
17546   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17547   ins_encode %{
17548     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17549             as_FloatRegister($src$$reg),
17550             -(int)$shift$$constant & 31);
17551   %}
17552   ins_pipe(vshift128_imm);
17553 %}
17554 
17555 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17556   predicate(n->as_Vector()->length() == 2);
17557   match(Set dst (LShiftVL src shift));
17558   match(Set dst (RShiftVL src shift));
17559   ins_cost(INSN_COST);
17560   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17561   ins_encode %{
17562     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17563             as_FloatRegister($src$$reg),
17564             as_FloatRegister($shift$$reg));
17565   %}
17566   ins_pipe(vshift128);
17567 %}
17568 
17569 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17570   predicate(n->as_Vector()->length() == 2);
17571   match(Set dst (URShiftVL src shift));
17572   ins_cost(INSN_COST);
17573   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17574   ins_encode %{
17575     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17576             as_FloatRegister($src$$reg),
17577             as_FloatRegister($shift$$reg));
17578   %}
17579   ins_pipe(vshift128);
17580 %}
17581 
17582 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17583   predicate(n->as_Vector()->length() == 2);
17584   match(Set dst (LShiftVL src shift));
17585   ins_cost(INSN_COST);
17586   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17587   ins_encode %{
17588     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17589            as_FloatRegister($src$$reg),
17590            (int)$shift$$constant & 63);
17591   %}
17592   ins_pipe(vshift128_imm);
17593 %}
17594 
17595 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17596   predicate(n->as_Vector()->length() == 2);
17597   match(Set dst (RShiftVL src shift));
17598   ins_cost(INSN_COST);
17599   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17600   ins_encode %{
17601     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17602             as_FloatRegister($src$$reg),
17603             -(int)$shift$$constant & 63);
17604   %}
17605   ins_pipe(vshift128_imm);
17606 %}
17607 
17608 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17609   predicate(n->as_Vector()->length() == 2);
17610   match(Set dst (URShiftVL src shift));
17611   ins_cost(INSN_COST);
17612   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17613   ins_encode %{
17614     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17615             as_FloatRegister($src$$reg),
17616             -(int)$shift$$constant & 63);
17617   %}
17618   ins_pipe(vshift128_imm);
17619 %}
17620 
17621 //----------PEEPHOLE RULES-----------------------------------------------------
17622 // These must follow all instruction definitions as they use the names
17623 // defined in the instructions definitions.
17624 //
17625 // peepmatch ( root_instr_name [preceding_instruction]* );
17626 //
17627 // peepconstraint %{
17628 // (instruction_number.operand_name relational_op instruction_number.operand_name
17629 //  [, ...] );
17630 // // instruction numbers are zero-based using left to right order in peepmatch
17631 //
17632 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17633 // // provide an instruction_number.operand_name for each operand that appears
17634 // // in the replacement instruction's match rule
17635 //
17636 // ---------VM FLAGS---------------------------------------------------------
17637 //
17638 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17639 //
17640 // Each peephole rule is given an identifying number starting with zero and
17641 // increasing by one in the order seen by the parser.  An individual peephole
17642 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17643 // on the command-line.
17644 //
17645 // ---------CURRENT LIMITATIONS----------------------------------------------
17646 //
17647 // Only match adjacent instructions in same basic block
17648 // Only equality constraints
17649 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17650 // Only one replacement instruction
17651 //
17652 // ---------EXAMPLE----------------------------------------------------------
17653 //
17654 // // pertinent parts of existing instructions in architecture description
17655 // instruct movI(iRegINoSp dst, iRegI src)
17656 // %{
17657 //   match(Set dst (CopyI src));
17658 // %}
17659 //
17660 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17661 // %{
17662 //   match(Set dst (AddI dst src));
17663 //   effect(KILL cr);
17664 // %}
17665 //
17666 // // Change (inc mov) to lea
17667 // peephole %{
17668 //   // increment preceeded by register-register move
17669 //   peepmatch ( incI_iReg movI );
17670 //   // require that the destination register of the increment
17671 //   // match the destination register of the move
17672 //   peepconstraint ( 0.dst == 1.dst );
17673 //   // construct a replacement instruction that sets
17674 //   // the destination to ( move's source register + one )
17675 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17676 // %}
17677 //
17678 
17679 // Implementation no longer uses movX instructions since
17680 // machine-independent system no longer uses CopyX nodes.
17681 //
17682 // peephole
17683 // %{
17684 //   peepmatch (incI_iReg movI);
17685 //   peepconstraint (0.dst == 1.dst);
17686 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17687 // %}
17688 
17689 // peephole
17690 // %{
17691 //   peepmatch (decI_iReg movI);
17692 //   peepconstraint (0.dst == 1.dst);
17693 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17694 // %}
17695 
17696 // peephole
17697 // %{
17698 //   peepmatch (addI_iReg_imm movI);
17699 //   peepconstraint (0.dst == 1.dst);
17700 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17701 // %}
17702 
17703 // peephole
17704 // %{
17705 //   peepmatch (incL_iReg movL);
17706 //   peepconstraint (0.dst == 1.dst);
17707 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17708 // %}
17709 
17710 // peephole
17711 // %{
17712 //   peepmatch (decL_iReg movL);
17713 //   peepconstraint (0.dst == 1.dst);
17714 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17715 // %}
17716 
17717 // peephole
17718 // %{
17719 //   peepmatch (addL_iReg_imm movL);
17720 //   peepconstraint (0.dst == 1.dst);
17721 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17722 // %}
17723 
17724 // peephole
17725 // %{
17726 //   peepmatch (addP_iReg_imm movP);
17727 //   peepconstraint (0.dst == 1.dst);
17728 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17729 // %}
17730 
17731 // // Change load of spilled value to only a spill
17732 // instruct storeI(memory mem, iRegI src)
17733 // %{
17734 //   match(Set mem (StoreI mem src));
17735 // %}
17736 //
17737 // instruct loadI(iRegINoSp dst, memory mem)
17738 // %{
17739 //   match(Set dst (LoadI mem));
17740 // %}
17741 //
17742 
17743 //----------SMARTSPILL RULES---------------------------------------------------
17744 // These must follow all instruction definitions as they use the names
17745 // defined in the instructions definitions.
17746 
17747 // Local Variables:
17748 // mode: c++
17749 // End: