1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   // graph traversal helpers
1040 
1041   MemBarNode *parent_membar(const Node *n);
1042   MemBarNode *child_membar(const MemBarNode *n);
1043   bool leading_membar(const MemBarNode *barrier);
1044 
1045   bool is_card_mark_membar(const MemBarNode *barrier);
1046   bool is_CAS(int opcode);
1047 
1048   MemBarNode *leading_to_normal(MemBarNode *leading);
1049   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1050   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1051   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1052   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1053 
1054   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1055 
1056   bool unnecessary_acquire(const Node *barrier);
1057   bool needs_acquiring_load(const Node *load);
1058 
1059   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1060 
1061   bool unnecessary_release(const Node *barrier);
1062   bool unnecessary_volatile(const Node *barrier);
1063   bool needs_releasing_store(const Node *store);
1064 
1065   // predicate controlling translation of CompareAndSwapX
1066   bool needs_acquiring_load_exclusive(const Node *load);
1067 
1068   // predicate controlling translation of StoreCM
1069   bool unnecessary_storestore(const Node *storecm);
1070 
1071   // predicate controlling addressing modes
1072   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1073 %}
1074 
1075 source %{
1076 
1077   // Optimizaton of volatile gets and puts
1078   // -------------------------------------
1079   //
1080   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1081   // use to implement volatile reads and writes. For a volatile read
1082   // we simply need
1083   //
1084   //   ldar<x>
1085   //
1086   // and for a volatile write we need
1087   //
1088   //   stlr<x>
1089   //
1090   // Alternatively, we can implement them by pairing a normal
1091   // load/store with a memory barrier. For a volatile read we need
1092   //
1093   //   ldr<x>
1094   //   dmb ishld
1095   //
1096   // for a volatile write
1097   //
1098   //   dmb ish
1099   //   str<x>
1100   //   dmb ish
1101   //
1102   // We can also use ldaxr and stlxr to implement compare and swap CAS
1103   // sequences. These are normally translated to an instruction
1104   // sequence like the following
1105   //
1106   //   dmb      ish
1107   // retry:
1108   //   ldxr<x>   rval raddr
1109   //   cmp       rval rold
1110   //   b.ne done
1111   //   stlxr<x>  rval, rnew, rold
1112   //   cbnz      rval retry
1113   // done:
1114   //   cset      r0, eq
1115   //   dmb ishld
1116   //
1117   // Note that the exclusive store is already using an stlxr
1118   // instruction. That is required to ensure visibility to other
1119   // threads of the exclusive write (assuming it succeeds) before that
1120   // of any subsequent writes.
1121   //
1122   // The following instruction sequence is an improvement on the above
1123   //
1124   // retry:
1125   //   ldaxr<x>  rval raddr
1126   //   cmp       rval rold
1127   //   b.ne done
1128   //   stlxr<x>  rval, rnew, rold
1129   //   cbnz      rval retry
1130   // done:
1131   //   cset      r0, eq
1132   //
1133   // We don't need the leading dmb ish since the stlxr guarantees
1134   // visibility of prior writes in the case that the swap is
1135   // successful. Crucially we don't have to worry about the case where
1136   // the swap is not successful since no valid program should be
1137   // relying on visibility of prior changes by the attempting thread
1138   // in the case where the CAS fails.
1139   //
1140   // Similarly, we don't need the trailing dmb ishld if we substitute
1141   // an ldaxr instruction since that will provide all the guarantees we
1142   // require regarding observation of changes made by other threads
1143   // before any change to the CAS address observed by the load.
1144   //
1145   // In order to generate the desired instruction sequence we need to
1146   // be able to identify specific 'signature' ideal graph node
1147   // sequences which i) occur as a translation of a volatile reads or
1148   // writes or CAS operations and ii) do not occur through any other
1149   // translation or graph transformation. We can then provide
1150   // alternative aldc matching rules which translate these node
1151   // sequences to the desired machine code sequences. Selection of the
1152   // alternative rules can be implemented by predicates which identify
1153   // the relevant node sequences.
1154   //
1155   // The ideal graph generator translates a volatile read to the node
1156   // sequence
1157   //
1158   //   LoadX[mo_acquire]
1159   //   MemBarAcquire
1160   //
1161   // As a special case when using the compressed oops optimization we
1162   // may also see this variant
1163   //
1164   //   LoadN[mo_acquire]
1165   //   DecodeN
1166   //   MemBarAcquire
1167   //
1168   // A volatile write is translated to the node sequence
1169   //
1170   //   MemBarRelease
1171   //   StoreX[mo_release] {CardMark}-optional
1172   //   MemBarVolatile
1173   //
1174   // n.b. the above node patterns are generated with a strict
1175   // 'signature' configuration of input and output dependencies (see
1176   // the predicates below for exact details). The card mark may be as
1177   // simple as a few extra nodes or, in a few GC configurations, may
1178   // include more complex control flow between the leading and
1179   // trailing memory barriers. However, whatever the card mark
1180   // configuration these signatures are unique to translated volatile
1181   // reads/stores -- they will not appear as a result of any other
1182   // bytecode translation or inlining nor as a consequence of
1183   // optimizing transforms.
1184   //
1185   // We also want to catch inlined unsafe volatile gets and puts and
1186   // be able to implement them using either ldar<x>/stlr<x> or some
1187   // combination of ldr<x>/stlr<x> and dmb instructions.
1188   //
1189   // Inlined unsafe volatiles puts manifest as a minor variant of the
1190   // normal volatile put node sequence containing an extra cpuorder
1191   // membar
1192   //
1193   //   MemBarRelease
1194   //   MemBarCPUOrder
1195   //   StoreX[mo_release] {CardMark}-optional
1196   //   MemBarVolatile
1197   //
1198   // n.b. as an aside, the cpuorder membar is not itself subject to
1199   // matching and translation by adlc rules.  However, the rule
1200   // predicates need to detect its presence in order to correctly
1201   // select the desired adlc rules.
1202   //
1203   // Inlined unsafe volatile gets manifest as a somewhat different
1204   // node sequence to a normal volatile get
1205   //
1206   //   MemBarCPUOrder
1207   //        ||       \\
1208   //   MemBarAcquire LoadX[mo_acquire]
1209   //        ||
1210   //   MemBarCPUOrder
1211   //
1212   // In this case the acquire membar does not directly depend on the
1213   // load. However, we can be sure that the load is generated from an
1214   // inlined unsafe volatile get if we see it dependent on this unique
1215   // sequence of membar nodes. Similarly, given an acquire membar we
1216   // can know that it was added because of an inlined unsafe volatile
1217   // get if it is fed and feeds a cpuorder membar and if its feed
1218   // membar also feeds an acquiring load.
1219   //
1220   // Finally an inlined (Unsafe) CAS operation is translated to the
1221   // following ideal graph
1222   //
1223   //   MemBarRelease
1224   //   MemBarCPUOrder
1225   //   CompareAndSwapX {CardMark}-optional
1226   //   MemBarCPUOrder
1227   //   MemBarAcquire
1228   //
1229   // So, where we can identify these volatile read and write
1230   // signatures we can choose to plant either of the above two code
1231   // sequences. For a volatile read we can simply plant a normal
1232   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1233   // also choose to inhibit translation of the MemBarAcquire and
1234   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1235   //
1236   // When we recognise a volatile store signature we can choose to
1237   // plant at a dmb ish as a translation for the MemBarRelease, a
1238   // normal str<x> and then a dmb ish for the MemBarVolatile.
1239   // Alternatively, we can inhibit translation of the MemBarRelease
1240   // and MemBarVolatile and instead plant a simple stlr<x>
1241   // instruction.
1242   //
1243   // when we recognise a CAS signature we can choose to plant a dmb
1244   // ish as a translation for the MemBarRelease, the conventional
1245   // macro-instruction sequence for the CompareAndSwap node (which
1246   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1247   // Alternatively, we can elide generation of the dmb instructions
1248   // and plant the alternative CompareAndSwap macro-instruction
1249   // sequence (which uses ldaxr<x>).
1250   //
1251   // Of course, the above only applies when we see these signature
1252   // configurations. We still want to plant dmb instructions in any
1253   // other cases where we may see a MemBarAcquire, MemBarRelease or
1254   // MemBarVolatile. For example, at the end of a constructor which
1255   // writes final/volatile fields we will see a MemBarRelease
1256   // instruction and this needs a 'dmb ish' lest we risk the
1257   // constructed object being visible without making the
1258   // final/volatile field writes visible.
1259   //
1260   // n.b. the translation rules below which rely on detection of the
1261   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1262   // If we see anything other than the signature configurations we
1263   // always just translate the loads and stores to ldr<x> and str<x>
1264   // and translate acquire, release and volatile membars to the
1265   // relevant dmb instructions.
1266   //
1267 
1268   // graph traversal helpers used for volatile put/get and CAS
1269   // optimization
1270 
1271   // 1) general purpose helpers
1272 
1273   // if node n is linked to a parent MemBarNode by an intervening
1274   // Control and Memory ProjNode return the MemBarNode otherwise return
1275   // NULL.
1276   //
1277   // n may only be a Load or a MemBar.
1278 
1279   MemBarNode *parent_membar(const Node *n)
1280   {
1281     Node *ctl = NULL;
1282     Node *mem = NULL;
1283     Node *membar = NULL;
1284 
1285     if (n->is_Load()) {
1286       ctl = n->lookup(LoadNode::Control);
1287       mem = n->lookup(LoadNode::Memory);
1288     } else if (n->is_MemBar()) {
1289       ctl = n->lookup(TypeFunc::Control);
1290       mem = n->lookup(TypeFunc::Memory);
1291     } else {
1292         return NULL;
1293     }
1294 
1295     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1296       return NULL;
1297     }
1298 
1299     membar = ctl->lookup(0);
1300 
1301     if (!membar || !membar->is_MemBar()) {
1302       return NULL;
1303     }
1304 
1305     if (mem->lookup(0) != membar) {
1306       return NULL;
1307     }
1308 
1309     return membar->as_MemBar();
1310   }
1311 
1312   // if n is linked to a child MemBarNode by intervening Control and
1313   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1314 
1315   MemBarNode *child_membar(const MemBarNode *n)
1316   {
1317     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1318     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1319 
1320     // MemBar needs to have both a Ctl and Mem projection
1321     if (! ctl || ! mem)
1322       return NULL;
1323 
1324     MemBarNode *child = NULL;
1325     Node *x;
1326 
1327     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1328       x = ctl->fast_out(i);
1329       // if we see a membar we keep hold of it. we may also see a new
1330       // arena copy of the original but it will appear later
1331       if (x->is_MemBar()) {
1332           child = x->as_MemBar();
1333           break;
1334       }
1335     }
1336 
1337     if (child == NULL) {
1338       return NULL;
1339     }
1340 
1341     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1342       x = mem->fast_out(i);
1343       // if we see a membar we keep hold of it. we may also see a new
1344       // arena copy of the original but it will appear later
1345       if (x == child) {
1346         return child;
1347       }
1348     }
1349     return NULL;
1350   }
1351 
1352   // helper predicate use to filter candidates for a leading memory
1353   // barrier
1354   //
1355   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1356   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1357 
1358   bool leading_membar(const MemBarNode *barrier)
1359   {
1360     int opcode = barrier->Opcode();
1361     // if this is a release membar we are ok
1362     if (opcode == Op_MemBarRelease) {
1363       return true;
1364     }
1365     // if its a cpuorder membar . . .
1366     if (opcode != Op_MemBarCPUOrder) {
1367       return false;
1368     }
1369     // then the parent has to be a release membar
1370     MemBarNode *parent = parent_membar(barrier);
1371     if (!parent) {
1372       return false;
1373     }
1374     opcode = parent->Opcode();
1375     return opcode == Op_MemBarRelease;
1376   }
1377 
1378   // 2) card mark detection helper
1379 
1380   // helper predicate which can be used to detect a volatile membar
1381   // introduced as part of a conditional card mark sequence either by
1382   // G1 or by CMS when UseCondCardMark is true.
1383   //
1384   // membar can be definitively determined to be part of a card mark
1385   // sequence if and only if all the following hold
1386   //
1387   // i) it is a MemBarVolatile
1388   //
1389   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1390   // true
1391   //
1392   // iii) the node's Mem projection feeds a StoreCM node.
1393 
1394   bool is_card_mark_membar(const MemBarNode *barrier)
1395   {
1396     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1397       return false;
1398     }
1399 
1400     if (barrier->Opcode() != Op_MemBarVolatile) {
1401       return false;
1402     }
1403 
1404     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1405 
1406     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1407       Node *y = mem->fast_out(i);
1408       if (y->Opcode() == Op_StoreCM) {
1409         return true;
1410       }
1411     }
1412 
1413     return false;
1414   }
1415 
1416 
1417   // 3) helper predicates to traverse volatile put or CAS graphs which
1418   // may contain GC barrier subgraphs
1419 
1420   // Preamble
1421   // --------
1422   //
1423   // for volatile writes we can omit generating barriers and employ a
1424   // releasing store when we see a node sequence sequence with a
1425   // leading MemBarRelease and a trailing MemBarVolatile as follows
1426   //
1427   //   MemBarRelease
1428   //  {      ||      } -- optional
1429   //  {MemBarCPUOrder}
1430   //         ||     \\
1431   //         ||     StoreX[mo_release]
1432   //         | \     /
1433   //         | MergeMem
1434   //         | /
1435   //   MemBarVolatile
1436   //
1437   // where
1438   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1439   //  | \ and / indicate further routing of the Ctl and Mem feeds
1440   //
1441   // this is the graph we see for non-object stores. however, for a
1442   // volatile Object store (StoreN/P) we may see other nodes below the
1443   // leading membar because of the need for a GC pre- or post-write
1444   // barrier.
1445   //
1446   // with most GC configurations we with see this simple variant which
1447   // includes a post-write barrier card mark.
1448   //
1449   //   MemBarRelease______________________________
1450   //         ||    \\               Ctl \        \\
1451   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1452   //         | \     /                       . . .  /
1453   //         | MergeMem
1454   //         | /
1455   //         ||      /
1456   //   MemBarVolatile
1457   //
1458   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1459   // the object address to an int used to compute the card offset) and
1460   // Ctl+Mem to a StoreB node (which does the actual card mark).
1461   //
1462   // n.b. a StoreCM node will only appear in this configuration when
1463   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1464   // because it implies a requirement to order visibility of the card
1465   // mark (StoreCM) relative to the object put (StoreP/N) using a
1466   // StoreStore memory barrier (arguably this ought to be represented
1467   // explicitly in the ideal graph but that is not how it works). This
1468   // ordering is required for both non-volatile and volatile
1469   // puts. Normally that means we need to translate a StoreCM using
1470   // the sequence
1471   //
1472   //   dmb ishst
1473   //   stlrb
1474   //
1475   // However, in the case of a volatile put if we can recognise this
1476   // configuration and plant an stlr for the object write then we can
1477   // omit the dmb and just plant an strb since visibility of the stlr
1478   // is ordered before visibility of subsequent stores. StoreCM nodes
1479   // also arise when using G1 or using CMS with conditional card
1480   // marking. In these cases (as we shall see) we don't need to insert
1481   // the dmb when translating StoreCM because there is already an
1482   // intervening StoreLoad barrier between it and the StoreP/N.
1483   //
1484   // It is also possible to perform the card mark conditionally on it
1485   // currently being unmarked in which case the volatile put graph
1486   // will look slightly different
1487   //
1488   //   MemBarRelease____________________________________________
1489   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1490   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1491   //         | \     /                              \            |
1492   //         | MergeMem                            . . .      StoreB
1493   //         | /                                                /
1494   //         ||     /
1495   //   MemBarVolatile
1496   //
1497   // It is worth noting at this stage that both the above
1498   // configurations can be uniquely identified by checking that the
1499   // memory flow includes the following subgraph:
1500   //
1501   //   MemBarRelease
1502   //  {MemBarCPUOrder}
1503   //          |  \      . . .
1504   //          |  StoreX[mo_release]  . . .
1505   //          |   /
1506   //         MergeMem
1507   //          |
1508   //   MemBarVolatile
1509   //
1510   // This is referred to as a *normal* subgraph. It can easily be
1511   // detected starting from any candidate MemBarRelease,
1512   // StoreX[mo_release] or MemBarVolatile.
1513   //
1514   // A simple variation on this normal case occurs for an unsafe CAS
1515   // operation. The basic graph for a non-object CAS is
1516   //
1517   //   MemBarRelease
1518   //         ||
1519   //   MemBarCPUOrder
1520   //         ||     \\   . . .
1521   //         ||     CompareAndSwapX
1522   //         ||       |
1523   //         ||     SCMemProj
1524   //         | \     /
1525   //         | MergeMem
1526   //         | /
1527   //   MemBarCPUOrder
1528   //         ||
1529   //   MemBarAcquire
1530   //
1531   // The same basic variations on this arrangement (mutatis mutandis)
1532   // occur when a card mark is introduced. i.e. we se the same basic
1533   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1534   // tail of the graph is a pair comprising a MemBarCPUOrder +
1535   // MemBarAcquire.
1536   //
1537   // So, in the case of a CAS the normal graph has the variant form
1538   //
1539   //   MemBarRelease
1540   //   MemBarCPUOrder
1541   //          |   \      . . .
1542   //          |  CompareAndSwapX  . . .
1543   //          |    |
1544   //          |   SCMemProj
1545   //          |   /  . . .
1546   //         MergeMem
1547   //          |
1548   //   MemBarCPUOrder
1549   //   MemBarAcquire
1550   //
1551   // This graph can also easily be detected starting from any
1552   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1553   //
1554   // the code below uses two helper predicates, leading_to_normal and
1555   // normal_to_leading to identify these normal graphs, one validating
1556   // the layout starting from the top membar and searching down and
1557   // the other validating the layout starting from the lower membar
1558   // and searching up.
1559   //
1560   // There are two special case GC configurations when a normal graph
1561   // may not be generated: when using G1 (which always employs a
1562   // conditional card mark); and when using CMS with conditional card
1563   // marking configured. These GCs are both concurrent rather than
1564   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1565   // graph between the leading and trailing membar nodes, in
1566   // particular enforcing stronger memory serialisation beween the
1567   // object put and the corresponding conditional card mark. CMS
1568   // employs a post-write GC barrier while G1 employs both a pre- and
1569   // post-write GC barrier. Of course the extra nodes may be absent --
1570   // they are only inserted for object puts. This significantly
1571   // complicates the task of identifying whether a MemBarRelease,
1572   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1573   // when using these GC configurations (see below). It adds similar
1574   // complexity to the task of identifying whether a MemBarRelease,
1575   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1576   //
1577   // In both cases the post-write subtree includes an auxiliary
1578   // MemBarVolatile (StoreLoad barrier) separating the object put and
1579   // the read of the corresponding card. This poses two additional
1580   // problems.
1581   //
1582   // Firstly, a card mark MemBarVolatile needs to be distinguished
1583   // from a normal trailing MemBarVolatile. Resolving this first
1584   // problem is straightforward: a card mark MemBarVolatile always
1585   // projects a Mem feed to a StoreCM node and that is a unique marker
1586   //
1587   //      MemBarVolatile (card mark)
1588   //       C |    \     . . .
1589   //         |   StoreCM   . . .
1590   //       . . .
1591   //
1592   // The second problem is how the code generator is to translate the
1593   // card mark barrier? It always needs to be translated to a "dmb
1594   // ish" instruction whether or not it occurs as part of a volatile
1595   // put. A StoreLoad barrier is needed after the object put to ensure
1596   // i) visibility to GC threads of the object put and ii) visibility
1597   // to the mutator thread of any card clearing write by a GC
1598   // thread. Clearly a normal store (str) will not guarantee this
1599   // ordering but neither will a releasing store (stlr). The latter
1600   // guarantees that the object put is visible but does not guarantee
1601   // that writes by other threads have also been observed.
1602   //
1603   // So, returning to the task of translating the object put and the
1604   // leading/trailing membar nodes: what do the non-normal node graph
1605   // look like for these 2 special cases? and how can we determine the
1606   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1607   // in both normal and non-normal cases?
1608   //
1609   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1610   // which selects conditonal execution based on the value loaded
1611   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1612   // intervening StoreLoad barrier (MemBarVolatile).
1613   //
1614   // So, with CMS we may see a node graph for a volatile object store
1615   // which looks like this
1616   //
1617   //   MemBarRelease
1618   //   MemBarCPUOrder_(leading)__________________
1619   //     C |    M \       \\                   C \
1620   //       |       \    StoreN/P[mo_release]  CastP2X
1621   //       |    Bot \    /
1622   //       |       MergeMem
1623   //       |         /
1624   //      MemBarVolatile (card mark)
1625   //     C |  ||    M |
1626   //       | LoadB    |
1627   //       |   |      |
1628   //       | Cmp      |\
1629   //       | /        | \
1630   //       If         |  \
1631   //       | \        |   \
1632   // IfFalse  IfTrue  |    \
1633   //       \     / \  |     \
1634   //        \   / StoreCM    |
1635   //         \ /      |      |
1636   //        Region   . . .   |
1637   //          | \           /
1638   //          |  . . .  \  / Bot
1639   //          |       MergeMem
1640   //          |          |
1641   //        MemBarVolatile (trailing)
1642   //
1643   // The first MergeMem merges the AliasIdxBot Mem slice from the
1644   // leading membar and the oopptr Mem slice from the Store into the
1645   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1646   // Mem slice from the card mark membar and the AliasIdxRaw slice
1647   // from the StoreCM into the trailing membar (n.b. the latter
1648   // proceeds via a Phi associated with the If region).
1649   //
1650   // The graph for a CAS varies slightly, the obvious difference being
1651   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1652   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1653   // MemBarAcquire pair. The other important difference is that the
1654   // CompareAndSwap node's SCMemProj is not merged into the card mark
1655   // membar - it still feeds the trailing MergeMem. This also means
1656   // that the card mark membar receives its Mem feed directly from the
1657   // leading membar rather than via a MergeMem.
1658   //
1659   //   MemBarRelease
1660   //   MemBarCPUOrder__(leading)_________________________
1661   //       ||                       \\                 C \
1662   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1663   //     C |  ||    M |              |
1664   //       | LoadB    |       ______/|
1665   //       |   |      |      /       |
1666   //       | Cmp      |     /      SCMemProj
1667   //       | /        |    /         |
1668   //       If         |   /         /
1669   //       | \        |  /         /
1670   // IfFalse  IfTrue  | /         /
1671   //       \     / \  |/ prec    /
1672   //        \   / StoreCM       /
1673   //         \ /      |        /
1674   //        Region   . . .    /
1675   //          | \            /
1676   //          |  . . .  \   / Bot
1677   //          |       MergeMem
1678   //          |          |
1679   //        MemBarCPUOrder
1680   //        MemBarAcquire (trailing)
1681   //
1682   // This has a slightly different memory subgraph to the one seen
1683   // previously but the core of it is the same as for the CAS normal
1684   // sungraph
1685   //
1686   //   MemBarRelease
1687   //   MemBarCPUOrder____
1688   //      ||             \      . . .
1689   //   MemBarVolatile  CompareAndSwapX  . . .
1690   //      |  \            |
1691   //        . . .   SCMemProj
1692   //          |     /  . . .
1693   //         MergeMem
1694   //          |
1695   //   MemBarCPUOrder
1696   //   MemBarAcquire
1697   //
1698   //
1699   // G1 is quite a lot more complicated. The nodes inserted on behalf
1700   // of G1 may comprise: a pre-write graph which adds the old value to
1701   // the SATB queue; the releasing store itself; and, finally, a
1702   // post-write graph which performs a card mark.
1703   //
1704   // The pre-write graph may be omitted, but only when the put is
1705   // writing to a newly allocated (young gen) object and then only if
1706   // there is a direct memory chain to the Initialize node for the
1707   // object allocation. This will not happen for a volatile put since
1708   // any memory chain passes through the leading membar.
1709   //
1710   // The pre-write graph includes a series of 3 If tests. The outermost
1711   // If tests whether SATB is enabled (no else case). The next If tests
1712   // whether the old value is non-NULL (no else case). The third tests
1713   // whether the SATB queue index is > 0, if so updating the queue. The
1714   // else case for this third If calls out to the runtime to allocate a
1715   // new queue buffer.
1716   //
1717   // So with G1 the pre-write and releasing store subgraph looks like
1718   // this (the nested Ifs are omitted).
1719   //
1720   //  MemBarRelease (leading)____________
1721   //     C |  ||  M \   M \    M \  M \ . . .
1722   //       | LoadB   \  LoadL  LoadN   \
1723   //       | /        \                 \
1724   //       If         |\                 \
1725   //       | \        | \                 \
1726   //  IfFalse  IfTrue |  \                 \
1727   //       |     |    |   \                 |
1728   //       |     If   |   /\                |
1729   //       |     |          \               |
1730   //       |                 \              |
1731   //       |    . . .         \             |
1732   //       | /       | /       |            |
1733   //      Region  Phi[M]       |            |
1734   //       | \       |         |            |
1735   //       |  \_____ | ___     |            |
1736   //     C | C \     |   C \ M |            |
1737   //       | CastP2X | StoreN/P[mo_release] |
1738   //       |         |         |            |
1739   //     C |       M |       M |          M |
1740   //        \        |         |           /
1741   //                  . . .
1742   //          (post write subtree elided)
1743   //                    . . .
1744   //             C \         M /
1745   //         MemBarVolatile (trailing)
1746   //
1747   // n.b. the LoadB in this subgraph is not the card read -- it's a
1748   // read of the SATB queue active flag.
1749   //
1750   // Once again the CAS graph is a minor variant on the above with the
1751   // expected substitutions of CompareAndSawpX for StoreN/P and
1752   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1753   //
1754   // The G1 post-write subtree is also optional, this time when the
1755   // new value being written is either null or can be identified as a
1756   // newly allocated (young gen) object with no intervening control
1757   // flow. The latter cannot happen but the former may, in which case
1758   // the card mark membar is omitted and the memory feeds form the
1759   // leading membar and the SToreN/P are merged direct into the
1760   // trailing membar as per the normal subgraph. So, the only special
1761   // case which arises is when the post-write subgraph is generated.
1762   //
1763   // The kernel of the post-write G1 subgraph is the card mark itself
1764   // which includes a card mark memory barrier (MemBarVolatile), a
1765   // card test (LoadB), and a conditional update (If feeding a
1766   // StoreCM). These nodes are surrounded by a series of nested Ifs
1767   // which try to avoid doing the card mark. The top level If skips if
1768   // the object reference does not cross regions (i.e. it tests if
1769   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1770   // need not be recorded. The next If, which skips on a NULL value,
1771   // may be absent (it is not generated if the type of value is >=
1772   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1773   // checking if card_val != young).  n.b. although this test requires
1774   // a pre-read of the card it can safely be done before the StoreLoad
1775   // barrier. However that does not bypass the need to reread the card
1776   // after the barrier.
1777   //
1778   //                (pre-write subtree elided)
1779   //        . . .                  . . .    . . .  . . .
1780   //        C |                    M |     M |    M |
1781   //       Region                  Phi[M] StoreN    |
1782   //          |                     / \      |      |
1783   //         / \_______            /   \     |      |
1784   //      C / C \      . . .            \    |      |
1785   //       If   CastP2X . . .            |   |      |
1786   //       / \                           |   |      |
1787   //      /   \                          |   |      |
1788   // IfFalse IfTrue                      |   |      |
1789   //   |       |                         |   |     /|
1790   //   |       If                        |   |    / |
1791   //   |      / \                        |   |   /  |
1792   //   |     /   \                        \  |  /   |
1793   //   | IfFalse IfTrue                   MergeMem  |
1794   //   |  . . .    / \                       /      |
1795   //   |          /   \                     /       |
1796   //   |     IfFalse IfTrue                /        |
1797   //   |      . . .    |                  /         |
1798   //   |               If                /          |
1799   //   |               / \              /           |
1800   //   |              /   \            /            |
1801   //   |         IfFalse IfTrue       /             |
1802   //   |           . . .   |         /              |
1803   //   |                    \       /               |
1804   //   |                     \     /                |
1805   //   |             MemBarVolatile__(card mark)    |
1806   //   |                ||   C |  M \  M \          |
1807   //   |               LoadB   If    |    |         |
1808   //   |                      / \    |    |         |
1809   //   |                     . . .   |    |         |
1810   //   |                          \  |    |        /
1811   //   |                        StoreCM   |       /
1812   //   |                          . . .   |      /
1813   //   |                        _________/      /
1814   //   |                       /  _____________/
1815   //   |   . . .       . . .  |  /            /
1816   //   |    |                 | /   _________/
1817   //   |    |               Phi[M] /        /
1818   //   |    |                 |   /        /
1819   //   |    |                 |  /        /
1820   //   |  Region  . . .     Phi[M]  _____/
1821   //   |    /                 |    /
1822   //   |                      |   /
1823   //   | . . .   . . .        |  /
1824   //   | /                    | /
1825   // Region           |  |  Phi[M]
1826   //   |              |  |  / Bot
1827   //    \            MergeMem
1828   //     \            /
1829   //     MemBarVolatile
1830   //
1831   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1832   // from the leading membar and the oopptr Mem slice from the Store
1833   // into the card mark membar i.e. the memory flow to the card mark
1834   // membar still looks like a normal graph.
1835   //
1836   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1837   // Mem slices (from the StoreCM and other card mark queue stores).
1838   // However in this case the AliasIdxBot Mem slice does not come
1839   // direct from the card mark membar. It is merged through a series
1840   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1841   // from the leading membar with the Mem feed from the card mark
1842   // membar. Each Phi corresponds to one of the Ifs which may skip
1843   // around the card mark membar. So when the If implementing the NULL
1844   // value check has been elided the total number of Phis is 2
1845   // otherwise it is 3.
1846   //
1847   // The CAS graph when using G1GC also includes a pre-write subgraph
1848   // and an optional post-write subgraph. Teh sam evarioations are
1849   // introduced as for CMS with conditional card marking i.e. the
1850   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1851   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1852   // Mem feed from the CompareAndSwapP/N includes a precedence
1853   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1854   // trailing membar. So, as before the configuration includes the
1855   // normal CAS graph as a subgraph of the memory flow.
1856   //
1857   // So, the upshot is that in all cases the volatile put graph will
1858   // include a *normal* memory subgraph betwen the leading membar and
1859   // its child membar, either a volatile put graph (including a
1860   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1861   // When that child is not a card mark membar then it marks the end
1862   // of the volatile put or CAS subgraph. If the child is a card mark
1863   // membar then the normal subgraph will form part of a volatile put
1864   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1865   // to a trailing barrier via a MergeMem. That feed is either direct
1866   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1867   // memory flow (for G1).
1868   //
1869   // The predicates controlling generation of instructions for store
1870   // and barrier nodes employ a few simple helper functions (described
1871   // below) which identify the presence or absence of all these
1872   // subgraph configurations and provide a means of traversing from
1873   // one node in the subgraph to another.
1874 
1875   // is_CAS(int opcode)
1876   //
1877   // return true if opcode is one of the possible CompareAndSwapX
1878   // values otherwise false.
1879 
1880   bool is_CAS(int opcode)
1881   {
1882     switch(opcode) {
1883       // We handle these
1884     case Op_CompareAndSwapI:
1885     case Op_CompareAndSwapL:
1886     case Op_CompareAndSwapP:
1887     case Op_CompareAndSwapN:
1888  // case Op_CompareAndSwapB:
1889  // case Op_CompareAndSwapS:
1890       return true;
1891       // These are TBD
1892     case Op_WeakCompareAndSwapB:
1893     case Op_WeakCompareAndSwapS:
1894     case Op_WeakCompareAndSwapI:
1895     case Op_WeakCompareAndSwapL:
1896     case Op_WeakCompareAndSwapP:
1897     case Op_WeakCompareAndSwapN:
1898     case Op_CompareAndExchangeB:
1899     case Op_CompareAndExchangeS:
1900     case Op_CompareAndExchangeI:
1901     case Op_CompareAndExchangeL:
1902     case Op_CompareAndExchangeP:
1903     case Op_CompareAndExchangeN:
1904       return false;
1905     default:
1906       return false;
1907     }
1908   }
1909 
1910 
1911   // leading_to_normal
1912   //
1913   //graph traversal helper which detects the normal case Mem feed from
1914   // a release membar (or, optionally, its cpuorder child) to a
1915   // dependent volatile membar i.e. it ensures that one or other of
1916   // the following Mem flow subgraph is present.
1917   //
1918   //   MemBarRelease
1919   //   MemBarCPUOrder {leading}
1920   //          |  \      . . .
1921   //          |  StoreN/P[mo_release]  . . .
1922   //          |   /
1923   //         MergeMem
1924   //          |
1925   //   MemBarVolatile {trailing or card mark}
1926   //
1927   //   MemBarRelease
1928   //   MemBarCPUOrder {leading}
1929   //      |       \      . . .
1930   //      |     CompareAndSwapX  . . .
1931   //               |
1932   //     . . .    SCMemProj
1933   //           \   |
1934   //      |    MergeMem
1935   //      |       /
1936   //    MemBarCPUOrder
1937   //    MemBarAcquire {trailing}
1938   //
1939   // if the correct configuration is present returns the trailing
1940   // membar otherwise NULL.
1941   //
1942   // the input membar is expected to be either a cpuorder membar or a
1943   // release membar. in the latter case it should not have a cpu membar
1944   // child.
1945   //
1946   // the returned value may be a card mark or trailing membar
1947   //
1948 
1949   MemBarNode *leading_to_normal(MemBarNode *leading)
1950   {
1951     assert((leading->Opcode() == Op_MemBarRelease ||
1952             leading->Opcode() == Op_MemBarCPUOrder),
1953            "expecting a volatile or cpuroder membar!");
1954 
1955     // check the mem flow
1956     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1957 
1958     if (!mem) {
1959       return NULL;
1960     }
1961 
1962     Node *x = NULL;
1963     StoreNode * st = NULL;
1964     LoadStoreNode *cas = NULL;
1965     MergeMemNode *mm = NULL;
1966 
1967     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1968       x = mem->fast_out(i);
1969       if (x->is_MergeMem()) {
1970         if (mm != NULL) {
1971           return NULL;
1972         }
1973         // two merge mems is one too many
1974         mm = x->as_MergeMem();
1975       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1976         // two releasing stores/CAS nodes is one too many
1977         if (st != NULL || cas != NULL) {
1978           return NULL;
1979         }
1980         st = x->as_Store();
1981       } else if (is_CAS(x->Opcode())) {
1982         if (st != NULL || cas != NULL) {
1983           return NULL;
1984         }
1985         cas = x->as_LoadStore();
1986       }
1987     }
1988 
1989     // must have a store or a cas
1990     if (!st && !cas) {
1991       return NULL;
1992     }
1993 
1994     // must have a merge if we also have st
1995     if (st && !mm) {
1996       return NULL;
1997     }
1998 
1999     Node *y = NULL;
2000     if (cas) {
2001       // look for an SCMemProj
2002       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2003         x = cas->fast_out(i);
2004         if (x->is_Proj()) {
2005           y = x;
2006           break;
2007         }
2008       }
2009       if (y == NULL) {
2010         return NULL;
2011       }
2012       // the proj must feed a MergeMem
2013       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2014         x = y->fast_out(i);
2015         if (x->is_MergeMem()) {
2016           mm = x->as_MergeMem();
2017           break;
2018         }
2019       }
2020       if (mm == NULL)
2021         return NULL;
2022     } else {
2023       // ensure the store feeds the existing mergemem;
2024       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2025         if (st->fast_out(i) == mm) {
2026           y = st;
2027           break;
2028         }
2029       }
2030       if (y == NULL) {
2031         return NULL;
2032       }
2033     }
2034 
2035     MemBarNode *mbar = NULL;
2036     // ensure the merge feeds to the expected type of membar
2037     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2038       x = mm->fast_out(i);
2039       if (x->is_MemBar()) {
2040         int opcode = x->Opcode();
2041         if (opcode == Op_MemBarVolatile && st) {
2042           mbar = x->as_MemBar();
2043         } else if (cas && opcode == Op_MemBarCPUOrder) {
2044           MemBarNode *y =  x->as_MemBar();
2045           y = child_membar(y);
2046           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2047             mbar = y;
2048           }
2049         }
2050         break;
2051       }
2052     }
2053 
2054     return mbar;
2055   }
2056 
2057   // normal_to_leading
2058   //
2059   // graph traversal helper which detects the normal case Mem feed
2060   // from either a card mark or a trailing membar to a preceding
2061   // release membar (optionally its cpuorder child) i.e. it ensures
2062   // that one or other of the following Mem flow subgraphs is present.
2063   //
2064   //   MemBarRelease
2065   //   MemBarCPUOrder {leading}
2066   //          |  \      . . .
2067   //          |  StoreN/P[mo_release]  . . .
2068   //          |   /
2069   //         MergeMem
2070   //          |
2071   //   MemBarVolatile {card mark or trailing}
2072   //
2073   //   MemBarRelease
2074   //   MemBarCPUOrder {leading}
2075   //      |       \      . . .
2076   //      |     CompareAndSwapX  . . .
2077   //               |
2078   //     . . .    SCMemProj
2079   //           \   |
2080   //      |    MergeMem
2081   //      |        /
2082   //    MemBarCPUOrder
2083   //    MemBarAcquire {trailing}
2084   //
2085   // this predicate checks for the same flow as the previous predicate
2086   // but starting from the bottom rather than the top.
2087   //
2088   // if the configuration is present returns the cpuorder member for
2089   // preference or when absent the release membar otherwise NULL.
2090   //
2091   // n.b. the input membar is expected to be a MemBarVolatile but
2092   // need not be a card mark membar.
2093 
2094   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2095   {
2096     // input must be a volatile membar
2097     assert((barrier->Opcode() == Op_MemBarVolatile ||
2098             barrier->Opcode() == Op_MemBarAcquire),
2099            "expecting a volatile or an acquire membar");
2100     Node *x;
2101     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2102 
2103     // if we have an acquire membar then it must be fed via a CPUOrder
2104     // membar
2105 
2106     if (is_cas) {
2107       // skip to parent barrier which must be a cpuorder
2108       x = parent_membar(barrier);
2109       if (x->Opcode() != Op_MemBarCPUOrder)
2110         return NULL;
2111     } else {
2112       // start from the supplied barrier
2113       x = (Node *)barrier;
2114     }
2115 
2116     // the Mem feed to the membar should be a merge
2117     x = x ->in(TypeFunc::Memory);
2118     if (!x->is_MergeMem())
2119       return NULL;
2120 
2121     MergeMemNode *mm = x->as_MergeMem();
2122 
2123     if (is_cas) {
2124       // the merge should be fed from the CAS via an SCMemProj node
2125       x = NULL;
2126       for (uint idx = 1; idx < mm->req(); idx++) {
2127         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2128           x = mm->in(idx);
2129           break;
2130         }
2131       }
2132       if (x == NULL) {
2133         return NULL;
2134       }
2135       // check for a CAS feeding this proj
2136       x = x->in(0);
2137       int opcode = x->Opcode();
2138       if (!is_CAS(opcode)) {
2139         return NULL;
2140       }
2141       // the CAS should get its mem feed from the leading membar
2142       x = x->in(MemNode::Memory);
2143     } else {
2144       // the merge should get its Bottom mem feed from the leading membar
2145       x = mm->in(Compile::AliasIdxBot);
2146     }
2147 
2148     // ensure this is a non control projection
2149     if (!x->is_Proj() || x->is_CFG()) {
2150       return NULL;
2151     }
2152     // if it is fed by a membar that's the one we want
2153     x = x->in(0);
2154 
2155     if (!x->is_MemBar()) {
2156       return NULL;
2157     }
2158 
2159     MemBarNode *leading = x->as_MemBar();
2160     // reject invalid candidates
2161     if (!leading_membar(leading)) {
2162       return NULL;
2163     }
2164 
2165     // ok, we have a leading membar, now for the sanity clauses
2166 
2167     // the leading membar must feed Mem to a releasing store or CAS
2168     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2169     StoreNode *st = NULL;
2170     LoadStoreNode *cas = NULL;
2171     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2172       x = mem->fast_out(i);
2173       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2174         // two stores or CASes is one too many
2175         if (st != NULL || cas != NULL) {
2176           return NULL;
2177         }
2178         st = x->as_Store();
2179       } else if (is_CAS(x->Opcode())) {
2180         if (st != NULL || cas != NULL) {
2181           return NULL;
2182         }
2183         cas = x->as_LoadStore();
2184       }
2185     }
2186 
2187     // we should not have both a store and a cas
2188     if (st == NULL & cas == NULL) {
2189       return NULL;
2190     }
2191 
2192     if (st == NULL) {
2193       // nothing more to check
2194       return leading;
2195     } else {
2196       // we should not have a store if we started from an acquire
2197       if (is_cas) {
2198         return NULL;
2199       }
2200 
2201       // the store should feed the merge we used to get here
2202       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2203         if (st->fast_out(i) == mm) {
2204           return leading;
2205         }
2206       }
2207     }
2208 
2209     return NULL;
2210   }
2211 
2212   // card_mark_to_trailing
2213   //
2214   // graph traversal helper which detects extra, non-normal Mem feed
2215   // from a card mark volatile membar to a trailing membar i.e. it
2216   // ensures that one of the following three GC post-write Mem flow
2217   // subgraphs is present.
2218   //
2219   // 1)
2220   //     . . .
2221   //       |
2222   //   MemBarVolatile (card mark)
2223   //      |          |
2224   //      |        StoreCM
2225   //      |          |
2226   //      |        . . .
2227   //  Bot |  /
2228   //   MergeMem
2229   //      |
2230   //      |
2231   //    MemBarVolatile {trailing}
2232   //
2233   // 2)
2234   //   MemBarRelease/CPUOrder (leading)
2235   //    |
2236   //    |
2237   //    |\       . . .
2238   //    | \        |
2239   //    |  \  MemBarVolatile (card mark)
2240   //    |   \   |     |
2241   //     \   \  |   StoreCM    . . .
2242   //      \   \ |
2243   //       \  Phi
2244   //        \ /
2245   //        Phi  . . .
2246   //     Bot |   /
2247   //       MergeMem
2248   //         |
2249   //    MemBarVolatile {trailing}
2250   //
2251   //
2252   // 3)
2253   //   MemBarRelease/CPUOrder (leading)
2254   //    |
2255   //    |\
2256   //    | \
2257   //    |  \      . . .
2258   //    |   \       |
2259   //    |\   \  MemBarVolatile (card mark)
2260   //    | \   \   |     |
2261   //    |  \   \  |   StoreCM    . . .
2262   //    |   \   \ |
2263   //     \   \  Phi
2264   //      \   \ /
2265   //       \  Phi
2266   //        \ /
2267   //        Phi  . . .
2268   //     Bot |   /
2269   //       MergeMem
2270   //         |
2271   //         |
2272   //    MemBarVolatile {trailing}
2273   //
2274   // configuration 1 is only valid if UseConcMarkSweepGC &&
2275   // UseCondCardMark
2276   //
2277   // configurations 2 and 3 are only valid if UseG1GC.
2278   //
2279   // if a valid configuration is present returns the trailing membar
2280   // otherwise NULL.
2281   //
2282   // n.b. the supplied membar is expected to be a card mark
2283   // MemBarVolatile i.e. the caller must ensure the input node has the
2284   // correct operand and feeds Mem to a StoreCM node
2285 
2286   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2287   {
2288     // input must be a card mark volatile membar
2289     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2290 
2291     Node *feed = barrier->proj_out(TypeFunc::Memory);
2292     Node *x;
2293     MergeMemNode *mm = NULL;
2294 
2295     const int MAX_PHIS = 3;     // max phis we will search through
2296     int phicount = 0;           // current search count
2297 
2298     bool retry_feed = true;
2299     while (retry_feed) {
2300       // see if we have a direct MergeMem feed
2301       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2302         x = feed->fast_out(i);
2303         // the correct Phi will be merging a Bot memory slice
2304         if (x->is_MergeMem()) {
2305           mm = x->as_MergeMem();
2306           break;
2307         }
2308       }
2309       if (mm) {
2310         retry_feed = false;
2311       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2312         // the barrier may feed indirectly via one or two Phi nodes
2313         PhiNode *phi = NULL;
2314         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2315           x = feed->fast_out(i);
2316           // the correct Phi will be merging a Bot memory slice
2317           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2318             phi = x->as_Phi();
2319             break;
2320           }
2321         }
2322         if (!phi) {
2323           return NULL;
2324         }
2325         // look for another merge below this phi
2326         feed = phi;
2327       } else {
2328         // couldn't find a merge
2329         return NULL;
2330       }
2331     }
2332 
2333     // sanity check this feed turns up as the expected slice
2334     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2335 
2336     MemBarNode *trailing = NULL;
2337     // be sure we have a trailing membar the merge
2338     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2339       x = mm->fast_out(i);
2340       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2341         trailing = x->as_MemBar();
2342         break;
2343       }
2344     }
2345 
2346     return trailing;
2347   }
2348 
2349   // trailing_to_card_mark
2350   //
2351   // graph traversal helper which detects extra, non-normal Mem feed
2352   // from a trailing volatile membar to a preceding card mark volatile
2353   // membar i.e. it identifies whether one of the three possible extra
2354   // GC post-write Mem flow subgraphs is present
2355   //
2356   // this predicate checks for the same flow as the previous predicate
2357   // but starting from the bottom rather than the top.
2358   //
2359   // if the configuration is present returns the card mark membar
2360   // otherwise NULL
2361   //
2362   // n.b. the supplied membar is expected to be a trailing
2363   // MemBarVolatile i.e. the caller must ensure the input node has the
2364   // correct opcode
2365 
2366   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2367   {
2368     assert(trailing->Opcode() == Op_MemBarVolatile,
2369            "expecting a volatile membar");
2370     assert(!is_card_mark_membar(trailing),
2371            "not expecting a card mark membar");
2372 
2373     // the Mem feed to the membar should be a merge
2374     Node *x = trailing->in(TypeFunc::Memory);
2375     if (!x->is_MergeMem()) {
2376       return NULL;
2377     }
2378 
2379     MergeMemNode *mm = x->as_MergeMem();
2380 
2381     x = mm->in(Compile::AliasIdxBot);
2382     // with G1 we may possibly see a Phi or two before we see a Memory
2383     // Proj from the card mark membar
2384 
2385     const int MAX_PHIS = 3;     // max phis we will search through
2386     int phicount = 0;           // current search count
2387 
2388     bool retry_feed = !x->is_Proj();
2389 
2390     while (retry_feed) {
2391       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2392         PhiNode *phi = x->as_Phi();
2393         ProjNode *proj = NULL;
2394         PhiNode *nextphi = NULL;
2395         bool found_leading = false;
2396         for (uint i = 1; i < phi->req(); i++) {
2397           x = phi->in(i);
2398           if (x->is_Phi()) {
2399             nextphi = x->as_Phi();
2400           } else if (x->is_Proj()) {
2401             int opcode = x->in(0)->Opcode();
2402             if (opcode == Op_MemBarVolatile) {
2403               proj = x->as_Proj();
2404             } else if (opcode == Op_MemBarRelease ||
2405                        opcode == Op_MemBarCPUOrder) {
2406               // probably a leading membar
2407               found_leading = true;
2408             }
2409           }
2410         }
2411         // if we found a correct looking proj then retry from there
2412         // otherwise we must see a leading and a phi or this the
2413         // wrong config
2414         if (proj != NULL) {
2415           x = proj;
2416           retry_feed = false;
2417         } else if (found_leading && nextphi != NULL) {
2418           // retry from this phi to check phi2
2419           x = nextphi;
2420         } else {
2421           // not what we were looking for
2422           return NULL;
2423         }
2424       } else {
2425         return NULL;
2426       }
2427     }
2428     // the proj has to come from the card mark membar
2429     x = x->in(0);
2430     if (!x->is_MemBar()) {
2431       return NULL;
2432     }
2433 
2434     MemBarNode *card_mark_membar = x->as_MemBar();
2435 
2436     if (!is_card_mark_membar(card_mark_membar)) {
2437       return NULL;
2438     }
2439 
2440     return card_mark_membar;
2441   }
2442 
2443   // trailing_to_leading
2444   //
2445   // graph traversal helper which checks the Mem flow up the graph
2446   // from a (non-card mark) trailing membar attempting to locate and
2447   // return an associated leading membar. it first looks for a
2448   // subgraph in the normal configuration (relying on helper
2449   // normal_to_leading). failing that it then looks for one of the
2450   // possible post-write card mark subgraphs linking the trailing node
2451   // to a the card mark membar (relying on helper
2452   // trailing_to_card_mark), and then checks that the card mark membar
2453   // is fed by a leading membar (once again relying on auxiliary
2454   // predicate normal_to_leading).
2455   //
2456   // if the configuration is valid returns the cpuorder member for
2457   // preference or when absent the release membar otherwise NULL.
2458   //
2459   // n.b. the input membar is expected to be either a volatile or
2460   // acquire membar but in the former case must *not* be a card mark
2461   // membar.
2462 
2463   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2464   {
2465     assert((trailing->Opcode() == Op_MemBarAcquire ||
2466             trailing->Opcode() == Op_MemBarVolatile),
2467            "expecting an acquire or volatile membar");
2468     assert((trailing->Opcode() != Op_MemBarVolatile ||
2469             !is_card_mark_membar(trailing)),
2470            "not expecting a card mark membar");
2471 
2472     MemBarNode *leading = normal_to_leading(trailing);
2473 
2474     if (leading) {
2475       return leading;
2476     }
2477 
2478     // nothing more to do if this is an acquire
2479     if (trailing->Opcode() == Op_MemBarAcquire) {
2480       return NULL;
2481     }
2482 
2483     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2484 
2485     if (!card_mark_membar) {
2486       return NULL;
2487     }
2488 
2489     return normal_to_leading(card_mark_membar);
2490   }
2491 
2492   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2493 
2494 bool unnecessary_acquire(const Node *barrier)
2495 {
2496   assert(barrier->is_MemBar(), "expecting a membar");
2497 
2498   if (UseBarriersForVolatile) {
2499     // we need to plant a dmb
2500     return false;
2501   }
2502 
2503   // a volatile read derived from bytecode (or also from an inlined
2504   // SHA field read via LibraryCallKit::load_field_from_object)
2505   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2506   // with a bogus read dependency on it's preceding load. so in those
2507   // cases we will find the load node at the PARMS offset of the
2508   // acquire membar.  n.b. there may be an intervening DecodeN node.
2509   //
2510   // a volatile load derived from an inlined unsafe field access
2511   // manifests as a cpuorder membar with Ctl and Mem projections
2512   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2513   // acquire then feeds another cpuorder membar via Ctl and Mem
2514   // projections. The load has no output dependency on these trailing
2515   // membars because subsequent nodes inserted into the graph take
2516   // their control feed from the final membar cpuorder meaning they
2517   // are all ordered after the load.
2518 
2519   Node *x = barrier->lookup(TypeFunc::Parms);
2520   if (x) {
2521     // we are starting from an acquire and it has a fake dependency
2522     //
2523     // need to check for
2524     //
2525     //   LoadX[mo_acquire]
2526     //   {  |1   }
2527     //   {DecodeN}
2528     //      |Parms
2529     //   MemBarAcquire*
2530     //
2531     // where * tags node we were passed
2532     // and |k means input k
2533     if (x->is_DecodeNarrowPtr()) {
2534       x = x->in(1);
2535     }
2536 
2537     return (x->is_Load() && x->as_Load()->is_acquire());
2538   }
2539 
2540   // now check for an unsafe volatile get
2541 
2542   // need to check for
2543   //
2544   //   MemBarCPUOrder
2545   //        ||       \\
2546   //   MemBarAcquire* LoadX[mo_acquire]
2547   //        ||
2548   //   MemBarCPUOrder
2549   //
2550   // where * tags node we were passed
2551   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2552 
2553   // check for a parent MemBarCPUOrder
2554   ProjNode *ctl;
2555   ProjNode *mem;
2556   MemBarNode *parent = parent_membar(barrier);
2557   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2558     return false;
2559   ctl = parent->proj_out(TypeFunc::Control);
2560   mem = parent->proj_out(TypeFunc::Memory);
2561   if (!ctl || !mem) {
2562     return false;
2563   }
2564   // ensure the proj nodes both feed a LoadX[mo_acquire]
2565   LoadNode *ld = NULL;
2566   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2567     x = ctl->fast_out(i);
2568     // if we see a load we keep hold of it and stop searching
2569     if (x->is_Load()) {
2570       ld = x->as_Load();
2571       break;
2572     }
2573   }
2574   // it must be an acquiring load
2575   if (ld && ld->is_acquire()) {
2576 
2577     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2578       x = mem->fast_out(i);
2579       // if we see the same load we drop it and stop searching
2580       if (x == ld) {
2581         ld = NULL;
2582         break;
2583       }
2584     }
2585     // we must have dropped the load
2586     if (ld == NULL) {
2587       // check for a child cpuorder membar
2588       MemBarNode *child  = child_membar(barrier->as_MemBar());
2589       if (child && child->Opcode() == Op_MemBarCPUOrder)
2590         return true;
2591     }
2592   }
2593 
2594   // final option for unnecessary mebar is that it is a trailing node
2595   // belonging to a CAS
2596 
2597   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2598 
2599   return leading != NULL;
2600 }
2601 
2602 bool needs_acquiring_load(const Node *n)
2603 {
2604   assert(n->is_Load(), "expecting a load");
2605   if (UseBarriersForVolatile) {
2606     // we use a normal load and a dmb
2607     return false;
2608   }
2609 
2610   LoadNode *ld = n->as_Load();
2611 
2612   if (!ld->is_acquire()) {
2613     return false;
2614   }
2615 
2616   // check if this load is feeding an acquire membar
2617   //
2618   //   LoadX[mo_acquire]
2619   //   {  |1   }
2620   //   {DecodeN}
2621   //      |Parms
2622   //   MemBarAcquire*
2623   //
2624   // where * tags node we were passed
2625   // and |k means input k
2626 
2627   Node *start = ld;
2628   Node *mbacq = NULL;
2629 
2630   // if we hit a DecodeNarrowPtr we reset the start node and restart
2631   // the search through the outputs
2632  restart:
2633 
2634   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2635     Node *x = start->fast_out(i);
2636     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2637       mbacq = x;
2638     } else if (!mbacq &&
2639                (x->is_DecodeNarrowPtr() ||
2640                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2641       start = x;
2642       goto restart;
2643     }
2644   }
2645 
2646   if (mbacq) {
2647     return true;
2648   }
2649 
2650   // now check for an unsafe volatile get
2651 
2652   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2653   //
2654   //     MemBarCPUOrder
2655   //        ||       \\
2656   //   MemBarAcquire* LoadX[mo_acquire]
2657   //        ||
2658   //   MemBarCPUOrder
2659 
2660   MemBarNode *membar;
2661 
2662   membar = parent_membar(ld);
2663 
2664   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2665     return false;
2666   }
2667 
2668   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2669 
2670   membar = child_membar(membar);
2671 
2672   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2673     return false;
2674   }
2675 
2676   membar = child_membar(membar);
2677 
2678   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2679     return false;
2680   }
2681 
2682   return true;
2683 }
2684 
2685 bool unnecessary_release(const Node *n)
2686 {
2687   assert((n->is_MemBar() &&
2688           n->Opcode() == Op_MemBarRelease),
2689          "expecting a release membar");
2690 
2691   if (UseBarriersForVolatile) {
2692     // we need to plant a dmb
2693     return false;
2694   }
2695 
2696   // if there is a dependent CPUOrder barrier then use that as the
2697   // leading
2698 
2699   MemBarNode *barrier = n->as_MemBar();
2700   // check for an intervening cpuorder membar
2701   MemBarNode *b = child_membar(barrier);
2702   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2703     // ok, so start the check from the dependent cpuorder barrier
2704     barrier = b;
2705   }
2706 
2707   // must start with a normal feed
2708   MemBarNode *child_barrier = leading_to_normal(barrier);
2709 
2710   if (!child_barrier) {
2711     return false;
2712   }
2713 
2714   if (!is_card_mark_membar(child_barrier)) {
2715     // this is the trailing membar and we are done
2716     return true;
2717   }
2718 
2719   // must be sure this card mark feeds a trailing membar
2720   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2721   return (trailing != NULL);
2722 }
2723 
2724 bool unnecessary_volatile(const Node *n)
2725 {
2726   // assert n->is_MemBar();
2727   if (UseBarriersForVolatile) {
2728     // we need to plant a dmb
2729     return false;
2730   }
2731 
2732   MemBarNode *mbvol = n->as_MemBar();
2733 
2734   // first we check if this is part of a card mark. if so then we have
2735   // to generate a StoreLoad barrier
2736 
2737   if (is_card_mark_membar(mbvol)) {
2738       return false;
2739   }
2740 
2741   // ok, if it's not a card mark then we still need to check if it is
2742   // a trailing membar of a volatile put hgraph.
2743 
2744   return (trailing_to_leading(mbvol) != NULL);
2745 }
2746 
2747 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2748 
2749 bool needs_releasing_store(const Node *n)
2750 {
2751   // assert n->is_Store();
2752   if (UseBarriersForVolatile) {
2753     // we use a normal store and dmb combination
2754     return false;
2755   }
2756 
2757   StoreNode *st = n->as_Store();
2758 
2759   // the store must be marked as releasing
2760   if (!st->is_release()) {
2761     return false;
2762   }
2763 
2764   // the store must be fed by a membar
2765 
2766   Node *x = st->lookup(StoreNode::Memory);
2767 
2768   if (! x || !x->is_Proj()) {
2769     return false;
2770   }
2771 
2772   ProjNode *proj = x->as_Proj();
2773 
2774   x = proj->lookup(0);
2775 
2776   if (!x || !x->is_MemBar()) {
2777     return false;
2778   }
2779 
2780   MemBarNode *barrier = x->as_MemBar();
2781 
2782   // if the barrier is a release membar or a cpuorder mmebar fed by a
2783   // release membar then we need to check whether that forms part of a
2784   // volatile put graph.
2785 
2786   // reject invalid candidates
2787   if (!leading_membar(barrier)) {
2788     return false;
2789   }
2790 
2791   // does this lead a normal subgraph?
2792   MemBarNode *mbvol = leading_to_normal(barrier);
2793 
2794   if (!mbvol) {
2795     return false;
2796   }
2797 
2798   // all done unless this is a card mark
2799   if (!is_card_mark_membar(mbvol)) {
2800     return true;
2801   }
2802 
2803   // we found a card mark -- just make sure we have a trailing barrier
2804 
2805   return (card_mark_to_trailing(mbvol) != NULL);
2806 }
2807 
2808 // predicate controlling translation of CAS
2809 //
2810 // returns true if CAS needs to use an acquiring load otherwise false
2811 
2812 bool needs_acquiring_load_exclusive(const Node *n)
2813 {
2814   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2815   if (UseBarriersForVolatile) {
2816     return false;
2817   }
2818 
2819   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2820 #ifdef ASSERT
2821   LoadStoreNode *st = n->as_LoadStore();
2822 
2823   // the store must be fed by a membar
2824 
2825   Node *x = st->lookup(StoreNode::Memory);
2826 
2827   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2828 
2829   ProjNode *proj = x->as_Proj();
2830 
2831   x = proj->lookup(0);
2832 
2833   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2834 
2835   MemBarNode *barrier = x->as_MemBar();
2836 
2837   // the barrier must be a cpuorder mmebar fed by a release membar
2838 
2839   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2840          "CAS not fed by cpuorder membar!");
2841 
2842   MemBarNode *b = parent_membar(barrier);
2843   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2844           "CAS not fed by cpuorder+release membar pair!");
2845 
2846   // does this lead a normal subgraph?
2847   MemBarNode *mbar = leading_to_normal(barrier);
2848 
2849   assert(mbar != NULL, "CAS not embedded in normal graph!");
2850 
2851   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2852 #endif // ASSERT
2853   // so we can just return true here
2854   return true;
2855 }
2856 
2857 // predicate controlling translation of StoreCM
2858 //
2859 // returns true if a StoreStore must precede the card write otherwise
2860 // false
2861 
2862 bool unnecessary_storestore(const Node *storecm)
2863 {
2864   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2865 
2866   // we only ever need to generate a dmb ishst between an object put
2867   // and the associated card mark when we are using CMS without
2868   // conditional card marking
2869 
2870   if (!UseConcMarkSweepGC || UseCondCardMark) {
2871     return true;
2872   }
2873 
2874   // if we are implementing volatile puts using barriers then the
2875   // object put as an str so we must insert the dmb ishst
2876 
2877   if (UseBarriersForVolatile) {
2878     return false;
2879   }
2880 
2881   // we can omit the dmb ishst if this StoreCM is part of a volatile
2882   // put because in thta case the put will be implemented by stlr
2883   //
2884   // we need to check for a normal subgraph feeding this StoreCM.
2885   // that means the StoreCM must be fed Memory from a leading membar,
2886   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2887   // leading membar must be part of a normal subgraph
2888 
2889   Node *x = storecm->in(StoreNode::Memory);
2890 
2891   if (!x->is_Proj()) {
2892     return false;
2893   }
2894 
2895   x = x->in(0);
2896 
2897   if (!x->is_MemBar()) {
2898     return false;
2899   }
2900 
2901   MemBarNode *leading = x->as_MemBar();
2902 
2903   // reject invalid candidates
2904   if (!leading_membar(leading)) {
2905     return false;
2906   }
2907 
2908   // we can omit the StoreStore if it is the head of a normal subgraph
2909   return (leading_to_normal(leading) != NULL);
2910 }
2911 
2912 
2913 #define __ _masm.
2914 
2915 // advance declarations for helper functions to convert register
2916 // indices to register objects
2917 
2918 // the ad file has to provide implementations of certain methods
2919 // expected by the generic code
2920 //
2921 // REQUIRED FUNCTIONALITY
2922 
2923 //=============================================================================
2924 
2925 // !!!!! Special hack to get all types of calls to specify the byte offset
2926 //       from the start of the call to the point where the return address
2927 //       will point.
2928 
2929 int MachCallStaticJavaNode::ret_addr_offset()
2930 {
2931   // call should be a simple bl
2932   int off = 4;
2933   return off;
2934 }
2935 
2936 int MachCallDynamicJavaNode::ret_addr_offset()
2937 {
2938   return 16; // movz, movk, movk, bl
2939 }
2940 
2941 int MachCallRuntimeNode::ret_addr_offset() {
2942   // for generated stubs the call will be
2943   //   far_call(addr)
2944   // for real runtime callouts it will be six instructions
2945   // see aarch64_enc_java_to_runtime
2946   //   adr(rscratch2, retaddr)
2947   //   lea(rscratch1, RuntimeAddress(addr)
2948   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2949   //   blrt rscratch1
2950   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2951   if (cb) {
2952     return MacroAssembler::far_branch_size();
2953   } else {
2954     return 6 * NativeInstruction::instruction_size;
2955   }
2956 }
2957 
2958 // Indicate if the safepoint node needs the polling page as an input
2959 
2960 // the shared code plants the oop data at the start of the generated
2961 // code for the safepoint node and that needs ot be at the load
2962 // instruction itself. so we cannot plant a mov of the safepoint poll
2963 // address followed by a load. setting this to true means the mov is
2964 // scheduled as a prior instruction. that's better for scheduling
2965 // anyway.
2966 
2967 bool SafePointNode::needs_polling_address_input()
2968 {
2969   return true;
2970 }
2971 
2972 //=============================================================================
2973 
2974 #ifndef PRODUCT
2975 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2976   st->print("BREAKPOINT");
2977 }
2978 #endif
2979 
2980 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2981   MacroAssembler _masm(&cbuf);
2982   __ brk(0);
2983 }
2984 
2985 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2986   return MachNode::size(ra_);
2987 }
2988 
2989 //=============================================================================
2990 
2991 #ifndef PRODUCT
2992   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2993     st->print("nop \t# %d bytes pad for loops and calls", _count);
2994   }
2995 #endif
2996 
2997   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2998     MacroAssembler _masm(&cbuf);
2999     for (int i = 0; i < _count; i++) {
3000       __ nop();
3001     }
3002   }
3003 
3004   uint MachNopNode::size(PhaseRegAlloc*) const {
3005     return _count * NativeInstruction::instruction_size;
3006   }
3007 
3008 //=============================================================================
3009 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3010 
3011 int Compile::ConstantTable::calculate_table_base_offset() const {
3012   return 0;  // absolute addressing, no offset
3013 }
3014 
3015 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3016 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3017   ShouldNotReachHere();
3018 }
3019 
3020 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3021   // Empty encoding
3022 }
3023 
3024 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3025   return 0;
3026 }
3027 
3028 #ifndef PRODUCT
3029 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3030   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3031 }
3032 #endif
3033 
3034 #ifndef PRODUCT
3035 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3036   Compile* C = ra_->C;
3037 
3038   int framesize = C->frame_slots() << LogBytesPerInt;
3039 
3040   if (C->need_stack_bang(framesize))
3041     st->print("# stack bang size=%d\n\t", framesize);
3042 
3043   if (framesize < ((1 << 9) + 2 * wordSize)) {
3044     st->print("sub  sp, sp, #%d\n\t", framesize);
3045     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3046     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3047   } else {
3048     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3049     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3050     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3051     st->print("sub  sp, sp, rscratch1");
3052   }
3053 }
3054 #endif
3055 
3056 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3057   Compile* C = ra_->C;
3058   MacroAssembler _masm(&cbuf);
3059 
3060   // n.b. frame size includes space for return pc and rfp
3061   const long framesize = C->frame_size_in_bytes();
3062   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3063 
3064   // insert a nop at the start of the prolog so we can patch in a
3065   // branch if we need to invalidate the method later
3066   __ nop();
3067 
3068   int bangsize = C->bang_size_in_bytes();
3069   if (C->need_stack_bang(bangsize) && UseStackBanging)
3070     __ generate_stack_overflow_check(bangsize);
3071 
3072   __ build_frame(framesize);
3073 
3074   if (NotifySimulator) {
3075     __ notify(Assembler::method_entry);
3076   }
3077 
3078   if (VerifyStackAtCalls) {
3079     Unimplemented();
3080   }
3081 
3082   C->set_frame_complete(cbuf.insts_size());
3083 
3084   if (C->has_mach_constant_base_node()) {
3085     // NOTE: We set the table base offset here because users might be
3086     // emitted before MachConstantBaseNode.
3087     Compile::ConstantTable& constant_table = C->constant_table();
3088     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3089   }
3090 }
3091 
3092 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3093 {
3094   return MachNode::size(ra_); // too many variables; just compute it
3095                               // the hard way
3096 }
3097 
3098 int MachPrologNode::reloc() const
3099 {
3100   return 0;
3101 }
3102 
3103 //=============================================================================
3104 
3105 #ifndef PRODUCT
3106 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3107   Compile* C = ra_->C;
3108   int framesize = C->frame_slots() << LogBytesPerInt;
3109 
3110   st->print("# pop frame %d\n\t",framesize);
3111 
3112   if (framesize == 0) {
3113     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3114   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3115     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3116     st->print("add  sp, sp, #%d\n\t", framesize);
3117   } else {
3118     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3119     st->print("add  sp, sp, rscratch1\n\t");
3120     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3121   }
3122 
3123   if (do_polling() && C->is_method_compilation()) {
3124     st->print("# touch polling page\n\t");
3125     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3126     st->print("ldr zr, [rscratch1]");
3127   }
3128 }
3129 #endif
3130 
3131 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3132   Compile* C = ra_->C;
3133   MacroAssembler _masm(&cbuf);
3134   int framesize = C->frame_slots() << LogBytesPerInt;
3135 
3136   __ remove_frame(framesize);
3137 
3138   if (NotifySimulator) {
3139     __ notify(Assembler::method_reentry);
3140   }
3141 
3142   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3143     __ reserved_stack_check();
3144   }
3145 
3146   if (do_polling() && C->is_method_compilation()) {
3147     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3148   }
3149 }
3150 
3151 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3152   // Variable size. Determine dynamically.
3153   return MachNode::size(ra_);
3154 }
3155 
3156 int MachEpilogNode::reloc() const {
3157   // Return number of relocatable values contained in this instruction.
3158   return 1; // 1 for polling page.
3159 }
3160 
3161 const Pipeline * MachEpilogNode::pipeline() const {
3162   return MachNode::pipeline_class();
3163 }
3164 
3165 // This method seems to be obsolete. It is declared in machnode.hpp
3166 // and defined in all *.ad files, but it is never called. Should we
3167 // get rid of it?
3168 int MachEpilogNode::safepoint_offset() const {
3169   assert(do_polling(), "no return for this epilog node");
3170   return 4;
3171 }
3172 
3173 //=============================================================================
3174 
3175 // Figure out which register class each belongs in: rc_int, rc_float or
3176 // rc_stack.
3177 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3178 
3179 static enum RC rc_class(OptoReg::Name reg) {
3180 
3181   if (reg == OptoReg::Bad) {
3182     return rc_bad;
3183   }
3184 
3185   // we have 30 int registers * 2 halves
3186   // (rscratch1 and rscratch2 are omitted)
3187 
3188   if (reg < 60) {
3189     return rc_int;
3190   }
3191 
3192   // we have 32 float register * 2 halves
3193   if (reg < 60 + 128) {
3194     return rc_float;
3195   }
3196 
3197   // Between float regs & stack is the flags regs.
3198   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3199 
3200   return rc_stack;
3201 }
3202 
3203 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3204   Compile* C = ra_->C;
3205 
3206   // Get registers to move.
3207   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3208   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3209   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3210   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3211 
3212   enum RC src_hi_rc = rc_class(src_hi);
3213   enum RC src_lo_rc = rc_class(src_lo);
3214   enum RC dst_hi_rc = rc_class(dst_hi);
3215   enum RC dst_lo_rc = rc_class(dst_lo);
3216 
3217   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3218 
3219   if (src_hi != OptoReg::Bad) {
3220     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3221            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3222            "expected aligned-adjacent pairs");
3223   }
3224 
3225   if (src_lo == dst_lo && src_hi == dst_hi) {
3226     return 0;            // Self copy, no move.
3227   }
3228 
3229   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3230               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3231   int src_offset = ra_->reg2offset(src_lo);
3232   int dst_offset = ra_->reg2offset(dst_lo);
3233 
3234   if (bottom_type()->isa_vect() != NULL) {
3235     uint ireg = ideal_reg();
3236     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3237     if (cbuf) {
3238       MacroAssembler _masm(cbuf);
3239       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3240       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3241         // stack->stack
3242         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3243         if (ireg == Op_VecD) {
3244           __ unspill(rscratch1, true, src_offset);
3245           __ spill(rscratch1, true, dst_offset);
3246         } else {
3247           __ spill_copy128(src_offset, dst_offset);
3248         }
3249       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3250         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3251                ireg == Op_VecD ? __ T8B : __ T16B,
3252                as_FloatRegister(Matcher::_regEncode[src_lo]));
3253       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3254         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3255                        ireg == Op_VecD ? __ D : __ Q,
3256                        ra_->reg2offset(dst_lo));
3257       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3258         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3259                        ireg == Op_VecD ? __ D : __ Q,
3260                        ra_->reg2offset(src_lo));
3261       } else {
3262         ShouldNotReachHere();
3263       }
3264     }
3265   } else if (cbuf) {
3266     MacroAssembler _masm(cbuf);
3267     switch (src_lo_rc) {
3268     case rc_int:
3269       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3270         if (is64) {
3271             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3272                    as_Register(Matcher::_regEncode[src_lo]));
3273         } else {
3274             MacroAssembler _masm(cbuf);
3275             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3276                     as_Register(Matcher::_regEncode[src_lo]));
3277         }
3278       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3279         if (is64) {
3280             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3281                      as_Register(Matcher::_regEncode[src_lo]));
3282         } else {
3283             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3284                      as_Register(Matcher::_regEncode[src_lo]));
3285         }
3286       } else {                    // gpr --> stack spill
3287         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3288         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3289       }
3290       break;
3291     case rc_float:
3292       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3293         if (is64) {
3294             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3295                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3296         } else {
3297             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3298                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3299         }
3300       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3301           if (cbuf) {
3302             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3303                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3304         } else {
3305             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3306                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3307         }
3308       } else {                    // fpr --> stack spill
3309         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3310         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3311                  is64 ? __ D : __ S, dst_offset);
3312       }
3313       break;
3314     case rc_stack:
3315       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3316         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3317       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3318         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3319                    is64 ? __ D : __ S, src_offset);
3320       } else {                    // stack --> stack copy
3321         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3322         __ unspill(rscratch1, is64, src_offset);
3323         __ spill(rscratch1, is64, dst_offset);
3324       }
3325       break;
3326     default:
3327       assert(false, "bad rc_class for spill");
3328       ShouldNotReachHere();
3329     }
3330   }
3331 
3332   if (st) {
3333     st->print("spill ");
3334     if (src_lo_rc == rc_stack) {
3335       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3336     } else {
3337       st->print("%s -> ", Matcher::regName[src_lo]);
3338     }
3339     if (dst_lo_rc == rc_stack) {
3340       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3341     } else {
3342       st->print("%s", Matcher::regName[dst_lo]);
3343     }
3344     if (bottom_type()->isa_vect() != NULL) {
3345       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3346     } else {
3347       st->print("\t# spill size = %d", is64 ? 64:32);
3348     }
3349   }
3350 
3351   return 0;
3352 
3353 }
3354 
3355 #ifndef PRODUCT
3356 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3357   if (!ra_)
3358     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3359   else
3360     implementation(NULL, ra_, false, st);
3361 }
3362 #endif
3363 
3364 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3365   implementation(&cbuf, ra_, false, NULL);
3366 }
3367 
3368 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3369   return MachNode::size(ra_);
3370 }
3371 
3372 //=============================================================================
3373 
3374 #ifndef PRODUCT
3375 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3376   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3377   int reg = ra_->get_reg_first(this);
3378   st->print("add %s, rsp, #%d]\t# box lock",
3379             Matcher::regName[reg], offset);
3380 }
3381 #endif
3382 
3383 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3384   MacroAssembler _masm(&cbuf);
3385 
3386   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3387   int reg    = ra_->get_encode(this);
3388 
3389   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3390     __ add(as_Register(reg), sp, offset);
3391   } else {
3392     ShouldNotReachHere();
3393   }
3394 }
3395 
3396 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3397   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3398   return 4;
3399 }
3400 
3401 //=============================================================================
3402 
3403 #ifndef PRODUCT
3404 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3405 {
3406   st->print_cr("# MachUEPNode");
3407   if (UseCompressedClassPointers) {
3408     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3409     if (Universe::narrow_klass_shift() != 0) {
3410       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3411     }
3412   } else {
3413    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3414   }
3415   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3416   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3417 }
3418 #endif
3419 
3420 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3421 {
3422   // This is the unverified entry point.
3423   MacroAssembler _masm(&cbuf);
3424 
3425   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3426   Label skip;
3427   // TODO
3428   // can we avoid this skip and still use a reloc?
3429   __ br(Assembler::EQ, skip);
3430   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3431   __ bind(skip);
3432 }
3433 
3434 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3435 {
3436   return MachNode::size(ra_);
3437 }
3438 
3439 // REQUIRED EMIT CODE
3440 
3441 //=============================================================================
3442 
3443 // Emit exception handler code.
3444 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3445 {
3446   // mov rscratch1 #exception_blob_entry_point
3447   // br rscratch1
3448   // Note that the code buffer's insts_mark is always relative to insts.
3449   // That's why we must use the macroassembler to generate a handler.
3450   MacroAssembler _masm(&cbuf);
3451   address base = __ start_a_stub(size_exception_handler());
3452   if (base == NULL) {
3453     ciEnv::current()->record_failure("CodeCache is full");
3454     return 0;  // CodeBuffer::expand failed
3455   }
3456   int offset = __ offset();
3457   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3458   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3459   __ end_a_stub();
3460   return offset;
3461 }
3462 
3463 // Emit deopt handler code.
3464 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3465 {
3466   // Note that the code buffer's insts_mark is always relative to insts.
3467   // That's why we must use the macroassembler to generate a handler.
3468   MacroAssembler _masm(&cbuf);
3469   address base = __ start_a_stub(size_deopt_handler());
3470   if (base == NULL) {
3471     ciEnv::current()->record_failure("CodeCache is full");
3472     return 0;  // CodeBuffer::expand failed
3473   }
3474   int offset = __ offset();
3475 
3476   __ adr(lr, __ pc());
3477   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3478 
3479   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3480   __ end_a_stub();
3481   return offset;
3482 }
3483 
3484 // REQUIRED MATCHER CODE
3485 
3486 //=============================================================================
3487 
3488 const bool Matcher::match_rule_supported(int opcode) {
3489 
3490   switch (opcode) {
3491   default:
3492     break;
3493   }
3494 
3495   if (!has_match_rule(opcode)) {
3496     return false;
3497   }
3498 
3499   return true;  // Per default match rules are supported.
3500 }
3501 
3502 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3503 
3504   // TODO
3505   // identify extra cases that we might want to provide match rules for
3506   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3507   bool ret_value = match_rule_supported(opcode);
3508   // Add rules here.
3509 
3510   return ret_value;  // Per default match rules are supported.
3511 }
3512 
3513 const bool Matcher::has_predicated_vectors(void) {
3514   return false;
3515 }
3516 
3517 const int Matcher::float_pressure(int default_pressure_threshold) {
3518   return default_pressure_threshold;
3519 }
3520 
3521 int Matcher::regnum_to_fpu_offset(int regnum)
3522 {
3523   Unimplemented();
3524   return 0;
3525 }
3526 
3527 // Is this branch offset short enough that a short branch can be used?
3528 //
3529 // NOTE: If the platform does not provide any short branch variants, then
3530 //       this method should return false for offset 0.
3531 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3532   // The passed offset is relative to address of the branch.
3533 
3534   return (-32768 <= offset && offset < 32768);
3535 }
3536 
3537 const bool Matcher::isSimpleConstant64(jlong value) {
3538   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3539   // Probably always true, even if a temp register is required.
3540   return true;
3541 }
3542 
3543 // true just means we have fast l2f conversion
3544 const bool Matcher::convL2FSupported(void) {
3545   return true;
3546 }
3547 
3548 // Vector width in bytes.
3549 const int Matcher::vector_width_in_bytes(BasicType bt) {
3550   int size = MIN2(16,(int)MaxVectorSize);
3551   // Minimum 2 values in vector
3552   if (size < 2*type2aelembytes(bt)) size = 0;
3553   // But never < 4
3554   if (size < 4) size = 0;
3555   return size;
3556 }
3557 
3558 // Limits on vector size (number of elements) loaded into vector.
3559 const int Matcher::max_vector_size(const BasicType bt) {
3560   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3561 }
3562 const int Matcher::min_vector_size(const BasicType bt) {
3563 //  For the moment limit the vector size to 8 bytes
3564     int size = 8 / type2aelembytes(bt);
3565     if (size < 2) size = 2;
3566     return size;
3567 }
3568 
3569 // Vector ideal reg.
3570 const uint Matcher::vector_ideal_reg(int len) {
3571   switch(len) {
3572     case  8: return Op_VecD;
3573     case 16: return Op_VecX;
3574   }
3575   ShouldNotReachHere();
3576   return 0;
3577 }
3578 
3579 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3580   return Op_VecX;
3581 }
3582 
3583 // AES support not yet implemented
3584 const bool Matcher::pass_original_key_for_aes() {
3585   return false;
3586 }
3587 
3588 // x86 supports misaligned vectors store/load.
3589 const bool Matcher::misaligned_vectors_ok() {
3590   return !AlignVector; // can be changed by flag
3591 }
3592 
3593 // false => size gets scaled to BytesPerLong, ok.
3594 const bool Matcher::init_array_count_is_in_bytes = false;
3595 
3596 // Use conditional move (CMOVL)
3597 const int Matcher::long_cmove_cost() {
3598   // long cmoves are no more expensive than int cmoves
3599   return 0;
3600 }
3601 
3602 const int Matcher::float_cmove_cost() {
3603   // float cmoves are no more expensive than int cmoves
3604   return 0;
3605 }
3606 
3607 // Does the CPU require late expand (see block.cpp for description of late expand)?
3608 const bool Matcher::require_postalloc_expand = false;
3609 
3610 // Do we need to mask the count passed to shift instructions or does
3611 // the cpu only look at the lower 5/6 bits anyway?
3612 const bool Matcher::need_masked_shift_count = false;
3613 
3614 // This affects two different things:
3615 //  - how Decode nodes are matched
3616 //  - how ImplicitNullCheck opportunities are recognized
3617 // If true, the matcher will try to remove all Decodes and match them
3618 // (as operands) into nodes. NullChecks are not prepared to deal with
3619 // Decodes by final_graph_reshaping().
3620 // If false, final_graph_reshaping() forces the decode behind the Cmp
3621 // for a NullCheck. The matcher matches the Decode node into a register.
3622 // Implicit_null_check optimization moves the Decode along with the
3623 // memory operation back up before the NullCheck.
3624 bool Matcher::narrow_oop_use_complex_address() {
3625   return Universe::narrow_oop_shift() == 0;
3626 }
3627 
3628 bool Matcher::narrow_klass_use_complex_address() {
3629 // TODO
3630 // decide whether we need to set this to true
3631   return false;
3632 }
3633 
3634 bool Matcher::const_oop_prefer_decode() {
3635   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3636   return Universe::narrow_oop_base() == NULL;
3637 }
3638 
3639 bool Matcher::const_klass_prefer_decode() {
3640   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3641   return Universe::narrow_klass_base() == NULL;
3642 }
3643 
3644 // Is it better to copy float constants, or load them directly from
3645 // memory?  Intel can load a float constant from a direct address,
3646 // requiring no extra registers.  Most RISCs will have to materialize
3647 // an address into a register first, so they would do better to copy
3648 // the constant from stack.
3649 const bool Matcher::rematerialize_float_constants = false;
3650 
3651 // If CPU can load and store mis-aligned doubles directly then no
3652 // fixup is needed.  Else we split the double into 2 integer pieces
3653 // and move it piece-by-piece.  Only happens when passing doubles into
3654 // C code as the Java calling convention forces doubles to be aligned.
3655 const bool Matcher::misaligned_doubles_ok = true;
3656 
3657 // No-op on amd64
3658 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3659   Unimplemented();
3660 }
3661 
3662 // Advertise here if the CPU requires explicit rounding operations to
3663 // implement the UseStrictFP mode.
3664 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3665 
3666 // Are floats converted to double when stored to stack during
3667 // deoptimization?
3668 bool Matcher::float_in_double() { return true; }
3669 
3670 // Do ints take an entire long register or just half?
3671 // The relevant question is how the int is callee-saved:
3672 // the whole long is written but de-opt'ing will have to extract
3673 // the relevant 32 bits.
3674 const bool Matcher::int_in_long = true;
3675 
3676 // Return whether or not this register is ever used as an argument.
3677 // This function is used on startup to build the trampoline stubs in
3678 // generateOptoStub.  Registers not mentioned will be killed by the VM
3679 // call in the trampoline, and arguments in those registers not be
3680 // available to the callee.
3681 bool Matcher::can_be_java_arg(int reg)
3682 {
3683   return
3684     reg ==  R0_num || reg == R0_H_num ||
3685     reg ==  R1_num || reg == R1_H_num ||
3686     reg ==  R2_num || reg == R2_H_num ||
3687     reg ==  R3_num || reg == R3_H_num ||
3688     reg ==  R4_num || reg == R4_H_num ||
3689     reg ==  R5_num || reg == R5_H_num ||
3690     reg ==  R6_num || reg == R6_H_num ||
3691     reg ==  R7_num || reg == R7_H_num ||
3692     reg ==  V0_num || reg == V0_H_num ||
3693     reg ==  V1_num || reg == V1_H_num ||
3694     reg ==  V2_num || reg == V2_H_num ||
3695     reg ==  V3_num || reg == V3_H_num ||
3696     reg ==  V4_num || reg == V4_H_num ||
3697     reg ==  V5_num || reg == V5_H_num ||
3698     reg ==  V6_num || reg == V6_H_num ||
3699     reg ==  V7_num || reg == V7_H_num;
3700 }
3701 
3702 bool Matcher::is_spillable_arg(int reg)
3703 {
3704   return can_be_java_arg(reg);
3705 }
3706 
3707 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3708   return false;
3709 }
3710 
3711 RegMask Matcher::divI_proj_mask() {
3712   ShouldNotReachHere();
3713   return RegMask();
3714 }
3715 
3716 // Register for MODI projection of divmodI.
3717 RegMask Matcher::modI_proj_mask() {
3718   ShouldNotReachHere();
3719   return RegMask();
3720 }
3721 
3722 // Register for DIVL projection of divmodL.
3723 RegMask Matcher::divL_proj_mask() {
3724   ShouldNotReachHere();
3725   return RegMask();
3726 }
3727 
3728 // Register for MODL projection of divmodL.
3729 RegMask Matcher::modL_proj_mask() {
3730   ShouldNotReachHere();
3731   return RegMask();
3732 }
3733 
3734 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3735   return FP_REG_mask();
3736 }
3737 
3738 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3739   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3740     Node* u = addp->fast_out(i);
3741     if (u->is_Mem()) {
3742       int opsize = u->as_Mem()->memory_size();
3743       assert(opsize > 0, "unexpected memory operand size");
3744       if (u->as_Mem()->memory_size() != (1<<shift)) {
3745         return false;
3746       }
3747     }
3748   }
3749   return true;
3750 }
3751 
3752 const bool Matcher::convi2l_type_required = false;
3753 
3754 // Should the Matcher clone shifts on addressing modes, expecting them
3755 // to be subsumed into complex addressing expressions or compute them
3756 // into registers?
3757 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3758   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3759     return true;
3760   }
3761 
3762   Node *off = m->in(AddPNode::Offset);
3763   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3764       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3765       // Are there other uses besides address expressions?
3766       !is_visited(off)) {
3767     address_visited.set(off->_idx); // Flag as address_visited
3768     mstack.push(off->in(2), Visit);
3769     Node *conv = off->in(1);
3770     if (conv->Opcode() == Op_ConvI2L &&
3771         // Are there other uses besides address expressions?
3772         !is_visited(conv)) {
3773       address_visited.set(conv->_idx); // Flag as address_visited
3774       mstack.push(conv->in(1), Pre_Visit);
3775     } else {
3776       mstack.push(conv, Pre_Visit);
3777     }
3778     address_visited.test_set(m->_idx); // Flag as address_visited
3779     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3780     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3781     return true;
3782   } else if (off->Opcode() == Op_ConvI2L &&
3783              // Are there other uses besides address expressions?
3784              !is_visited(off)) {
3785     address_visited.test_set(m->_idx); // Flag as address_visited
3786     address_visited.set(off->_idx); // Flag as address_visited
3787     mstack.push(off->in(1), Pre_Visit);
3788     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3789     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3790     return true;
3791   }
3792   return false;
3793 }
3794 
3795 // Transform:
3796 // (AddP base (AddP base address (LShiftL index con)) offset)
3797 // into:
3798 // (AddP base (AddP base offset) (LShiftL index con))
3799 // to take full advantage of ARM's addressing modes
3800 void Compile::reshape_address(AddPNode* addp) {
3801   Node *addr = addp->in(AddPNode::Address);
3802   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3803     const AddPNode *addp2 = addr->as_AddP();
3804     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3805          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3806          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3807         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3808 
3809       // Any use that can't embed the address computation?
3810       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3811         Node* u = addp->fast_out(i);
3812         if (!u->is_Mem()) {
3813           return;
3814         }
3815         if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3816           return;
3817         }
3818         if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
3819           int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
3820           if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
3821             return;
3822           }
3823         }
3824       }
3825 
3826       Node* off = addp->in(AddPNode::Offset);
3827       Node* addr2 = addp2->in(AddPNode::Address);
3828       Node* base = addp->in(AddPNode::Base);
3829 
3830       Node* new_addr = NULL;
3831       // Check whether the graph already has the new AddP we need
3832       // before we create one (no GVN available here).
3833       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3834         Node* u = addr2->fast_out(i);
3835         if (u->is_AddP() &&
3836             u->in(AddPNode::Base) == base &&
3837             u->in(AddPNode::Address) == addr2 &&
3838             u->in(AddPNode::Offset) == off) {
3839           new_addr = u;
3840           break;
3841         }
3842       }
3843 
3844       if (new_addr == NULL) {
3845         new_addr = new AddPNode(base, addr2, off);
3846       }
3847       Node* new_off = addp2->in(AddPNode::Offset);
3848       addp->set_req(AddPNode::Address, new_addr);
3849       if (addr->outcnt() == 0) {
3850         addr->disconnect_inputs(NULL, this);
3851       }
3852       addp->set_req(AddPNode::Offset, new_off);
3853       if (off->outcnt() == 0) {
3854         off->disconnect_inputs(NULL, this);
3855       }
3856     }
3857   }
3858 }
3859 
3860 // helper for encoding java_to_runtime calls on sim
3861 //
3862 // this is needed to compute the extra arguments required when
3863 // planting a call to the simulator blrt instruction. the TypeFunc
3864 // can be queried to identify the counts for integral, and floating
3865 // arguments and the return type
3866 
3867 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3868 {
3869   int gps = 0;
3870   int fps = 0;
3871   const TypeTuple *domain = tf->domain();
3872   int max = domain->cnt();
3873   for (int i = TypeFunc::Parms; i < max; i++) {
3874     const Type *t = domain->field_at(i);
3875     switch(t->basic_type()) {
3876     case T_FLOAT:
3877     case T_DOUBLE:
3878       fps++;
3879     default:
3880       gps++;
3881     }
3882   }
3883   gpcnt = gps;
3884   fpcnt = fps;
3885   BasicType rt = tf->return_type();
3886   switch (rt) {
3887   case T_VOID:
3888     rtype = MacroAssembler::ret_type_void;
3889     break;
3890   default:
3891     rtype = MacroAssembler::ret_type_integral;
3892     break;
3893   case T_FLOAT:
3894     rtype = MacroAssembler::ret_type_float;
3895     break;
3896   case T_DOUBLE:
3897     rtype = MacroAssembler::ret_type_double;
3898     break;
3899   }
3900 }
3901 
3902 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3903   MacroAssembler _masm(&cbuf);                                          \
3904   {                                                                     \
3905     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3906     guarantee(DISP == 0, "mode not permitted for volatile");            \
3907     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3908     __ INSN(REG, as_Register(BASE));                                    \
3909   }
3910 
3911 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3912 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3913 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3914                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3915 
3916   // Used for all non-volatile memory accesses.  The use of
3917   // $mem->opcode() to discover whether this pattern uses sign-extended
3918   // offsets is something of a kludge.
3919   static void loadStore(MacroAssembler masm, mem_insn insn,
3920                          Register reg, int opcode,
3921                          Register base, int index, int size, int disp)
3922   {
3923     Address::extend scale;
3924 
3925     // Hooboy, this is fugly.  We need a way to communicate to the
3926     // encoder that the index needs to be sign extended, so we have to
3927     // enumerate all the cases.
3928     switch (opcode) {
3929     case INDINDEXSCALEDI2L:
3930     case INDINDEXSCALEDI2LN:
3931     case INDINDEXI2L:
3932     case INDINDEXI2LN:
3933       scale = Address::sxtw(size);
3934       break;
3935     default:
3936       scale = Address::lsl(size);
3937     }
3938 
3939     if (index == -1) {
3940       (masm.*insn)(reg, Address(base, disp));
3941     } else {
3942       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3943       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3944     }
3945   }
3946 
3947   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3948                          FloatRegister reg, int opcode,
3949                          Register base, int index, int size, int disp)
3950   {
3951     Address::extend scale;
3952 
3953     switch (opcode) {
3954     case INDINDEXSCALEDI2L:
3955     case INDINDEXSCALEDI2LN:
3956       scale = Address::sxtw(size);
3957       break;
3958     default:
3959       scale = Address::lsl(size);
3960     }
3961 
3962      if (index == -1) {
3963       (masm.*insn)(reg, Address(base, disp));
3964     } else {
3965       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3966       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3967     }
3968   }
3969 
3970   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3971                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3972                          int opcode, Register base, int index, int size, int disp)
3973   {
3974     if (index == -1) {
3975       (masm.*insn)(reg, T, Address(base, disp));
3976     } else {
3977       assert(disp == 0, "unsupported address mode");
3978       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3979     }
3980   }
3981 
3982 %}
3983 
3984 
3985 
3986 //----------ENCODING BLOCK-----------------------------------------------------
3987 // This block specifies the encoding classes used by the compiler to
3988 // output byte streams.  Encoding classes are parameterized macros
3989 // used by Machine Instruction Nodes in order to generate the bit
3990 // encoding of the instruction.  Operands specify their base encoding
3991 // interface with the interface keyword.  There are currently
3992 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3993 // COND_INTER.  REG_INTER causes an operand to generate a function
3994 // which returns its register number when queried.  CONST_INTER causes
3995 // an operand to generate a function which returns the value of the
3996 // constant when queried.  MEMORY_INTER causes an operand to generate
3997 // four functions which return the Base Register, the Index Register,
3998 // the Scale Value, and the Offset Value of the operand when queried.
3999 // COND_INTER causes an operand to generate six functions which return
4000 // the encoding code (ie - encoding bits for the instruction)
4001 // associated with each basic boolean condition for a conditional
4002 // instruction.
4003 //
4004 // Instructions specify two basic values for encoding.  Again, a
4005 // function is available to check if the constant displacement is an
4006 // oop. They use the ins_encode keyword to specify their encoding
4007 // classes (which must be a sequence of enc_class names, and their
4008 // parameters, specified in the encoding block), and they use the
4009 // opcode keyword to specify, in order, their primary, secondary, and
4010 // tertiary opcode.  Only the opcode sections which a particular
4011 // instruction needs for encoding need to be specified.
4012 encode %{
4013   // Build emit functions for each basic byte or larger field in the
4014   // intel encoding scheme (opcode, rm, sib, immediate), and call them
4015   // from C++ code in the enc_class source block.  Emit functions will
4016   // live in the main source block for now.  In future, we can
4017   // generalize this by adding a syntax that specifies the sizes of
4018   // fields in an order, so that the adlc can build the emit functions
4019   // automagically
4020 
4021   // catch all for unimplemented encodings
4022   enc_class enc_unimplemented %{
4023     MacroAssembler _masm(&cbuf);
4024     __ unimplemented("C2 catch all");
4025   %}
4026 
4027   // BEGIN Non-volatile memory access
4028 
4029   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
4030     Register dst_reg = as_Register($dst$$reg);
4031     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
4032                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4033   %}
4034 
4035   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
4036     Register dst_reg = as_Register($dst$$reg);
4037     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
4038                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4039   %}
4040 
4041   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
4042     Register dst_reg = as_Register($dst$$reg);
4043     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
4044                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4045   %}
4046 
4047   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
4048     Register dst_reg = as_Register($dst$$reg);
4049     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
4050                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4051   %}
4052 
4053   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
4054     Register dst_reg = as_Register($dst$$reg);
4055     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
4056                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4057   %}
4058 
4059   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4060     Register dst_reg = as_Register($dst$$reg);
4061     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
4062                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4063   %}
4064 
4065   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4066     Register dst_reg = as_Register($dst$$reg);
4067     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4068                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4069   %}
4070 
4071   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4072     Register dst_reg = as_Register($dst$$reg);
4073     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4074                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4075   %}
4076 
4077   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4078     Register dst_reg = as_Register($dst$$reg);
4079     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4080                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4081   %}
4082 
4083   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4084     Register dst_reg = as_Register($dst$$reg);
4085     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4086                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4087   %}
4088 
4089   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4090     Register dst_reg = as_Register($dst$$reg);
4091     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
4092                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4093   %}
4094 
4095   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4096     Register dst_reg = as_Register($dst$$reg);
4097     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4098                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4099   %}
4100 
4101   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4102     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4103     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4104                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4105   %}
4106 
4107   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4108     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4109     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4110                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4111   %}
4112 
4113   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4114     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4115     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4116        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4117   %}
4118 
4119   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4120     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4121     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4122        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4123   %}
4124 
4125   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4126     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4127     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4128        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4129   %}
4130 
4131   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4132     Register src_reg = as_Register($src$$reg);
4133     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4134                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4135   %}
4136 
4137   enc_class aarch64_enc_strb0(memory mem) %{
4138     MacroAssembler _masm(&cbuf);
4139     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4140                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4141   %}
4142 
4143   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4144     MacroAssembler _masm(&cbuf);
4145     __ membar(Assembler::StoreStore);
4146     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4147                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4148   %}
4149 
4150   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4151     Register src_reg = as_Register($src$$reg);
4152     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4153                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4154   %}
4155 
4156   enc_class aarch64_enc_strh0(memory mem) %{
4157     MacroAssembler _masm(&cbuf);
4158     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4159                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4160   %}
4161 
4162   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4163     Register src_reg = as_Register($src$$reg);
4164     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4165                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4166   %}
4167 
4168   enc_class aarch64_enc_strw0(memory mem) %{
4169     MacroAssembler _masm(&cbuf);
4170     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4171                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4172   %}
4173 
4174   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4175     Register src_reg = as_Register($src$$reg);
4176     // we sometimes get asked to store the stack pointer into the
4177     // current thread -- we cannot do that directly on AArch64
4178     if (src_reg == r31_sp) {
4179       MacroAssembler _masm(&cbuf);
4180       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4181       __ mov(rscratch2, sp);
4182       src_reg = rscratch2;
4183     }
4184     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4185                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4186   %}
4187 
4188   enc_class aarch64_enc_str0(memory mem) %{
4189     MacroAssembler _masm(&cbuf);
4190     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4191                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4192   %}
4193 
4194   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4195     FloatRegister src_reg = as_FloatRegister($src$$reg);
4196     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4197                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4198   %}
4199 
4200   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4201     FloatRegister src_reg = as_FloatRegister($src$$reg);
4202     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4203                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4204   %}
4205 
4206   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4207     FloatRegister src_reg = as_FloatRegister($src$$reg);
4208     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4209        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4210   %}
4211 
4212   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4213     FloatRegister src_reg = as_FloatRegister($src$$reg);
4214     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4215        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4216   %}
4217 
4218   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4219     FloatRegister src_reg = as_FloatRegister($src$$reg);
4220     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4221        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4222   %}
4223 
4224   // END Non-volatile memory access
4225 
4226   // volatile loads and stores
4227 
4228   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4229     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4230                  rscratch1, stlrb);
4231   %}
4232 
4233   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4234     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4235                  rscratch1, stlrh);
4236   %}
4237 
4238   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4239     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4240                  rscratch1, stlrw);
4241   %}
4242 
4243 
4244   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4245     Register dst_reg = as_Register($dst$$reg);
4246     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4247              rscratch1, ldarb);
4248     __ sxtbw(dst_reg, dst_reg);
4249   %}
4250 
4251   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4252     Register dst_reg = as_Register($dst$$reg);
4253     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4254              rscratch1, ldarb);
4255     __ sxtb(dst_reg, dst_reg);
4256   %}
4257 
4258   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4259     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4260              rscratch1, ldarb);
4261   %}
4262 
4263   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4264     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4265              rscratch1, ldarb);
4266   %}
4267 
4268   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4269     Register dst_reg = as_Register($dst$$reg);
4270     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4271              rscratch1, ldarh);
4272     __ sxthw(dst_reg, dst_reg);
4273   %}
4274 
4275   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4276     Register dst_reg = as_Register($dst$$reg);
4277     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4278              rscratch1, ldarh);
4279     __ sxth(dst_reg, dst_reg);
4280   %}
4281 
4282   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4283     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4284              rscratch1, ldarh);
4285   %}
4286 
4287   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4288     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4289              rscratch1, ldarh);
4290   %}
4291 
4292   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4293     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4294              rscratch1, ldarw);
4295   %}
4296 
4297   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4298     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4299              rscratch1, ldarw);
4300   %}
4301 
4302   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4303     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4304              rscratch1, ldar);
4305   %}
4306 
4307   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4308     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4309              rscratch1, ldarw);
4310     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4311   %}
4312 
4313   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4314     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4315              rscratch1, ldar);
4316     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4317   %}
4318 
4319   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4320     Register src_reg = as_Register($src$$reg);
4321     // we sometimes get asked to store the stack pointer into the
4322     // current thread -- we cannot do that directly on AArch64
4323     if (src_reg == r31_sp) {
4324         MacroAssembler _masm(&cbuf);
4325       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4326       __ mov(rscratch2, sp);
4327       src_reg = rscratch2;
4328     }
4329     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4330                  rscratch1, stlr);
4331   %}
4332 
4333   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4334     {
4335       MacroAssembler _masm(&cbuf);
4336       FloatRegister src_reg = as_FloatRegister($src$$reg);
4337       __ fmovs(rscratch2, src_reg);
4338     }
4339     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4340                  rscratch1, stlrw);
4341   %}
4342 
4343   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4344     {
4345       MacroAssembler _masm(&cbuf);
4346       FloatRegister src_reg = as_FloatRegister($src$$reg);
4347       __ fmovd(rscratch2, src_reg);
4348     }
4349     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4350                  rscratch1, stlr);
4351   %}
4352 
4353   // synchronized read/update encodings
4354 
4355   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4356     MacroAssembler _masm(&cbuf);
4357     Register dst_reg = as_Register($dst$$reg);
4358     Register base = as_Register($mem$$base);
4359     int index = $mem$$index;
4360     int scale = $mem$$scale;
4361     int disp = $mem$$disp;
4362     if (index == -1) {
4363        if (disp != 0) {
4364         __ lea(rscratch1, Address(base, disp));
4365         __ ldaxr(dst_reg, rscratch1);
4366       } else {
4367         // TODO
4368         // should we ever get anything other than this case?
4369         __ ldaxr(dst_reg, base);
4370       }
4371     } else {
4372       Register index_reg = as_Register(index);
4373       if (disp == 0) {
4374         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4375         __ ldaxr(dst_reg, rscratch1);
4376       } else {
4377         __ lea(rscratch1, Address(base, disp));
4378         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4379         __ ldaxr(dst_reg, rscratch1);
4380       }
4381     }
4382   %}
4383 
4384   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4385     MacroAssembler _masm(&cbuf);
4386     Register src_reg = as_Register($src$$reg);
4387     Register base = as_Register($mem$$base);
4388     int index = $mem$$index;
4389     int scale = $mem$$scale;
4390     int disp = $mem$$disp;
4391     if (index == -1) {
4392        if (disp != 0) {
4393         __ lea(rscratch2, Address(base, disp));
4394         __ stlxr(rscratch1, src_reg, rscratch2);
4395       } else {
4396         // TODO
4397         // should we ever get anything other than this case?
4398         __ stlxr(rscratch1, src_reg, base);
4399       }
4400     } else {
4401       Register index_reg = as_Register(index);
4402       if (disp == 0) {
4403         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4404         __ stlxr(rscratch1, src_reg, rscratch2);
4405       } else {
4406         __ lea(rscratch2, Address(base, disp));
4407         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4408         __ stlxr(rscratch1, src_reg, rscratch2);
4409       }
4410     }
4411     __ cmpw(rscratch1, zr);
4412   %}
4413 
4414   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4415     MacroAssembler _masm(&cbuf);
4416     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4417     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4418                Assembler::xword, /*acquire*/ false, /*release*/ true,
4419                /*weak*/ false, noreg);
4420   %}
4421 
4422   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4423     MacroAssembler _masm(&cbuf);
4424     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4425     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4426                Assembler::word, /*acquire*/ false, /*release*/ true,
4427                /*weak*/ false, noreg);
4428   %}
4429 
4430   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4431     MacroAssembler _masm(&cbuf);
4432     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4433     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4434                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4435                /*weak*/ false, noreg);
4436   %}
4437 
4438   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4439     MacroAssembler _masm(&cbuf);
4440     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4441     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4442                Assembler::byte, /*acquire*/ false, /*release*/ true,
4443                /*weak*/ false, noreg);
4444   %}
4445 
4446 
4447   // The only difference between aarch64_enc_cmpxchg and
4448   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4449   // CompareAndSwap sequence to serve as a barrier on acquiring a
4450   // lock.
4451   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4452     MacroAssembler _masm(&cbuf);
4453     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4454     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4455                Assembler::xword, /*acquire*/ true, /*release*/ true,
4456                /*weak*/ false, noreg);
4457   %}
4458 
4459   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4460     MacroAssembler _masm(&cbuf);
4461     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4462     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4463                Assembler::word, /*acquire*/ true, /*release*/ true,
4464                /*weak*/ false, noreg);
4465   %}
4466 
4467 
4468   // auxiliary used for CompareAndSwapX to set result register
4469   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4470     MacroAssembler _masm(&cbuf);
4471     Register res_reg = as_Register($res$$reg);
4472     __ cset(res_reg, Assembler::EQ);
4473   %}
4474 
4475   // prefetch encodings
4476 
4477   enc_class aarch64_enc_prefetchw(memory mem) %{
4478     MacroAssembler _masm(&cbuf);
4479     Register base = as_Register($mem$$base);
4480     int index = $mem$$index;
4481     int scale = $mem$$scale;
4482     int disp = $mem$$disp;
4483     if (index == -1) {
4484       __ prfm(Address(base, disp), PSTL1KEEP);
4485     } else {
4486       Register index_reg = as_Register(index);
4487       if (disp == 0) {
4488         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4489       } else {
4490         __ lea(rscratch1, Address(base, disp));
4491         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4492       }
4493     }
4494   %}
4495 
4496   /// mov envcodings
4497 
4498   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4499     MacroAssembler _masm(&cbuf);
4500     u_int32_t con = (u_int32_t)$src$$constant;
4501     Register dst_reg = as_Register($dst$$reg);
4502     if (con == 0) {
4503       __ movw(dst_reg, zr);
4504     } else {
4505       __ movw(dst_reg, con);
4506     }
4507   %}
4508 
4509   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4510     MacroAssembler _masm(&cbuf);
4511     Register dst_reg = as_Register($dst$$reg);
4512     u_int64_t con = (u_int64_t)$src$$constant;
4513     if (con == 0) {
4514       __ mov(dst_reg, zr);
4515     } else {
4516       __ mov(dst_reg, con);
4517     }
4518   %}
4519 
4520   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4521     MacroAssembler _masm(&cbuf);
4522     Register dst_reg = as_Register($dst$$reg);
4523     address con = (address)$src$$constant;
4524     if (con == NULL || con == (address)1) {
4525       ShouldNotReachHere();
4526     } else {
4527       relocInfo::relocType rtype = $src->constant_reloc();
4528       if (rtype == relocInfo::oop_type) {
4529         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4530       } else if (rtype == relocInfo::metadata_type) {
4531         __ mov_metadata(dst_reg, (Metadata*)con);
4532       } else {
4533         assert(rtype == relocInfo::none, "unexpected reloc type");
4534         if (con < (address)(uintptr_t)os::vm_page_size()) {
4535           __ mov(dst_reg, con);
4536         } else {
4537           unsigned long offset;
4538           __ adrp(dst_reg, con, offset);
4539           __ add(dst_reg, dst_reg, offset);
4540         }
4541       }
4542     }
4543   %}
4544 
4545   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4546     MacroAssembler _masm(&cbuf);
4547     Register dst_reg = as_Register($dst$$reg);
4548     __ mov(dst_reg, zr);
4549   %}
4550 
4551   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4552     MacroAssembler _masm(&cbuf);
4553     Register dst_reg = as_Register($dst$$reg);
4554     __ mov(dst_reg, (u_int64_t)1);
4555   %}
4556 
4557   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4558     MacroAssembler _masm(&cbuf);
4559     address page = (address)$src$$constant;
4560     Register dst_reg = as_Register($dst$$reg);
4561     unsigned long off;
4562     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4563     assert(off == 0, "assumed offset == 0");
4564   %}
4565 
4566   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4567     MacroAssembler _masm(&cbuf);
4568     __ load_byte_map_base($dst$$Register);
4569   %}
4570 
4571   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4572     MacroAssembler _masm(&cbuf);
4573     Register dst_reg = as_Register($dst$$reg);
4574     address con = (address)$src$$constant;
4575     if (con == NULL) {
4576       ShouldNotReachHere();
4577     } else {
4578       relocInfo::relocType rtype = $src->constant_reloc();
4579       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4580       __ set_narrow_oop(dst_reg, (jobject)con);
4581     }
4582   %}
4583 
4584   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4585     MacroAssembler _masm(&cbuf);
4586     Register dst_reg = as_Register($dst$$reg);
4587     __ mov(dst_reg, zr);
4588   %}
4589 
4590   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4591     MacroAssembler _masm(&cbuf);
4592     Register dst_reg = as_Register($dst$$reg);
4593     address con = (address)$src$$constant;
4594     if (con == NULL) {
4595       ShouldNotReachHere();
4596     } else {
4597       relocInfo::relocType rtype = $src->constant_reloc();
4598       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4599       __ set_narrow_klass(dst_reg, (Klass *)con);
4600     }
4601   %}
4602 
4603   // arithmetic encodings
4604 
4605   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4606     MacroAssembler _masm(&cbuf);
4607     Register dst_reg = as_Register($dst$$reg);
4608     Register src_reg = as_Register($src1$$reg);
4609     int32_t con = (int32_t)$src2$$constant;
4610     // add has primary == 0, subtract has primary == 1
4611     if ($primary) { con = -con; }
4612     if (con < 0) {
4613       __ subw(dst_reg, src_reg, -con);
4614     } else {
4615       __ addw(dst_reg, src_reg, con);
4616     }
4617   %}
4618 
4619   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4620     MacroAssembler _masm(&cbuf);
4621     Register dst_reg = as_Register($dst$$reg);
4622     Register src_reg = as_Register($src1$$reg);
4623     int32_t con = (int32_t)$src2$$constant;
4624     // add has primary == 0, subtract has primary == 1
4625     if ($primary) { con = -con; }
4626     if (con < 0) {
4627       __ sub(dst_reg, src_reg, -con);
4628     } else {
4629       __ add(dst_reg, src_reg, con);
4630     }
4631   %}
4632 
4633   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4634     MacroAssembler _masm(&cbuf);
4635    Register dst_reg = as_Register($dst$$reg);
4636    Register src1_reg = as_Register($src1$$reg);
4637    Register src2_reg = as_Register($src2$$reg);
4638     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4639   %}
4640 
4641   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4642     MacroAssembler _masm(&cbuf);
4643    Register dst_reg = as_Register($dst$$reg);
4644    Register src1_reg = as_Register($src1$$reg);
4645    Register src2_reg = as_Register($src2$$reg);
4646     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4647   %}
4648 
4649   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4650     MacroAssembler _masm(&cbuf);
4651    Register dst_reg = as_Register($dst$$reg);
4652    Register src1_reg = as_Register($src1$$reg);
4653    Register src2_reg = as_Register($src2$$reg);
4654     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4655   %}
4656 
4657   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4658     MacroAssembler _masm(&cbuf);
4659    Register dst_reg = as_Register($dst$$reg);
4660    Register src1_reg = as_Register($src1$$reg);
4661    Register src2_reg = as_Register($src2$$reg);
4662     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4663   %}
4664 
4665   // compare instruction encodings
4666 
4667   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4668     MacroAssembler _masm(&cbuf);
4669     Register reg1 = as_Register($src1$$reg);
4670     Register reg2 = as_Register($src2$$reg);
4671     __ cmpw(reg1, reg2);
4672   %}
4673 
4674   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4675     MacroAssembler _masm(&cbuf);
4676     Register reg = as_Register($src1$$reg);
4677     int32_t val = $src2$$constant;
4678     if (val >= 0) {
4679       __ subsw(zr, reg, val);
4680     } else {
4681       __ addsw(zr, reg, -val);
4682     }
4683   %}
4684 
4685   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4686     MacroAssembler _masm(&cbuf);
4687     Register reg1 = as_Register($src1$$reg);
4688     u_int32_t val = (u_int32_t)$src2$$constant;
4689     __ movw(rscratch1, val);
4690     __ cmpw(reg1, rscratch1);
4691   %}
4692 
4693   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4694     MacroAssembler _masm(&cbuf);
4695     Register reg1 = as_Register($src1$$reg);
4696     Register reg2 = as_Register($src2$$reg);
4697     __ cmp(reg1, reg2);
4698   %}
4699 
4700   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4701     MacroAssembler _masm(&cbuf);
4702     Register reg = as_Register($src1$$reg);
4703     int64_t val = $src2$$constant;
4704     if (val >= 0) {
4705       __ subs(zr, reg, val);
4706     } else if (val != -val) {
4707       __ adds(zr, reg, -val);
4708     } else {
4709     // aargh, Long.MIN_VALUE is a special case
4710       __ orr(rscratch1, zr, (u_int64_t)val);
4711       __ subs(zr, reg, rscratch1);
4712     }
4713   %}
4714 
4715   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4716     MacroAssembler _masm(&cbuf);
4717     Register reg1 = as_Register($src1$$reg);
4718     u_int64_t val = (u_int64_t)$src2$$constant;
4719     __ mov(rscratch1, val);
4720     __ cmp(reg1, rscratch1);
4721   %}
4722 
4723   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4724     MacroAssembler _masm(&cbuf);
4725     Register reg1 = as_Register($src1$$reg);
4726     Register reg2 = as_Register($src2$$reg);
4727     __ cmp(reg1, reg2);
4728   %}
4729 
4730   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4731     MacroAssembler _masm(&cbuf);
4732     Register reg1 = as_Register($src1$$reg);
4733     Register reg2 = as_Register($src2$$reg);
4734     __ cmpw(reg1, reg2);
4735   %}
4736 
4737   enc_class aarch64_enc_testp(iRegP src) %{
4738     MacroAssembler _masm(&cbuf);
4739     Register reg = as_Register($src$$reg);
4740     __ cmp(reg, zr);
4741   %}
4742 
4743   enc_class aarch64_enc_testn(iRegN src) %{
4744     MacroAssembler _masm(&cbuf);
4745     Register reg = as_Register($src$$reg);
4746     __ cmpw(reg, zr);
4747   %}
4748 
4749   enc_class aarch64_enc_b(label lbl) %{
4750     MacroAssembler _masm(&cbuf);
4751     Label *L = $lbl$$label;
4752     __ b(*L);
4753   %}
4754 
4755   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4756     MacroAssembler _masm(&cbuf);
4757     Label *L = $lbl$$label;
4758     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4759   %}
4760 
4761   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4762     MacroAssembler _masm(&cbuf);
4763     Label *L = $lbl$$label;
4764     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4765   %}
4766 
4767   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4768   %{
4769      Register sub_reg = as_Register($sub$$reg);
4770      Register super_reg = as_Register($super$$reg);
4771      Register temp_reg = as_Register($temp$$reg);
4772      Register result_reg = as_Register($result$$reg);
4773 
4774      Label miss;
4775      MacroAssembler _masm(&cbuf);
4776      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4777                                      NULL, &miss,
4778                                      /*set_cond_codes:*/ true);
4779      if ($primary) {
4780        __ mov(result_reg, zr);
4781      }
4782      __ bind(miss);
4783   %}
4784 
4785   enc_class aarch64_enc_java_static_call(method meth) %{
4786     MacroAssembler _masm(&cbuf);
4787 
4788     address addr = (address)$meth$$method;
4789     address call;
4790     if (!_method) {
4791       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4792       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4793     } else {
4794       int method_index = resolved_method_index(cbuf);
4795       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4796                                                   : static_call_Relocation::spec(method_index);
4797       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4798 
4799       // Emit stub for static call
4800       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4801       if (stub == NULL) {
4802         ciEnv::current()->record_failure("CodeCache is full");
4803         return;
4804       }
4805     }
4806     if (call == NULL) {
4807       ciEnv::current()->record_failure("CodeCache is full");
4808       return;
4809     }
4810   %}
4811 
4812   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4813     MacroAssembler _masm(&cbuf);
4814     int method_index = resolved_method_index(cbuf);
4815     address call = __ ic_call((address)$meth$$method, method_index);
4816     if (call == NULL) {
4817       ciEnv::current()->record_failure("CodeCache is full");
4818       return;
4819     }
4820   %}
4821 
4822   enc_class aarch64_enc_call_epilog() %{
4823     MacroAssembler _masm(&cbuf);
4824     if (VerifyStackAtCalls) {
4825       // Check that stack depth is unchanged: find majik cookie on stack
4826       __ call_Unimplemented();
4827     }
4828   %}
4829 
4830   enc_class aarch64_enc_java_to_runtime(method meth) %{
4831     MacroAssembler _masm(&cbuf);
4832 
4833     // some calls to generated routines (arraycopy code) are scheduled
4834     // by C2 as runtime calls. if so we can call them using a br (they
4835     // will be in a reachable segment) otherwise we have to use a blrt
4836     // which loads the absolute address into a register.
4837     address entry = (address)$meth$$method;
4838     CodeBlob *cb = CodeCache::find_blob(entry);
4839     if (cb) {
4840       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4841       if (call == NULL) {
4842         ciEnv::current()->record_failure("CodeCache is full");
4843         return;
4844       }
4845     } else {
4846       int gpcnt;
4847       int fpcnt;
4848       int rtype;
4849       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4850       Label retaddr;
4851       __ adr(rscratch2, retaddr);
4852       __ lea(rscratch1, RuntimeAddress(entry));
4853       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4854       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4855       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4856       __ bind(retaddr);
4857       __ add(sp, sp, 2 * wordSize);
4858     }
4859   %}
4860 
4861   enc_class aarch64_enc_rethrow() %{
4862     MacroAssembler _masm(&cbuf);
4863     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4864   %}
4865 
4866   enc_class aarch64_enc_ret() %{
4867     MacroAssembler _masm(&cbuf);
4868     __ ret(lr);
4869   %}
4870 
4871   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4872     MacroAssembler _masm(&cbuf);
4873     Register target_reg = as_Register($jump_target$$reg);
4874     __ br(target_reg);
4875   %}
4876 
4877   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4878     MacroAssembler _masm(&cbuf);
4879     Register target_reg = as_Register($jump_target$$reg);
4880     // exception oop should be in r0
4881     // ret addr has been popped into lr
4882     // callee expects it in r3
4883     __ mov(r3, lr);
4884     __ br(target_reg);
4885   %}
4886 
4887   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4888     MacroAssembler _masm(&cbuf);
4889     Register oop = as_Register($object$$reg);
4890     Register box = as_Register($box$$reg);
4891     Register disp_hdr = as_Register($tmp$$reg);
4892     Register tmp = as_Register($tmp2$$reg);
4893     Label cont;
4894     Label object_has_monitor;
4895     Label cas_failed;
4896 
4897     assert_different_registers(oop, box, tmp, disp_hdr);
4898 
4899     // Load markOop from object into displaced_header.
4900     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4901 
4902     // Always do locking in runtime.
4903     if (EmitSync & 0x01) {
4904       __ cmp(oop, zr);
4905       return;
4906     }
4907 
4908     if (UseBiasedLocking && !UseOptoBiasInlining) {
4909       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4910     }
4911 
4912     // Handle existing monitor
4913     if ((EmitSync & 0x02) == 0) {
4914       // we can use AArch64's bit test and branch here but
4915       // markoopDesc does not define a bit index just the bit value
4916       // so assert in case the bit pos changes
4917 #     define __monitor_value_log2 1
4918       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4919       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4920 #     undef __monitor_value_log2
4921     }
4922 
4923     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4924     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4925 
4926     // Load Compare Value application register.
4927 
4928     // Initialize the box. (Must happen before we update the object mark!)
4929     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4930 
4931     // Compare object markOop with mark and if equal exchange scratch1
4932     // with object markOop.
4933     if (UseLSE) {
4934       __ mov(tmp, disp_hdr);
4935       __ casal(Assembler::xword, tmp, box, oop);
4936       __ cmp(tmp, disp_hdr);
4937       __ br(Assembler::EQ, cont);
4938     } else {
4939       Label retry_load;
4940       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4941         __ prfm(Address(oop), PSTL1STRM);
4942       __ bind(retry_load);
4943       __ ldaxr(tmp, oop);
4944       __ cmp(tmp, disp_hdr);
4945       __ br(Assembler::NE, cas_failed);
4946       // use stlxr to ensure update is immediately visible
4947       __ stlxr(tmp, box, oop);
4948       __ cbzw(tmp, cont);
4949       __ b(retry_load);
4950     }
4951 
4952     // Formerly:
4953     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4954     //               /*newv=*/box,
4955     //               /*addr=*/oop,
4956     //               /*tmp=*/tmp,
4957     //               cont,
4958     //               /*fail*/NULL);
4959 
4960     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4961 
4962     // If the compare-and-exchange succeeded, then we found an unlocked
4963     // object, will have now locked it will continue at label cont
4964 
4965     __ bind(cas_failed);
4966     // We did not see an unlocked object so try the fast recursive case.
4967 
4968     // Check if the owner is self by comparing the value in the
4969     // markOop of object (disp_hdr) with the stack pointer.
4970     __ mov(rscratch1, sp);
4971     __ sub(disp_hdr, disp_hdr, rscratch1);
4972     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4973     // If condition is true we are cont and hence we can store 0 as the
4974     // displaced header in the box, which indicates that it is a recursive lock.
4975     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4976     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4977 
4978     // Handle existing monitor.
4979     if ((EmitSync & 0x02) == 0) {
4980       __ b(cont);
4981 
4982       __ bind(object_has_monitor);
4983       // The object's monitor m is unlocked iff m->owner == NULL,
4984       // otherwise m->owner may contain a thread or a stack address.
4985       //
4986       // Try to CAS m->owner from NULL to current thread.
4987       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4988       __ mov(disp_hdr, zr);
4989 
4990       if (UseLSE) {
4991         __ mov(rscratch1, disp_hdr);
4992         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4993         __ cmp(rscratch1, disp_hdr);
4994       } else {
4995         Label retry_load, fail;
4996         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4997           __ prfm(Address(tmp), PSTL1STRM);
4998         __ bind(retry_load);
4999         __ ldaxr(rscratch1, tmp);
5000         __ cmp(disp_hdr, rscratch1);
5001         __ br(Assembler::NE, fail);
5002         // use stlxr to ensure update is immediately visible
5003         __ stlxr(rscratch1, rthread, tmp);
5004         __ cbnzw(rscratch1, retry_load);
5005         __ bind(fail);
5006       }
5007 
5008       // Label next;
5009       // __ cmpxchgptr(/*oldv=*/disp_hdr,
5010       //               /*newv=*/rthread,
5011       //               /*addr=*/tmp,
5012       //               /*tmp=*/rscratch1,
5013       //               /*succeed*/next,
5014       //               /*fail*/NULL);
5015       // __ bind(next);
5016 
5017       // store a non-null value into the box.
5018       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5019 
5020       // PPC port checks the following invariants
5021       // #ifdef ASSERT
5022       // bne(flag, cont);
5023       // We have acquired the monitor, check some invariants.
5024       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
5025       // Invariant 1: _recursions should be 0.
5026       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
5027       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
5028       //                        "monitor->_recursions should be 0", -1);
5029       // Invariant 2: OwnerIsThread shouldn't be 0.
5030       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
5031       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
5032       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
5033       // #endif
5034     }
5035 
5036     __ bind(cont);
5037     // flag == EQ indicates success
5038     // flag == NE indicates failure
5039 
5040   %}
5041 
5042   // TODO
5043   // reimplement this with custom cmpxchgptr code
5044   // which avoids some of the unnecessary branching
5045   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
5046     MacroAssembler _masm(&cbuf);
5047     Register oop = as_Register($object$$reg);
5048     Register box = as_Register($box$$reg);
5049     Register disp_hdr = as_Register($tmp$$reg);
5050     Register tmp = as_Register($tmp2$$reg);
5051     Label cont;
5052     Label object_has_monitor;
5053     Label cas_failed;
5054 
5055     assert_different_registers(oop, box, tmp, disp_hdr);
5056 
5057     // Always do locking in runtime.
5058     if (EmitSync & 0x01) {
5059       __ cmp(oop, zr); // Oop can't be 0 here => always false.
5060       return;
5061     }
5062 
5063     if (UseBiasedLocking && !UseOptoBiasInlining) {
5064       __ biased_locking_exit(oop, tmp, cont);
5065     }
5066 
5067     // Find the lock address and load the displaced header from the stack.
5068     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5069 
5070     // If the displaced header is 0, we have a recursive unlock.
5071     __ cmp(disp_hdr, zr);
5072     __ br(Assembler::EQ, cont);
5073 
5074 
5075     // Handle existing monitor.
5076     if ((EmitSync & 0x02) == 0) {
5077       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
5078       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
5079     }
5080 
5081     // Check if it is still a light weight lock, this is is true if we
5082     // see the stack address of the basicLock in the markOop of the
5083     // object.
5084 
5085       if (UseLSE) {
5086         __ mov(tmp, box);
5087         __ casl(Assembler::xword, tmp, disp_hdr, oop);
5088         __ cmp(tmp, box);
5089       } else {
5090         Label retry_load;
5091         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5092           __ prfm(Address(oop), PSTL1STRM);
5093         __ bind(retry_load);
5094         __ ldxr(tmp, oop);
5095         __ cmp(box, tmp);
5096         __ br(Assembler::NE, cas_failed);
5097         // use stlxr to ensure update is immediately visible
5098         __ stlxr(tmp, disp_hdr, oop);
5099         __ cbzw(tmp, cont);
5100         __ b(retry_load);
5101       }
5102 
5103     // __ cmpxchgptr(/*compare_value=*/box,
5104     //               /*exchange_value=*/disp_hdr,
5105     //               /*where=*/oop,
5106     //               /*result=*/tmp,
5107     //               cont,
5108     //               /*cas_failed*/NULL);
5109     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5110 
5111     __ bind(cas_failed);
5112 
5113     // Handle existing monitor.
5114     if ((EmitSync & 0x02) == 0) {
5115       __ b(cont);
5116 
5117       __ bind(object_has_monitor);
5118       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5119       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5120       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5121       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5122       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5123       __ cmp(rscratch1, zr);
5124       __ br(Assembler::NE, cont);
5125 
5126       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5127       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5128       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5129       __ cmp(rscratch1, zr);
5130       __ cbnz(rscratch1, cont);
5131       // need a release store here
5132       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5133       __ stlr(rscratch1, tmp); // rscratch1 is zero
5134     }
5135 
5136     __ bind(cont);
5137     // flag == EQ indicates success
5138     // flag == NE indicates failure
5139   %}
5140 
5141 %}
5142 
5143 //----------FRAME--------------------------------------------------------------
5144 // Definition of frame structure and management information.
5145 //
5146 //  S T A C K   L A Y O U T    Allocators stack-slot number
5147 //                             |   (to get allocators register number
5148 //  G  Owned by    |        |  v    add OptoReg::stack0())
5149 //  r   CALLER     |        |
5150 //  o     |        +--------+      pad to even-align allocators stack-slot
5151 //  w     V        |  pad0  |        numbers; owned by CALLER
5152 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5153 //  h     ^        |   in   |  5
5154 //        |        |  args  |  4   Holes in incoming args owned by SELF
5155 //  |     |        |        |  3
5156 //  |     |        +--------+
5157 //  V     |        | old out|      Empty on Intel, window on Sparc
5158 //        |    old |preserve|      Must be even aligned.
5159 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5160 //        |        |   in   |  3   area for Intel ret address
5161 //     Owned by    |preserve|      Empty on Sparc.
5162 //       SELF      +--------+
5163 //        |        |  pad2  |  2   pad to align old SP
5164 //        |        +--------+  1
5165 //        |        | locks  |  0
5166 //        |        +--------+----> OptoReg::stack0(), even aligned
5167 //        |        |  pad1  | 11   pad to align new SP
5168 //        |        +--------+
5169 //        |        |        | 10
5170 //        |        | spills |  9   spills
5171 //        V        |        |  8   (pad0 slot for callee)
5172 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5173 //        ^        |  out   |  7
5174 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5175 //     Owned by    +--------+
5176 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5177 //        |    new |preserve|      Must be even-aligned.
5178 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5179 //        |        |        |
5180 //
5181 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5182 //         known from SELF's arguments and the Java calling convention.
5183 //         Region 6-7 is determined per call site.
5184 // Note 2: If the calling convention leaves holes in the incoming argument
5185 //         area, those holes are owned by SELF.  Holes in the outgoing area
5186 //         are owned by the CALLEE.  Holes should not be nessecary in the
5187 //         incoming area, as the Java calling convention is completely under
5188 //         the control of the AD file.  Doubles can be sorted and packed to
5189 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5190 //         varargs C calling conventions.
5191 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5192 //         even aligned with pad0 as needed.
5193 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5194 //           (the latter is true on Intel but is it false on AArch64?)
5195 //         region 6-11 is even aligned; it may be padded out more so that
5196 //         the region from SP to FP meets the minimum stack alignment.
5197 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5198 //         alignment.  Region 11, pad1, may be dynamically extended so that
5199 //         SP meets the minimum alignment.
5200 
5201 frame %{
5202   // What direction does stack grow in (assumed to be same for C & Java)
5203   stack_direction(TOWARDS_LOW);
5204 
5205   // These three registers define part of the calling convention
5206   // between compiled code and the interpreter.
5207 
5208   // Inline Cache Register or methodOop for I2C.
5209   inline_cache_reg(R12);
5210 
5211   // Method Oop Register when calling interpreter.
5212   interpreter_method_oop_reg(R12);
5213 
5214   // Number of stack slots consumed by locking an object
5215   sync_stack_slots(2);
5216 
5217   // Compiled code's Frame Pointer
5218   frame_pointer(R31);
5219 
5220   // Interpreter stores its frame pointer in a register which is
5221   // stored to the stack by I2CAdaptors.
5222   // I2CAdaptors convert from interpreted java to compiled java.
5223   interpreter_frame_pointer(R29);
5224 
5225   // Stack alignment requirement
5226   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5227 
5228   // Number of stack slots between incoming argument block and the start of
5229   // a new frame.  The PROLOG must add this many slots to the stack.  The
5230   // EPILOG must remove this many slots. aarch64 needs two slots for
5231   // return address and fp.
5232   // TODO think this is correct but check
5233   in_preserve_stack_slots(4);
5234 
5235   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5236   // for calls to C.  Supports the var-args backing area for register parms.
5237   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5238 
5239   // The after-PROLOG location of the return address.  Location of
5240   // return address specifies a type (REG or STACK) and a number
5241   // representing the register number (i.e. - use a register name) or
5242   // stack slot.
5243   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5244   // Otherwise, it is above the locks and verification slot and alignment word
5245   // TODO this may well be correct but need to check why that - 2 is there
5246   // ppc port uses 0 but we definitely need to allow for fixed_slots
5247   // which folds in the space used for monitors
5248   return_addr(STACK - 2 +
5249               align_up((Compile::current()->in_preserve_stack_slots() +
5250                         Compile::current()->fixed_slots()),
5251                        stack_alignment_in_slots()));
5252 
5253   // Body of function which returns an integer array locating
5254   // arguments either in registers or in stack slots.  Passed an array
5255   // of ideal registers called "sig" and a "length" count.  Stack-slot
5256   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5257   // arguments for a CALLEE.  Incoming stack arguments are
5258   // automatically biased by the preserve_stack_slots field above.
5259 
5260   calling_convention
5261   %{
5262     // No difference between ingoing/outgoing just pass false
5263     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5264   %}
5265 
5266   c_calling_convention
5267   %{
5268     // This is obviously always outgoing
5269     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5270   %}
5271 
5272   // Location of compiled Java return values.  Same as C for now.
5273   return_value
5274   %{
5275     // TODO do we allow ideal_reg == Op_RegN???
5276     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5277            "only return normal values");
5278 
5279     static const int lo[Op_RegL + 1] = { // enum name
5280       0,                                 // Op_Node
5281       0,                                 // Op_Set
5282       R0_num,                            // Op_RegN
5283       R0_num,                            // Op_RegI
5284       R0_num,                            // Op_RegP
5285       V0_num,                            // Op_RegF
5286       V0_num,                            // Op_RegD
5287       R0_num                             // Op_RegL
5288     };
5289 
5290     static const int hi[Op_RegL + 1] = { // enum name
5291       0,                                 // Op_Node
5292       0,                                 // Op_Set
5293       OptoReg::Bad,                       // Op_RegN
5294       OptoReg::Bad,                      // Op_RegI
5295       R0_H_num,                          // Op_RegP
5296       OptoReg::Bad,                      // Op_RegF
5297       V0_H_num,                          // Op_RegD
5298       R0_H_num                           // Op_RegL
5299     };
5300 
5301     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5302   %}
5303 %}
5304 
5305 //----------ATTRIBUTES---------------------------------------------------------
5306 //----------Operand Attributes-------------------------------------------------
5307 op_attrib op_cost(1);        // Required cost attribute
5308 
5309 //----------Instruction Attributes---------------------------------------------
5310 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5311 ins_attrib ins_size(32);        // Required size attribute (in bits)
5312 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5313                                 // a non-matching short branch variant
5314                                 // of some long branch?
5315 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5316                                 // be a power of 2) specifies the
5317                                 // alignment that some part of the
5318                                 // instruction (not necessarily the
5319                                 // start) requires.  If > 1, a
5320                                 // compute_padding() function must be
5321                                 // provided for the instruction
5322 
5323 //----------OPERANDS-----------------------------------------------------------
5324 // Operand definitions must precede instruction definitions for correct parsing
5325 // in the ADLC because operands constitute user defined types which are used in
5326 // instruction definitions.
5327 
5328 //----------Simple Operands----------------------------------------------------
5329 
5330 // Integer operands 32 bit
5331 // 32 bit immediate
5332 operand immI()
5333 %{
5334   match(ConI);
5335 
5336   op_cost(0);
5337   format %{ %}
5338   interface(CONST_INTER);
5339 %}
5340 
5341 // 32 bit zero
5342 operand immI0()
5343 %{
5344   predicate(n->get_int() == 0);
5345   match(ConI);
5346 
5347   op_cost(0);
5348   format %{ %}
5349   interface(CONST_INTER);
5350 %}
5351 
5352 // 32 bit unit increment
5353 operand immI_1()
5354 %{
5355   predicate(n->get_int() == 1);
5356   match(ConI);
5357 
5358   op_cost(0);
5359   format %{ %}
5360   interface(CONST_INTER);
5361 %}
5362 
5363 // 32 bit unit decrement
5364 operand immI_M1()
5365 %{
5366   predicate(n->get_int() == -1);
5367   match(ConI);
5368 
5369   op_cost(0);
5370   format %{ %}
5371   interface(CONST_INTER);
5372 %}
5373 
5374 // Shift values for add/sub extension shift
5375 operand immIExt()
5376 %{
5377   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5378   match(ConI);
5379 
5380   op_cost(0);
5381   format %{ %}
5382   interface(CONST_INTER);
5383 %}
5384 
5385 operand immI_le_4()
5386 %{
5387   predicate(n->get_int() <= 4);
5388   match(ConI);
5389 
5390   op_cost(0);
5391   format %{ %}
5392   interface(CONST_INTER);
5393 %}
5394 
5395 operand immI_31()
5396 %{
5397   predicate(n->get_int() == 31);
5398   match(ConI);
5399 
5400   op_cost(0);
5401   format %{ %}
5402   interface(CONST_INTER);
5403 %}
5404 
5405 operand immI_8()
5406 %{
5407   predicate(n->get_int() == 8);
5408   match(ConI);
5409 
5410   op_cost(0);
5411   format %{ %}
5412   interface(CONST_INTER);
5413 %}
5414 
5415 operand immI_16()
5416 %{
5417   predicate(n->get_int() == 16);
5418   match(ConI);
5419 
5420   op_cost(0);
5421   format %{ %}
5422   interface(CONST_INTER);
5423 %}
5424 
5425 operand immI_24()
5426 %{
5427   predicate(n->get_int() == 24);
5428   match(ConI);
5429 
5430   op_cost(0);
5431   format %{ %}
5432   interface(CONST_INTER);
5433 %}
5434 
5435 operand immI_32()
5436 %{
5437   predicate(n->get_int() == 32);
5438   match(ConI);
5439 
5440   op_cost(0);
5441   format %{ %}
5442   interface(CONST_INTER);
5443 %}
5444 
5445 operand immI_48()
5446 %{
5447   predicate(n->get_int() == 48);
5448   match(ConI);
5449 
5450   op_cost(0);
5451   format %{ %}
5452   interface(CONST_INTER);
5453 %}
5454 
5455 operand immI_56()
5456 %{
5457   predicate(n->get_int() == 56);
5458   match(ConI);
5459 
5460   op_cost(0);
5461   format %{ %}
5462   interface(CONST_INTER);
5463 %}
5464 
5465 operand immI_63()
5466 %{
5467   predicate(n->get_int() == 63);
5468   match(ConI);
5469 
5470   op_cost(0);
5471   format %{ %}
5472   interface(CONST_INTER);
5473 %}
5474 
5475 operand immI_64()
5476 %{
5477   predicate(n->get_int() == 64);
5478   match(ConI);
5479 
5480   op_cost(0);
5481   format %{ %}
5482   interface(CONST_INTER);
5483 %}
5484 
5485 operand immI_255()
5486 %{
5487   predicate(n->get_int() == 255);
5488   match(ConI);
5489 
5490   op_cost(0);
5491   format %{ %}
5492   interface(CONST_INTER);
5493 %}
5494 
5495 operand immI_65535()
5496 %{
5497   predicate(n->get_int() == 65535);
5498   match(ConI);
5499 
5500   op_cost(0);
5501   format %{ %}
5502   interface(CONST_INTER);
5503 %}
5504 
5505 operand immL_255()
5506 %{
5507   predicate(n->get_long() == 255L);
5508   match(ConL);
5509 
5510   op_cost(0);
5511   format %{ %}
5512   interface(CONST_INTER);
5513 %}
5514 
5515 operand immL_65535()
5516 %{
5517   predicate(n->get_long() == 65535L);
5518   match(ConL);
5519 
5520   op_cost(0);
5521   format %{ %}
5522   interface(CONST_INTER);
5523 %}
5524 
5525 operand immL_4294967295()
5526 %{
5527   predicate(n->get_long() == 4294967295L);
5528   match(ConL);
5529 
5530   op_cost(0);
5531   format %{ %}
5532   interface(CONST_INTER);
5533 %}
5534 
5535 operand immL_bitmask()
5536 %{
5537   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5538             && is_power_of_2(n->get_long() + 1));
5539   match(ConL);
5540 
5541   op_cost(0);
5542   format %{ %}
5543   interface(CONST_INTER);
5544 %}
5545 
5546 operand immI_bitmask()
5547 %{
5548   predicate(((n->get_int() & 0xc0000000) == 0)
5549             && is_power_of_2(n->get_int() + 1));
5550   match(ConI);
5551 
5552   op_cost(0);
5553   format %{ %}
5554   interface(CONST_INTER);
5555 %}
5556 
5557 // Scale values for scaled offset addressing modes (up to long but not quad)
5558 operand immIScale()
5559 %{
5560   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5561   match(ConI);
5562 
5563   op_cost(0);
5564   format %{ %}
5565   interface(CONST_INTER);
5566 %}
5567 
5568 // 26 bit signed offset -- for pc-relative branches
5569 operand immI26()
5570 %{
5571   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5572   match(ConI);
5573 
5574   op_cost(0);
5575   format %{ %}
5576   interface(CONST_INTER);
5577 %}
5578 
5579 // 19 bit signed offset -- for pc-relative loads
5580 operand immI19()
5581 %{
5582   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5583   match(ConI);
5584 
5585   op_cost(0);
5586   format %{ %}
5587   interface(CONST_INTER);
5588 %}
5589 
5590 // 12 bit unsigned offset -- for base plus immediate loads
5591 operand immIU12()
5592 %{
5593   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5594   match(ConI);
5595 
5596   op_cost(0);
5597   format %{ %}
5598   interface(CONST_INTER);
5599 %}
5600 
5601 operand immLU12()
5602 %{
5603   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5604   match(ConL);
5605 
5606   op_cost(0);
5607   format %{ %}
5608   interface(CONST_INTER);
5609 %}
5610 
5611 // Offset for scaled or unscaled immediate loads and stores
5612 operand immIOffset()
5613 %{
5614   predicate(Address::offset_ok_for_immed(n->get_int()));
5615   match(ConI);
5616 
5617   op_cost(0);
5618   format %{ %}
5619   interface(CONST_INTER);
5620 %}
5621 
5622 operand immIOffset4()
5623 %{
5624   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5625   match(ConI);
5626 
5627   op_cost(0);
5628   format %{ %}
5629   interface(CONST_INTER);
5630 %}
5631 
5632 operand immIOffset8()
5633 %{
5634   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5635   match(ConI);
5636 
5637   op_cost(0);
5638   format %{ %}
5639   interface(CONST_INTER);
5640 %}
5641 
5642 operand immIOffset16()
5643 %{
5644   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5645   match(ConI);
5646 
5647   op_cost(0);
5648   format %{ %}
5649   interface(CONST_INTER);
5650 %}
5651 
5652 operand immLoffset()
5653 %{
5654   predicate(Address::offset_ok_for_immed(n->get_long()));
5655   match(ConL);
5656 
5657   op_cost(0);
5658   format %{ %}
5659   interface(CONST_INTER);
5660 %}
5661 
5662 operand immLoffset4()
5663 %{
5664   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5665   match(ConL);
5666 
5667   op_cost(0);
5668   format %{ %}
5669   interface(CONST_INTER);
5670 %}
5671 
5672 operand immLoffset8()
5673 %{
5674   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5675   match(ConL);
5676 
5677   op_cost(0);
5678   format %{ %}
5679   interface(CONST_INTER);
5680 %}
5681 
5682 operand immLoffset16()
5683 %{
5684   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5685   match(ConL);
5686 
5687   op_cost(0);
5688   format %{ %}
5689   interface(CONST_INTER);
5690 %}
5691 
5692 // 32 bit integer valid for add sub immediate
5693 operand immIAddSub()
5694 %{
5695   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5696   match(ConI);
5697   op_cost(0);
5698   format %{ %}
5699   interface(CONST_INTER);
5700 %}
5701 
5702 // 32 bit unsigned integer valid for logical immediate
5703 // TODO -- check this is right when e.g the mask is 0x80000000
5704 operand immILog()
5705 %{
5706   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5707   match(ConI);
5708 
5709   op_cost(0);
5710   format %{ %}
5711   interface(CONST_INTER);
5712 %}
5713 
5714 // Integer operands 64 bit
5715 // 64 bit immediate
5716 operand immL()
5717 %{
5718   match(ConL);
5719 
5720   op_cost(0);
5721   format %{ %}
5722   interface(CONST_INTER);
5723 %}
5724 
5725 // 64 bit zero
5726 operand immL0()
5727 %{
5728   predicate(n->get_long() == 0);
5729   match(ConL);
5730 
5731   op_cost(0);
5732   format %{ %}
5733   interface(CONST_INTER);
5734 %}
5735 
5736 // 64 bit unit increment
5737 operand immL_1()
5738 %{
5739   predicate(n->get_long() == 1);
5740   match(ConL);
5741 
5742   op_cost(0);
5743   format %{ %}
5744   interface(CONST_INTER);
5745 %}
5746 
5747 // 64 bit unit decrement
5748 operand immL_M1()
5749 %{
5750   predicate(n->get_long() == -1);
5751   match(ConL);
5752 
5753   op_cost(0);
5754   format %{ %}
5755   interface(CONST_INTER);
5756 %}
5757 
5758 // 32 bit offset of pc in thread anchor
5759 
5760 operand immL_pc_off()
5761 %{
5762   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5763                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5764   match(ConL);
5765 
5766   op_cost(0);
5767   format %{ %}
5768   interface(CONST_INTER);
5769 %}
5770 
5771 // 64 bit integer valid for add sub immediate
5772 operand immLAddSub()
5773 %{
5774   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5775   match(ConL);
5776   op_cost(0);
5777   format %{ %}
5778   interface(CONST_INTER);
5779 %}
5780 
5781 // 64 bit integer valid for logical immediate
5782 operand immLLog()
5783 %{
5784   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5785   match(ConL);
5786   op_cost(0);
5787   format %{ %}
5788   interface(CONST_INTER);
5789 %}
5790 
5791 // Long Immediate: low 32-bit mask
5792 operand immL_32bits()
5793 %{
5794   predicate(n->get_long() == 0xFFFFFFFFL);
5795   match(ConL);
5796   op_cost(0);
5797   format %{ %}
5798   interface(CONST_INTER);
5799 %}
5800 
5801 // Pointer operands
5802 // Pointer Immediate
5803 operand immP()
5804 %{
5805   match(ConP);
5806 
5807   op_cost(0);
5808   format %{ %}
5809   interface(CONST_INTER);
5810 %}
5811 
5812 // NULL Pointer Immediate
5813 operand immP0()
5814 %{
5815   predicate(n->get_ptr() == 0);
5816   match(ConP);
5817 
5818   op_cost(0);
5819   format %{ %}
5820   interface(CONST_INTER);
5821 %}
5822 
5823 // Pointer Immediate One
5824 // this is used in object initialization (initial object header)
5825 operand immP_1()
5826 %{
5827   predicate(n->get_ptr() == 1);
5828   match(ConP);
5829 
5830   op_cost(0);
5831   format %{ %}
5832   interface(CONST_INTER);
5833 %}
5834 
5835 // Polling Page Pointer Immediate
5836 operand immPollPage()
5837 %{
5838   predicate((address)n->get_ptr() == os::get_polling_page());
5839   match(ConP);
5840 
5841   op_cost(0);
5842   format %{ %}
5843   interface(CONST_INTER);
5844 %}
5845 
5846 // Card Table Byte Map Base
5847 operand immByteMapBase()
5848 %{
5849   // Get base of card map
5850   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
5851             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
5852   match(ConP);
5853 
5854   op_cost(0);
5855   format %{ %}
5856   interface(CONST_INTER);
5857 %}
5858 
5859 // Pointer Immediate Minus One
5860 // this is used when we want to write the current PC to the thread anchor
5861 operand immP_M1()
5862 %{
5863   predicate(n->get_ptr() == -1);
5864   match(ConP);
5865 
5866   op_cost(0);
5867   format %{ %}
5868   interface(CONST_INTER);
5869 %}
5870 
5871 // Pointer Immediate Minus Two
5872 // this is used when we want to write the current PC to the thread anchor
5873 operand immP_M2()
5874 %{
5875   predicate(n->get_ptr() == -2);
5876   match(ConP);
5877 
5878   op_cost(0);
5879   format %{ %}
5880   interface(CONST_INTER);
5881 %}
5882 
5883 // Float and Double operands
5884 // Double Immediate
5885 operand immD()
5886 %{
5887   match(ConD);
5888   op_cost(0);
5889   format %{ %}
5890   interface(CONST_INTER);
5891 %}
5892 
5893 // Double Immediate: +0.0d
5894 operand immD0()
5895 %{
5896   predicate(jlong_cast(n->getd()) == 0);
5897   match(ConD);
5898 
5899   op_cost(0);
5900   format %{ %}
5901   interface(CONST_INTER);
5902 %}
5903 
5904 // constant 'double +0.0'.
5905 operand immDPacked()
5906 %{
5907   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5908   match(ConD);
5909   op_cost(0);
5910   format %{ %}
5911   interface(CONST_INTER);
5912 %}
5913 
5914 // Float Immediate
5915 operand immF()
5916 %{
5917   match(ConF);
5918   op_cost(0);
5919   format %{ %}
5920   interface(CONST_INTER);
5921 %}
5922 
5923 // Float Immediate: +0.0f.
5924 operand immF0()
5925 %{
5926   predicate(jint_cast(n->getf()) == 0);
5927   match(ConF);
5928 
5929   op_cost(0);
5930   format %{ %}
5931   interface(CONST_INTER);
5932 %}
5933 
5934 //
5935 operand immFPacked()
5936 %{
5937   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5938   match(ConF);
5939   op_cost(0);
5940   format %{ %}
5941   interface(CONST_INTER);
5942 %}
5943 
5944 // Narrow pointer operands
5945 // Narrow Pointer Immediate
5946 operand immN()
5947 %{
5948   match(ConN);
5949 
5950   op_cost(0);
5951   format %{ %}
5952   interface(CONST_INTER);
5953 %}
5954 
5955 // Narrow NULL Pointer Immediate
5956 operand immN0()
5957 %{
5958   predicate(n->get_narrowcon() == 0);
5959   match(ConN);
5960 
5961   op_cost(0);
5962   format %{ %}
5963   interface(CONST_INTER);
5964 %}
5965 
5966 operand immNKlass()
5967 %{
5968   match(ConNKlass);
5969 
5970   op_cost(0);
5971   format %{ %}
5972   interface(CONST_INTER);
5973 %}
5974 
5975 // Integer 32 bit Register Operands
5976 // Integer 32 bitRegister (excludes SP)
5977 operand iRegI()
5978 %{
5979   constraint(ALLOC_IN_RC(any_reg32));
5980   match(RegI);
5981   match(iRegINoSp);
5982   op_cost(0);
5983   format %{ %}
5984   interface(REG_INTER);
5985 %}
5986 
5987 // Integer 32 bit Register not Special
5988 operand iRegINoSp()
5989 %{
5990   constraint(ALLOC_IN_RC(no_special_reg32));
5991   match(RegI);
5992   op_cost(0);
5993   format %{ %}
5994   interface(REG_INTER);
5995 %}
5996 
5997 // Integer 64 bit Register Operands
5998 // Integer 64 bit Register (includes SP)
5999 operand iRegL()
6000 %{
6001   constraint(ALLOC_IN_RC(any_reg));
6002   match(RegL);
6003   match(iRegLNoSp);
6004   op_cost(0);
6005   format %{ %}
6006   interface(REG_INTER);
6007 %}
6008 
6009 // Integer 64 bit Register not Special
6010 operand iRegLNoSp()
6011 %{
6012   constraint(ALLOC_IN_RC(no_special_reg));
6013   match(RegL);
6014   match(iRegL_R0);
6015   format %{ %}
6016   interface(REG_INTER);
6017 %}
6018 
6019 // Pointer Register Operands
6020 // Pointer Register
6021 operand iRegP()
6022 %{
6023   constraint(ALLOC_IN_RC(ptr_reg));
6024   match(RegP);
6025   match(iRegPNoSp);
6026   match(iRegP_R0);
6027   //match(iRegP_R2);
6028   //match(iRegP_R4);
6029   //match(iRegP_R5);
6030   match(thread_RegP);
6031   op_cost(0);
6032   format %{ %}
6033   interface(REG_INTER);
6034 %}
6035 
6036 // Pointer 64 bit Register not Special
6037 operand iRegPNoSp()
6038 %{
6039   constraint(ALLOC_IN_RC(no_special_ptr_reg));
6040   match(RegP);
6041   // match(iRegP);
6042   // match(iRegP_R0);
6043   // match(iRegP_R2);
6044   // match(iRegP_R4);
6045   // match(iRegP_R5);
6046   // match(thread_RegP);
6047   op_cost(0);
6048   format %{ %}
6049   interface(REG_INTER);
6050 %}
6051 
6052 // Pointer 64 bit Register R0 only
6053 operand iRegP_R0()
6054 %{
6055   constraint(ALLOC_IN_RC(r0_reg));
6056   match(RegP);
6057   // match(iRegP);
6058   match(iRegPNoSp);
6059   op_cost(0);
6060   format %{ %}
6061   interface(REG_INTER);
6062 %}
6063 
6064 // Pointer 64 bit Register R1 only
6065 operand iRegP_R1()
6066 %{
6067   constraint(ALLOC_IN_RC(r1_reg));
6068   match(RegP);
6069   // match(iRegP);
6070   match(iRegPNoSp);
6071   op_cost(0);
6072   format %{ %}
6073   interface(REG_INTER);
6074 %}
6075 
6076 // Pointer 64 bit Register R2 only
6077 operand iRegP_R2()
6078 %{
6079   constraint(ALLOC_IN_RC(r2_reg));
6080   match(RegP);
6081   // match(iRegP);
6082   match(iRegPNoSp);
6083   op_cost(0);
6084   format %{ %}
6085   interface(REG_INTER);
6086 %}
6087 
6088 // Pointer 64 bit Register R3 only
6089 operand iRegP_R3()
6090 %{
6091   constraint(ALLOC_IN_RC(r3_reg));
6092   match(RegP);
6093   // match(iRegP);
6094   match(iRegPNoSp);
6095   op_cost(0);
6096   format %{ %}
6097   interface(REG_INTER);
6098 %}
6099 
6100 // Pointer 64 bit Register R4 only
6101 operand iRegP_R4()
6102 %{
6103   constraint(ALLOC_IN_RC(r4_reg));
6104   match(RegP);
6105   // match(iRegP);
6106   match(iRegPNoSp);
6107   op_cost(0);
6108   format %{ %}
6109   interface(REG_INTER);
6110 %}
6111 
6112 // Pointer 64 bit Register R5 only
6113 operand iRegP_R5()
6114 %{
6115   constraint(ALLOC_IN_RC(r5_reg));
6116   match(RegP);
6117   // match(iRegP);
6118   match(iRegPNoSp);
6119   op_cost(0);
6120   format %{ %}
6121   interface(REG_INTER);
6122 %}
6123 
6124 // Pointer 64 bit Register R10 only
6125 operand iRegP_R10()
6126 %{
6127   constraint(ALLOC_IN_RC(r10_reg));
6128   match(RegP);
6129   // match(iRegP);
6130   match(iRegPNoSp);
6131   op_cost(0);
6132   format %{ %}
6133   interface(REG_INTER);
6134 %}
6135 
6136 // Long 64 bit Register R0 only
6137 operand iRegL_R0()
6138 %{
6139   constraint(ALLOC_IN_RC(r0_reg));
6140   match(RegL);
6141   match(iRegLNoSp);
6142   op_cost(0);
6143   format %{ %}
6144   interface(REG_INTER);
6145 %}
6146 
6147 // Long 64 bit Register R2 only
6148 operand iRegL_R2()
6149 %{
6150   constraint(ALLOC_IN_RC(r2_reg));
6151   match(RegL);
6152   match(iRegLNoSp);
6153   op_cost(0);
6154   format %{ %}
6155   interface(REG_INTER);
6156 %}
6157 
6158 // Long 64 bit Register R3 only
6159 operand iRegL_R3()
6160 %{
6161   constraint(ALLOC_IN_RC(r3_reg));
6162   match(RegL);
6163   match(iRegLNoSp);
6164   op_cost(0);
6165   format %{ %}
6166   interface(REG_INTER);
6167 %}
6168 
6169 // Long 64 bit Register R11 only
6170 operand iRegL_R11()
6171 %{
6172   constraint(ALLOC_IN_RC(r11_reg));
6173   match(RegL);
6174   match(iRegLNoSp);
6175   op_cost(0);
6176   format %{ %}
6177   interface(REG_INTER);
6178 %}
6179 
6180 // Pointer 64 bit Register FP only
6181 operand iRegP_FP()
6182 %{
6183   constraint(ALLOC_IN_RC(fp_reg));
6184   match(RegP);
6185   // match(iRegP);
6186   op_cost(0);
6187   format %{ %}
6188   interface(REG_INTER);
6189 %}
6190 
6191 // Register R0 only
6192 operand iRegI_R0()
6193 %{
6194   constraint(ALLOC_IN_RC(int_r0_reg));
6195   match(RegI);
6196   match(iRegINoSp);
6197   op_cost(0);
6198   format %{ %}
6199   interface(REG_INTER);
6200 %}
6201 
6202 // Register R2 only
6203 operand iRegI_R2()
6204 %{
6205   constraint(ALLOC_IN_RC(int_r2_reg));
6206   match(RegI);
6207   match(iRegINoSp);
6208   op_cost(0);
6209   format %{ %}
6210   interface(REG_INTER);
6211 %}
6212 
6213 // Register R3 only
6214 operand iRegI_R3()
6215 %{
6216   constraint(ALLOC_IN_RC(int_r3_reg));
6217   match(RegI);
6218   match(iRegINoSp);
6219   op_cost(0);
6220   format %{ %}
6221   interface(REG_INTER);
6222 %}
6223 
6224 
6225 // Register R4 only
6226 operand iRegI_R4()
6227 %{
6228   constraint(ALLOC_IN_RC(int_r4_reg));
6229   match(RegI);
6230   match(iRegINoSp);
6231   op_cost(0);
6232   format %{ %}
6233   interface(REG_INTER);
6234 %}
6235 
6236 
6237 // Pointer Register Operands
6238 // Narrow Pointer Register
6239 operand iRegN()
6240 %{
6241   constraint(ALLOC_IN_RC(any_reg32));
6242   match(RegN);
6243   match(iRegNNoSp);
6244   op_cost(0);
6245   format %{ %}
6246   interface(REG_INTER);
6247 %}
6248 
6249 operand iRegN_R0()
6250 %{
6251   constraint(ALLOC_IN_RC(r0_reg));
6252   match(iRegN);
6253   op_cost(0);
6254   format %{ %}
6255   interface(REG_INTER);
6256 %}
6257 
6258 operand iRegN_R2()
6259 %{
6260   constraint(ALLOC_IN_RC(r2_reg));
6261   match(iRegN);
6262   op_cost(0);
6263   format %{ %}
6264   interface(REG_INTER);
6265 %}
6266 
6267 operand iRegN_R3()
6268 %{
6269   constraint(ALLOC_IN_RC(r3_reg));
6270   match(iRegN);
6271   op_cost(0);
6272   format %{ %}
6273   interface(REG_INTER);
6274 %}
6275 
6276 // Integer 64 bit Register not Special
6277 operand iRegNNoSp()
6278 %{
6279   constraint(ALLOC_IN_RC(no_special_reg32));
6280   match(RegN);
6281   op_cost(0);
6282   format %{ %}
6283   interface(REG_INTER);
6284 %}
6285 
6286 // heap base register -- used for encoding immN0
6287 
6288 operand iRegIHeapbase()
6289 %{
6290   constraint(ALLOC_IN_RC(heapbase_reg));
6291   match(RegI);
6292   op_cost(0);
6293   format %{ %}
6294   interface(REG_INTER);
6295 %}
6296 
6297 // Float Register
6298 // Float register operands
6299 operand vRegF()
6300 %{
6301   constraint(ALLOC_IN_RC(float_reg));
6302   match(RegF);
6303 
6304   op_cost(0);
6305   format %{ %}
6306   interface(REG_INTER);
6307 %}
6308 
6309 // Double Register
6310 // Double register operands
6311 operand vRegD()
6312 %{
6313   constraint(ALLOC_IN_RC(double_reg));
6314   match(RegD);
6315 
6316   op_cost(0);
6317   format %{ %}
6318   interface(REG_INTER);
6319 %}
6320 
6321 operand vecD()
6322 %{
6323   constraint(ALLOC_IN_RC(vectord_reg));
6324   match(VecD);
6325 
6326   op_cost(0);
6327   format %{ %}
6328   interface(REG_INTER);
6329 %}
6330 
6331 operand vecX()
6332 %{
6333   constraint(ALLOC_IN_RC(vectorx_reg));
6334   match(VecX);
6335 
6336   op_cost(0);
6337   format %{ %}
6338   interface(REG_INTER);
6339 %}
6340 
6341 operand vRegD_V0()
6342 %{
6343   constraint(ALLOC_IN_RC(v0_reg));
6344   match(RegD);
6345   op_cost(0);
6346   format %{ %}
6347   interface(REG_INTER);
6348 %}
6349 
6350 operand vRegD_V1()
6351 %{
6352   constraint(ALLOC_IN_RC(v1_reg));
6353   match(RegD);
6354   op_cost(0);
6355   format %{ %}
6356   interface(REG_INTER);
6357 %}
6358 
6359 operand vRegD_V2()
6360 %{
6361   constraint(ALLOC_IN_RC(v2_reg));
6362   match(RegD);
6363   op_cost(0);
6364   format %{ %}
6365   interface(REG_INTER);
6366 %}
6367 
6368 operand vRegD_V3()
6369 %{
6370   constraint(ALLOC_IN_RC(v3_reg));
6371   match(RegD);
6372   op_cost(0);
6373   format %{ %}
6374   interface(REG_INTER);
6375 %}
6376 
6377 // Flags register, used as output of signed compare instructions
6378 
6379 // note that on AArch64 we also use this register as the output for
6380 // for floating point compare instructions (CmpF CmpD). this ensures
6381 // that ordered inequality tests use GT, GE, LT or LE none of which
6382 // pass through cases where the result is unordered i.e. one or both
6383 // inputs to the compare is a NaN. this means that the ideal code can
6384 // replace e.g. a GT with an LE and not end up capturing the NaN case
6385 // (where the comparison should always fail). EQ and NE tests are
6386 // always generated in ideal code so that unordered folds into the NE
6387 // case, matching the behaviour of AArch64 NE.
6388 //
6389 // This differs from x86 where the outputs of FP compares use a
6390 // special FP flags registers and where compares based on this
6391 // register are distinguished into ordered inequalities (cmpOpUCF) and
6392 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6393 // to explicitly handle the unordered case in branches. x86 also has
6394 // to include extra CMoveX rules to accept a cmpOpUCF input.
6395 
6396 operand rFlagsReg()
6397 %{
6398   constraint(ALLOC_IN_RC(int_flags));
6399   match(RegFlags);
6400 
6401   op_cost(0);
6402   format %{ "RFLAGS" %}
6403   interface(REG_INTER);
6404 %}
6405 
6406 // Flags register, used as output of unsigned compare instructions
6407 operand rFlagsRegU()
6408 %{
6409   constraint(ALLOC_IN_RC(int_flags));
6410   match(RegFlags);
6411 
6412   op_cost(0);
6413   format %{ "RFLAGSU" %}
6414   interface(REG_INTER);
6415 %}
6416 
6417 // Special Registers
6418 
6419 // Method Register
6420 operand inline_cache_RegP(iRegP reg)
6421 %{
6422   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6423   match(reg);
6424   match(iRegPNoSp);
6425   op_cost(0);
6426   format %{ %}
6427   interface(REG_INTER);
6428 %}
6429 
6430 operand interpreter_method_oop_RegP(iRegP reg)
6431 %{
6432   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6433   match(reg);
6434   match(iRegPNoSp);
6435   op_cost(0);
6436   format %{ %}
6437   interface(REG_INTER);
6438 %}
6439 
6440 // Thread Register
6441 operand thread_RegP(iRegP reg)
6442 %{
6443   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6444   match(reg);
6445   op_cost(0);
6446   format %{ %}
6447   interface(REG_INTER);
6448 %}
6449 
6450 operand lr_RegP(iRegP reg)
6451 %{
6452   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6453   match(reg);
6454   op_cost(0);
6455   format %{ %}
6456   interface(REG_INTER);
6457 %}
6458 
6459 //----------Memory Operands----------------------------------------------------
6460 
6461 operand indirect(iRegP reg)
6462 %{
6463   constraint(ALLOC_IN_RC(ptr_reg));
6464   match(reg);
6465   op_cost(0);
6466   format %{ "[$reg]" %}
6467   interface(MEMORY_INTER) %{
6468     base($reg);
6469     index(0xffffffff);
6470     scale(0x0);
6471     disp(0x0);
6472   %}
6473 %}
6474 
6475 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6476 %{
6477   constraint(ALLOC_IN_RC(ptr_reg));
6478   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6479   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6480   op_cost(0);
6481   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6482   interface(MEMORY_INTER) %{
6483     base($reg);
6484     index($ireg);
6485     scale($scale);
6486     disp(0x0);
6487   %}
6488 %}
6489 
6490 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6491 %{
6492   constraint(ALLOC_IN_RC(ptr_reg));
6493   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6494   match(AddP reg (LShiftL lreg scale));
6495   op_cost(0);
6496   format %{ "$reg, $lreg lsl($scale)" %}
6497   interface(MEMORY_INTER) %{
6498     base($reg);
6499     index($lreg);
6500     scale($scale);
6501     disp(0x0);
6502   %}
6503 %}
6504 
6505 operand indIndexI2L(iRegP reg, iRegI ireg)
6506 %{
6507   constraint(ALLOC_IN_RC(ptr_reg));
6508   match(AddP reg (ConvI2L ireg));
6509   op_cost(0);
6510   format %{ "$reg, $ireg, 0, I2L" %}
6511   interface(MEMORY_INTER) %{
6512     base($reg);
6513     index($ireg);
6514     scale(0x0);
6515     disp(0x0);
6516   %}
6517 %}
6518 
6519 operand indIndex(iRegP reg, iRegL lreg)
6520 %{
6521   constraint(ALLOC_IN_RC(ptr_reg));
6522   match(AddP reg lreg);
6523   op_cost(0);
6524   format %{ "$reg, $lreg" %}
6525   interface(MEMORY_INTER) %{
6526     base($reg);
6527     index($lreg);
6528     scale(0x0);
6529     disp(0x0);
6530   %}
6531 %}
6532 
6533 operand indOffI(iRegP reg, immIOffset off)
6534 %{
6535   constraint(ALLOC_IN_RC(ptr_reg));
6536   match(AddP reg off);
6537   op_cost(0);
6538   format %{ "[$reg, $off]" %}
6539   interface(MEMORY_INTER) %{
6540     base($reg);
6541     index(0xffffffff);
6542     scale(0x0);
6543     disp($off);
6544   %}
6545 %}
6546 
6547 operand indOffI4(iRegP reg, immIOffset4 off)
6548 %{
6549   constraint(ALLOC_IN_RC(ptr_reg));
6550   match(AddP reg off);
6551   op_cost(0);
6552   format %{ "[$reg, $off]" %}
6553   interface(MEMORY_INTER) %{
6554     base($reg);
6555     index(0xffffffff);
6556     scale(0x0);
6557     disp($off);
6558   %}
6559 %}
6560 
6561 operand indOffI8(iRegP reg, immIOffset8 off)
6562 %{
6563   constraint(ALLOC_IN_RC(ptr_reg));
6564   match(AddP reg off);
6565   op_cost(0);
6566   format %{ "[$reg, $off]" %}
6567   interface(MEMORY_INTER) %{
6568     base($reg);
6569     index(0xffffffff);
6570     scale(0x0);
6571     disp($off);
6572   %}
6573 %}
6574 
6575 operand indOffI16(iRegP reg, immIOffset16 off)
6576 %{
6577   constraint(ALLOC_IN_RC(ptr_reg));
6578   match(AddP reg off);
6579   op_cost(0);
6580   format %{ "[$reg, $off]" %}
6581   interface(MEMORY_INTER) %{
6582     base($reg);
6583     index(0xffffffff);
6584     scale(0x0);
6585     disp($off);
6586   %}
6587 %}
6588 
6589 operand indOffL(iRegP reg, immLoffset off)
6590 %{
6591   constraint(ALLOC_IN_RC(ptr_reg));
6592   match(AddP reg off);
6593   op_cost(0);
6594   format %{ "[$reg, $off]" %}
6595   interface(MEMORY_INTER) %{
6596     base($reg);
6597     index(0xffffffff);
6598     scale(0x0);
6599     disp($off);
6600   %}
6601 %}
6602 
6603 operand indOffL4(iRegP reg, immLoffset4 off)
6604 %{
6605   constraint(ALLOC_IN_RC(ptr_reg));
6606   match(AddP reg off);
6607   op_cost(0);
6608   format %{ "[$reg, $off]" %}
6609   interface(MEMORY_INTER) %{
6610     base($reg);
6611     index(0xffffffff);
6612     scale(0x0);
6613     disp($off);
6614   %}
6615 %}
6616 
6617 operand indOffL8(iRegP reg, immLoffset8 off)
6618 %{
6619   constraint(ALLOC_IN_RC(ptr_reg));
6620   match(AddP reg off);
6621   op_cost(0);
6622   format %{ "[$reg, $off]" %}
6623   interface(MEMORY_INTER) %{
6624     base($reg);
6625     index(0xffffffff);
6626     scale(0x0);
6627     disp($off);
6628   %}
6629 %}
6630 
6631 operand indOffL16(iRegP reg, immLoffset16 off)
6632 %{
6633   constraint(ALLOC_IN_RC(ptr_reg));
6634   match(AddP reg off);
6635   op_cost(0);
6636   format %{ "[$reg, $off]" %}
6637   interface(MEMORY_INTER) %{
6638     base($reg);
6639     index(0xffffffff);
6640     scale(0x0);
6641     disp($off);
6642   %}
6643 %}
6644 
6645 operand indirectN(iRegN reg)
6646 %{
6647   predicate(Universe::narrow_oop_shift() == 0);
6648   constraint(ALLOC_IN_RC(ptr_reg));
6649   match(DecodeN reg);
6650   op_cost(0);
6651   format %{ "[$reg]\t# narrow" %}
6652   interface(MEMORY_INTER) %{
6653     base($reg);
6654     index(0xffffffff);
6655     scale(0x0);
6656     disp(0x0);
6657   %}
6658 %}
6659 
6660 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6661 %{
6662   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6663   constraint(ALLOC_IN_RC(ptr_reg));
6664   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6665   op_cost(0);
6666   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6667   interface(MEMORY_INTER) %{
6668     base($reg);
6669     index($ireg);
6670     scale($scale);
6671     disp(0x0);
6672   %}
6673 %}
6674 
6675 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6676 %{
6677   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6678   constraint(ALLOC_IN_RC(ptr_reg));
6679   match(AddP (DecodeN reg) (LShiftL lreg scale));
6680   op_cost(0);
6681   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6682   interface(MEMORY_INTER) %{
6683     base($reg);
6684     index($lreg);
6685     scale($scale);
6686     disp(0x0);
6687   %}
6688 %}
6689 
6690 operand indIndexI2LN(iRegN reg, iRegI ireg)
6691 %{
6692   predicate(Universe::narrow_oop_shift() == 0);
6693   constraint(ALLOC_IN_RC(ptr_reg));
6694   match(AddP (DecodeN reg) (ConvI2L ireg));
6695   op_cost(0);
6696   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6697   interface(MEMORY_INTER) %{
6698     base($reg);
6699     index($ireg);
6700     scale(0x0);
6701     disp(0x0);
6702   %}
6703 %}
6704 
6705 operand indIndexN(iRegN reg, iRegL lreg)
6706 %{
6707   predicate(Universe::narrow_oop_shift() == 0);
6708   constraint(ALLOC_IN_RC(ptr_reg));
6709   match(AddP (DecodeN reg) lreg);
6710   op_cost(0);
6711   format %{ "$reg, $lreg\t# narrow" %}
6712   interface(MEMORY_INTER) %{
6713     base($reg);
6714     index($lreg);
6715     scale(0x0);
6716     disp(0x0);
6717   %}
6718 %}
6719 
6720 operand indOffIN(iRegN reg, immIOffset off)
6721 %{
6722   predicate(Universe::narrow_oop_shift() == 0);
6723   constraint(ALLOC_IN_RC(ptr_reg));
6724   match(AddP (DecodeN reg) off);
6725   op_cost(0);
6726   format %{ "[$reg, $off]\t# narrow" %}
6727   interface(MEMORY_INTER) %{
6728     base($reg);
6729     index(0xffffffff);
6730     scale(0x0);
6731     disp($off);
6732   %}
6733 %}
6734 
6735 operand indOffLN(iRegN reg, immLoffset off)
6736 %{
6737   predicate(Universe::narrow_oop_shift() == 0);
6738   constraint(ALLOC_IN_RC(ptr_reg));
6739   match(AddP (DecodeN reg) off);
6740   op_cost(0);
6741   format %{ "[$reg, $off]\t# narrow" %}
6742   interface(MEMORY_INTER) %{
6743     base($reg);
6744     index(0xffffffff);
6745     scale(0x0);
6746     disp($off);
6747   %}
6748 %}
6749 
6750 
6751 
6752 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6753 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6754 %{
6755   constraint(ALLOC_IN_RC(ptr_reg));
6756   match(AddP reg off);
6757   op_cost(0);
6758   format %{ "[$reg, $off]" %}
6759   interface(MEMORY_INTER) %{
6760     base($reg);
6761     index(0xffffffff);
6762     scale(0x0);
6763     disp($off);
6764   %}
6765 %}
6766 
6767 //----------Special Memory Operands--------------------------------------------
6768 // Stack Slot Operand - This operand is used for loading and storing temporary
6769 //                      values on the stack where a match requires a value to
6770 //                      flow through memory.
6771 operand stackSlotP(sRegP reg)
6772 %{
6773   constraint(ALLOC_IN_RC(stack_slots));
6774   op_cost(100);
6775   // No match rule because this operand is only generated in matching
6776   // match(RegP);
6777   format %{ "[$reg]" %}
6778   interface(MEMORY_INTER) %{
6779     base(0x1e);  // RSP
6780     index(0x0);  // No Index
6781     scale(0x0);  // No Scale
6782     disp($reg);  // Stack Offset
6783   %}
6784 %}
6785 
6786 operand stackSlotI(sRegI reg)
6787 %{
6788   constraint(ALLOC_IN_RC(stack_slots));
6789   // No match rule because this operand is only generated in matching
6790   // match(RegI);
6791   format %{ "[$reg]" %}
6792   interface(MEMORY_INTER) %{
6793     base(0x1e);  // RSP
6794     index(0x0);  // No Index
6795     scale(0x0);  // No Scale
6796     disp($reg);  // Stack Offset
6797   %}
6798 %}
6799 
6800 operand stackSlotF(sRegF reg)
6801 %{
6802   constraint(ALLOC_IN_RC(stack_slots));
6803   // No match rule because this operand is only generated in matching
6804   // match(RegF);
6805   format %{ "[$reg]" %}
6806   interface(MEMORY_INTER) %{
6807     base(0x1e);  // RSP
6808     index(0x0);  // No Index
6809     scale(0x0);  // No Scale
6810     disp($reg);  // Stack Offset
6811   %}
6812 %}
6813 
6814 operand stackSlotD(sRegD reg)
6815 %{
6816   constraint(ALLOC_IN_RC(stack_slots));
6817   // No match rule because this operand is only generated in matching
6818   // match(RegD);
6819   format %{ "[$reg]" %}
6820   interface(MEMORY_INTER) %{
6821     base(0x1e);  // RSP
6822     index(0x0);  // No Index
6823     scale(0x0);  // No Scale
6824     disp($reg);  // Stack Offset
6825   %}
6826 %}
6827 
6828 operand stackSlotL(sRegL reg)
6829 %{
6830   constraint(ALLOC_IN_RC(stack_slots));
6831   // No match rule because this operand is only generated in matching
6832   // match(RegL);
6833   format %{ "[$reg]" %}
6834   interface(MEMORY_INTER) %{
6835     base(0x1e);  // RSP
6836     index(0x0);  // No Index
6837     scale(0x0);  // No Scale
6838     disp($reg);  // Stack Offset
6839   %}
6840 %}
6841 
6842 // Operands for expressing Control Flow
6843 // NOTE: Label is a predefined operand which should not be redefined in
6844 //       the AD file. It is generically handled within the ADLC.
6845 
6846 //----------Conditional Branch Operands----------------------------------------
6847 // Comparison Op  - This is the operation of the comparison, and is limited to
6848 //                  the following set of codes:
6849 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6850 //
6851 // Other attributes of the comparison, such as unsignedness, are specified
6852 // by the comparison instruction that sets a condition code flags register.
6853 // That result is represented by a flags operand whose subtype is appropriate
6854 // to the unsignedness (etc.) of the comparison.
6855 //
6856 // Later, the instruction which matches both the Comparison Op (a Bool) and
6857 // the flags (produced by the Cmp) specifies the coding of the comparison op
6858 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6859 
6860 // used for signed integral comparisons and fp comparisons
6861 
6862 operand cmpOp()
6863 %{
6864   match(Bool);
6865 
6866   format %{ "" %}
6867   interface(COND_INTER) %{
6868     equal(0x0, "eq");
6869     not_equal(0x1, "ne");
6870     less(0xb, "lt");
6871     greater_equal(0xa, "ge");
6872     less_equal(0xd, "le");
6873     greater(0xc, "gt");
6874     overflow(0x6, "vs");
6875     no_overflow(0x7, "vc");
6876   %}
6877 %}
6878 
6879 // used for unsigned integral comparisons
6880 
6881 operand cmpOpU()
6882 %{
6883   match(Bool);
6884 
6885   format %{ "" %}
6886   interface(COND_INTER) %{
6887     equal(0x0, "eq");
6888     not_equal(0x1, "ne");
6889     less(0x3, "lo");
6890     greater_equal(0x2, "hs");
6891     less_equal(0x9, "ls");
6892     greater(0x8, "hi");
6893     overflow(0x6, "vs");
6894     no_overflow(0x7, "vc");
6895   %}
6896 %}
6897 
6898 // used for certain integral comparisons which can be
6899 // converted to cbxx or tbxx instructions
6900 
6901 operand cmpOpEqNe()
6902 %{
6903   match(Bool);
6904   match(CmpOp);
6905   op_cost(0);
6906   predicate(n->as_Bool()->_test._test == BoolTest::ne
6907             || n->as_Bool()->_test._test == BoolTest::eq);
6908 
6909   format %{ "" %}
6910   interface(COND_INTER) %{
6911     equal(0x0, "eq");
6912     not_equal(0x1, "ne");
6913     less(0xb, "lt");
6914     greater_equal(0xa, "ge");
6915     less_equal(0xd, "le");
6916     greater(0xc, "gt");
6917     overflow(0x6, "vs");
6918     no_overflow(0x7, "vc");
6919   %}
6920 %}
6921 
6922 // used for certain integral comparisons which can be
6923 // converted to cbxx or tbxx instructions
6924 
6925 operand cmpOpLtGe()
6926 %{
6927   match(Bool);
6928   match(CmpOp);
6929   op_cost(0);
6930 
6931   predicate(n->as_Bool()->_test._test == BoolTest::lt
6932             || n->as_Bool()->_test._test == BoolTest::ge);
6933 
6934   format %{ "" %}
6935   interface(COND_INTER) %{
6936     equal(0x0, "eq");
6937     not_equal(0x1, "ne");
6938     less(0xb, "lt");
6939     greater_equal(0xa, "ge");
6940     less_equal(0xd, "le");
6941     greater(0xc, "gt");
6942     overflow(0x6, "vs");
6943     no_overflow(0x7, "vc");
6944   %}
6945 %}
6946 
6947 // used for certain unsigned integral comparisons which can be
6948 // converted to cbxx or tbxx instructions
6949 
6950 operand cmpOpUEqNeLtGe()
6951 %{
6952   match(Bool);
6953   match(CmpOp);
6954   op_cost(0);
6955 
6956   predicate(n->as_Bool()->_test._test == BoolTest::eq
6957             || n->as_Bool()->_test._test == BoolTest::ne
6958             || n->as_Bool()->_test._test == BoolTest::lt
6959             || n->as_Bool()->_test._test == BoolTest::ge);
6960 
6961   format %{ "" %}
6962   interface(COND_INTER) %{
6963     equal(0x0, "eq");
6964     not_equal(0x1, "ne");
6965     less(0xb, "lt");
6966     greater_equal(0xa, "ge");
6967     less_equal(0xd, "le");
6968     greater(0xc, "gt");
6969     overflow(0x6, "vs");
6970     no_overflow(0x7, "vc");
6971   %}
6972 %}
6973 
6974 // Special operand allowing long args to int ops to be truncated for free
6975 
6976 operand iRegL2I(iRegL reg) %{
6977 
6978   op_cost(0);
6979 
6980   match(ConvL2I reg);
6981 
6982   format %{ "l2i($reg)" %}
6983 
6984   interface(REG_INTER)
6985 %}
6986 
6987 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6988 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6989 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6990 
6991 //----------OPERAND CLASSES----------------------------------------------------
6992 // Operand Classes are groups of operands that are used as to simplify
6993 // instruction definitions by not requiring the AD writer to specify
6994 // separate instructions for every form of operand when the
6995 // instruction accepts multiple operand types with the same basic
6996 // encoding and format. The classic case of this is memory operands.
6997 
6998 // memory is used to define read/write location for load/store
6999 // instruction defs. we can turn a memory op into an Address
7000 
7001 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
7002                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
7003 
7004 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
7005 // operations. it allows the src to be either an iRegI or a (ConvL2I
7006 // iRegL). in the latter case the l2i normally planted for a ConvL2I
7007 // can be elided because the 32-bit instruction will just employ the
7008 // lower 32 bits anyway.
7009 //
7010 // n.b. this does not elide all L2I conversions. if the truncated
7011 // value is consumed by more than one operation then the ConvL2I
7012 // cannot be bundled into the consuming nodes so an l2i gets planted
7013 // (actually a movw $dst $src) and the downstream instructions consume
7014 // the result of the l2i as an iRegI input. That's a shame since the
7015 // movw is actually redundant but its not too costly.
7016 
7017 opclass iRegIorL2I(iRegI, iRegL2I);
7018 
7019 //----------PIPELINE-----------------------------------------------------------
7020 // Rules which define the behavior of the target architectures pipeline.
7021 
7022 // For specific pipelines, eg A53, define the stages of that pipeline
7023 //pipe_desc(ISS, EX1, EX2, WR);
7024 #define ISS S0
7025 #define EX1 S1
7026 #define EX2 S2
7027 #define WR  S3
7028 
7029 // Integer ALU reg operation
7030 pipeline %{
7031 
7032 attributes %{
7033   // ARM instructions are of fixed length
7034   fixed_size_instructions;        // Fixed size instructions TODO does
7035   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
7036   // ARM instructions come in 32-bit word units
7037   instruction_unit_size = 4;         // An instruction is 4 bytes long
7038   instruction_fetch_unit_size = 64;  // The processor fetches one line
7039   instruction_fetch_units = 1;       // of 64 bytes
7040 
7041   // List of nop instructions
7042   nops( MachNop );
7043 %}
7044 
7045 // We don't use an actual pipeline model so don't care about resources
7046 // or description. we do use pipeline classes to introduce fixed
7047 // latencies
7048 
7049 //----------RESOURCES----------------------------------------------------------
7050 // Resources are the functional units available to the machine
7051 
7052 resources( INS0, INS1, INS01 = INS0 | INS1,
7053            ALU0, ALU1, ALU = ALU0 | ALU1,
7054            MAC,
7055            DIV,
7056            BRANCH,
7057            LDST,
7058            NEON_FP);
7059 
7060 //----------PIPELINE DESCRIPTION-----------------------------------------------
7061 // Pipeline Description specifies the stages in the machine's pipeline
7062 
7063 // Define the pipeline as a generic 6 stage pipeline
7064 pipe_desc(S0, S1, S2, S3, S4, S5);
7065 
7066 //----------PIPELINE CLASSES---------------------------------------------------
7067 // Pipeline Classes describe the stages in which input and output are
7068 // referenced by the hardware pipeline.
7069 
7070 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
7071 %{
7072   single_instruction;
7073   src1   : S1(read);
7074   src2   : S2(read);
7075   dst    : S5(write);
7076   INS01  : ISS;
7077   NEON_FP : S5;
7078 %}
7079 
7080 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
7081 %{
7082   single_instruction;
7083   src1   : S1(read);
7084   src2   : S2(read);
7085   dst    : S5(write);
7086   INS01  : ISS;
7087   NEON_FP : S5;
7088 %}
7089 
7090 pipe_class fp_uop_s(vRegF dst, vRegF src)
7091 %{
7092   single_instruction;
7093   src    : S1(read);
7094   dst    : S5(write);
7095   INS01  : ISS;
7096   NEON_FP : S5;
7097 %}
7098 
7099 pipe_class fp_uop_d(vRegD dst, vRegD src)
7100 %{
7101   single_instruction;
7102   src    : S1(read);
7103   dst    : S5(write);
7104   INS01  : ISS;
7105   NEON_FP : S5;
7106 %}
7107 
7108 pipe_class fp_d2f(vRegF dst, vRegD src)
7109 %{
7110   single_instruction;
7111   src    : S1(read);
7112   dst    : S5(write);
7113   INS01  : ISS;
7114   NEON_FP : S5;
7115 %}
7116 
7117 pipe_class fp_f2d(vRegD dst, vRegF src)
7118 %{
7119   single_instruction;
7120   src    : S1(read);
7121   dst    : S5(write);
7122   INS01  : ISS;
7123   NEON_FP : S5;
7124 %}
7125 
7126 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7127 %{
7128   single_instruction;
7129   src    : S1(read);
7130   dst    : S5(write);
7131   INS01  : ISS;
7132   NEON_FP : S5;
7133 %}
7134 
7135 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7136 %{
7137   single_instruction;
7138   src    : S1(read);
7139   dst    : S5(write);
7140   INS01  : ISS;
7141   NEON_FP : S5;
7142 %}
7143 
7144 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7145 %{
7146   single_instruction;
7147   src    : S1(read);
7148   dst    : S5(write);
7149   INS01  : ISS;
7150   NEON_FP : S5;
7151 %}
7152 
7153 pipe_class fp_l2f(vRegF dst, iRegL src)
7154 %{
7155   single_instruction;
7156   src    : S1(read);
7157   dst    : S5(write);
7158   INS01  : ISS;
7159   NEON_FP : S5;
7160 %}
7161 
7162 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7163 %{
7164   single_instruction;
7165   src    : S1(read);
7166   dst    : S5(write);
7167   INS01  : ISS;
7168   NEON_FP : S5;
7169 %}
7170 
7171 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7172 %{
7173   single_instruction;
7174   src    : S1(read);
7175   dst    : S5(write);
7176   INS01  : ISS;
7177   NEON_FP : S5;
7178 %}
7179 
7180 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7181 %{
7182   single_instruction;
7183   src    : S1(read);
7184   dst    : S5(write);
7185   INS01  : ISS;
7186   NEON_FP : S5;
7187 %}
7188 
7189 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7190 %{
7191   single_instruction;
7192   src    : S1(read);
7193   dst    : S5(write);
7194   INS01  : ISS;
7195   NEON_FP : S5;
7196 %}
7197 
7198 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7199 %{
7200   single_instruction;
7201   src1   : S1(read);
7202   src2   : S2(read);
7203   dst    : S5(write);
7204   INS0   : ISS;
7205   NEON_FP : S5;
7206 %}
7207 
7208 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7209 %{
7210   single_instruction;
7211   src1   : S1(read);
7212   src2   : S2(read);
7213   dst    : S5(write);
7214   INS0   : ISS;
7215   NEON_FP : S5;
7216 %}
7217 
7218 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7219 %{
7220   single_instruction;
7221   cr     : S1(read);
7222   src1   : S1(read);
7223   src2   : S1(read);
7224   dst    : S3(write);
7225   INS01  : ISS;
7226   NEON_FP : S3;
7227 %}
7228 
7229 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7230 %{
7231   single_instruction;
7232   cr     : S1(read);
7233   src1   : S1(read);
7234   src2   : S1(read);
7235   dst    : S3(write);
7236   INS01  : ISS;
7237   NEON_FP : S3;
7238 %}
7239 
7240 pipe_class fp_imm_s(vRegF dst)
7241 %{
7242   single_instruction;
7243   dst    : S3(write);
7244   INS01  : ISS;
7245   NEON_FP : S3;
7246 %}
7247 
7248 pipe_class fp_imm_d(vRegD dst)
7249 %{
7250   single_instruction;
7251   dst    : S3(write);
7252   INS01  : ISS;
7253   NEON_FP : S3;
7254 %}
7255 
7256 pipe_class fp_load_constant_s(vRegF dst)
7257 %{
7258   single_instruction;
7259   dst    : S4(write);
7260   INS01  : ISS;
7261   NEON_FP : S4;
7262 %}
7263 
7264 pipe_class fp_load_constant_d(vRegD dst)
7265 %{
7266   single_instruction;
7267   dst    : S4(write);
7268   INS01  : ISS;
7269   NEON_FP : S4;
7270 %}
7271 
7272 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7273 %{
7274   single_instruction;
7275   dst    : S5(write);
7276   src1   : S1(read);
7277   src2   : S1(read);
7278   INS01  : ISS;
7279   NEON_FP : S5;
7280 %}
7281 
7282 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7283 %{
7284   single_instruction;
7285   dst    : S5(write);
7286   src1   : S1(read);
7287   src2   : S1(read);
7288   INS0   : ISS;
7289   NEON_FP : S5;
7290 %}
7291 
7292 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7293 %{
7294   single_instruction;
7295   dst    : S5(write);
7296   src1   : S1(read);
7297   src2   : S1(read);
7298   dst    : S1(read);
7299   INS01  : ISS;
7300   NEON_FP : S5;
7301 %}
7302 
7303 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7304 %{
7305   single_instruction;
7306   dst    : S5(write);
7307   src1   : S1(read);
7308   src2   : S1(read);
7309   dst    : S1(read);
7310   INS0   : ISS;
7311   NEON_FP : S5;
7312 %}
7313 
7314 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7315 %{
7316   single_instruction;
7317   dst    : S4(write);
7318   src1   : S2(read);
7319   src2   : S2(read);
7320   INS01  : ISS;
7321   NEON_FP : S4;
7322 %}
7323 
7324 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7325 %{
7326   single_instruction;
7327   dst    : S4(write);
7328   src1   : S2(read);
7329   src2   : S2(read);
7330   INS0   : ISS;
7331   NEON_FP : S4;
7332 %}
7333 
7334 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7335 %{
7336   single_instruction;
7337   dst    : S3(write);
7338   src1   : S2(read);
7339   src2   : S2(read);
7340   INS01  : ISS;
7341   NEON_FP : S3;
7342 %}
7343 
7344 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7345 %{
7346   single_instruction;
7347   dst    : S3(write);
7348   src1   : S2(read);
7349   src2   : S2(read);
7350   INS0   : ISS;
7351   NEON_FP : S3;
7352 %}
7353 
7354 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7355 %{
7356   single_instruction;
7357   dst    : S3(write);
7358   src    : S1(read);
7359   shift  : S1(read);
7360   INS01  : ISS;
7361   NEON_FP : S3;
7362 %}
7363 
7364 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7365 %{
7366   single_instruction;
7367   dst    : S3(write);
7368   src    : S1(read);
7369   shift  : S1(read);
7370   INS0   : ISS;
7371   NEON_FP : S3;
7372 %}
7373 
7374 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7375 %{
7376   single_instruction;
7377   dst    : S3(write);
7378   src    : S1(read);
7379   INS01  : ISS;
7380   NEON_FP : S3;
7381 %}
7382 
7383 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7384 %{
7385   single_instruction;
7386   dst    : S3(write);
7387   src    : S1(read);
7388   INS0   : ISS;
7389   NEON_FP : S3;
7390 %}
7391 
7392 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7393 %{
7394   single_instruction;
7395   dst    : S5(write);
7396   src1   : S1(read);
7397   src2   : S1(read);
7398   INS01  : ISS;
7399   NEON_FP : S5;
7400 %}
7401 
7402 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7403 %{
7404   single_instruction;
7405   dst    : S5(write);
7406   src1   : S1(read);
7407   src2   : S1(read);
7408   INS0   : ISS;
7409   NEON_FP : S5;
7410 %}
7411 
7412 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7413 %{
7414   single_instruction;
7415   dst    : S5(write);
7416   src1   : S1(read);
7417   src2   : S1(read);
7418   INS0   : ISS;
7419   NEON_FP : S5;
7420 %}
7421 
7422 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7423 %{
7424   single_instruction;
7425   dst    : S5(write);
7426   src1   : S1(read);
7427   src2   : S1(read);
7428   INS0   : ISS;
7429   NEON_FP : S5;
7430 %}
7431 
7432 pipe_class vsqrt_fp128(vecX dst, vecX src)
7433 %{
7434   single_instruction;
7435   dst    : S5(write);
7436   src    : S1(read);
7437   INS0   : ISS;
7438   NEON_FP : S5;
7439 %}
7440 
7441 pipe_class vunop_fp64(vecD dst, vecD src)
7442 %{
7443   single_instruction;
7444   dst    : S5(write);
7445   src    : S1(read);
7446   INS01  : ISS;
7447   NEON_FP : S5;
7448 %}
7449 
7450 pipe_class vunop_fp128(vecX dst, vecX src)
7451 %{
7452   single_instruction;
7453   dst    : S5(write);
7454   src    : S1(read);
7455   INS0   : ISS;
7456   NEON_FP : S5;
7457 %}
7458 
7459 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7460 %{
7461   single_instruction;
7462   dst    : S3(write);
7463   src    : S1(read);
7464   INS01  : ISS;
7465   NEON_FP : S3;
7466 %}
7467 
7468 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7469 %{
7470   single_instruction;
7471   dst    : S3(write);
7472   src    : S1(read);
7473   INS01  : ISS;
7474   NEON_FP : S3;
7475 %}
7476 
7477 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7478 %{
7479   single_instruction;
7480   dst    : S3(write);
7481   src    : S1(read);
7482   INS01  : ISS;
7483   NEON_FP : S3;
7484 %}
7485 
7486 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7487 %{
7488   single_instruction;
7489   dst    : S3(write);
7490   src    : S1(read);
7491   INS01  : ISS;
7492   NEON_FP : S3;
7493 %}
7494 
7495 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7496 %{
7497   single_instruction;
7498   dst    : S3(write);
7499   src    : S1(read);
7500   INS01  : ISS;
7501   NEON_FP : S3;
7502 %}
7503 
7504 pipe_class vmovi_reg_imm64(vecD dst)
7505 %{
7506   single_instruction;
7507   dst    : S3(write);
7508   INS01  : ISS;
7509   NEON_FP : S3;
7510 %}
7511 
7512 pipe_class vmovi_reg_imm128(vecX dst)
7513 %{
7514   single_instruction;
7515   dst    : S3(write);
7516   INS0   : ISS;
7517   NEON_FP : S3;
7518 %}
7519 
7520 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7521 %{
7522   single_instruction;
7523   dst    : S5(write);
7524   mem    : ISS(read);
7525   INS01  : ISS;
7526   NEON_FP : S3;
7527 %}
7528 
7529 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7530 %{
7531   single_instruction;
7532   dst    : S5(write);
7533   mem    : ISS(read);
7534   INS01  : ISS;
7535   NEON_FP : S3;
7536 %}
7537 
7538 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7539 %{
7540   single_instruction;
7541   mem    : ISS(read);
7542   src    : S2(read);
7543   INS01  : ISS;
7544   NEON_FP : S3;
7545 %}
7546 
7547 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7548 %{
7549   single_instruction;
7550   mem    : ISS(read);
7551   src    : S2(read);
7552   INS01  : ISS;
7553   NEON_FP : S3;
7554 %}
7555 
7556 //------- Integer ALU operations --------------------------
7557 
7558 // Integer ALU reg-reg operation
7559 // Operands needed in EX1, result generated in EX2
7560 // Eg.  ADD     x0, x1, x2
7561 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7562 %{
7563   single_instruction;
7564   dst    : EX2(write);
7565   src1   : EX1(read);
7566   src2   : EX1(read);
7567   INS01  : ISS; // Dual issue as instruction 0 or 1
7568   ALU    : EX2;
7569 %}
7570 
7571 // Integer ALU reg-reg operation with constant shift
7572 // Shifted register must be available in LATE_ISS instead of EX1
7573 // Eg.  ADD     x0, x1, x2, LSL #2
7574 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7575 %{
7576   single_instruction;
7577   dst    : EX2(write);
7578   src1   : EX1(read);
7579   src2   : ISS(read);
7580   INS01  : ISS;
7581   ALU    : EX2;
7582 %}
7583 
7584 // Integer ALU reg operation with constant shift
7585 // Eg.  LSL     x0, x1, #shift
7586 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7587 %{
7588   single_instruction;
7589   dst    : EX2(write);
7590   src1   : ISS(read);
7591   INS01  : ISS;
7592   ALU    : EX2;
7593 %}
7594 
7595 // Integer ALU reg-reg operation with variable shift
7596 // Both operands must be available in LATE_ISS instead of EX1
7597 // Result is available in EX1 instead of EX2
7598 // Eg.  LSLV    x0, x1, x2
7599 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7600 %{
7601   single_instruction;
7602   dst    : EX1(write);
7603   src1   : ISS(read);
7604   src2   : ISS(read);
7605   INS01  : ISS;
7606   ALU    : EX1;
7607 %}
7608 
7609 // Integer ALU reg-reg operation with extract
7610 // As for _vshift above, but result generated in EX2
7611 // Eg.  EXTR    x0, x1, x2, #N
7612 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7613 %{
7614   single_instruction;
7615   dst    : EX2(write);
7616   src1   : ISS(read);
7617   src2   : ISS(read);
7618   INS1   : ISS; // Can only dual issue as Instruction 1
7619   ALU    : EX1;
7620 %}
7621 
7622 // Integer ALU reg operation
7623 // Eg.  NEG     x0, x1
7624 pipe_class ialu_reg(iRegI dst, iRegI src)
7625 %{
7626   single_instruction;
7627   dst    : EX2(write);
7628   src    : EX1(read);
7629   INS01  : ISS;
7630   ALU    : EX2;
7631 %}
7632 
7633 // Integer ALU reg mmediate operation
7634 // Eg.  ADD     x0, x1, #N
7635 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7636 %{
7637   single_instruction;
7638   dst    : EX2(write);
7639   src1   : EX1(read);
7640   INS01  : ISS;
7641   ALU    : EX2;
7642 %}
7643 
7644 // Integer ALU immediate operation (no source operands)
7645 // Eg.  MOV     x0, #N
7646 pipe_class ialu_imm(iRegI dst)
7647 %{
7648   single_instruction;
7649   dst    : EX1(write);
7650   INS01  : ISS;
7651   ALU    : EX1;
7652 %}
7653 
7654 //------- Compare operation -------------------------------
7655 
7656 // Compare reg-reg
7657 // Eg.  CMP     x0, x1
7658 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7659 %{
7660   single_instruction;
7661 //  fixed_latency(16);
7662   cr     : EX2(write);
7663   op1    : EX1(read);
7664   op2    : EX1(read);
7665   INS01  : ISS;
7666   ALU    : EX2;
7667 %}
7668 
7669 // Compare reg-reg
7670 // Eg.  CMP     x0, #N
7671 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7672 %{
7673   single_instruction;
7674 //  fixed_latency(16);
7675   cr     : EX2(write);
7676   op1    : EX1(read);
7677   INS01  : ISS;
7678   ALU    : EX2;
7679 %}
7680 
7681 //------- Conditional instructions ------------------------
7682 
7683 // Conditional no operands
7684 // Eg.  CSINC   x0, zr, zr, <cond>
7685 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7686 %{
7687   single_instruction;
7688   cr     : EX1(read);
7689   dst    : EX2(write);
7690   INS01  : ISS;
7691   ALU    : EX2;
7692 %}
7693 
7694 // Conditional 2 operand
7695 // EG.  CSEL    X0, X1, X2, <cond>
7696 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7697 %{
7698   single_instruction;
7699   cr     : EX1(read);
7700   src1   : EX1(read);
7701   src2   : EX1(read);
7702   dst    : EX2(write);
7703   INS01  : ISS;
7704   ALU    : EX2;
7705 %}
7706 
7707 // Conditional 2 operand
7708 // EG.  CSEL    X0, X1, X2, <cond>
7709 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7710 %{
7711   single_instruction;
7712   cr     : EX1(read);
7713   src    : EX1(read);
7714   dst    : EX2(write);
7715   INS01  : ISS;
7716   ALU    : EX2;
7717 %}
7718 
7719 //------- Multiply pipeline operations --------------------
7720 
7721 // Multiply reg-reg
7722 // Eg.  MUL     w0, w1, w2
7723 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7724 %{
7725   single_instruction;
7726   dst    : WR(write);
7727   src1   : ISS(read);
7728   src2   : ISS(read);
7729   INS01  : ISS;
7730   MAC    : WR;
7731 %}
7732 
7733 // Multiply accumulate
7734 // Eg.  MADD    w0, w1, w2, w3
7735 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7736 %{
7737   single_instruction;
7738   dst    : WR(write);
7739   src1   : ISS(read);
7740   src2   : ISS(read);
7741   src3   : ISS(read);
7742   INS01  : ISS;
7743   MAC    : WR;
7744 %}
7745 
7746 // Eg.  MUL     w0, w1, w2
7747 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7748 %{
7749   single_instruction;
7750   fixed_latency(3); // Maximum latency for 64 bit mul
7751   dst    : WR(write);
7752   src1   : ISS(read);
7753   src2   : ISS(read);
7754   INS01  : ISS;
7755   MAC    : WR;
7756 %}
7757 
7758 // Multiply accumulate
7759 // Eg.  MADD    w0, w1, w2, w3
7760 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7761 %{
7762   single_instruction;
7763   fixed_latency(3); // Maximum latency for 64 bit mul
7764   dst    : WR(write);
7765   src1   : ISS(read);
7766   src2   : ISS(read);
7767   src3   : ISS(read);
7768   INS01  : ISS;
7769   MAC    : WR;
7770 %}
7771 
7772 //------- Divide pipeline operations --------------------
7773 
7774 // Eg.  SDIV    w0, w1, w2
7775 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7776 %{
7777   single_instruction;
7778   fixed_latency(8); // Maximum latency for 32 bit divide
7779   dst    : WR(write);
7780   src1   : ISS(read);
7781   src2   : ISS(read);
7782   INS0   : ISS; // Can only dual issue as instruction 0
7783   DIV    : WR;
7784 %}
7785 
7786 // Eg.  SDIV    x0, x1, x2
7787 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7788 %{
7789   single_instruction;
7790   fixed_latency(16); // Maximum latency for 64 bit divide
7791   dst    : WR(write);
7792   src1   : ISS(read);
7793   src2   : ISS(read);
7794   INS0   : ISS; // Can only dual issue as instruction 0
7795   DIV    : WR;
7796 %}
7797 
7798 //------- Load pipeline operations ------------------------
7799 
7800 // Load - prefetch
7801 // Eg.  PFRM    <mem>
7802 pipe_class iload_prefetch(memory mem)
7803 %{
7804   single_instruction;
7805   mem    : ISS(read);
7806   INS01  : ISS;
7807   LDST   : WR;
7808 %}
7809 
7810 // Load - reg, mem
7811 // Eg.  LDR     x0, <mem>
7812 pipe_class iload_reg_mem(iRegI dst, memory mem)
7813 %{
7814   single_instruction;
7815   dst    : WR(write);
7816   mem    : ISS(read);
7817   INS01  : ISS;
7818   LDST   : WR;
7819 %}
7820 
7821 // Load - reg, reg
7822 // Eg.  LDR     x0, [sp, x1]
7823 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7824 %{
7825   single_instruction;
7826   dst    : WR(write);
7827   src    : ISS(read);
7828   INS01  : ISS;
7829   LDST   : WR;
7830 %}
7831 
7832 //------- Store pipeline operations -----------------------
7833 
7834 // Store - zr, mem
7835 // Eg.  STR     zr, <mem>
7836 pipe_class istore_mem(memory mem)
7837 %{
7838   single_instruction;
7839   mem    : ISS(read);
7840   INS01  : ISS;
7841   LDST   : WR;
7842 %}
7843 
7844 // Store - reg, mem
7845 // Eg.  STR     x0, <mem>
7846 pipe_class istore_reg_mem(iRegI src, memory mem)
7847 %{
7848   single_instruction;
7849   mem    : ISS(read);
7850   src    : EX2(read);
7851   INS01  : ISS;
7852   LDST   : WR;
7853 %}
7854 
7855 // Store - reg, reg
7856 // Eg. STR      x0, [sp, x1]
7857 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7858 %{
7859   single_instruction;
7860   dst    : ISS(read);
7861   src    : EX2(read);
7862   INS01  : ISS;
7863   LDST   : WR;
7864 %}
7865 
7866 //------- Store pipeline operations -----------------------
7867 
7868 // Branch
7869 pipe_class pipe_branch()
7870 %{
7871   single_instruction;
7872   INS01  : ISS;
7873   BRANCH : EX1;
7874 %}
7875 
7876 // Conditional branch
7877 pipe_class pipe_branch_cond(rFlagsReg cr)
7878 %{
7879   single_instruction;
7880   cr     : EX1(read);
7881   INS01  : ISS;
7882   BRANCH : EX1;
7883 %}
7884 
7885 // Compare & Branch
7886 // EG.  CBZ/CBNZ
7887 pipe_class pipe_cmp_branch(iRegI op1)
7888 %{
7889   single_instruction;
7890   op1    : EX1(read);
7891   INS01  : ISS;
7892   BRANCH : EX1;
7893 %}
7894 
7895 //------- Synchronisation operations ----------------------
7896 
7897 // Any operation requiring serialization.
7898 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7899 pipe_class pipe_serial()
7900 %{
7901   single_instruction;
7902   force_serialization;
7903   fixed_latency(16);
7904   INS01  : ISS(2); // Cannot dual issue with any other instruction
7905   LDST   : WR;
7906 %}
7907 
7908 // Generic big/slow expanded idiom - also serialized
7909 pipe_class pipe_slow()
7910 %{
7911   instruction_count(10);
7912   multiple_bundles;
7913   force_serialization;
7914   fixed_latency(16);
7915   INS01  : ISS(2); // Cannot dual issue with any other instruction
7916   LDST   : WR;
7917 %}
7918 
7919 // Empty pipeline class
7920 pipe_class pipe_class_empty()
7921 %{
7922   single_instruction;
7923   fixed_latency(0);
7924 %}
7925 
7926 // Default pipeline class.
7927 pipe_class pipe_class_default()
7928 %{
7929   single_instruction;
7930   fixed_latency(2);
7931 %}
7932 
7933 // Pipeline class for compares.
7934 pipe_class pipe_class_compare()
7935 %{
7936   single_instruction;
7937   fixed_latency(16);
7938 %}
7939 
7940 // Pipeline class for memory operations.
7941 pipe_class pipe_class_memory()
7942 %{
7943   single_instruction;
7944   fixed_latency(16);
7945 %}
7946 
7947 // Pipeline class for call.
7948 pipe_class pipe_class_call()
7949 %{
7950   single_instruction;
7951   fixed_latency(100);
7952 %}
7953 
7954 // Define the class for the Nop node.
7955 define %{
7956    MachNop = pipe_class_empty;
7957 %}
7958 
7959 %}
7960 //----------INSTRUCTIONS-------------------------------------------------------
7961 //
7962 // match      -- States which machine-independent subtree may be replaced
7963 //               by this instruction.
7964 // ins_cost   -- The estimated cost of this instruction is used by instruction
7965 //               selection to identify a minimum cost tree of machine
7966 //               instructions that matches a tree of machine-independent
7967 //               instructions.
7968 // format     -- A string providing the disassembly for this instruction.
7969 //               The value of an instruction's operand may be inserted
7970 //               by referring to it with a '$' prefix.
7971 // opcode     -- Three instruction opcodes may be provided.  These are referred
7972 //               to within an encode class as $primary, $secondary, and $tertiary
7973 //               rrspectively.  The primary opcode is commonly used to
7974 //               indicate the type of machine instruction, while secondary
7975 //               and tertiary are often used for prefix options or addressing
7976 //               modes.
7977 // ins_encode -- A list of encode classes with parameters. The encode class
7978 //               name must have been defined in an 'enc_class' specification
7979 //               in the encode section of the architecture description.
7980 
7981 // ============================================================================
7982 // Memory (Load/Store) Instructions
7983 
7984 // Load Instructions
7985 
7986 // Load Byte (8 bit signed)
7987 instruct loadB(iRegINoSp dst, memory mem)
7988 %{
7989   match(Set dst (LoadB mem));
7990   predicate(!needs_acquiring_load(n));
7991 
7992   ins_cost(4 * INSN_COST);
7993   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7994 
7995   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7996 
7997   ins_pipe(iload_reg_mem);
7998 %}
7999 
8000 // Load Byte (8 bit signed) into long
8001 instruct loadB2L(iRegLNoSp dst, memory mem)
8002 %{
8003   match(Set dst (ConvI2L (LoadB mem)));
8004   predicate(!needs_acquiring_load(n->in(1)));
8005 
8006   ins_cost(4 * INSN_COST);
8007   format %{ "ldrsb  $dst, $mem\t# byte" %}
8008 
8009   ins_encode(aarch64_enc_ldrsb(dst, mem));
8010 
8011   ins_pipe(iload_reg_mem);
8012 %}
8013 
8014 // Load Byte (8 bit unsigned)
8015 instruct loadUB(iRegINoSp dst, memory mem)
8016 %{
8017   match(Set dst (LoadUB mem));
8018   predicate(!needs_acquiring_load(n));
8019 
8020   ins_cost(4 * INSN_COST);
8021   format %{ "ldrbw  $dst, $mem\t# byte" %}
8022 
8023   ins_encode(aarch64_enc_ldrb(dst, mem));
8024 
8025   ins_pipe(iload_reg_mem);
8026 %}
8027 
8028 // Load Byte (8 bit unsigned) into long
8029 instruct loadUB2L(iRegLNoSp dst, memory mem)
8030 %{
8031   match(Set dst (ConvI2L (LoadUB mem)));
8032   predicate(!needs_acquiring_load(n->in(1)));
8033 
8034   ins_cost(4 * INSN_COST);
8035   format %{ "ldrb  $dst, $mem\t# byte" %}
8036 
8037   ins_encode(aarch64_enc_ldrb(dst, mem));
8038 
8039   ins_pipe(iload_reg_mem);
8040 %}
8041 
8042 // Load Short (16 bit signed)
8043 instruct loadS(iRegINoSp dst, memory mem)
8044 %{
8045   match(Set dst (LoadS mem));
8046   predicate(!needs_acquiring_load(n));
8047 
8048   ins_cost(4 * INSN_COST);
8049   format %{ "ldrshw  $dst, $mem\t# short" %}
8050 
8051   ins_encode(aarch64_enc_ldrshw(dst, mem));
8052 
8053   ins_pipe(iload_reg_mem);
8054 %}
8055 
8056 // Load Short (16 bit signed) into long
8057 instruct loadS2L(iRegLNoSp dst, memory mem)
8058 %{
8059   match(Set dst (ConvI2L (LoadS mem)));
8060   predicate(!needs_acquiring_load(n->in(1)));
8061 
8062   ins_cost(4 * INSN_COST);
8063   format %{ "ldrsh  $dst, $mem\t# short" %}
8064 
8065   ins_encode(aarch64_enc_ldrsh(dst, mem));
8066 
8067   ins_pipe(iload_reg_mem);
8068 %}
8069 
8070 // Load Char (16 bit unsigned)
8071 instruct loadUS(iRegINoSp dst, memory mem)
8072 %{
8073   match(Set dst (LoadUS mem));
8074   predicate(!needs_acquiring_load(n));
8075 
8076   ins_cost(4 * INSN_COST);
8077   format %{ "ldrh  $dst, $mem\t# short" %}
8078 
8079   ins_encode(aarch64_enc_ldrh(dst, mem));
8080 
8081   ins_pipe(iload_reg_mem);
8082 %}
8083 
8084 // Load Short/Char (16 bit unsigned) into long
8085 instruct loadUS2L(iRegLNoSp dst, memory mem)
8086 %{
8087   match(Set dst (ConvI2L (LoadUS mem)));
8088   predicate(!needs_acquiring_load(n->in(1)));
8089 
8090   ins_cost(4 * INSN_COST);
8091   format %{ "ldrh  $dst, $mem\t# short" %}
8092 
8093   ins_encode(aarch64_enc_ldrh(dst, mem));
8094 
8095   ins_pipe(iload_reg_mem);
8096 %}
8097 
8098 // Load Integer (32 bit signed)
8099 instruct loadI(iRegINoSp dst, memory mem)
8100 %{
8101   match(Set dst (LoadI mem));
8102   predicate(!needs_acquiring_load(n));
8103 
8104   ins_cost(4 * INSN_COST);
8105   format %{ "ldrw  $dst, $mem\t# int" %}
8106 
8107   ins_encode(aarch64_enc_ldrw(dst, mem));
8108 
8109   ins_pipe(iload_reg_mem);
8110 %}
8111 
8112 // Load Integer (32 bit signed) into long
8113 instruct loadI2L(iRegLNoSp dst, memory mem)
8114 %{
8115   match(Set dst (ConvI2L (LoadI mem)));
8116   predicate(!needs_acquiring_load(n->in(1)));
8117 
8118   ins_cost(4 * INSN_COST);
8119   format %{ "ldrsw  $dst, $mem\t# int" %}
8120 
8121   ins_encode(aarch64_enc_ldrsw(dst, mem));
8122 
8123   ins_pipe(iload_reg_mem);
8124 %}
8125 
8126 // Load Integer (32 bit unsigned) into long
8127 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8128 %{
8129   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8130   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8131 
8132   ins_cost(4 * INSN_COST);
8133   format %{ "ldrw  $dst, $mem\t# int" %}
8134 
8135   ins_encode(aarch64_enc_ldrw(dst, mem));
8136 
8137   ins_pipe(iload_reg_mem);
8138 %}
8139 
8140 // Load Long (64 bit signed)
8141 instruct loadL(iRegLNoSp dst, memory mem)
8142 %{
8143   match(Set dst (LoadL mem));
8144   predicate(!needs_acquiring_load(n));
8145 
8146   ins_cost(4 * INSN_COST);
8147   format %{ "ldr  $dst, $mem\t# int" %}
8148 
8149   ins_encode(aarch64_enc_ldr(dst, mem));
8150 
8151   ins_pipe(iload_reg_mem);
8152 %}
8153 
8154 // Load Range
8155 instruct loadRange(iRegINoSp dst, memory mem)
8156 %{
8157   match(Set dst (LoadRange mem));
8158 
8159   ins_cost(4 * INSN_COST);
8160   format %{ "ldrw  $dst, $mem\t# range" %}
8161 
8162   ins_encode(aarch64_enc_ldrw(dst, mem));
8163 
8164   ins_pipe(iload_reg_mem);
8165 %}
8166 
8167 // Load Pointer
8168 instruct loadP(iRegPNoSp dst, memory mem)
8169 %{
8170   match(Set dst (LoadP mem));
8171   predicate(!needs_acquiring_load(n));
8172 
8173   ins_cost(4 * INSN_COST);
8174   format %{ "ldr  $dst, $mem\t# ptr" %}
8175 
8176   ins_encode(aarch64_enc_ldr(dst, mem));
8177 
8178   ins_pipe(iload_reg_mem);
8179 %}
8180 
8181 // Load Compressed Pointer
8182 instruct loadN(iRegNNoSp dst, memory mem)
8183 %{
8184   match(Set dst (LoadN mem));
8185   predicate(!needs_acquiring_load(n));
8186 
8187   ins_cost(4 * INSN_COST);
8188   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8189 
8190   ins_encode(aarch64_enc_ldrw(dst, mem));
8191 
8192   ins_pipe(iload_reg_mem);
8193 %}
8194 
8195 // Load Klass Pointer
8196 instruct loadKlass(iRegPNoSp dst, memory mem)
8197 %{
8198   match(Set dst (LoadKlass mem));
8199   predicate(!needs_acquiring_load(n));
8200 
8201   ins_cost(4 * INSN_COST);
8202   format %{ "ldr  $dst, $mem\t# class" %}
8203 
8204   ins_encode(aarch64_enc_ldr(dst, mem));
8205 
8206   ins_pipe(iload_reg_mem);
8207 %}
8208 
8209 // Load Narrow Klass Pointer
8210 instruct loadNKlass(iRegNNoSp dst, memory mem)
8211 %{
8212   match(Set dst (LoadNKlass mem));
8213   predicate(!needs_acquiring_load(n));
8214 
8215   ins_cost(4 * INSN_COST);
8216   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8217 
8218   ins_encode(aarch64_enc_ldrw(dst, mem));
8219 
8220   ins_pipe(iload_reg_mem);
8221 %}
8222 
8223 // Load Float
8224 instruct loadF(vRegF dst, memory mem)
8225 %{
8226   match(Set dst (LoadF mem));
8227   predicate(!needs_acquiring_load(n));
8228 
8229   ins_cost(4 * INSN_COST);
8230   format %{ "ldrs  $dst, $mem\t# float" %}
8231 
8232   ins_encode( aarch64_enc_ldrs(dst, mem) );
8233 
8234   ins_pipe(pipe_class_memory);
8235 %}
8236 
8237 // Load Double
8238 instruct loadD(vRegD dst, memory mem)
8239 %{
8240   match(Set dst (LoadD mem));
8241   predicate(!needs_acquiring_load(n));
8242 
8243   ins_cost(4 * INSN_COST);
8244   format %{ "ldrd  $dst, $mem\t# double" %}
8245 
8246   ins_encode( aarch64_enc_ldrd(dst, mem) );
8247 
8248   ins_pipe(pipe_class_memory);
8249 %}
8250 
8251 
8252 // Load Int Constant
8253 instruct loadConI(iRegINoSp dst, immI src)
8254 %{
8255   match(Set dst src);
8256 
8257   ins_cost(INSN_COST);
8258   format %{ "mov $dst, $src\t# int" %}
8259 
8260   ins_encode( aarch64_enc_movw_imm(dst, src) );
8261 
8262   ins_pipe(ialu_imm);
8263 %}
8264 
8265 // Load Long Constant
8266 instruct loadConL(iRegLNoSp dst, immL src)
8267 %{
8268   match(Set dst src);
8269 
8270   ins_cost(INSN_COST);
8271   format %{ "mov $dst, $src\t# long" %}
8272 
8273   ins_encode( aarch64_enc_mov_imm(dst, src) );
8274 
8275   ins_pipe(ialu_imm);
8276 %}
8277 
8278 // Load Pointer Constant
8279 
8280 instruct loadConP(iRegPNoSp dst, immP con)
8281 %{
8282   match(Set dst con);
8283 
8284   ins_cost(INSN_COST * 4);
8285   format %{
8286     "mov  $dst, $con\t# ptr\n\t"
8287   %}
8288 
8289   ins_encode(aarch64_enc_mov_p(dst, con));
8290 
8291   ins_pipe(ialu_imm);
8292 %}
8293 
8294 // Load Null Pointer Constant
8295 
8296 instruct loadConP0(iRegPNoSp dst, immP0 con)
8297 %{
8298   match(Set dst con);
8299 
8300   ins_cost(INSN_COST);
8301   format %{ "mov  $dst, $con\t# NULL ptr" %}
8302 
8303   ins_encode(aarch64_enc_mov_p0(dst, con));
8304 
8305   ins_pipe(ialu_imm);
8306 %}
8307 
8308 // Load Pointer Constant One
8309 
8310 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8311 %{
8312   match(Set dst con);
8313 
8314   ins_cost(INSN_COST);
8315   format %{ "mov  $dst, $con\t# NULL ptr" %}
8316 
8317   ins_encode(aarch64_enc_mov_p1(dst, con));
8318 
8319   ins_pipe(ialu_imm);
8320 %}
8321 
8322 // Load Poll Page Constant
8323 
8324 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8325 %{
8326   match(Set dst con);
8327 
8328   ins_cost(INSN_COST);
8329   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8330 
8331   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8332 
8333   ins_pipe(ialu_imm);
8334 %}
8335 
8336 // Load Byte Map Base Constant
8337 
8338 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8339 %{
8340   match(Set dst con);
8341 
8342   ins_cost(INSN_COST);
8343   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8344 
8345   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8346 
8347   ins_pipe(ialu_imm);
8348 %}
8349 
8350 // Load Narrow Pointer Constant
8351 
8352 instruct loadConN(iRegNNoSp dst, immN con)
8353 %{
8354   match(Set dst con);
8355 
8356   ins_cost(INSN_COST * 4);
8357   format %{ "mov  $dst, $con\t# compressed ptr" %}
8358 
8359   ins_encode(aarch64_enc_mov_n(dst, con));
8360 
8361   ins_pipe(ialu_imm);
8362 %}
8363 
8364 // Load Narrow Null Pointer Constant
8365 
8366 instruct loadConN0(iRegNNoSp dst, immN0 con)
8367 %{
8368   match(Set dst con);
8369 
8370   ins_cost(INSN_COST);
8371   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8372 
8373   ins_encode(aarch64_enc_mov_n0(dst, con));
8374 
8375   ins_pipe(ialu_imm);
8376 %}
8377 
8378 // Load Narrow Klass Constant
8379 
8380 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8381 %{
8382   match(Set dst con);
8383 
8384   ins_cost(INSN_COST);
8385   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8386 
8387   ins_encode(aarch64_enc_mov_nk(dst, con));
8388 
8389   ins_pipe(ialu_imm);
8390 %}
8391 
8392 // Load Packed Float Constant
8393 
8394 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8395   match(Set dst con);
8396   ins_cost(INSN_COST * 4);
8397   format %{ "fmovs  $dst, $con"%}
8398   ins_encode %{
8399     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8400   %}
8401 
8402   ins_pipe(fp_imm_s);
8403 %}
8404 
8405 // Load Float Constant
8406 
8407 instruct loadConF(vRegF dst, immF con) %{
8408   match(Set dst con);
8409 
8410   ins_cost(INSN_COST * 4);
8411 
8412   format %{
8413     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8414   %}
8415 
8416   ins_encode %{
8417     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8418   %}
8419 
8420   ins_pipe(fp_load_constant_s);
8421 %}
8422 
8423 // Load Packed Double Constant
8424 
8425 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8426   match(Set dst con);
8427   ins_cost(INSN_COST);
8428   format %{ "fmovd  $dst, $con"%}
8429   ins_encode %{
8430     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8431   %}
8432 
8433   ins_pipe(fp_imm_d);
8434 %}
8435 
8436 // Load Double Constant
8437 
8438 instruct loadConD(vRegD dst, immD con) %{
8439   match(Set dst con);
8440 
8441   ins_cost(INSN_COST * 5);
8442   format %{
8443     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8444   %}
8445 
8446   ins_encode %{
8447     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8448   %}
8449 
8450   ins_pipe(fp_load_constant_d);
8451 %}
8452 
8453 // Store Instructions
8454 
8455 // Store CMS card-mark Immediate
8456 instruct storeimmCM0(immI0 zero, memory mem)
8457 %{
8458   match(Set mem (StoreCM mem zero));
8459   predicate(unnecessary_storestore(n));
8460 
8461   ins_cost(INSN_COST);
8462   format %{ "strb zr, $mem\t# byte" %}
8463 
8464   ins_encode(aarch64_enc_strb0(mem));
8465 
8466   ins_pipe(istore_mem);
8467 %}
8468 
8469 // Store CMS card-mark Immediate with intervening StoreStore
8470 // needed when using CMS with no conditional card marking
8471 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8472 %{
8473   match(Set mem (StoreCM mem zero));
8474 
8475   ins_cost(INSN_COST * 2);
8476   format %{ "dmb ishst"
8477       "\n\tstrb zr, $mem\t# byte" %}
8478 
8479   ins_encode(aarch64_enc_strb0_ordered(mem));
8480 
8481   ins_pipe(istore_mem);
8482 %}
8483 
8484 // Store Byte
8485 instruct storeB(iRegIorL2I src, memory mem)
8486 %{
8487   match(Set mem (StoreB mem src));
8488   predicate(!needs_releasing_store(n));
8489 
8490   ins_cost(INSN_COST);
8491   format %{ "strb  $src, $mem\t# byte" %}
8492 
8493   ins_encode(aarch64_enc_strb(src, mem));
8494 
8495   ins_pipe(istore_reg_mem);
8496 %}
8497 
8498 
8499 instruct storeimmB0(immI0 zero, memory mem)
8500 %{
8501   match(Set mem (StoreB mem zero));
8502   predicate(!needs_releasing_store(n));
8503 
8504   ins_cost(INSN_COST);
8505   format %{ "strb rscractch2, $mem\t# byte" %}
8506 
8507   ins_encode(aarch64_enc_strb0(mem));
8508 
8509   ins_pipe(istore_mem);
8510 %}
8511 
8512 // Store Char/Short
8513 instruct storeC(iRegIorL2I src, memory mem)
8514 %{
8515   match(Set mem (StoreC mem src));
8516   predicate(!needs_releasing_store(n));
8517 
8518   ins_cost(INSN_COST);
8519   format %{ "strh  $src, $mem\t# short" %}
8520 
8521   ins_encode(aarch64_enc_strh(src, mem));
8522 
8523   ins_pipe(istore_reg_mem);
8524 %}
8525 
8526 instruct storeimmC0(immI0 zero, memory mem)
8527 %{
8528   match(Set mem (StoreC mem zero));
8529   predicate(!needs_releasing_store(n));
8530 
8531   ins_cost(INSN_COST);
8532   format %{ "strh  zr, $mem\t# short" %}
8533 
8534   ins_encode(aarch64_enc_strh0(mem));
8535 
8536   ins_pipe(istore_mem);
8537 %}
8538 
8539 // Store Integer
8540 
8541 instruct storeI(iRegIorL2I src, memory mem)
8542 %{
8543   match(Set mem(StoreI mem src));
8544   predicate(!needs_releasing_store(n));
8545 
8546   ins_cost(INSN_COST);
8547   format %{ "strw  $src, $mem\t# int" %}
8548 
8549   ins_encode(aarch64_enc_strw(src, mem));
8550 
8551   ins_pipe(istore_reg_mem);
8552 %}
8553 
8554 instruct storeimmI0(immI0 zero, memory mem)
8555 %{
8556   match(Set mem(StoreI mem zero));
8557   predicate(!needs_releasing_store(n));
8558 
8559   ins_cost(INSN_COST);
8560   format %{ "strw  zr, $mem\t# int" %}
8561 
8562   ins_encode(aarch64_enc_strw0(mem));
8563 
8564   ins_pipe(istore_mem);
8565 %}
8566 
8567 // Store Long (64 bit signed)
8568 instruct storeL(iRegL src, memory mem)
8569 %{
8570   match(Set mem (StoreL mem src));
8571   predicate(!needs_releasing_store(n));
8572 
8573   ins_cost(INSN_COST);
8574   format %{ "str  $src, $mem\t# int" %}
8575 
8576   ins_encode(aarch64_enc_str(src, mem));
8577 
8578   ins_pipe(istore_reg_mem);
8579 %}
8580 
8581 // Store Long (64 bit signed)
8582 instruct storeimmL0(immL0 zero, memory mem)
8583 %{
8584   match(Set mem (StoreL mem zero));
8585   predicate(!needs_releasing_store(n));
8586 
8587   ins_cost(INSN_COST);
8588   format %{ "str  zr, $mem\t# int" %}
8589 
8590   ins_encode(aarch64_enc_str0(mem));
8591 
8592   ins_pipe(istore_mem);
8593 %}
8594 
8595 // Store Pointer
8596 instruct storeP(iRegP src, memory mem)
8597 %{
8598   match(Set mem (StoreP mem src));
8599   predicate(!needs_releasing_store(n));
8600 
8601   ins_cost(INSN_COST);
8602   format %{ "str  $src, $mem\t# ptr" %}
8603 
8604   ins_encode(aarch64_enc_str(src, mem));
8605 
8606   ins_pipe(istore_reg_mem);
8607 %}
8608 
8609 // Store Pointer
8610 instruct storeimmP0(immP0 zero, memory mem)
8611 %{
8612   match(Set mem (StoreP mem zero));
8613   predicate(!needs_releasing_store(n));
8614 
8615   ins_cost(INSN_COST);
8616   format %{ "str zr, $mem\t# ptr" %}
8617 
8618   ins_encode(aarch64_enc_str0(mem));
8619 
8620   ins_pipe(istore_mem);
8621 %}
8622 
8623 // Store Compressed Pointer
8624 instruct storeN(iRegN src, memory mem)
8625 %{
8626   match(Set mem (StoreN mem src));
8627   predicate(!needs_releasing_store(n));
8628 
8629   ins_cost(INSN_COST);
8630   format %{ "strw  $src, $mem\t# compressed ptr" %}
8631 
8632   ins_encode(aarch64_enc_strw(src, mem));
8633 
8634   ins_pipe(istore_reg_mem);
8635 %}
8636 
8637 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8638 %{
8639   match(Set mem (StoreN mem zero));
8640   predicate(Universe::narrow_oop_base() == NULL &&
8641             Universe::narrow_klass_base() == NULL &&
8642             (!needs_releasing_store(n)));
8643 
8644   ins_cost(INSN_COST);
8645   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8646 
8647   ins_encode(aarch64_enc_strw(heapbase, mem));
8648 
8649   ins_pipe(istore_reg_mem);
8650 %}
8651 
8652 // Store Float
8653 instruct storeF(vRegF src, memory mem)
8654 %{
8655   match(Set mem (StoreF mem src));
8656   predicate(!needs_releasing_store(n));
8657 
8658   ins_cost(INSN_COST);
8659   format %{ "strs  $src, $mem\t# float" %}
8660 
8661   ins_encode( aarch64_enc_strs(src, mem) );
8662 
8663   ins_pipe(pipe_class_memory);
8664 %}
8665 
8666 // TODO
8667 // implement storeImmF0 and storeFImmPacked
8668 
8669 // Store Double
8670 instruct storeD(vRegD src, memory mem)
8671 %{
8672   match(Set mem (StoreD mem src));
8673   predicate(!needs_releasing_store(n));
8674 
8675   ins_cost(INSN_COST);
8676   format %{ "strd  $src, $mem\t# double" %}
8677 
8678   ins_encode( aarch64_enc_strd(src, mem) );
8679 
8680   ins_pipe(pipe_class_memory);
8681 %}
8682 
8683 // Store Compressed Klass Pointer
8684 instruct storeNKlass(iRegN src, memory mem)
8685 %{
8686   predicate(!needs_releasing_store(n));
8687   match(Set mem (StoreNKlass mem src));
8688 
8689   ins_cost(INSN_COST);
8690   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8691 
8692   ins_encode(aarch64_enc_strw(src, mem));
8693 
8694   ins_pipe(istore_reg_mem);
8695 %}
8696 
8697 // TODO
8698 // implement storeImmD0 and storeDImmPacked
8699 
8700 // prefetch instructions
8701 // Must be safe to execute with invalid address (cannot fault).
8702 
8703 instruct prefetchalloc( memory mem ) %{
8704   match(PrefetchAllocation mem);
8705 
8706   ins_cost(INSN_COST);
8707   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8708 
8709   ins_encode( aarch64_enc_prefetchw(mem) );
8710 
8711   ins_pipe(iload_prefetch);
8712 %}
8713 
8714 //  ---------------- volatile loads and stores ----------------
8715 
8716 // Load Byte (8 bit signed)
8717 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8718 %{
8719   match(Set dst (LoadB mem));
8720 
8721   ins_cost(VOLATILE_REF_COST);
8722   format %{ "ldarsb  $dst, $mem\t# byte" %}
8723 
8724   ins_encode(aarch64_enc_ldarsb(dst, mem));
8725 
8726   ins_pipe(pipe_serial);
8727 %}
8728 
8729 // Load Byte (8 bit signed) into long
8730 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8731 %{
8732   match(Set dst (ConvI2L (LoadB mem)));
8733 
8734   ins_cost(VOLATILE_REF_COST);
8735   format %{ "ldarsb  $dst, $mem\t# byte" %}
8736 
8737   ins_encode(aarch64_enc_ldarsb(dst, mem));
8738 
8739   ins_pipe(pipe_serial);
8740 %}
8741 
8742 // Load Byte (8 bit unsigned)
8743 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8744 %{
8745   match(Set dst (LoadUB mem));
8746 
8747   ins_cost(VOLATILE_REF_COST);
8748   format %{ "ldarb  $dst, $mem\t# byte" %}
8749 
8750   ins_encode(aarch64_enc_ldarb(dst, mem));
8751 
8752   ins_pipe(pipe_serial);
8753 %}
8754 
8755 // Load Byte (8 bit unsigned) into long
8756 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8757 %{
8758   match(Set dst (ConvI2L (LoadUB mem)));
8759 
8760   ins_cost(VOLATILE_REF_COST);
8761   format %{ "ldarb  $dst, $mem\t# byte" %}
8762 
8763   ins_encode(aarch64_enc_ldarb(dst, mem));
8764 
8765   ins_pipe(pipe_serial);
8766 %}
8767 
8768 // Load Short (16 bit signed)
8769 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8770 %{
8771   match(Set dst (LoadS mem));
8772 
8773   ins_cost(VOLATILE_REF_COST);
8774   format %{ "ldarshw  $dst, $mem\t# short" %}
8775 
8776   ins_encode(aarch64_enc_ldarshw(dst, mem));
8777 
8778   ins_pipe(pipe_serial);
8779 %}
8780 
8781 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8782 %{
8783   match(Set dst (LoadUS mem));
8784 
8785   ins_cost(VOLATILE_REF_COST);
8786   format %{ "ldarhw  $dst, $mem\t# short" %}
8787 
8788   ins_encode(aarch64_enc_ldarhw(dst, mem));
8789 
8790   ins_pipe(pipe_serial);
8791 %}
8792 
8793 // Load Short/Char (16 bit unsigned) into long
8794 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8795 %{
8796   match(Set dst (ConvI2L (LoadUS mem)));
8797 
8798   ins_cost(VOLATILE_REF_COST);
8799   format %{ "ldarh  $dst, $mem\t# short" %}
8800 
8801   ins_encode(aarch64_enc_ldarh(dst, mem));
8802 
8803   ins_pipe(pipe_serial);
8804 %}
8805 
8806 // Load Short/Char (16 bit signed) into long
8807 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8808 %{
8809   match(Set dst (ConvI2L (LoadS mem)));
8810 
8811   ins_cost(VOLATILE_REF_COST);
8812   format %{ "ldarh  $dst, $mem\t# short" %}
8813 
8814   ins_encode(aarch64_enc_ldarsh(dst, mem));
8815 
8816   ins_pipe(pipe_serial);
8817 %}
8818 
8819 // Load Integer (32 bit signed)
8820 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8821 %{
8822   match(Set dst (LoadI mem));
8823 
8824   ins_cost(VOLATILE_REF_COST);
8825   format %{ "ldarw  $dst, $mem\t# int" %}
8826 
8827   ins_encode(aarch64_enc_ldarw(dst, mem));
8828 
8829   ins_pipe(pipe_serial);
8830 %}
8831 
8832 // Load Integer (32 bit unsigned) into long
8833 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8834 %{
8835   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8836 
8837   ins_cost(VOLATILE_REF_COST);
8838   format %{ "ldarw  $dst, $mem\t# int" %}
8839 
8840   ins_encode(aarch64_enc_ldarw(dst, mem));
8841 
8842   ins_pipe(pipe_serial);
8843 %}
8844 
8845 // Load Long (64 bit signed)
8846 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8847 %{
8848   match(Set dst (LoadL mem));
8849 
8850   ins_cost(VOLATILE_REF_COST);
8851   format %{ "ldar  $dst, $mem\t# int" %}
8852 
8853   ins_encode(aarch64_enc_ldar(dst, mem));
8854 
8855   ins_pipe(pipe_serial);
8856 %}
8857 
8858 // Load Pointer
8859 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8860 %{
8861   match(Set dst (LoadP mem));
8862 
8863   ins_cost(VOLATILE_REF_COST);
8864   format %{ "ldar  $dst, $mem\t# ptr" %}
8865 
8866   ins_encode(aarch64_enc_ldar(dst, mem));
8867 
8868   ins_pipe(pipe_serial);
8869 %}
8870 
8871 // Load Compressed Pointer
8872 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8873 %{
8874   match(Set dst (LoadN mem));
8875 
8876   ins_cost(VOLATILE_REF_COST);
8877   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8878 
8879   ins_encode(aarch64_enc_ldarw(dst, mem));
8880 
8881   ins_pipe(pipe_serial);
8882 %}
8883 
8884 // Load Float
8885 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8886 %{
8887   match(Set dst (LoadF mem));
8888 
8889   ins_cost(VOLATILE_REF_COST);
8890   format %{ "ldars  $dst, $mem\t# float" %}
8891 
8892   ins_encode( aarch64_enc_fldars(dst, mem) );
8893 
8894   ins_pipe(pipe_serial);
8895 %}
8896 
8897 // Load Double
8898 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8899 %{
8900   match(Set dst (LoadD mem));
8901 
8902   ins_cost(VOLATILE_REF_COST);
8903   format %{ "ldard  $dst, $mem\t# double" %}
8904 
8905   ins_encode( aarch64_enc_fldard(dst, mem) );
8906 
8907   ins_pipe(pipe_serial);
8908 %}
8909 
8910 // Store Byte
8911 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8912 %{
8913   match(Set mem (StoreB mem src));
8914 
8915   ins_cost(VOLATILE_REF_COST);
8916   format %{ "stlrb  $src, $mem\t# byte" %}
8917 
8918   ins_encode(aarch64_enc_stlrb(src, mem));
8919 
8920   ins_pipe(pipe_class_memory);
8921 %}
8922 
8923 // Store Char/Short
8924 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8925 %{
8926   match(Set mem (StoreC mem src));
8927 
8928   ins_cost(VOLATILE_REF_COST);
8929   format %{ "stlrh  $src, $mem\t# short" %}
8930 
8931   ins_encode(aarch64_enc_stlrh(src, mem));
8932 
8933   ins_pipe(pipe_class_memory);
8934 %}
8935 
8936 // Store Integer
8937 
8938 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8939 %{
8940   match(Set mem(StoreI mem src));
8941 
8942   ins_cost(VOLATILE_REF_COST);
8943   format %{ "stlrw  $src, $mem\t# int" %}
8944 
8945   ins_encode(aarch64_enc_stlrw(src, mem));
8946 
8947   ins_pipe(pipe_class_memory);
8948 %}
8949 
8950 // Store Long (64 bit signed)
8951 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8952 %{
8953   match(Set mem (StoreL mem src));
8954 
8955   ins_cost(VOLATILE_REF_COST);
8956   format %{ "stlr  $src, $mem\t# int" %}
8957 
8958   ins_encode(aarch64_enc_stlr(src, mem));
8959 
8960   ins_pipe(pipe_class_memory);
8961 %}
8962 
8963 // Store Pointer
8964 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8965 %{
8966   match(Set mem (StoreP mem src));
8967 
8968   ins_cost(VOLATILE_REF_COST);
8969   format %{ "stlr  $src, $mem\t# ptr" %}
8970 
8971   ins_encode(aarch64_enc_stlr(src, mem));
8972 
8973   ins_pipe(pipe_class_memory);
8974 %}
8975 
8976 // Store Compressed Pointer
8977 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8978 %{
8979   match(Set mem (StoreN mem src));
8980 
8981   ins_cost(VOLATILE_REF_COST);
8982   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8983 
8984   ins_encode(aarch64_enc_stlrw(src, mem));
8985 
8986   ins_pipe(pipe_class_memory);
8987 %}
8988 
8989 // Store Float
8990 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8991 %{
8992   match(Set mem (StoreF mem src));
8993 
8994   ins_cost(VOLATILE_REF_COST);
8995   format %{ "stlrs  $src, $mem\t# float" %}
8996 
8997   ins_encode( aarch64_enc_fstlrs(src, mem) );
8998 
8999   ins_pipe(pipe_class_memory);
9000 %}
9001 
9002 // TODO
9003 // implement storeImmF0 and storeFImmPacked
9004 
9005 // Store Double
9006 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
9007 %{
9008   match(Set mem (StoreD mem src));
9009 
9010   ins_cost(VOLATILE_REF_COST);
9011   format %{ "stlrd  $src, $mem\t# double" %}
9012 
9013   ins_encode( aarch64_enc_fstlrd(src, mem) );
9014 
9015   ins_pipe(pipe_class_memory);
9016 %}
9017 
9018 //  ---------------- end of volatile loads and stores ----------------
9019 
9020 // ============================================================================
9021 // BSWAP Instructions
9022 
9023 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
9024   match(Set dst (ReverseBytesI src));
9025 
9026   ins_cost(INSN_COST);
9027   format %{ "revw  $dst, $src" %}
9028 
9029   ins_encode %{
9030     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
9031   %}
9032 
9033   ins_pipe(ialu_reg);
9034 %}
9035 
9036 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
9037   match(Set dst (ReverseBytesL src));
9038 
9039   ins_cost(INSN_COST);
9040   format %{ "rev  $dst, $src" %}
9041 
9042   ins_encode %{
9043     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
9044   %}
9045 
9046   ins_pipe(ialu_reg);
9047 %}
9048 
9049 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
9050   match(Set dst (ReverseBytesUS src));
9051 
9052   ins_cost(INSN_COST);
9053   format %{ "rev16w  $dst, $src" %}
9054 
9055   ins_encode %{
9056     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9057   %}
9058 
9059   ins_pipe(ialu_reg);
9060 %}
9061 
9062 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9063   match(Set dst (ReverseBytesS src));
9064 
9065   ins_cost(INSN_COST);
9066   format %{ "rev16w  $dst, $src\n\t"
9067             "sbfmw $dst, $dst, #0, #15" %}
9068 
9069   ins_encode %{
9070     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9071     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9072   %}
9073 
9074   ins_pipe(ialu_reg);
9075 %}
9076 
9077 // ============================================================================
9078 // Zero Count Instructions
9079 
9080 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9081   match(Set dst (CountLeadingZerosI src));
9082 
9083   ins_cost(INSN_COST);
9084   format %{ "clzw  $dst, $src" %}
9085   ins_encode %{
9086     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9087   %}
9088 
9089   ins_pipe(ialu_reg);
9090 %}
9091 
9092 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9093   match(Set dst (CountLeadingZerosL src));
9094 
9095   ins_cost(INSN_COST);
9096   format %{ "clz   $dst, $src" %}
9097   ins_encode %{
9098     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9099   %}
9100 
9101   ins_pipe(ialu_reg);
9102 %}
9103 
9104 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9105   match(Set dst (CountTrailingZerosI src));
9106 
9107   ins_cost(INSN_COST * 2);
9108   format %{ "rbitw  $dst, $src\n\t"
9109             "clzw   $dst, $dst" %}
9110   ins_encode %{
9111     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9112     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9113   %}
9114 
9115   ins_pipe(ialu_reg);
9116 %}
9117 
9118 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9119   match(Set dst (CountTrailingZerosL src));
9120 
9121   ins_cost(INSN_COST * 2);
9122   format %{ "rbit   $dst, $src\n\t"
9123             "clz    $dst, $dst" %}
9124   ins_encode %{
9125     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9126     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9127   %}
9128 
9129   ins_pipe(ialu_reg);
9130 %}
9131 
9132 //---------- Population Count Instructions -------------------------------------
9133 //
9134 
9135 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9136   predicate(UsePopCountInstruction);
9137   match(Set dst (PopCountI src));
9138   effect(TEMP tmp);
9139   ins_cost(INSN_COST * 13);
9140 
9141   format %{ "movw   $src, $src\n\t"
9142             "mov    $tmp, $src\t# vector (1D)\n\t"
9143             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9144             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9145             "mov    $dst, $tmp\t# vector (1D)" %}
9146   ins_encode %{
9147     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9148     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9149     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9150     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9151     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9152   %}
9153 
9154   ins_pipe(pipe_class_default);
9155 %}
9156 
9157 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9158   predicate(UsePopCountInstruction);
9159   match(Set dst (PopCountI (LoadI mem)));
9160   effect(TEMP tmp);
9161   ins_cost(INSN_COST * 13);
9162 
9163   format %{ "ldrs   $tmp, $mem\n\t"
9164             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9165             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9166             "mov    $dst, $tmp\t# vector (1D)" %}
9167   ins_encode %{
9168     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9169     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9170                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9171     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9172     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9173     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9174   %}
9175 
9176   ins_pipe(pipe_class_default);
9177 %}
9178 
9179 // Note: Long.bitCount(long) returns an int.
9180 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9181   predicate(UsePopCountInstruction);
9182   match(Set dst (PopCountL src));
9183   effect(TEMP tmp);
9184   ins_cost(INSN_COST * 13);
9185 
9186   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9187             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9188             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9189             "mov    $dst, $tmp\t# vector (1D)" %}
9190   ins_encode %{
9191     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9192     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9193     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9194     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9195   %}
9196 
9197   ins_pipe(pipe_class_default);
9198 %}
9199 
9200 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9201   predicate(UsePopCountInstruction);
9202   match(Set dst (PopCountL (LoadL mem)));
9203   effect(TEMP tmp);
9204   ins_cost(INSN_COST * 13);
9205 
9206   format %{ "ldrd   $tmp, $mem\n\t"
9207             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9208             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9209             "mov    $dst, $tmp\t# vector (1D)" %}
9210   ins_encode %{
9211     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9212     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9213                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9214     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9215     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9216     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9217   %}
9218 
9219   ins_pipe(pipe_class_default);
9220 %}
9221 
9222 // ============================================================================
9223 // MemBar Instruction
9224 
9225 instruct load_fence() %{
9226   match(LoadFence);
9227   ins_cost(VOLATILE_REF_COST);
9228 
9229   format %{ "load_fence" %}
9230 
9231   ins_encode %{
9232     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9233   %}
9234   ins_pipe(pipe_serial);
9235 %}
9236 
9237 instruct unnecessary_membar_acquire() %{
9238   predicate(unnecessary_acquire(n));
9239   match(MemBarAcquire);
9240   ins_cost(0);
9241 
9242   format %{ "membar_acquire (elided)" %}
9243 
9244   ins_encode %{
9245     __ block_comment("membar_acquire (elided)");
9246   %}
9247 
9248   ins_pipe(pipe_class_empty);
9249 %}
9250 
9251 instruct membar_acquire() %{
9252   match(MemBarAcquire);
9253   ins_cost(VOLATILE_REF_COST);
9254 
9255   format %{ "membar_acquire" %}
9256 
9257   ins_encode %{
9258     __ block_comment("membar_acquire");
9259     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9260   %}
9261 
9262   ins_pipe(pipe_serial);
9263 %}
9264 
9265 
9266 instruct membar_acquire_lock() %{
9267   match(MemBarAcquireLock);
9268   ins_cost(VOLATILE_REF_COST);
9269 
9270   format %{ "membar_acquire_lock (elided)" %}
9271 
9272   ins_encode %{
9273     __ block_comment("membar_acquire_lock (elided)");
9274   %}
9275 
9276   ins_pipe(pipe_serial);
9277 %}
9278 
9279 instruct store_fence() %{
9280   match(StoreFence);
9281   ins_cost(VOLATILE_REF_COST);
9282 
9283   format %{ "store_fence" %}
9284 
9285   ins_encode %{
9286     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9287   %}
9288   ins_pipe(pipe_serial);
9289 %}
9290 
9291 instruct unnecessary_membar_release() %{
9292   predicate(unnecessary_release(n));
9293   match(MemBarRelease);
9294   ins_cost(0);
9295 
9296   format %{ "membar_release (elided)" %}
9297 
9298   ins_encode %{
9299     __ block_comment("membar_release (elided)");
9300   %}
9301   ins_pipe(pipe_serial);
9302 %}
9303 
9304 instruct membar_release() %{
9305   match(MemBarRelease);
9306   ins_cost(VOLATILE_REF_COST);
9307 
9308   format %{ "membar_release" %}
9309 
9310   ins_encode %{
9311     __ block_comment("membar_release");
9312     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9313   %}
9314   ins_pipe(pipe_serial);
9315 %}
9316 
9317 instruct membar_storestore() %{
9318   match(MemBarStoreStore);
9319   ins_cost(VOLATILE_REF_COST);
9320 
9321   format %{ "MEMBAR-store-store" %}
9322 
9323   ins_encode %{
9324     __ membar(Assembler::StoreStore);
9325   %}
9326   ins_pipe(pipe_serial);
9327 %}
9328 
9329 instruct membar_release_lock() %{
9330   match(MemBarReleaseLock);
9331   ins_cost(VOLATILE_REF_COST);
9332 
9333   format %{ "membar_release_lock (elided)" %}
9334 
9335   ins_encode %{
9336     __ block_comment("membar_release_lock (elided)");
9337   %}
9338 
9339   ins_pipe(pipe_serial);
9340 %}
9341 
9342 instruct unnecessary_membar_volatile() %{
9343   predicate(unnecessary_volatile(n));
9344   match(MemBarVolatile);
9345   ins_cost(0);
9346 
9347   format %{ "membar_volatile (elided)" %}
9348 
9349   ins_encode %{
9350     __ block_comment("membar_volatile (elided)");
9351   %}
9352 
9353   ins_pipe(pipe_serial);
9354 %}
9355 
9356 instruct membar_volatile() %{
9357   match(MemBarVolatile);
9358   ins_cost(VOLATILE_REF_COST*100);
9359 
9360   format %{ "membar_volatile" %}
9361 
9362   ins_encode %{
9363     __ block_comment("membar_volatile");
9364     __ membar(Assembler::StoreLoad);
9365   %}
9366 
9367   ins_pipe(pipe_serial);
9368 %}
9369 
9370 // ============================================================================
9371 // Cast/Convert Instructions
9372 
9373 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9374   match(Set dst (CastX2P src));
9375 
9376   ins_cost(INSN_COST);
9377   format %{ "mov $dst, $src\t# long -> ptr" %}
9378 
9379   ins_encode %{
9380     if ($dst$$reg != $src$$reg) {
9381       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9382     }
9383   %}
9384 
9385   ins_pipe(ialu_reg);
9386 %}
9387 
9388 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9389   match(Set dst (CastP2X src));
9390 
9391   ins_cost(INSN_COST);
9392   format %{ "mov $dst, $src\t# ptr -> long" %}
9393 
9394   ins_encode %{
9395     if ($dst$$reg != $src$$reg) {
9396       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9397     }
9398   %}
9399 
9400   ins_pipe(ialu_reg);
9401 %}
9402 
9403 // Convert oop into int for vectors alignment masking
9404 instruct convP2I(iRegINoSp dst, iRegP src) %{
9405   match(Set dst (ConvL2I (CastP2X src)));
9406 
9407   ins_cost(INSN_COST);
9408   format %{ "movw $dst, $src\t# ptr -> int" %}
9409   ins_encode %{
9410     __ movw($dst$$Register, $src$$Register);
9411   %}
9412 
9413   ins_pipe(ialu_reg);
9414 %}
9415 
9416 // Convert compressed oop into int for vectors alignment masking
9417 // in case of 32bit oops (heap < 4Gb).
9418 instruct convN2I(iRegINoSp dst, iRegN src)
9419 %{
9420   predicate(Universe::narrow_oop_shift() == 0);
9421   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9422 
9423   ins_cost(INSN_COST);
9424   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9425   ins_encode %{
9426     __ movw($dst$$Register, $src$$Register);
9427   %}
9428 
9429   ins_pipe(ialu_reg);
9430 %}
9431 
9432 
9433 // Convert oop pointer into compressed form
9434 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9435   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9436   match(Set dst (EncodeP src));
9437   effect(KILL cr);
9438   ins_cost(INSN_COST * 3);
9439   format %{ "encode_heap_oop $dst, $src" %}
9440   ins_encode %{
9441     Register s = $src$$Register;
9442     Register d = $dst$$Register;
9443     __ encode_heap_oop(d, s);
9444   %}
9445   ins_pipe(ialu_reg);
9446 %}
9447 
9448 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9449   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9450   match(Set dst (EncodeP src));
9451   ins_cost(INSN_COST * 3);
9452   format %{ "encode_heap_oop_not_null $dst, $src" %}
9453   ins_encode %{
9454     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9455   %}
9456   ins_pipe(ialu_reg);
9457 %}
9458 
9459 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9460   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9461             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9462   match(Set dst (DecodeN src));
9463   ins_cost(INSN_COST * 3);
9464   format %{ "decode_heap_oop $dst, $src" %}
9465   ins_encode %{
9466     Register s = $src$$Register;
9467     Register d = $dst$$Register;
9468     __ decode_heap_oop(d, s);
9469   %}
9470   ins_pipe(ialu_reg);
9471 %}
9472 
9473 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9474   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9475             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9476   match(Set dst (DecodeN src));
9477   ins_cost(INSN_COST * 3);
9478   format %{ "decode_heap_oop_not_null $dst, $src" %}
9479   ins_encode %{
9480     Register s = $src$$Register;
9481     Register d = $dst$$Register;
9482     __ decode_heap_oop_not_null(d, s);
9483   %}
9484   ins_pipe(ialu_reg);
9485 %}
9486 
9487 // n.b. AArch64 implementations of encode_klass_not_null and
9488 // decode_klass_not_null do not modify the flags register so, unlike
9489 // Intel, we don't kill CR as a side effect here
9490 
9491 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9492   match(Set dst (EncodePKlass src));
9493 
9494   ins_cost(INSN_COST * 3);
9495   format %{ "encode_klass_not_null $dst,$src" %}
9496 
9497   ins_encode %{
9498     Register src_reg = as_Register($src$$reg);
9499     Register dst_reg = as_Register($dst$$reg);
9500     __ encode_klass_not_null(dst_reg, src_reg);
9501   %}
9502 
9503    ins_pipe(ialu_reg);
9504 %}
9505 
9506 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9507   match(Set dst (DecodeNKlass src));
9508 
9509   ins_cost(INSN_COST * 3);
9510   format %{ "decode_klass_not_null $dst,$src" %}
9511 
9512   ins_encode %{
9513     Register src_reg = as_Register($src$$reg);
9514     Register dst_reg = as_Register($dst$$reg);
9515     if (dst_reg != src_reg) {
9516       __ decode_klass_not_null(dst_reg, src_reg);
9517     } else {
9518       __ decode_klass_not_null(dst_reg);
9519     }
9520   %}
9521 
9522    ins_pipe(ialu_reg);
9523 %}
9524 
9525 instruct checkCastPP(iRegPNoSp dst)
9526 %{
9527   match(Set dst (CheckCastPP dst));
9528 
9529   size(0);
9530   format %{ "# checkcastPP of $dst" %}
9531   ins_encode(/* empty encoding */);
9532   ins_pipe(pipe_class_empty);
9533 %}
9534 
9535 instruct castPP(iRegPNoSp dst)
9536 %{
9537   match(Set dst (CastPP dst));
9538 
9539   size(0);
9540   format %{ "# castPP of $dst" %}
9541   ins_encode(/* empty encoding */);
9542   ins_pipe(pipe_class_empty);
9543 %}
9544 
9545 instruct castII(iRegI dst)
9546 %{
9547   match(Set dst (CastII dst));
9548 
9549   size(0);
9550   format %{ "# castII of $dst" %}
9551   ins_encode(/* empty encoding */);
9552   ins_cost(0);
9553   ins_pipe(pipe_class_empty);
9554 %}
9555 
9556 // ============================================================================
9557 // Atomic operation instructions
9558 //
9559 // Intel and SPARC both implement Ideal Node LoadPLocked and
9560 // Store{PIL}Conditional instructions using a normal load for the
9561 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9562 //
9563 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9564 // pair to lock object allocations from Eden space when not using
9565 // TLABs.
9566 //
9567 // There does not appear to be a Load{IL}Locked Ideal Node and the
9568 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9569 // and to use StoreIConditional only for 32-bit and StoreLConditional
9570 // only for 64-bit.
9571 //
9572 // We implement LoadPLocked and StorePLocked instructions using,
9573 // respectively the AArch64 hw load-exclusive and store-conditional
9574 // instructions. Whereas we must implement each of
9575 // Store{IL}Conditional using a CAS which employs a pair of
9576 // instructions comprising a load-exclusive followed by a
9577 // store-conditional.
9578 
9579 
9580 // Locked-load (linked load) of the current heap-top
9581 // used when updating the eden heap top
9582 // implemented using ldaxr on AArch64
9583 
9584 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9585 %{
9586   match(Set dst (LoadPLocked mem));
9587 
9588   ins_cost(VOLATILE_REF_COST);
9589 
9590   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9591 
9592   ins_encode(aarch64_enc_ldaxr(dst, mem));
9593 
9594   ins_pipe(pipe_serial);
9595 %}
9596 
9597 // Conditional-store of the updated heap-top.
9598 // Used during allocation of the shared heap.
9599 // Sets flag (EQ) on success.
9600 // implemented using stlxr on AArch64.
9601 
9602 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9603 %{
9604   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9605 
9606   ins_cost(VOLATILE_REF_COST);
9607 
9608  // TODO
9609  // do we need to do a store-conditional release or can we just use a
9610  // plain store-conditional?
9611 
9612   format %{
9613     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9614     "cmpw rscratch1, zr\t# EQ on successful write"
9615   %}
9616 
9617   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9618 
9619   ins_pipe(pipe_serial);
9620 %}
9621 
9622 
9623 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9624 // when attempting to rebias a lock towards the current thread.  We
9625 // must use the acquire form of cmpxchg in order to guarantee acquire
9626 // semantics in this case.
9627 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9628 %{
9629   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9630 
9631   ins_cost(VOLATILE_REF_COST);
9632 
9633   format %{
9634     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9635     "cmpw rscratch1, zr\t# EQ on successful write"
9636   %}
9637 
9638   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9639 
9640   ins_pipe(pipe_slow);
9641 %}
9642 
9643 // storeIConditional also has acquire semantics, for no better reason
9644 // than matching storeLConditional.  At the time of writing this
9645 // comment storeIConditional was not used anywhere by AArch64.
9646 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9647 %{
9648   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9649 
9650   ins_cost(VOLATILE_REF_COST);
9651 
9652   format %{
9653     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9654     "cmpw rscratch1, zr\t# EQ on successful write"
9655   %}
9656 
9657   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9658 
9659   ins_pipe(pipe_slow);
9660 %}
9661 
9662 // standard CompareAndSwapX when we are using barriers
9663 // these have higher priority than the rules selected by a predicate
9664 
9665 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9666 // can't match them
9667 
9668 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9669 
9670   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9671   ins_cost(2 * VOLATILE_REF_COST);
9672 
9673   effect(KILL cr);
9674 
9675   format %{
9676     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9677     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9678   %}
9679 
9680   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9681             aarch64_enc_cset_eq(res));
9682 
9683   ins_pipe(pipe_slow);
9684 %}
9685 
9686 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9687 
9688   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9689   ins_cost(2 * VOLATILE_REF_COST);
9690 
9691   effect(KILL cr);
9692 
9693   format %{
9694     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9695     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9696   %}
9697 
9698   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9699             aarch64_enc_cset_eq(res));
9700 
9701   ins_pipe(pipe_slow);
9702 %}
9703 
9704 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9705 
9706   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9707   ins_cost(2 * VOLATILE_REF_COST);
9708 
9709   effect(KILL cr);
9710 
9711  format %{
9712     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9713     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9714  %}
9715 
9716  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9717             aarch64_enc_cset_eq(res));
9718 
9719   ins_pipe(pipe_slow);
9720 %}
9721 
9722 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9723 
9724   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9725   ins_cost(2 * VOLATILE_REF_COST);
9726 
9727   effect(KILL cr);
9728 
9729  format %{
9730     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9731     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9732  %}
9733 
9734  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9735             aarch64_enc_cset_eq(res));
9736 
9737   ins_pipe(pipe_slow);
9738 %}
9739 
9740 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9741 
9742   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9743   ins_cost(2 * VOLATILE_REF_COST);
9744 
9745   effect(KILL cr);
9746 
9747  format %{
9748     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9749     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9750  %}
9751 
9752  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9753             aarch64_enc_cset_eq(res));
9754 
9755   ins_pipe(pipe_slow);
9756 %}
9757 
9758 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9759 
9760   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9761   ins_cost(2 * VOLATILE_REF_COST);
9762 
9763   effect(KILL cr);
9764 
9765  format %{
9766     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9767     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9768  %}
9769 
9770  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9771             aarch64_enc_cset_eq(res));
9772 
9773   ins_pipe(pipe_slow);
9774 %}
9775 
9776 // alternative CompareAndSwapX when we are eliding barriers
9777 
9778 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9779 
9780   predicate(needs_acquiring_load_exclusive(n));
9781   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9782   ins_cost(VOLATILE_REF_COST);
9783 
9784   effect(KILL cr);
9785 
9786  format %{
9787     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9788     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9789  %}
9790 
9791  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9792             aarch64_enc_cset_eq(res));
9793 
9794   ins_pipe(pipe_slow);
9795 %}
9796 
9797 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9798 
9799   predicate(needs_acquiring_load_exclusive(n));
9800   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9801   ins_cost(VOLATILE_REF_COST);
9802 
9803   effect(KILL cr);
9804 
9805  format %{
9806     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9807     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9808  %}
9809 
9810  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9811             aarch64_enc_cset_eq(res));
9812 
9813   ins_pipe(pipe_slow);
9814 %}
9815 
9816 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9817 
9818   predicate(needs_acquiring_load_exclusive(n));
9819   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9820   ins_cost(VOLATILE_REF_COST);
9821 
9822   effect(KILL cr);
9823 
9824  format %{
9825     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9826     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9827  %}
9828 
9829  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9830             aarch64_enc_cset_eq(res));
9831 
9832   ins_pipe(pipe_slow);
9833 %}
9834 
9835 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9836 
9837   predicate(needs_acquiring_load_exclusive(n));
9838   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9839   ins_cost(VOLATILE_REF_COST);
9840 
9841   effect(KILL cr);
9842 
9843  format %{
9844     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9845     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9846  %}
9847 
9848  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9849             aarch64_enc_cset_eq(res));
9850 
9851   ins_pipe(pipe_slow);
9852 %}
9853 
9854 
9855 // ---------------------------------------------------------------------
9856 
9857 
9858 // BEGIN This section of the file is automatically generated. Do not edit --------------
9859 
9860 // Sundry CAS operations.  Note that release is always true,
9861 // regardless of the memory ordering of the CAS.  This is because we
9862 // need the volatile case to be sequentially consistent but there is
9863 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9864 // can't check the type of memory ordering here, so we always emit a
9865 // STLXR.
9866 
9867 // This section is generated from aarch64_ad_cas.m4
9868 
9869 
9870 
9871 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9872   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9873   ins_cost(2 * VOLATILE_REF_COST);
9874   effect(TEMP_DEF res, KILL cr);
9875   format %{
9876     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9877   %}
9878   ins_encode %{
9879     __ uxtbw(rscratch2, $oldval$$Register);
9880     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9881                Assembler::byte, /*acquire*/ false, /*release*/ true,
9882                /*weak*/ false, $res$$Register);
9883     __ sxtbw($res$$Register, $res$$Register);
9884   %}
9885   ins_pipe(pipe_slow);
9886 %}
9887 
9888 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9889   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9890   ins_cost(2 * VOLATILE_REF_COST);
9891   effect(TEMP_DEF res, KILL cr);
9892   format %{
9893     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9894   %}
9895   ins_encode %{
9896     __ uxthw(rscratch2, $oldval$$Register);
9897     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9898                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9899                /*weak*/ false, $res$$Register);
9900     __ sxthw($res$$Register, $res$$Register);
9901   %}
9902   ins_pipe(pipe_slow);
9903 %}
9904 
9905 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9906   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9907   ins_cost(2 * VOLATILE_REF_COST);
9908   effect(TEMP_DEF res, KILL cr);
9909   format %{
9910     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9911   %}
9912   ins_encode %{
9913     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9914                Assembler::word, /*acquire*/ false, /*release*/ true,
9915                /*weak*/ false, $res$$Register);
9916   %}
9917   ins_pipe(pipe_slow);
9918 %}
9919 
9920 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9921   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9922   ins_cost(2 * VOLATILE_REF_COST);
9923   effect(TEMP_DEF res, KILL cr);
9924   format %{
9925     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9926   %}
9927   ins_encode %{
9928     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9929                Assembler::xword, /*acquire*/ false, /*release*/ true,
9930                /*weak*/ false, $res$$Register);
9931   %}
9932   ins_pipe(pipe_slow);
9933 %}
9934 
9935 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9936   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9937   ins_cost(2 * VOLATILE_REF_COST);
9938   effect(TEMP_DEF res, KILL cr);
9939   format %{
9940     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9941   %}
9942   ins_encode %{
9943     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9944                Assembler::word, /*acquire*/ false, /*release*/ true,
9945                /*weak*/ false, $res$$Register);
9946   %}
9947   ins_pipe(pipe_slow);
9948 %}
9949 
9950 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9951   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9952   ins_cost(2 * VOLATILE_REF_COST);
9953   effect(TEMP_DEF res, KILL cr);
9954   format %{
9955     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9956   %}
9957   ins_encode %{
9958     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9959                Assembler::xword, /*acquire*/ false, /*release*/ true,
9960                /*weak*/ false, $res$$Register);
9961   %}
9962   ins_pipe(pipe_slow);
9963 %}
9964 
9965 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9966   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9967   ins_cost(2 * VOLATILE_REF_COST);
9968   effect(KILL cr);
9969   format %{
9970     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9971     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9972   %}
9973   ins_encode %{
9974     __ uxtbw(rscratch2, $oldval$$Register);
9975     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9976                Assembler::byte, /*acquire*/ false, /*release*/ true,
9977                /*weak*/ true, noreg);
9978     __ csetw($res$$Register, Assembler::EQ);
9979   %}
9980   ins_pipe(pipe_slow);
9981 %}
9982 
9983 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9984   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9985   ins_cost(2 * VOLATILE_REF_COST);
9986   effect(KILL cr);
9987   format %{
9988     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9989     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9990   %}
9991   ins_encode %{
9992     __ uxthw(rscratch2, $oldval$$Register);
9993     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9994                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9995                /*weak*/ true, noreg);
9996     __ csetw($res$$Register, Assembler::EQ);
9997   %}
9998   ins_pipe(pipe_slow);
9999 %}
10000 
10001 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10002   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10003   ins_cost(2 * VOLATILE_REF_COST);
10004   effect(KILL cr);
10005   format %{
10006     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10007     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10008   %}
10009   ins_encode %{
10010     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10011                Assembler::word, /*acquire*/ false, /*release*/ true,
10012                /*weak*/ true, noreg);
10013     __ csetw($res$$Register, Assembler::EQ);
10014   %}
10015   ins_pipe(pipe_slow);
10016 %}
10017 
10018 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10019   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10020   ins_cost(2 * VOLATILE_REF_COST);
10021   effect(KILL cr);
10022   format %{
10023     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10024     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10025   %}
10026   ins_encode %{
10027     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10028                Assembler::xword, /*acquire*/ false, /*release*/ true,
10029                /*weak*/ true, noreg);
10030     __ csetw($res$$Register, Assembler::EQ);
10031   %}
10032   ins_pipe(pipe_slow);
10033 %}
10034 
10035 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10036   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10037   ins_cost(2 * VOLATILE_REF_COST);
10038   effect(KILL cr);
10039   format %{
10040     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10041     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10042   %}
10043   ins_encode %{
10044     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10045                Assembler::word, /*acquire*/ false, /*release*/ true,
10046                /*weak*/ true, noreg);
10047     __ csetw($res$$Register, Assembler::EQ);
10048   %}
10049   ins_pipe(pipe_slow);
10050 %}
10051 
10052 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10053   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10054   ins_cost(2 * VOLATILE_REF_COST);
10055   effect(KILL cr);
10056   format %{
10057     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10058     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10059   %}
10060   ins_encode %{
10061     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10062                Assembler::xword, /*acquire*/ false, /*release*/ true,
10063                /*weak*/ true, noreg);
10064     __ csetw($res$$Register, Assembler::EQ);
10065   %}
10066   ins_pipe(pipe_slow);
10067 %}
10068 
10069 // END This section of the file is automatically generated. Do not edit --------------
10070 // ---------------------------------------------------------------------
10071 
10072 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10073   match(Set prev (GetAndSetI mem newv));
10074   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10075   ins_encode %{
10076     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10077   %}
10078   ins_pipe(pipe_serial);
10079 %}
10080 
10081 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10082   match(Set prev (GetAndSetL mem newv));
10083   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10084   ins_encode %{
10085     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10086   %}
10087   ins_pipe(pipe_serial);
10088 %}
10089 
10090 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10091   match(Set prev (GetAndSetN mem newv));
10092   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10093   ins_encode %{
10094     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10095   %}
10096   ins_pipe(pipe_serial);
10097 %}
10098 
10099 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10100   match(Set prev (GetAndSetP mem newv));
10101   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10102   ins_encode %{
10103     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10104   %}
10105   ins_pipe(pipe_serial);
10106 %}
10107 
10108 
10109 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10110   match(Set newval (GetAndAddL mem incr));
10111   ins_cost(INSN_COST * 10);
10112   format %{ "get_and_addL $newval, [$mem], $incr" %}
10113   ins_encode %{
10114     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10115   %}
10116   ins_pipe(pipe_serial);
10117 %}
10118 
10119 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10120   predicate(n->as_LoadStore()->result_not_used());
10121   match(Set dummy (GetAndAddL mem incr));
10122   ins_cost(INSN_COST * 9);
10123   format %{ "get_and_addL [$mem], $incr" %}
10124   ins_encode %{
10125     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10126   %}
10127   ins_pipe(pipe_serial);
10128 %}
10129 
10130 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10131   match(Set newval (GetAndAddL mem incr));
10132   ins_cost(INSN_COST * 10);
10133   format %{ "get_and_addL $newval, [$mem], $incr" %}
10134   ins_encode %{
10135     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10136   %}
10137   ins_pipe(pipe_serial);
10138 %}
10139 
10140 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10141   predicate(n->as_LoadStore()->result_not_used());
10142   match(Set dummy (GetAndAddL mem incr));
10143   ins_cost(INSN_COST * 9);
10144   format %{ "get_and_addL [$mem], $incr" %}
10145   ins_encode %{
10146     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10147   %}
10148   ins_pipe(pipe_serial);
10149 %}
10150 
10151 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10152   match(Set newval (GetAndAddI mem incr));
10153   ins_cost(INSN_COST * 10);
10154   format %{ "get_and_addI $newval, [$mem], $incr" %}
10155   ins_encode %{
10156     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10157   %}
10158   ins_pipe(pipe_serial);
10159 %}
10160 
10161 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10162   predicate(n->as_LoadStore()->result_not_used());
10163   match(Set dummy (GetAndAddI mem incr));
10164   ins_cost(INSN_COST * 9);
10165   format %{ "get_and_addI [$mem], $incr" %}
10166   ins_encode %{
10167     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10168   %}
10169   ins_pipe(pipe_serial);
10170 %}
10171 
10172 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10173   match(Set newval (GetAndAddI mem incr));
10174   ins_cost(INSN_COST * 10);
10175   format %{ "get_and_addI $newval, [$mem], $incr" %}
10176   ins_encode %{
10177     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10178   %}
10179   ins_pipe(pipe_serial);
10180 %}
10181 
10182 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10183   predicate(n->as_LoadStore()->result_not_used());
10184   match(Set dummy (GetAndAddI mem incr));
10185   ins_cost(INSN_COST * 9);
10186   format %{ "get_and_addI [$mem], $incr" %}
10187   ins_encode %{
10188     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10189   %}
10190   ins_pipe(pipe_serial);
10191 %}
10192 
10193 // Manifest a CmpL result in an integer register.
10194 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10195 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10196 %{
10197   match(Set dst (CmpL3 src1 src2));
10198   effect(KILL flags);
10199 
10200   ins_cost(INSN_COST * 6);
10201   format %{
10202       "cmp $src1, $src2"
10203       "csetw $dst, ne"
10204       "cnegw $dst, lt"
10205   %}
10206   // format %{ "CmpL3 $dst, $src1, $src2" %}
10207   ins_encode %{
10208     __ cmp($src1$$Register, $src2$$Register);
10209     __ csetw($dst$$Register, Assembler::NE);
10210     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10211   %}
10212 
10213   ins_pipe(pipe_class_default);
10214 %}
10215 
10216 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10217 %{
10218   match(Set dst (CmpL3 src1 src2));
10219   effect(KILL flags);
10220 
10221   ins_cost(INSN_COST * 6);
10222   format %{
10223       "cmp $src1, $src2"
10224       "csetw $dst, ne"
10225       "cnegw $dst, lt"
10226   %}
10227   ins_encode %{
10228     int32_t con = (int32_t)$src2$$constant;
10229      if (con < 0) {
10230       __ adds(zr, $src1$$Register, -con);
10231     } else {
10232       __ subs(zr, $src1$$Register, con);
10233     }
10234     __ csetw($dst$$Register, Assembler::NE);
10235     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10236   %}
10237 
10238   ins_pipe(pipe_class_default);
10239 %}
10240 
10241 // ============================================================================
10242 // Conditional Move Instructions
10243 
10244 // n.b. we have identical rules for both a signed compare op (cmpOp)
10245 // and an unsigned compare op (cmpOpU). it would be nice if we could
10246 // define an op class which merged both inputs and use it to type the
10247 // argument to a single rule. unfortunatelyt his fails because the
10248 // opclass does not live up to the COND_INTER interface of its
10249 // component operands. When the generic code tries to negate the
10250 // operand it ends up running the generci Machoper::negate method
10251 // which throws a ShouldNotHappen. So, we have to provide two flavours
10252 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10253 
10254 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10255   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10256 
10257   ins_cost(INSN_COST * 2);
10258   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10259 
10260   ins_encode %{
10261     __ cselw(as_Register($dst$$reg),
10262              as_Register($src2$$reg),
10263              as_Register($src1$$reg),
10264              (Assembler::Condition)$cmp$$cmpcode);
10265   %}
10266 
10267   ins_pipe(icond_reg_reg);
10268 %}
10269 
10270 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10271   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10272 
10273   ins_cost(INSN_COST * 2);
10274   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10275 
10276   ins_encode %{
10277     __ cselw(as_Register($dst$$reg),
10278              as_Register($src2$$reg),
10279              as_Register($src1$$reg),
10280              (Assembler::Condition)$cmp$$cmpcode);
10281   %}
10282 
10283   ins_pipe(icond_reg_reg);
10284 %}
10285 
10286 // special cases where one arg is zero
10287 
10288 // n.b. this is selected in preference to the rule above because it
10289 // avoids loading constant 0 into a source register
10290 
10291 // TODO
10292 // we ought only to be able to cull one of these variants as the ideal
10293 // transforms ought always to order the zero consistently (to left/right?)
10294 
10295 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10296   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10297 
10298   ins_cost(INSN_COST * 2);
10299   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10300 
10301   ins_encode %{
10302     __ cselw(as_Register($dst$$reg),
10303              as_Register($src$$reg),
10304              zr,
10305              (Assembler::Condition)$cmp$$cmpcode);
10306   %}
10307 
10308   ins_pipe(icond_reg);
10309 %}
10310 
10311 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10312   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10313 
10314   ins_cost(INSN_COST * 2);
10315   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10316 
10317   ins_encode %{
10318     __ cselw(as_Register($dst$$reg),
10319              as_Register($src$$reg),
10320              zr,
10321              (Assembler::Condition)$cmp$$cmpcode);
10322   %}
10323 
10324   ins_pipe(icond_reg);
10325 %}
10326 
10327 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10328   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10329 
10330   ins_cost(INSN_COST * 2);
10331   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10332 
10333   ins_encode %{
10334     __ cselw(as_Register($dst$$reg),
10335              zr,
10336              as_Register($src$$reg),
10337              (Assembler::Condition)$cmp$$cmpcode);
10338   %}
10339 
10340   ins_pipe(icond_reg);
10341 %}
10342 
10343 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10344   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10345 
10346   ins_cost(INSN_COST * 2);
10347   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10348 
10349   ins_encode %{
10350     __ cselw(as_Register($dst$$reg),
10351              zr,
10352              as_Register($src$$reg),
10353              (Assembler::Condition)$cmp$$cmpcode);
10354   %}
10355 
10356   ins_pipe(icond_reg);
10357 %}
10358 
10359 // special case for creating a boolean 0 or 1
10360 
10361 // n.b. this is selected in preference to the rule above because it
10362 // avoids loading constants 0 and 1 into a source register
10363 
10364 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10365   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10366 
10367   ins_cost(INSN_COST * 2);
10368   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10369 
10370   ins_encode %{
10371     // equivalently
10372     // cset(as_Register($dst$$reg),
10373     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10374     __ csincw(as_Register($dst$$reg),
10375              zr,
10376              zr,
10377              (Assembler::Condition)$cmp$$cmpcode);
10378   %}
10379 
10380   ins_pipe(icond_none);
10381 %}
10382 
10383 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10384   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10385 
10386   ins_cost(INSN_COST * 2);
10387   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10388 
10389   ins_encode %{
10390     // equivalently
10391     // cset(as_Register($dst$$reg),
10392     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10393     __ csincw(as_Register($dst$$reg),
10394              zr,
10395              zr,
10396              (Assembler::Condition)$cmp$$cmpcode);
10397   %}
10398 
10399   ins_pipe(icond_none);
10400 %}
10401 
10402 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10403   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10404 
10405   ins_cost(INSN_COST * 2);
10406   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10407 
10408   ins_encode %{
10409     __ csel(as_Register($dst$$reg),
10410             as_Register($src2$$reg),
10411             as_Register($src1$$reg),
10412             (Assembler::Condition)$cmp$$cmpcode);
10413   %}
10414 
10415   ins_pipe(icond_reg_reg);
10416 %}
10417 
10418 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10419   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10420 
10421   ins_cost(INSN_COST * 2);
10422   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10423 
10424   ins_encode %{
10425     __ csel(as_Register($dst$$reg),
10426             as_Register($src2$$reg),
10427             as_Register($src1$$reg),
10428             (Assembler::Condition)$cmp$$cmpcode);
10429   %}
10430 
10431   ins_pipe(icond_reg_reg);
10432 %}
10433 
10434 // special cases where one arg is zero
10435 
10436 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10437   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10438 
10439   ins_cost(INSN_COST * 2);
10440   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10441 
10442   ins_encode %{
10443     __ csel(as_Register($dst$$reg),
10444             zr,
10445             as_Register($src$$reg),
10446             (Assembler::Condition)$cmp$$cmpcode);
10447   %}
10448 
10449   ins_pipe(icond_reg);
10450 %}
10451 
10452 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10453   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10454 
10455   ins_cost(INSN_COST * 2);
10456   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10457 
10458   ins_encode %{
10459     __ csel(as_Register($dst$$reg),
10460             zr,
10461             as_Register($src$$reg),
10462             (Assembler::Condition)$cmp$$cmpcode);
10463   %}
10464 
10465   ins_pipe(icond_reg);
10466 %}
10467 
10468 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10469   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10470 
10471   ins_cost(INSN_COST * 2);
10472   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10473 
10474   ins_encode %{
10475     __ csel(as_Register($dst$$reg),
10476             as_Register($src$$reg),
10477             zr,
10478             (Assembler::Condition)$cmp$$cmpcode);
10479   %}
10480 
10481   ins_pipe(icond_reg);
10482 %}
10483 
10484 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10485   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10486 
10487   ins_cost(INSN_COST * 2);
10488   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10489 
10490   ins_encode %{
10491     __ csel(as_Register($dst$$reg),
10492             as_Register($src$$reg),
10493             zr,
10494             (Assembler::Condition)$cmp$$cmpcode);
10495   %}
10496 
10497   ins_pipe(icond_reg);
10498 %}
10499 
10500 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10501   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10502 
10503   ins_cost(INSN_COST * 2);
10504   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10505 
10506   ins_encode %{
10507     __ csel(as_Register($dst$$reg),
10508             as_Register($src2$$reg),
10509             as_Register($src1$$reg),
10510             (Assembler::Condition)$cmp$$cmpcode);
10511   %}
10512 
10513   ins_pipe(icond_reg_reg);
10514 %}
10515 
10516 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10517   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10518 
10519   ins_cost(INSN_COST * 2);
10520   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10521 
10522   ins_encode %{
10523     __ csel(as_Register($dst$$reg),
10524             as_Register($src2$$reg),
10525             as_Register($src1$$reg),
10526             (Assembler::Condition)$cmp$$cmpcode);
10527   %}
10528 
10529   ins_pipe(icond_reg_reg);
10530 %}
10531 
10532 // special cases where one arg is zero
10533 
10534 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10535   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10536 
10537   ins_cost(INSN_COST * 2);
10538   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10539 
10540   ins_encode %{
10541     __ csel(as_Register($dst$$reg),
10542             zr,
10543             as_Register($src$$reg),
10544             (Assembler::Condition)$cmp$$cmpcode);
10545   %}
10546 
10547   ins_pipe(icond_reg);
10548 %}
10549 
10550 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10551   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10552 
10553   ins_cost(INSN_COST * 2);
10554   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10555 
10556   ins_encode %{
10557     __ csel(as_Register($dst$$reg),
10558             zr,
10559             as_Register($src$$reg),
10560             (Assembler::Condition)$cmp$$cmpcode);
10561   %}
10562 
10563   ins_pipe(icond_reg);
10564 %}
10565 
10566 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10567   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10568 
10569   ins_cost(INSN_COST * 2);
10570   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10571 
10572   ins_encode %{
10573     __ csel(as_Register($dst$$reg),
10574             as_Register($src$$reg),
10575             zr,
10576             (Assembler::Condition)$cmp$$cmpcode);
10577   %}
10578 
10579   ins_pipe(icond_reg);
10580 %}
10581 
10582 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10583   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10584 
10585   ins_cost(INSN_COST * 2);
10586   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10587 
10588   ins_encode %{
10589     __ csel(as_Register($dst$$reg),
10590             as_Register($src$$reg),
10591             zr,
10592             (Assembler::Condition)$cmp$$cmpcode);
10593   %}
10594 
10595   ins_pipe(icond_reg);
10596 %}
10597 
10598 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10599   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10600 
10601   ins_cost(INSN_COST * 2);
10602   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10603 
10604   ins_encode %{
10605     __ cselw(as_Register($dst$$reg),
10606              as_Register($src2$$reg),
10607              as_Register($src1$$reg),
10608              (Assembler::Condition)$cmp$$cmpcode);
10609   %}
10610 
10611   ins_pipe(icond_reg_reg);
10612 %}
10613 
10614 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10615   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10616 
10617   ins_cost(INSN_COST * 2);
10618   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10619 
10620   ins_encode %{
10621     __ cselw(as_Register($dst$$reg),
10622              as_Register($src2$$reg),
10623              as_Register($src1$$reg),
10624              (Assembler::Condition)$cmp$$cmpcode);
10625   %}
10626 
10627   ins_pipe(icond_reg_reg);
10628 %}
10629 
10630 // special cases where one arg is zero
10631 
10632 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10633   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10634 
10635   ins_cost(INSN_COST * 2);
10636   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10637 
10638   ins_encode %{
10639     __ cselw(as_Register($dst$$reg),
10640              zr,
10641              as_Register($src$$reg),
10642              (Assembler::Condition)$cmp$$cmpcode);
10643   %}
10644 
10645   ins_pipe(icond_reg);
10646 %}
10647 
10648 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10649   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10650 
10651   ins_cost(INSN_COST * 2);
10652   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10653 
10654   ins_encode %{
10655     __ cselw(as_Register($dst$$reg),
10656              zr,
10657              as_Register($src$$reg),
10658              (Assembler::Condition)$cmp$$cmpcode);
10659   %}
10660 
10661   ins_pipe(icond_reg);
10662 %}
10663 
10664 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10665   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10666 
10667   ins_cost(INSN_COST * 2);
10668   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10669 
10670   ins_encode %{
10671     __ cselw(as_Register($dst$$reg),
10672              as_Register($src$$reg),
10673              zr,
10674              (Assembler::Condition)$cmp$$cmpcode);
10675   %}
10676 
10677   ins_pipe(icond_reg);
10678 %}
10679 
10680 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10681   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10682 
10683   ins_cost(INSN_COST * 2);
10684   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10685 
10686   ins_encode %{
10687     __ cselw(as_Register($dst$$reg),
10688              as_Register($src$$reg),
10689              zr,
10690              (Assembler::Condition)$cmp$$cmpcode);
10691   %}
10692 
10693   ins_pipe(icond_reg);
10694 %}
10695 
10696 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10697 %{
10698   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10699 
10700   ins_cost(INSN_COST * 3);
10701 
10702   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10703   ins_encode %{
10704     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10705     __ fcsels(as_FloatRegister($dst$$reg),
10706               as_FloatRegister($src2$$reg),
10707               as_FloatRegister($src1$$reg),
10708               cond);
10709   %}
10710 
10711   ins_pipe(fp_cond_reg_reg_s);
10712 %}
10713 
10714 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10715 %{
10716   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10717 
10718   ins_cost(INSN_COST * 3);
10719 
10720   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10721   ins_encode %{
10722     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10723     __ fcsels(as_FloatRegister($dst$$reg),
10724               as_FloatRegister($src2$$reg),
10725               as_FloatRegister($src1$$reg),
10726               cond);
10727   %}
10728 
10729   ins_pipe(fp_cond_reg_reg_s);
10730 %}
10731 
10732 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10733 %{
10734   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10735 
10736   ins_cost(INSN_COST * 3);
10737 
10738   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10739   ins_encode %{
10740     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10741     __ fcseld(as_FloatRegister($dst$$reg),
10742               as_FloatRegister($src2$$reg),
10743               as_FloatRegister($src1$$reg),
10744               cond);
10745   %}
10746 
10747   ins_pipe(fp_cond_reg_reg_d);
10748 %}
10749 
10750 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10751 %{
10752   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10753 
10754   ins_cost(INSN_COST * 3);
10755 
10756   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10757   ins_encode %{
10758     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10759     __ fcseld(as_FloatRegister($dst$$reg),
10760               as_FloatRegister($src2$$reg),
10761               as_FloatRegister($src1$$reg),
10762               cond);
10763   %}
10764 
10765   ins_pipe(fp_cond_reg_reg_d);
10766 %}
10767 
10768 // ============================================================================
10769 // Arithmetic Instructions
10770 //
10771 
10772 // Integer Addition
10773 
10774 // TODO
10775 // these currently employ operations which do not set CR and hence are
10776 // not flagged as killing CR but we would like to isolate the cases
10777 // where we want to set flags from those where we don't. need to work
10778 // out how to do that.
10779 
10780 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10781   match(Set dst (AddI src1 src2));
10782 
10783   ins_cost(INSN_COST);
10784   format %{ "addw  $dst, $src1, $src2" %}
10785 
10786   ins_encode %{
10787     __ addw(as_Register($dst$$reg),
10788             as_Register($src1$$reg),
10789             as_Register($src2$$reg));
10790   %}
10791 
10792   ins_pipe(ialu_reg_reg);
10793 %}
10794 
10795 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10796   match(Set dst (AddI src1 src2));
10797 
10798   ins_cost(INSN_COST);
10799   format %{ "addw $dst, $src1, $src2" %}
10800 
10801   // use opcode to indicate that this is an add not a sub
10802   opcode(0x0);
10803 
10804   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10805 
10806   ins_pipe(ialu_reg_imm);
10807 %}
10808 
10809 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10810   match(Set dst (AddI (ConvL2I src1) src2));
10811 
10812   ins_cost(INSN_COST);
10813   format %{ "addw $dst, $src1, $src2" %}
10814 
10815   // use opcode to indicate that this is an add not a sub
10816   opcode(0x0);
10817 
10818   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10819 
10820   ins_pipe(ialu_reg_imm);
10821 %}
10822 
10823 // Pointer Addition
10824 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10825   match(Set dst (AddP src1 src2));
10826 
10827   ins_cost(INSN_COST);
10828   format %{ "add $dst, $src1, $src2\t# ptr" %}
10829 
10830   ins_encode %{
10831     __ add(as_Register($dst$$reg),
10832            as_Register($src1$$reg),
10833            as_Register($src2$$reg));
10834   %}
10835 
10836   ins_pipe(ialu_reg_reg);
10837 %}
10838 
10839 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10840   match(Set dst (AddP src1 (ConvI2L src2)));
10841 
10842   ins_cost(1.9 * INSN_COST);
10843   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10844 
10845   ins_encode %{
10846     __ add(as_Register($dst$$reg),
10847            as_Register($src1$$reg),
10848            as_Register($src2$$reg), ext::sxtw);
10849   %}
10850 
10851   ins_pipe(ialu_reg_reg);
10852 %}
10853 
10854 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10855   match(Set dst (AddP src1 (LShiftL src2 scale)));
10856 
10857   ins_cost(1.9 * INSN_COST);
10858   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10859 
10860   ins_encode %{
10861     __ lea(as_Register($dst$$reg),
10862            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10863                    Address::lsl($scale$$constant)));
10864   %}
10865 
10866   ins_pipe(ialu_reg_reg_shift);
10867 %}
10868 
10869 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10870   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10871 
10872   ins_cost(1.9 * INSN_COST);
10873   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10874 
10875   ins_encode %{
10876     __ lea(as_Register($dst$$reg),
10877            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10878                    Address::sxtw($scale$$constant)));
10879   %}
10880 
10881   ins_pipe(ialu_reg_reg_shift);
10882 %}
10883 
10884 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10885   match(Set dst (LShiftL (ConvI2L src) scale));
10886 
10887   ins_cost(INSN_COST);
10888   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10889 
10890   ins_encode %{
10891     __ sbfiz(as_Register($dst$$reg),
10892           as_Register($src$$reg),
10893           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10894   %}
10895 
10896   ins_pipe(ialu_reg_shift);
10897 %}
10898 
10899 // Pointer Immediate Addition
10900 // n.b. this needs to be more expensive than using an indirect memory
10901 // operand
10902 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10903   match(Set dst (AddP src1 src2));
10904 
10905   ins_cost(INSN_COST);
10906   format %{ "add $dst, $src1, $src2\t# ptr" %}
10907 
10908   // use opcode to indicate that this is an add not a sub
10909   opcode(0x0);
10910 
10911   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10912 
10913   ins_pipe(ialu_reg_imm);
10914 %}
10915 
10916 // Long Addition
10917 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10918 
10919   match(Set dst (AddL src1 src2));
10920 
10921   ins_cost(INSN_COST);
10922   format %{ "add  $dst, $src1, $src2" %}
10923 
10924   ins_encode %{
10925     __ add(as_Register($dst$$reg),
10926            as_Register($src1$$reg),
10927            as_Register($src2$$reg));
10928   %}
10929 
10930   ins_pipe(ialu_reg_reg);
10931 %}
10932 
10933 // No constant pool entries requiredLong Immediate Addition.
10934 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10935   match(Set dst (AddL src1 src2));
10936 
10937   ins_cost(INSN_COST);
10938   format %{ "add $dst, $src1, $src2" %}
10939 
10940   // use opcode to indicate that this is an add not a sub
10941   opcode(0x0);
10942 
10943   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10944 
10945   ins_pipe(ialu_reg_imm);
10946 %}
10947 
10948 // Integer Subtraction
10949 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10950   match(Set dst (SubI src1 src2));
10951 
10952   ins_cost(INSN_COST);
10953   format %{ "subw  $dst, $src1, $src2" %}
10954 
10955   ins_encode %{
10956     __ subw(as_Register($dst$$reg),
10957             as_Register($src1$$reg),
10958             as_Register($src2$$reg));
10959   %}
10960 
10961   ins_pipe(ialu_reg_reg);
10962 %}
10963 
10964 // Immediate Subtraction
10965 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10966   match(Set dst (SubI src1 src2));
10967 
10968   ins_cost(INSN_COST);
10969   format %{ "subw $dst, $src1, $src2" %}
10970 
10971   // use opcode to indicate that this is a sub not an add
10972   opcode(0x1);
10973 
10974   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10975 
10976   ins_pipe(ialu_reg_imm);
10977 %}
10978 
10979 // Long Subtraction
10980 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10981 
10982   match(Set dst (SubL src1 src2));
10983 
10984   ins_cost(INSN_COST);
10985   format %{ "sub  $dst, $src1, $src2" %}
10986 
10987   ins_encode %{
10988     __ sub(as_Register($dst$$reg),
10989            as_Register($src1$$reg),
10990            as_Register($src2$$reg));
10991   %}
10992 
10993   ins_pipe(ialu_reg_reg);
10994 %}
10995 
10996 // No constant pool entries requiredLong Immediate Subtraction.
10997 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10998   match(Set dst (SubL src1 src2));
10999 
11000   ins_cost(INSN_COST);
11001   format %{ "sub$dst, $src1, $src2" %}
11002 
11003   // use opcode to indicate that this is a sub not an add
11004   opcode(0x1);
11005 
11006   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11007 
11008   ins_pipe(ialu_reg_imm);
11009 %}
11010 
11011 // Integer Negation (special case for sub)
11012 
11013 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11014   match(Set dst (SubI zero src));
11015 
11016   ins_cost(INSN_COST);
11017   format %{ "negw $dst, $src\t# int" %}
11018 
11019   ins_encode %{
11020     __ negw(as_Register($dst$$reg),
11021             as_Register($src$$reg));
11022   %}
11023 
11024   ins_pipe(ialu_reg);
11025 %}
11026 
11027 // Long Negation
11028 
11029 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
11030   match(Set dst (SubL zero src));
11031 
11032   ins_cost(INSN_COST);
11033   format %{ "neg $dst, $src\t# long" %}
11034 
11035   ins_encode %{
11036     __ neg(as_Register($dst$$reg),
11037            as_Register($src$$reg));
11038   %}
11039 
11040   ins_pipe(ialu_reg);
11041 %}
11042 
11043 // Integer Multiply
11044 
11045 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11046   match(Set dst (MulI src1 src2));
11047 
11048   ins_cost(INSN_COST * 3);
11049   format %{ "mulw  $dst, $src1, $src2" %}
11050 
11051   ins_encode %{
11052     __ mulw(as_Register($dst$$reg),
11053             as_Register($src1$$reg),
11054             as_Register($src2$$reg));
11055   %}
11056 
11057   ins_pipe(imul_reg_reg);
11058 %}
11059 
11060 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11061   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11062 
11063   ins_cost(INSN_COST * 3);
11064   format %{ "smull  $dst, $src1, $src2" %}
11065 
11066   ins_encode %{
11067     __ smull(as_Register($dst$$reg),
11068              as_Register($src1$$reg),
11069              as_Register($src2$$reg));
11070   %}
11071 
11072   ins_pipe(imul_reg_reg);
11073 %}
11074 
11075 // Long Multiply
11076 
11077 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11078   match(Set dst (MulL src1 src2));
11079 
11080   ins_cost(INSN_COST * 5);
11081   format %{ "mul  $dst, $src1, $src2" %}
11082 
11083   ins_encode %{
11084     __ mul(as_Register($dst$$reg),
11085            as_Register($src1$$reg),
11086            as_Register($src2$$reg));
11087   %}
11088 
11089   ins_pipe(lmul_reg_reg);
11090 %}
11091 
11092 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11093 %{
11094   match(Set dst (MulHiL src1 src2));
11095 
11096   ins_cost(INSN_COST * 7);
11097   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11098 
11099   ins_encode %{
11100     __ smulh(as_Register($dst$$reg),
11101              as_Register($src1$$reg),
11102              as_Register($src2$$reg));
11103   %}
11104 
11105   ins_pipe(lmul_reg_reg);
11106 %}
11107 
11108 // Combined Integer Multiply & Add/Sub
11109 
11110 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11111   match(Set dst (AddI src3 (MulI src1 src2)));
11112 
11113   ins_cost(INSN_COST * 3);
11114   format %{ "madd  $dst, $src1, $src2, $src3" %}
11115 
11116   ins_encode %{
11117     __ maddw(as_Register($dst$$reg),
11118              as_Register($src1$$reg),
11119              as_Register($src2$$reg),
11120              as_Register($src3$$reg));
11121   %}
11122 
11123   ins_pipe(imac_reg_reg);
11124 %}
11125 
11126 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11127   match(Set dst (SubI src3 (MulI src1 src2)));
11128 
11129   ins_cost(INSN_COST * 3);
11130   format %{ "msub  $dst, $src1, $src2, $src3" %}
11131 
11132   ins_encode %{
11133     __ msubw(as_Register($dst$$reg),
11134              as_Register($src1$$reg),
11135              as_Register($src2$$reg),
11136              as_Register($src3$$reg));
11137   %}
11138 
11139   ins_pipe(imac_reg_reg);
11140 %}
11141 
11142 // Combined Long Multiply & Add/Sub
11143 
11144 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11145   match(Set dst (AddL src3 (MulL src1 src2)));
11146 
11147   ins_cost(INSN_COST * 5);
11148   format %{ "madd  $dst, $src1, $src2, $src3" %}
11149 
11150   ins_encode %{
11151     __ madd(as_Register($dst$$reg),
11152             as_Register($src1$$reg),
11153             as_Register($src2$$reg),
11154             as_Register($src3$$reg));
11155   %}
11156 
11157   ins_pipe(lmac_reg_reg);
11158 %}
11159 
11160 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11161   match(Set dst (SubL src3 (MulL src1 src2)));
11162 
11163   ins_cost(INSN_COST * 5);
11164   format %{ "msub  $dst, $src1, $src2, $src3" %}
11165 
11166   ins_encode %{
11167     __ msub(as_Register($dst$$reg),
11168             as_Register($src1$$reg),
11169             as_Register($src2$$reg),
11170             as_Register($src3$$reg));
11171   %}
11172 
11173   ins_pipe(lmac_reg_reg);
11174 %}
11175 
11176 // Integer Divide
11177 
11178 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11179   match(Set dst (DivI src1 src2));
11180 
11181   ins_cost(INSN_COST * 19);
11182   format %{ "sdivw  $dst, $src1, $src2" %}
11183 
11184   ins_encode(aarch64_enc_divw(dst, src1, src2));
11185   ins_pipe(idiv_reg_reg);
11186 %}
11187 
11188 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11189   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11190   ins_cost(INSN_COST);
11191   format %{ "lsrw $dst, $src1, $div1" %}
11192   ins_encode %{
11193     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11194   %}
11195   ins_pipe(ialu_reg_shift);
11196 %}
11197 
11198 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11199   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11200   ins_cost(INSN_COST);
11201   format %{ "addw $dst, $src, LSR $div1" %}
11202 
11203   ins_encode %{
11204     __ addw(as_Register($dst$$reg),
11205               as_Register($src$$reg),
11206               as_Register($src$$reg),
11207               Assembler::LSR, 31);
11208   %}
11209   ins_pipe(ialu_reg);
11210 %}
11211 
11212 // Long Divide
11213 
11214 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11215   match(Set dst (DivL src1 src2));
11216 
11217   ins_cost(INSN_COST * 35);
11218   format %{ "sdiv   $dst, $src1, $src2" %}
11219 
11220   ins_encode(aarch64_enc_div(dst, src1, src2));
11221   ins_pipe(ldiv_reg_reg);
11222 %}
11223 
11224 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11225   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11226   ins_cost(INSN_COST);
11227   format %{ "lsr $dst, $src1, $div1" %}
11228   ins_encode %{
11229     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11230   %}
11231   ins_pipe(ialu_reg_shift);
11232 %}
11233 
11234 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11235   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11236   ins_cost(INSN_COST);
11237   format %{ "add $dst, $src, $div1" %}
11238 
11239   ins_encode %{
11240     __ add(as_Register($dst$$reg),
11241               as_Register($src$$reg),
11242               as_Register($src$$reg),
11243               Assembler::LSR, 63);
11244   %}
11245   ins_pipe(ialu_reg);
11246 %}
11247 
11248 // Integer Remainder
11249 
11250 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11251   match(Set dst (ModI src1 src2));
11252 
11253   ins_cost(INSN_COST * 22);
11254   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11255             "msubw($dst, rscratch1, $src2, $src1" %}
11256 
11257   ins_encode(aarch64_enc_modw(dst, src1, src2));
11258   ins_pipe(idiv_reg_reg);
11259 %}
11260 
11261 // Long Remainder
11262 
11263 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11264   match(Set dst (ModL src1 src2));
11265 
11266   ins_cost(INSN_COST * 38);
11267   format %{ "sdiv   rscratch1, $src1, $src2\n"
11268             "msub($dst, rscratch1, $src2, $src1" %}
11269 
11270   ins_encode(aarch64_enc_mod(dst, src1, src2));
11271   ins_pipe(ldiv_reg_reg);
11272 %}
11273 
11274 // Integer Shifts
11275 
11276 // Shift Left Register
11277 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11278   match(Set dst (LShiftI src1 src2));
11279 
11280   ins_cost(INSN_COST * 2);
11281   format %{ "lslvw  $dst, $src1, $src2" %}
11282 
11283   ins_encode %{
11284     __ lslvw(as_Register($dst$$reg),
11285              as_Register($src1$$reg),
11286              as_Register($src2$$reg));
11287   %}
11288 
11289   ins_pipe(ialu_reg_reg_vshift);
11290 %}
11291 
11292 // Shift Left Immediate
11293 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11294   match(Set dst (LShiftI src1 src2));
11295 
11296   ins_cost(INSN_COST);
11297   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11298 
11299   ins_encode %{
11300     __ lslw(as_Register($dst$$reg),
11301             as_Register($src1$$reg),
11302             $src2$$constant & 0x1f);
11303   %}
11304 
11305   ins_pipe(ialu_reg_shift);
11306 %}
11307 
11308 // Shift Right Logical Register
11309 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11310   match(Set dst (URShiftI src1 src2));
11311 
11312   ins_cost(INSN_COST * 2);
11313   format %{ "lsrvw  $dst, $src1, $src2" %}
11314 
11315   ins_encode %{
11316     __ lsrvw(as_Register($dst$$reg),
11317              as_Register($src1$$reg),
11318              as_Register($src2$$reg));
11319   %}
11320 
11321   ins_pipe(ialu_reg_reg_vshift);
11322 %}
11323 
11324 // Shift Right Logical Immediate
11325 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11326   match(Set dst (URShiftI src1 src2));
11327 
11328   ins_cost(INSN_COST);
11329   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11330 
11331   ins_encode %{
11332     __ lsrw(as_Register($dst$$reg),
11333             as_Register($src1$$reg),
11334             $src2$$constant & 0x1f);
11335   %}
11336 
11337   ins_pipe(ialu_reg_shift);
11338 %}
11339 
11340 // Shift Right Arithmetic Register
11341 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11342   match(Set dst (RShiftI src1 src2));
11343 
11344   ins_cost(INSN_COST * 2);
11345   format %{ "asrvw  $dst, $src1, $src2" %}
11346 
11347   ins_encode %{
11348     __ asrvw(as_Register($dst$$reg),
11349              as_Register($src1$$reg),
11350              as_Register($src2$$reg));
11351   %}
11352 
11353   ins_pipe(ialu_reg_reg_vshift);
11354 %}
11355 
11356 // Shift Right Arithmetic Immediate
11357 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11358   match(Set dst (RShiftI src1 src2));
11359 
11360   ins_cost(INSN_COST);
11361   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11362 
11363   ins_encode %{
11364     __ asrw(as_Register($dst$$reg),
11365             as_Register($src1$$reg),
11366             $src2$$constant & 0x1f);
11367   %}
11368 
11369   ins_pipe(ialu_reg_shift);
11370 %}
11371 
11372 // Combined Int Mask and Right Shift (using UBFM)
11373 // TODO
11374 
11375 // Long Shifts
11376 
11377 // Shift Left Register
11378 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11379   match(Set dst (LShiftL src1 src2));
11380 
11381   ins_cost(INSN_COST * 2);
11382   format %{ "lslv  $dst, $src1, $src2" %}
11383 
11384   ins_encode %{
11385     __ lslv(as_Register($dst$$reg),
11386             as_Register($src1$$reg),
11387             as_Register($src2$$reg));
11388   %}
11389 
11390   ins_pipe(ialu_reg_reg_vshift);
11391 %}
11392 
11393 // Shift Left Immediate
11394 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11395   match(Set dst (LShiftL src1 src2));
11396 
11397   ins_cost(INSN_COST);
11398   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11399 
11400   ins_encode %{
11401     __ lsl(as_Register($dst$$reg),
11402             as_Register($src1$$reg),
11403             $src2$$constant & 0x3f);
11404   %}
11405 
11406   ins_pipe(ialu_reg_shift);
11407 %}
11408 
11409 // Shift Right Logical Register
11410 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11411   match(Set dst (URShiftL src1 src2));
11412 
11413   ins_cost(INSN_COST * 2);
11414   format %{ "lsrv  $dst, $src1, $src2" %}
11415 
11416   ins_encode %{
11417     __ lsrv(as_Register($dst$$reg),
11418             as_Register($src1$$reg),
11419             as_Register($src2$$reg));
11420   %}
11421 
11422   ins_pipe(ialu_reg_reg_vshift);
11423 %}
11424 
11425 // Shift Right Logical Immediate
11426 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11427   match(Set dst (URShiftL src1 src2));
11428 
11429   ins_cost(INSN_COST);
11430   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11431 
11432   ins_encode %{
11433     __ lsr(as_Register($dst$$reg),
11434            as_Register($src1$$reg),
11435            $src2$$constant & 0x3f);
11436   %}
11437 
11438   ins_pipe(ialu_reg_shift);
11439 %}
11440 
11441 // A special-case pattern for card table stores.
11442 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11443   match(Set dst (URShiftL (CastP2X src1) src2));
11444 
11445   ins_cost(INSN_COST);
11446   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11447 
11448   ins_encode %{
11449     __ lsr(as_Register($dst$$reg),
11450            as_Register($src1$$reg),
11451            $src2$$constant & 0x3f);
11452   %}
11453 
11454   ins_pipe(ialu_reg_shift);
11455 %}
11456 
11457 // Shift Right Arithmetic Register
11458 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11459   match(Set dst (RShiftL src1 src2));
11460 
11461   ins_cost(INSN_COST * 2);
11462   format %{ "asrv  $dst, $src1, $src2" %}
11463 
11464   ins_encode %{
11465     __ asrv(as_Register($dst$$reg),
11466             as_Register($src1$$reg),
11467             as_Register($src2$$reg));
11468   %}
11469 
11470   ins_pipe(ialu_reg_reg_vshift);
11471 %}
11472 
11473 // Shift Right Arithmetic Immediate
11474 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11475   match(Set dst (RShiftL src1 src2));
11476 
11477   ins_cost(INSN_COST);
11478   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11479 
11480   ins_encode %{
11481     __ asr(as_Register($dst$$reg),
11482            as_Register($src1$$reg),
11483            $src2$$constant & 0x3f);
11484   %}
11485 
11486   ins_pipe(ialu_reg_shift);
11487 %}
11488 
11489 // BEGIN This section of the file is automatically generated. Do not edit --------------
11490 
11491 instruct regL_not_reg(iRegLNoSp dst,
11492                          iRegL src1, immL_M1 m1,
11493                          rFlagsReg cr) %{
11494   match(Set dst (XorL src1 m1));
11495   ins_cost(INSN_COST);
11496   format %{ "eon  $dst, $src1, zr" %}
11497 
11498   ins_encode %{
11499     __ eon(as_Register($dst$$reg),
11500               as_Register($src1$$reg),
11501               zr,
11502               Assembler::LSL, 0);
11503   %}
11504 
11505   ins_pipe(ialu_reg);
11506 %}
11507 instruct regI_not_reg(iRegINoSp dst,
11508                          iRegIorL2I src1, immI_M1 m1,
11509                          rFlagsReg cr) %{
11510   match(Set dst (XorI src1 m1));
11511   ins_cost(INSN_COST);
11512   format %{ "eonw  $dst, $src1, zr" %}
11513 
11514   ins_encode %{
11515     __ eonw(as_Register($dst$$reg),
11516               as_Register($src1$$reg),
11517               zr,
11518               Assembler::LSL, 0);
11519   %}
11520 
11521   ins_pipe(ialu_reg);
11522 %}
11523 
11524 instruct AndI_reg_not_reg(iRegINoSp dst,
11525                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11526                          rFlagsReg cr) %{
11527   match(Set dst (AndI src1 (XorI src2 m1)));
11528   ins_cost(INSN_COST);
11529   format %{ "bicw  $dst, $src1, $src2" %}
11530 
11531   ins_encode %{
11532     __ bicw(as_Register($dst$$reg),
11533               as_Register($src1$$reg),
11534               as_Register($src2$$reg),
11535               Assembler::LSL, 0);
11536   %}
11537 
11538   ins_pipe(ialu_reg_reg);
11539 %}
11540 
11541 instruct AndL_reg_not_reg(iRegLNoSp dst,
11542                          iRegL src1, iRegL src2, immL_M1 m1,
11543                          rFlagsReg cr) %{
11544   match(Set dst (AndL src1 (XorL src2 m1)));
11545   ins_cost(INSN_COST);
11546   format %{ "bic  $dst, $src1, $src2" %}
11547 
11548   ins_encode %{
11549     __ bic(as_Register($dst$$reg),
11550               as_Register($src1$$reg),
11551               as_Register($src2$$reg),
11552               Assembler::LSL, 0);
11553   %}
11554 
11555   ins_pipe(ialu_reg_reg);
11556 %}
11557 
11558 instruct OrI_reg_not_reg(iRegINoSp dst,
11559                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11560                          rFlagsReg cr) %{
11561   match(Set dst (OrI src1 (XorI src2 m1)));
11562   ins_cost(INSN_COST);
11563   format %{ "ornw  $dst, $src1, $src2" %}
11564 
11565   ins_encode %{
11566     __ ornw(as_Register($dst$$reg),
11567               as_Register($src1$$reg),
11568               as_Register($src2$$reg),
11569               Assembler::LSL, 0);
11570   %}
11571 
11572   ins_pipe(ialu_reg_reg);
11573 %}
11574 
11575 instruct OrL_reg_not_reg(iRegLNoSp dst,
11576                          iRegL src1, iRegL src2, immL_M1 m1,
11577                          rFlagsReg cr) %{
11578   match(Set dst (OrL src1 (XorL src2 m1)));
11579   ins_cost(INSN_COST);
11580   format %{ "orn  $dst, $src1, $src2" %}
11581 
11582   ins_encode %{
11583     __ orn(as_Register($dst$$reg),
11584               as_Register($src1$$reg),
11585               as_Register($src2$$reg),
11586               Assembler::LSL, 0);
11587   %}
11588 
11589   ins_pipe(ialu_reg_reg);
11590 %}
11591 
11592 instruct XorI_reg_not_reg(iRegINoSp dst,
11593                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11594                          rFlagsReg cr) %{
11595   match(Set dst (XorI m1 (XorI src2 src1)));
11596   ins_cost(INSN_COST);
11597   format %{ "eonw  $dst, $src1, $src2" %}
11598 
11599   ins_encode %{
11600     __ eonw(as_Register($dst$$reg),
11601               as_Register($src1$$reg),
11602               as_Register($src2$$reg),
11603               Assembler::LSL, 0);
11604   %}
11605 
11606   ins_pipe(ialu_reg_reg);
11607 %}
11608 
11609 instruct XorL_reg_not_reg(iRegLNoSp dst,
11610                          iRegL src1, iRegL src2, immL_M1 m1,
11611                          rFlagsReg cr) %{
11612   match(Set dst (XorL m1 (XorL src2 src1)));
11613   ins_cost(INSN_COST);
11614   format %{ "eon  $dst, $src1, $src2" %}
11615 
11616   ins_encode %{
11617     __ eon(as_Register($dst$$reg),
11618               as_Register($src1$$reg),
11619               as_Register($src2$$reg),
11620               Assembler::LSL, 0);
11621   %}
11622 
11623   ins_pipe(ialu_reg_reg);
11624 %}
11625 
11626 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11627                          iRegIorL2I src1, iRegIorL2I src2,
11628                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11629   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11630   ins_cost(1.9 * INSN_COST);
11631   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11632 
11633   ins_encode %{
11634     __ bicw(as_Register($dst$$reg),
11635               as_Register($src1$$reg),
11636               as_Register($src2$$reg),
11637               Assembler::LSR,
11638               $src3$$constant & 0x1f);
11639   %}
11640 
11641   ins_pipe(ialu_reg_reg_shift);
11642 %}
11643 
11644 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11645                          iRegL src1, iRegL src2,
11646                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11647   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11648   ins_cost(1.9 * INSN_COST);
11649   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11650 
11651   ins_encode %{
11652     __ bic(as_Register($dst$$reg),
11653               as_Register($src1$$reg),
11654               as_Register($src2$$reg),
11655               Assembler::LSR,
11656               $src3$$constant & 0x3f);
11657   %}
11658 
11659   ins_pipe(ialu_reg_reg_shift);
11660 %}
11661 
11662 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11663                          iRegIorL2I src1, iRegIorL2I src2,
11664                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11665   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11666   ins_cost(1.9 * INSN_COST);
11667   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11668 
11669   ins_encode %{
11670     __ bicw(as_Register($dst$$reg),
11671               as_Register($src1$$reg),
11672               as_Register($src2$$reg),
11673               Assembler::ASR,
11674               $src3$$constant & 0x1f);
11675   %}
11676 
11677   ins_pipe(ialu_reg_reg_shift);
11678 %}
11679 
11680 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11681                          iRegL src1, iRegL src2,
11682                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11683   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11684   ins_cost(1.9 * INSN_COST);
11685   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11686 
11687   ins_encode %{
11688     __ bic(as_Register($dst$$reg),
11689               as_Register($src1$$reg),
11690               as_Register($src2$$reg),
11691               Assembler::ASR,
11692               $src3$$constant & 0x3f);
11693   %}
11694 
11695   ins_pipe(ialu_reg_reg_shift);
11696 %}
11697 
11698 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11699                          iRegIorL2I src1, iRegIorL2I src2,
11700                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11701   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11702   ins_cost(1.9 * INSN_COST);
11703   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11704 
11705   ins_encode %{
11706     __ bicw(as_Register($dst$$reg),
11707               as_Register($src1$$reg),
11708               as_Register($src2$$reg),
11709               Assembler::LSL,
11710               $src3$$constant & 0x1f);
11711   %}
11712 
11713   ins_pipe(ialu_reg_reg_shift);
11714 %}
11715 
11716 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11717                          iRegL src1, iRegL src2,
11718                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11719   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11720   ins_cost(1.9 * INSN_COST);
11721   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11722 
11723   ins_encode %{
11724     __ bic(as_Register($dst$$reg),
11725               as_Register($src1$$reg),
11726               as_Register($src2$$reg),
11727               Assembler::LSL,
11728               $src3$$constant & 0x3f);
11729   %}
11730 
11731   ins_pipe(ialu_reg_reg_shift);
11732 %}
11733 
11734 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11735                          iRegIorL2I src1, iRegIorL2I src2,
11736                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11737   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11738   ins_cost(1.9 * INSN_COST);
11739   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11740 
11741   ins_encode %{
11742     __ eonw(as_Register($dst$$reg),
11743               as_Register($src1$$reg),
11744               as_Register($src2$$reg),
11745               Assembler::LSR,
11746               $src3$$constant & 0x1f);
11747   %}
11748 
11749   ins_pipe(ialu_reg_reg_shift);
11750 %}
11751 
11752 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11753                          iRegL src1, iRegL src2,
11754                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11755   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11756   ins_cost(1.9 * INSN_COST);
11757   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11758 
11759   ins_encode %{
11760     __ eon(as_Register($dst$$reg),
11761               as_Register($src1$$reg),
11762               as_Register($src2$$reg),
11763               Assembler::LSR,
11764               $src3$$constant & 0x3f);
11765   %}
11766 
11767   ins_pipe(ialu_reg_reg_shift);
11768 %}
11769 
11770 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11771                          iRegIorL2I src1, iRegIorL2I src2,
11772                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11773   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11774   ins_cost(1.9 * INSN_COST);
11775   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11776 
11777   ins_encode %{
11778     __ eonw(as_Register($dst$$reg),
11779               as_Register($src1$$reg),
11780               as_Register($src2$$reg),
11781               Assembler::ASR,
11782               $src3$$constant & 0x1f);
11783   %}
11784 
11785   ins_pipe(ialu_reg_reg_shift);
11786 %}
11787 
11788 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11789                          iRegL src1, iRegL src2,
11790                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11791   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11792   ins_cost(1.9 * INSN_COST);
11793   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11794 
11795   ins_encode %{
11796     __ eon(as_Register($dst$$reg),
11797               as_Register($src1$$reg),
11798               as_Register($src2$$reg),
11799               Assembler::ASR,
11800               $src3$$constant & 0x3f);
11801   %}
11802 
11803   ins_pipe(ialu_reg_reg_shift);
11804 %}
11805 
11806 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11807                          iRegIorL2I src1, iRegIorL2I src2,
11808                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11809   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11810   ins_cost(1.9 * INSN_COST);
11811   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11812 
11813   ins_encode %{
11814     __ eonw(as_Register($dst$$reg),
11815               as_Register($src1$$reg),
11816               as_Register($src2$$reg),
11817               Assembler::LSL,
11818               $src3$$constant & 0x1f);
11819   %}
11820 
11821   ins_pipe(ialu_reg_reg_shift);
11822 %}
11823 
11824 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11825                          iRegL src1, iRegL src2,
11826                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11827   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11828   ins_cost(1.9 * INSN_COST);
11829   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11830 
11831   ins_encode %{
11832     __ eon(as_Register($dst$$reg),
11833               as_Register($src1$$reg),
11834               as_Register($src2$$reg),
11835               Assembler::LSL,
11836               $src3$$constant & 0x3f);
11837   %}
11838 
11839   ins_pipe(ialu_reg_reg_shift);
11840 %}
11841 
11842 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11843                          iRegIorL2I src1, iRegIorL2I src2,
11844                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11845   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11846   ins_cost(1.9 * INSN_COST);
11847   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11848 
11849   ins_encode %{
11850     __ ornw(as_Register($dst$$reg),
11851               as_Register($src1$$reg),
11852               as_Register($src2$$reg),
11853               Assembler::LSR,
11854               $src3$$constant & 0x1f);
11855   %}
11856 
11857   ins_pipe(ialu_reg_reg_shift);
11858 %}
11859 
11860 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11861                          iRegL src1, iRegL src2,
11862                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11863   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11864   ins_cost(1.9 * INSN_COST);
11865   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11866 
11867   ins_encode %{
11868     __ orn(as_Register($dst$$reg),
11869               as_Register($src1$$reg),
11870               as_Register($src2$$reg),
11871               Assembler::LSR,
11872               $src3$$constant & 0x3f);
11873   %}
11874 
11875   ins_pipe(ialu_reg_reg_shift);
11876 %}
11877 
11878 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11879                          iRegIorL2I src1, iRegIorL2I src2,
11880                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11881   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11882   ins_cost(1.9 * INSN_COST);
11883   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11884 
11885   ins_encode %{
11886     __ ornw(as_Register($dst$$reg),
11887               as_Register($src1$$reg),
11888               as_Register($src2$$reg),
11889               Assembler::ASR,
11890               $src3$$constant & 0x1f);
11891   %}
11892 
11893   ins_pipe(ialu_reg_reg_shift);
11894 %}
11895 
11896 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11897                          iRegL src1, iRegL src2,
11898                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11899   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11900   ins_cost(1.9 * INSN_COST);
11901   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11902 
11903   ins_encode %{
11904     __ orn(as_Register($dst$$reg),
11905               as_Register($src1$$reg),
11906               as_Register($src2$$reg),
11907               Assembler::ASR,
11908               $src3$$constant & 0x3f);
11909   %}
11910 
11911   ins_pipe(ialu_reg_reg_shift);
11912 %}
11913 
11914 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11915                          iRegIorL2I src1, iRegIorL2I src2,
11916                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11917   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11918   ins_cost(1.9 * INSN_COST);
11919   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11920 
11921   ins_encode %{
11922     __ ornw(as_Register($dst$$reg),
11923               as_Register($src1$$reg),
11924               as_Register($src2$$reg),
11925               Assembler::LSL,
11926               $src3$$constant & 0x1f);
11927   %}
11928 
11929   ins_pipe(ialu_reg_reg_shift);
11930 %}
11931 
11932 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11933                          iRegL src1, iRegL src2,
11934                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11935   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11936   ins_cost(1.9 * INSN_COST);
11937   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11938 
11939   ins_encode %{
11940     __ orn(as_Register($dst$$reg),
11941               as_Register($src1$$reg),
11942               as_Register($src2$$reg),
11943               Assembler::LSL,
11944               $src3$$constant & 0x3f);
11945   %}
11946 
11947   ins_pipe(ialu_reg_reg_shift);
11948 %}
11949 
11950 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11951                          iRegIorL2I src1, iRegIorL2I src2,
11952                          immI src3, rFlagsReg cr) %{
11953   match(Set dst (AndI src1 (URShiftI src2 src3)));
11954 
11955   ins_cost(1.9 * INSN_COST);
11956   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11957 
11958   ins_encode %{
11959     __ andw(as_Register($dst$$reg),
11960               as_Register($src1$$reg),
11961               as_Register($src2$$reg),
11962               Assembler::LSR,
11963               $src3$$constant & 0x1f);
11964   %}
11965 
11966   ins_pipe(ialu_reg_reg_shift);
11967 %}
11968 
11969 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11970                          iRegL src1, iRegL src2,
11971                          immI src3, rFlagsReg cr) %{
11972   match(Set dst (AndL src1 (URShiftL src2 src3)));
11973 
11974   ins_cost(1.9 * INSN_COST);
11975   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11976 
11977   ins_encode %{
11978     __ andr(as_Register($dst$$reg),
11979               as_Register($src1$$reg),
11980               as_Register($src2$$reg),
11981               Assembler::LSR,
11982               $src3$$constant & 0x3f);
11983   %}
11984 
11985   ins_pipe(ialu_reg_reg_shift);
11986 %}
11987 
11988 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11989                          iRegIorL2I src1, iRegIorL2I src2,
11990                          immI src3, rFlagsReg cr) %{
11991   match(Set dst (AndI src1 (RShiftI src2 src3)));
11992 
11993   ins_cost(1.9 * INSN_COST);
11994   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11995 
11996   ins_encode %{
11997     __ andw(as_Register($dst$$reg),
11998               as_Register($src1$$reg),
11999               as_Register($src2$$reg),
12000               Assembler::ASR,
12001               $src3$$constant & 0x1f);
12002   %}
12003 
12004   ins_pipe(ialu_reg_reg_shift);
12005 %}
12006 
12007 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12008                          iRegL src1, iRegL src2,
12009                          immI src3, rFlagsReg cr) %{
12010   match(Set dst (AndL src1 (RShiftL src2 src3)));
12011 
12012   ins_cost(1.9 * INSN_COST);
12013   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12014 
12015   ins_encode %{
12016     __ andr(as_Register($dst$$reg),
12017               as_Register($src1$$reg),
12018               as_Register($src2$$reg),
12019               Assembler::ASR,
12020               $src3$$constant & 0x3f);
12021   %}
12022 
12023   ins_pipe(ialu_reg_reg_shift);
12024 %}
12025 
12026 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12027                          iRegIorL2I src1, iRegIorL2I src2,
12028                          immI src3, rFlagsReg cr) %{
12029   match(Set dst (AndI src1 (LShiftI src2 src3)));
12030 
12031   ins_cost(1.9 * INSN_COST);
12032   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12033 
12034   ins_encode %{
12035     __ andw(as_Register($dst$$reg),
12036               as_Register($src1$$reg),
12037               as_Register($src2$$reg),
12038               Assembler::LSL,
12039               $src3$$constant & 0x1f);
12040   %}
12041 
12042   ins_pipe(ialu_reg_reg_shift);
12043 %}
12044 
12045 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12046                          iRegL src1, iRegL src2,
12047                          immI src3, rFlagsReg cr) %{
12048   match(Set dst (AndL src1 (LShiftL src2 src3)));
12049 
12050   ins_cost(1.9 * INSN_COST);
12051   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12052 
12053   ins_encode %{
12054     __ andr(as_Register($dst$$reg),
12055               as_Register($src1$$reg),
12056               as_Register($src2$$reg),
12057               Assembler::LSL,
12058               $src3$$constant & 0x3f);
12059   %}
12060 
12061   ins_pipe(ialu_reg_reg_shift);
12062 %}
12063 
12064 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12065                          iRegIorL2I src1, iRegIorL2I src2,
12066                          immI src3, rFlagsReg cr) %{
12067   match(Set dst (XorI src1 (URShiftI src2 src3)));
12068 
12069   ins_cost(1.9 * INSN_COST);
12070   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12071 
12072   ins_encode %{
12073     __ eorw(as_Register($dst$$reg),
12074               as_Register($src1$$reg),
12075               as_Register($src2$$reg),
12076               Assembler::LSR,
12077               $src3$$constant & 0x1f);
12078   %}
12079 
12080   ins_pipe(ialu_reg_reg_shift);
12081 %}
12082 
12083 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12084                          iRegL src1, iRegL src2,
12085                          immI src3, rFlagsReg cr) %{
12086   match(Set dst (XorL src1 (URShiftL src2 src3)));
12087 
12088   ins_cost(1.9 * INSN_COST);
12089   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12090 
12091   ins_encode %{
12092     __ eor(as_Register($dst$$reg),
12093               as_Register($src1$$reg),
12094               as_Register($src2$$reg),
12095               Assembler::LSR,
12096               $src3$$constant & 0x3f);
12097   %}
12098 
12099   ins_pipe(ialu_reg_reg_shift);
12100 %}
12101 
12102 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12103                          iRegIorL2I src1, iRegIorL2I src2,
12104                          immI src3, rFlagsReg cr) %{
12105   match(Set dst (XorI src1 (RShiftI src2 src3)));
12106 
12107   ins_cost(1.9 * INSN_COST);
12108   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12109 
12110   ins_encode %{
12111     __ eorw(as_Register($dst$$reg),
12112               as_Register($src1$$reg),
12113               as_Register($src2$$reg),
12114               Assembler::ASR,
12115               $src3$$constant & 0x1f);
12116   %}
12117 
12118   ins_pipe(ialu_reg_reg_shift);
12119 %}
12120 
12121 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12122                          iRegL src1, iRegL src2,
12123                          immI src3, rFlagsReg cr) %{
12124   match(Set dst (XorL src1 (RShiftL src2 src3)));
12125 
12126   ins_cost(1.9 * INSN_COST);
12127   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12128 
12129   ins_encode %{
12130     __ eor(as_Register($dst$$reg),
12131               as_Register($src1$$reg),
12132               as_Register($src2$$reg),
12133               Assembler::ASR,
12134               $src3$$constant & 0x3f);
12135   %}
12136 
12137   ins_pipe(ialu_reg_reg_shift);
12138 %}
12139 
12140 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12141                          iRegIorL2I src1, iRegIorL2I src2,
12142                          immI src3, rFlagsReg cr) %{
12143   match(Set dst (XorI src1 (LShiftI src2 src3)));
12144 
12145   ins_cost(1.9 * INSN_COST);
12146   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12147 
12148   ins_encode %{
12149     __ eorw(as_Register($dst$$reg),
12150               as_Register($src1$$reg),
12151               as_Register($src2$$reg),
12152               Assembler::LSL,
12153               $src3$$constant & 0x1f);
12154   %}
12155 
12156   ins_pipe(ialu_reg_reg_shift);
12157 %}
12158 
12159 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12160                          iRegL src1, iRegL src2,
12161                          immI src3, rFlagsReg cr) %{
12162   match(Set dst (XorL src1 (LShiftL src2 src3)));
12163 
12164   ins_cost(1.9 * INSN_COST);
12165   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12166 
12167   ins_encode %{
12168     __ eor(as_Register($dst$$reg),
12169               as_Register($src1$$reg),
12170               as_Register($src2$$reg),
12171               Assembler::LSL,
12172               $src3$$constant & 0x3f);
12173   %}
12174 
12175   ins_pipe(ialu_reg_reg_shift);
12176 %}
12177 
12178 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12179                          iRegIorL2I src1, iRegIorL2I src2,
12180                          immI src3, rFlagsReg cr) %{
12181   match(Set dst (OrI src1 (URShiftI src2 src3)));
12182 
12183   ins_cost(1.9 * INSN_COST);
12184   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12185 
12186   ins_encode %{
12187     __ orrw(as_Register($dst$$reg),
12188               as_Register($src1$$reg),
12189               as_Register($src2$$reg),
12190               Assembler::LSR,
12191               $src3$$constant & 0x1f);
12192   %}
12193 
12194   ins_pipe(ialu_reg_reg_shift);
12195 %}
12196 
12197 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12198                          iRegL src1, iRegL src2,
12199                          immI src3, rFlagsReg cr) %{
12200   match(Set dst (OrL src1 (URShiftL src2 src3)));
12201 
12202   ins_cost(1.9 * INSN_COST);
12203   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12204 
12205   ins_encode %{
12206     __ orr(as_Register($dst$$reg),
12207               as_Register($src1$$reg),
12208               as_Register($src2$$reg),
12209               Assembler::LSR,
12210               $src3$$constant & 0x3f);
12211   %}
12212 
12213   ins_pipe(ialu_reg_reg_shift);
12214 %}
12215 
12216 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12217                          iRegIorL2I src1, iRegIorL2I src2,
12218                          immI src3, rFlagsReg cr) %{
12219   match(Set dst (OrI src1 (RShiftI src2 src3)));
12220 
12221   ins_cost(1.9 * INSN_COST);
12222   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12223 
12224   ins_encode %{
12225     __ orrw(as_Register($dst$$reg),
12226               as_Register($src1$$reg),
12227               as_Register($src2$$reg),
12228               Assembler::ASR,
12229               $src3$$constant & 0x1f);
12230   %}
12231 
12232   ins_pipe(ialu_reg_reg_shift);
12233 %}
12234 
12235 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12236                          iRegL src1, iRegL src2,
12237                          immI src3, rFlagsReg cr) %{
12238   match(Set dst (OrL src1 (RShiftL src2 src3)));
12239 
12240   ins_cost(1.9 * INSN_COST);
12241   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12242 
12243   ins_encode %{
12244     __ orr(as_Register($dst$$reg),
12245               as_Register($src1$$reg),
12246               as_Register($src2$$reg),
12247               Assembler::ASR,
12248               $src3$$constant & 0x3f);
12249   %}
12250 
12251   ins_pipe(ialu_reg_reg_shift);
12252 %}
12253 
12254 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12255                          iRegIorL2I src1, iRegIorL2I src2,
12256                          immI src3, rFlagsReg cr) %{
12257   match(Set dst (OrI src1 (LShiftI src2 src3)));
12258 
12259   ins_cost(1.9 * INSN_COST);
12260   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12261 
12262   ins_encode %{
12263     __ orrw(as_Register($dst$$reg),
12264               as_Register($src1$$reg),
12265               as_Register($src2$$reg),
12266               Assembler::LSL,
12267               $src3$$constant & 0x1f);
12268   %}
12269 
12270   ins_pipe(ialu_reg_reg_shift);
12271 %}
12272 
12273 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12274                          iRegL src1, iRegL src2,
12275                          immI src3, rFlagsReg cr) %{
12276   match(Set dst (OrL src1 (LShiftL src2 src3)));
12277 
12278   ins_cost(1.9 * INSN_COST);
12279   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12280 
12281   ins_encode %{
12282     __ orr(as_Register($dst$$reg),
12283               as_Register($src1$$reg),
12284               as_Register($src2$$reg),
12285               Assembler::LSL,
12286               $src3$$constant & 0x3f);
12287   %}
12288 
12289   ins_pipe(ialu_reg_reg_shift);
12290 %}
12291 
12292 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12293                          iRegIorL2I src1, iRegIorL2I src2,
12294                          immI src3, rFlagsReg cr) %{
12295   match(Set dst (AddI src1 (URShiftI src2 src3)));
12296 
12297   ins_cost(1.9 * INSN_COST);
12298   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12299 
12300   ins_encode %{
12301     __ addw(as_Register($dst$$reg),
12302               as_Register($src1$$reg),
12303               as_Register($src2$$reg),
12304               Assembler::LSR,
12305               $src3$$constant & 0x1f);
12306   %}
12307 
12308   ins_pipe(ialu_reg_reg_shift);
12309 %}
12310 
12311 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12312                          iRegL src1, iRegL src2,
12313                          immI src3, rFlagsReg cr) %{
12314   match(Set dst (AddL src1 (URShiftL src2 src3)));
12315 
12316   ins_cost(1.9 * INSN_COST);
12317   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12318 
12319   ins_encode %{
12320     __ add(as_Register($dst$$reg),
12321               as_Register($src1$$reg),
12322               as_Register($src2$$reg),
12323               Assembler::LSR,
12324               $src3$$constant & 0x3f);
12325   %}
12326 
12327   ins_pipe(ialu_reg_reg_shift);
12328 %}
12329 
12330 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12331                          iRegIorL2I src1, iRegIorL2I src2,
12332                          immI src3, rFlagsReg cr) %{
12333   match(Set dst (AddI src1 (RShiftI src2 src3)));
12334 
12335   ins_cost(1.9 * INSN_COST);
12336   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12337 
12338   ins_encode %{
12339     __ addw(as_Register($dst$$reg),
12340               as_Register($src1$$reg),
12341               as_Register($src2$$reg),
12342               Assembler::ASR,
12343               $src3$$constant & 0x1f);
12344   %}
12345 
12346   ins_pipe(ialu_reg_reg_shift);
12347 %}
12348 
12349 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12350                          iRegL src1, iRegL src2,
12351                          immI src3, rFlagsReg cr) %{
12352   match(Set dst (AddL src1 (RShiftL src2 src3)));
12353 
12354   ins_cost(1.9 * INSN_COST);
12355   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12356 
12357   ins_encode %{
12358     __ add(as_Register($dst$$reg),
12359               as_Register($src1$$reg),
12360               as_Register($src2$$reg),
12361               Assembler::ASR,
12362               $src3$$constant & 0x3f);
12363   %}
12364 
12365   ins_pipe(ialu_reg_reg_shift);
12366 %}
12367 
12368 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12369                          iRegIorL2I src1, iRegIorL2I src2,
12370                          immI src3, rFlagsReg cr) %{
12371   match(Set dst (AddI src1 (LShiftI src2 src3)));
12372 
12373   ins_cost(1.9 * INSN_COST);
12374   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12375 
12376   ins_encode %{
12377     __ addw(as_Register($dst$$reg),
12378               as_Register($src1$$reg),
12379               as_Register($src2$$reg),
12380               Assembler::LSL,
12381               $src3$$constant & 0x1f);
12382   %}
12383 
12384   ins_pipe(ialu_reg_reg_shift);
12385 %}
12386 
12387 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12388                          iRegL src1, iRegL src2,
12389                          immI src3, rFlagsReg cr) %{
12390   match(Set dst (AddL src1 (LShiftL src2 src3)));
12391 
12392   ins_cost(1.9 * INSN_COST);
12393   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12394 
12395   ins_encode %{
12396     __ add(as_Register($dst$$reg),
12397               as_Register($src1$$reg),
12398               as_Register($src2$$reg),
12399               Assembler::LSL,
12400               $src3$$constant & 0x3f);
12401   %}
12402 
12403   ins_pipe(ialu_reg_reg_shift);
12404 %}
12405 
12406 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12407                          iRegIorL2I src1, iRegIorL2I src2,
12408                          immI src3, rFlagsReg cr) %{
12409   match(Set dst (SubI src1 (URShiftI src2 src3)));
12410 
12411   ins_cost(1.9 * INSN_COST);
12412   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12413 
12414   ins_encode %{
12415     __ subw(as_Register($dst$$reg),
12416               as_Register($src1$$reg),
12417               as_Register($src2$$reg),
12418               Assembler::LSR,
12419               $src3$$constant & 0x1f);
12420   %}
12421 
12422   ins_pipe(ialu_reg_reg_shift);
12423 %}
12424 
12425 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12426                          iRegL src1, iRegL src2,
12427                          immI src3, rFlagsReg cr) %{
12428   match(Set dst (SubL src1 (URShiftL src2 src3)));
12429 
12430   ins_cost(1.9 * INSN_COST);
12431   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12432 
12433   ins_encode %{
12434     __ sub(as_Register($dst$$reg),
12435               as_Register($src1$$reg),
12436               as_Register($src2$$reg),
12437               Assembler::LSR,
12438               $src3$$constant & 0x3f);
12439   %}
12440 
12441   ins_pipe(ialu_reg_reg_shift);
12442 %}
12443 
12444 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12445                          iRegIorL2I src1, iRegIorL2I src2,
12446                          immI src3, rFlagsReg cr) %{
12447   match(Set dst (SubI src1 (RShiftI src2 src3)));
12448 
12449   ins_cost(1.9 * INSN_COST);
12450   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12451 
12452   ins_encode %{
12453     __ subw(as_Register($dst$$reg),
12454               as_Register($src1$$reg),
12455               as_Register($src2$$reg),
12456               Assembler::ASR,
12457               $src3$$constant & 0x1f);
12458   %}
12459 
12460   ins_pipe(ialu_reg_reg_shift);
12461 %}
12462 
12463 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12464                          iRegL src1, iRegL src2,
12465                          immI src3, rFlagsReg cr) %{
12466   match(Set dst (SubL src1 (RShiftL src2 src3)));
12467 
12468   ins_cost(1.9 * INSN_COST);
12469   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12470 
12471   ins_encode %{
12472     __ sub(as_Register($dst$$reg),
12473               as_Register($src1$$reg),
12474               as_Register($src2$$reg),
12475               Assembler::ASR,
12476               $src3$$constant & 0x3f);
12477   %}
12478 
12479   ins_pipe(ialu_reg_reg_shift);
12480 %}
12481 
12482 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12483                          iRegIorL2I src1, iRegIorL2I src2,
12484                          immI src3, rFlagsReg cr) %{
12485   match(Set dst (SubI src1 (LShiftI src2 src3)));
12486 
12487   ins_cost(1.9 * INSN_COST);
12488   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12489 
12490   ins_encode %{
12491     __ subw(as_Register($dst$$reg),
12492               as_Register($src1$$reg),
12493               as_Register($src2$$reg),
12494               Assembler::LSL,
12495               $src3$$constant & 0x1f);
12496   %}
12497 
12498   ins_pipe(ialu_reg_reg_shift);
12499 %}
12500 
12501 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12502                          iRegL src1, iRegL src2,
12503                          immI src3, rFlagsReg cr) %{
12504   match(Set dst (SubL src1 (LShiftL src2 src3)));
12505 
12506   ins_cost(1.9 * INSN_COST);
12507   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12508 
12509   ins_encode %{
12510     __ sub(as_Register($dst$$reg),
12511               as_Register($src1$$reg),
12512               as_Register($src2$$reg),
12513               Assembler::LSL,
12514               $src3$$constant & 0x3f);
12515   %}
12516 
12517   ins_pipe(ialu_reg_reg_shift);
12518 %}
12519 
12520 
12521 
12522 // Shift Left followed by Shift Right.
12523 // This idiom is used by the compiler for the i2b bytecode etc.
12524 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12525 %{
12526   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12527   // Make sure we are not going to exceed what sbfm can do.
12528   predicate((unsigned int)n->in(2)->get_int() <= 63
12529             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12530 
12531   ins_cost(INSN_COST * 2);
12532   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12533   ins_encode %{
12534     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12535     int s = 63 - lshift;
12536     int r = (rshift - lshift) & 63;
12537     __ sbfm(as_Register($dst$$reg),
12538             as_Register($src$$reg),
12539             r, s);
12540   %}
12541 
12542   ins_pipe(ialu_reg_shift);
12543 %}
12544 
12545 // Shift Left followed by Shift Right.
12546 // This idiom is used by the compiler for the i2b bytecode etc.
12547 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12548 %{
12549   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12550   // Make sure we are not going to exceed what sbfmw can do.
12551   predicate((unsigned int)n->in(2)->get_int() <= 31
12552             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12553 
12554   ins_cost(INSN_COST * 2);
12555   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12556   ins_encode %{
12557     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12558     int s = 31 - lshift;
12559     int r = (rshift - lshift) & 31;
12560     __ sbfmw(as_Register($dst$$reg),
12561             as_Register($src$$reg),
12562             r, s);
12563   %}
12564 
12565   ins_pipe(ialu_reg_shift);
12566 %}
12567 
12568 // Shift Left followed by Shift Right.
12569 // This idiom is used by the compiler for the i2b bytecode etc.
12570 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12571 %{
12572   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12573   // Make sure we are not going to exceed what ubfm can do.
12574   predicate((unsigned int)n->in(2)->get_int() <= 63
12575             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12576 
12577   ins_cost(INSN_COST * 2);
12578   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12579   ins_encode %{
12580     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12581     int s = 63 - lshift;
12582     int r = (rshift - lshift) & 63;
12583     __ ubfm(as_Register($dst$$reg),
12584             as_Register($src$$reg),
12585             r, s);
12586   %}
12587 
12588   ins_pipe(ialu_reg_shift);
12589 %}
12590 
12591 // Shift Left followed by Shift Right.
12592 // This idiom is used by the compiler for the i2b bytecode etc.
12593 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12594 %{
12595   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12596   // Make sure we are not going to exceed what ubfmw can do.
12597   predicate((unsigned int)n->in(2)->get_int() <= 31
12598             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12599 
12600   ins_cost(INSN_COST * 2);
12601   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12602   ins_encode %{
12603     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12604     int s = 31 - lshift;
12605     int r = (rshift - lshift) & 31;
12606     __ ubfmw(as_Register($dst$$reg),
12607             as_Register($src$$reg),
12608             r, s);
12609   %}
12610 
12611   ins_pipe(ialu_reg_shift);
12612 %}
12613 // Bitfield extract with shift & mask
12614 
12615 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12616 %{
12617   match(Set dst (AndI (URShiftI src rshift) mask));
12618 
12619   ins_cost(INSN_COST);
12620   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12621   ins_encode %{
12622     int rshift = $rshift$$constant;
12623     long mask = $mask$$constant;
12624     int width = exact_log2(mask+1);
12625     __ ubfxw(as_Register($dst$$reg),
12626             as_Register($src$$reg), rshift, width);
12627   %}
12628   ins_pipe(ialu_reg_shift);
12629 %}
12630 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12631 %{
12632   match(Set dst (AndL (URShiftL src rshift) mask));
12633 
12634   ins_cost(INSN_COST);
12635   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12636   ins_encode %{
12637     int rshift = $rshift$$constant;
12638     long mask = $mask$$constant;
12639     int width = exact_log2(mask+1);
12640     __ ubfx(as_Register($dst$$reg),
12641             as_Register($src$$reg), rshift, width);
12642   %}
12643   ins_pipe(ialu_reg_shift);
12644 %}
12645 
12646 // We can use ubfx when extending an And with a mask when we know mask
12647 // is positive.  We know that because immI_bitmask guarantees it.
12648 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12649 %{
12650   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12651 
12652   ins_cost(INSN_COST * 2);
12653   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12654   ins_encode %{
12655     int rshift = $rshift$$constant;
12656     long mask = $mask$$constant;
12657     int width = exact_log2(mask+1);
12658     __ ubfx(as_Register($dst$$reg),
12659             as_Register($src$$reg), rshift, width);
12660   %}
12661   ins_pipe(ialu_reg_shift);
12662 %}
12663 
12664 // We can use ubfiz when masking by a positive number and then left shifting the result.
12665 // We know that the mask is positive because immI_bitmask guarantees it.
12666 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12667 %{
12668   match(Set dst (LShiftI (AndI src mask) lshift));
12669   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12670     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12671 
12672   ins_cost(INSN_COST);
12673   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12674   ins_encode %{
12675     int lshift = $lshift$$constant;
12676     long mask = $mask$$constant;
12677     int width = exact_log2(mask+1);
12678     __ ubfizw(as_Register($dst$$reg),
12679           as_Register($src$$reg), lshift, width);
12680   %}
12681   ins_pipe(ialu_reg_shift);
12682 %}
12683 // We can use ubfiz when masking by a positive number and then left shifting the result.
12684 // We know that the mask is positive because immL_bitmask guarantees it.
12685 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12686 %{
12687   match(Set dst (LShiftL (AndL src mask) lshift));
12688   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12689     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12690 
12691   ins_cost(INSN_COST);
12692   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12693   ins_encode %{
12694     int lshift = $lshift$$constant;
12695     long mask = $mask$$constant;
12696     int width = exact_log2(mask+1);
12697     __ ubfiz(as_Register($dst$$reg),
12698           as_Register($src$$reg), lshift, width);
12699   %}
12700   ins_pipe(ialu_reg_shift);
12701 %}
12702 
12703 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12704 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12705 %{
12706   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12707   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12708     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12709 
12710   ins_cost(INSN_COST);
12711   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12712   ins_encode %{
12713     int lshift = $lshift$$constant;
12714     long mask = $mask$$constant;
12715     int width = exact_log2(mask+1);
12716     __ ubfiz(as_Register($dst$$reg),
12717              as_Register($src$$reg), lshift, width);
12718   %}
12719   ins_pipe(ialu_reg_shift);
12720 %}
12721 
12722 // Rotations
12723 
12724 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12725 %{
12726   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12727   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12728 
12729   ins_cost(INSN_COST);
12730   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12731 
12732   ins_encode %{
12733     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12734             $rshift$$constant & 63);
12735   %}
12736   ins_pipe(ialu_reg_reg_extr);
12737 %}
12738 
12739 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12740 %{
12741   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12742   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12743 
12744   ins_cost(INSN_COST);
12745   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12746 
12747   ins_encode %{
12748     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12749             $rshift$$constant & 31);
12750   %}
12751   ins_pipe(ialu_reg_reg_extr);
12752 %}
12753 
12754 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12755 %{
12756   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12757   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12758 
12759   ins_cost(INSN_COST);
12760   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12761 
12762   ins_encode %{
12763     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12764             $rshift$$constant & 63);
12765   %}
12766   ins_pipe(ialu_reg_reg_extr);
12767 %}
12768 
12769 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12770 %{
12771   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12772   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12773 
12774   ins_cost(INSN_COST);
12775   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12776 
12777   ins_encode %{
12778     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12779             $rshift$$constant & 31);
12780   %}
12781   ins_pipe(ialu_reg_reg_extr);
12782 %}
12783 
12784 
12785 // rol expander
12786 
12787 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12788 %{
12789   effect(DEF dst, USE src, USE shift);
12790 
12791   format %{ "rol    $dst, $src, $shift" %}
12792   ins_cost(INSN_COST * 3);
12793   ins_encode %{
12794     __ subw(rscratch1, zr, as_Register($shift$$reg));
12795     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12796             rscratch1);
12797     %}
12798   ins_pipe(ialu_reg_reg_vshift);
12799 %}
12800 
12801 // rol expander
12802 
12803 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12804 %{
12805   effect(DEF dst, USE src, USE shift);
12806 
12807   format %{ "rol    $dst, $src, $shift" %}
12808   ins_cost(INSN_COST * 3);
12809   ins_encode %{
12810     __ subw(rscratch1, zr, as_Register($shift$$reg));
12811     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12812             rscratch1);
12813     %}
12814   ins_pipe(ialu_reg_reg_vshift);
12815 %}
12816 
12817 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12818 %{
12819   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12820 
12821   expand %{
12822     rolL_rReg(dst, src, shift, cr);
12823   %}
12824 %}
12825 
12826 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12827 %{
12828   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12829 
12830   expand %{
12831     rolL_rReg(dst, src, shift, cr);
12832   %}
12833 %}
12834 
12835 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12836 %{
12837   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12838 
12839   expand %{
12840     rolI_rReg(dst, src, shift, cr);
12841   %}
12842 %}
12843 
12844 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12845 %{
12846   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12847 
12848   expand %{
12849     rolI_rReg(dst, src, shift, cr);
12850   %}
12851 %}
12852 
12853 // ror expander
12854 
12855 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12856 %{
12857   effect(DEF dst, USE src, USE shift);
12858 
12859   format %{ "ror    $dst, $src, $shift" %}
12860   ins_cost(INSN_COST);
12861   ins_encode %{
12862     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12863             as_Register($shift$$reg));
12864     %}
12865   ins_pipe(ialu_reg_reg_vshift);
12866 %}
12867 
12868 // ror expander
12869 
12870 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12871 %{
12872   effect(DEF dst, USE src, USE shift);
12873 
12874   format %{ "ror    $dst, $src, $shift" %}
12875   ins_cost(INSN_COST);
12876   ins_encode %{
12877     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12878             as_Register($shift$$reg));
12879     %}
12880   ins_pipe(ialu_reg_reg_vshift);
12881 %}
12882 
12883 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12884 %{
12885   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12886 
12887   expand %{
12888     rorL_rReg(dst, src, shift, cr);
12889   %}
12890 %}
12891 
12892 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12893 %{
12894   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12895 
12896   expand %{
12897     rorL_rReg(dst, src, shift, cr);
12898   %}
12899 %}
12900 
12901 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12902 %{
12903   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12904 
12905   expand %{
12906     rorI_rReg(dst, src, shift, cr);
12907   %}
12908 %}
12909 
12910 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12911 %{
12912   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12913 
12914   expand %{
12915     rorI_rReg(dst, src, shift, cr);
12916   %}
12917 %}
12918 
12919 // Add/subtract (extended)
12920 
12921 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12922 %{
12923   match(Set dst (AddL src1 (ConvI2L src2)));
12924   ins_cost(INSN_COST);
12925   format %{ "add  $dst, $src1, $src2, sxtw" %}
12926 
12927    ins_encode %{
12928      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12929             as_Register($src2$$reg), ext::sxtw);
12930    %}
12931   ins_pipe(ialu_reg_reg);
12932 %};
12933 
12934 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12935 %{
12936   match(Set dst (SubL src1 (ConvI2L src2)));
12937   ins_cost(INSN_COST);
12938   format %{ "sub  $dst, $src1, $src2, sxtw" %}
12939 
12940    ins_encode %{
12941      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12942             as_Register($src2$$reg), ext::sxtw);
12943    %}
12944   ins_pipe(ialu_reg_reg);
12945 %};
12946 
12947 
12948 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12949 %{
12950   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12951   ins_cost(INSN_COST);
12952   format %{ "add  $dst, $src1, $src2, sxth" %}
12953 
12954    ins_encode %{
12955      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12956             as_Register($src2$$reg), ext::sxth);
12957    %}
12958   ins_pipe(ialu_reg_reg);
12959 %}
12960 
12961 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12962 %{
12963   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12964   ins_cost(INSN_COST);
12965   format %{ "add  $dst, $src1, $src2, sxtb" %}
12966 
12967    ins_encode %{
12968      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12969             as_Register($src2$$reg), ext::sxtb);
12970    %}
12971   ins_pipe(ialu_reg_reg);
12972 %}
12973 
12974 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12975 %{
12976   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12977   ins_cost(INSN_COST);
12978   format %{ "add  $dst, $src1, $src2, uxtb" %}
12979 
12980    ins_encode %{
12981      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12982             as_Register($src2$$reg), ext::uxtb);
12983    %}
12984   ins_pipe(ialu_reg_reg);
12985 %}
12986 
12987 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12988 %{
12989   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12990   ins_cost(INSN_COST);
12991   format %{ "add  $dst, $src1, $src2, sxth" %}
12992 
12993    ins_encode %{
12994      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12995             as_Register($src2$$reg), ext::sxth);
12996    %}
12997   ins_pipe(ialu_reg_reg);
12998 %}
12999 
13000 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
13001 %{
13002   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13003   ins_cost(INSN_COST);
13004   format %{ "add  $dst, $src1, $src2, sxtw" %}
13005 
13006    ins_encode %{
13007      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13008             as_Register($src2$$reg), ext::sxtw);
13009    %}
13010   ins_pipe(ialu_reg_reg);
13011 %}
13012 
13013 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13014 %{
13015   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13016   ins_cost(INSN_COST);
13017   format %{ "add  $dst, $src1, $src2, sxtb" %}
13018 
13019    ins_encode %{
13020      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13021             as_Register($src2$$reg), ext::sxtb);
13022    %}
13023   ins_pipe(ialu_reg_reg);
13024 %}
13025 
13026 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13027 %{
13028   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13029   ins_cost(INSN_COST);
13030   format %{ "add  $dst, $src1, $src2, uxtb" %}
13031 
13032    ins_encode %{
13033      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13034             as_Register($src2$$reg), ext::uxtb);
13035    %}
13036   ins_pipe(ialu_reg_reg);
13037 %}
13038 
13039 
13040 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13041 %{
13042   match(Set dst (AddI src1 (AndI src2 mask)));
13043   ins_cost(INSN_COST);
13044   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13045 
13046    ins_encode %{
13047      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13048             as_Register($src2$$reg), ext::uxtb);
13049    %}
13050   ins_pipe(ialu_reg_reg);
13051 %}
13052 
13053 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13054 %{
13055   match(Set dst (AddI src1 (AndI src2 mask)));
13056   ins_cost(INSN_COST);
13057   format %{ "addw  $dst, $src1, $src2, uxth" %}
13058 
13059    ins_encode %{
13060      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13061             as_Register($src2$$reg), ext::uxth);
13062    %}
13063   ins_pipe(ialu_reg_reg);
13064 %}
13065 
13066 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13067 %{
13068   match(Set dst (AddL src1 (AndL src2 mask)));
13069   ins_cost(INSN_COST);
13070   format %{ "add  $dst, $src1, $src2, uxtb" %}
13071 
13072    ins_encode %{
13073      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13074             as_Register($src2$$reg), ext::uxtb);
13075    %}
13076   ins_pipe(ialu_reg_reg);
13077 %}
13078 
13079 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13080 %{
13081   match(Set dst (AddL src1 (AndL src2 mask)));
13082   ins_cost(INSN_COST);
13083   format %{ "add  $dst, $src1, $src2, uxth" %}
13084 
13085    ins_encode %{
13086      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13087             as_Register($src2$$reg), ext::uxth);
13088    %}
13089   ins_pipe(ialu_reg_reg);
13090 %}
13091 
13092 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13093 %{
13094   match(Set dst (AddL src1 (AndL src2 mask)));
13095   ins_cost(INSN_COST);
13096   format %{ "add  $dst, $src1, $src2, uxtw" %}
13097 
13098    ins_encode %{
13099      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13100             as_Register($src2$$reg), ext::uxtw);
13101    %}
13102   ins_pipe(ialu_reg_reg);
13103 %}
13104 
13105 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13106 %{
13107   match(Set dst (SubI src1 (AndI src2 mask)));
13108   ins_cost(INSN_COST);
13109   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13110 
13111    ins_encode %{
13112      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13113             as_Register($src2$$reg), ext::uxtb);
13114    %}
13115   ins_pipe(ialu_reg_reg);
13116 %}
13117 
13118 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13119 %{
13120   match(Set dst (SubI src1 (AndI src2 mask)));
13121   ins_cost(INSN_COST);
13122   format %{ "subw  $dst, $src1, $src2, uxth" %}
13123 
13124    ins_encode %{
13125      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13126             as_Register($src2$$reg), ext::uxth);
13127    %}
13128   ins_pipe(ialu_reg_reg);
13129 %}
13130 
13131 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13132 %{
13133   match(Set dst (SubL src1 (AndL src2 mask)));
13134   ins_cost(INSN_COST);
13135   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13136 
13137    ins_encode %{
13138      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13139             as_Register($src2$$reg), ext::uxtb);
13140    %}
13141   ins_pipe(ialu_reg_reg);
13142 %}
13143 
13144 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13145 %{
13146   match(Set dst (SubL src1 (AndL src2 mask)));
13147   ins_cost(INSN_COST);
13148   format %{ "sub  $dst, $src1, $src2, uxth" %}
13149 
13150    ins_encode %{
13151      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13152             as_Register($src2$$reg), ext::uxth);
13153    %}
13154   ins_pipe(ialu_reg_reg);
13155 %}
13156 
13157 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13158 %{
13159   match(Set dst (SubL src1 (AndL src2 mask)));
13160   ins_cost(INSN_COST);
13161   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13162 
13163    ins_encode %{
13164      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13165             as_Register($src2$$reg), ext::uxtw);
13166    %}
13167   ins_pipe(ialu_reg_reg);
13168 %}
13169 
13170 
13171 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13172 %{
13173   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13174   ins_cost(1.9 * INSN_COST);
13175   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13176 
13177    ins_encode %{
13178      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13179             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13180    %}
13181   ins_pipe(ialu_reg_reg_shift);
13182 %}
13183 
13184 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13185 %{
13186   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13187   ins_cost(1.9 * INSN_COST);
13188   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13189 
13190    ins_encode %{
13191      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13192             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13193    %}
13194   ins_pipe(ialu_reg_reg_shift);
13195 %}
13196 
13197 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13198 %{
13199   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13200   ins_cost(1.9 * INSN_COST);
13201   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13202 
13203    ins_encode %{
13204      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13205             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13206    %}
13207   ins_pipe(ialu_reg_reg_shift);
13208 %}
13209 
13210 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13211 %{
13212   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13213   ins_cost(1.9 * INSN_COST);
13214   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13215 
13216    ins_encode %{
13217      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13218             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13219    %}
13220   ins_pipe(ialu_reg_reg_shift);
13221 %}
13222 
13223 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13224 %{
13225   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13226   ins_cost(1.9 * INSN_COST);
13227   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13228 
13229    ins_encode %{
13230      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13231             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13232    %}
13233   ins_pipe(ialu_reg_reg_shift);
13234 %}
13235 
13236 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13237 %{
13238   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13239   ins_cost(1.9 * INSN_COST);
13240   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13241 
13242    ins_encode %{
13243      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13244             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13245    %}
13246   ins_pipe(ialu_reg_reg_shift);
13247 %}
13248 
13249 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13250 %{
13251   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13252   ins_cost(1.9 * INSN_COST);
13253   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13254 
13255    ins_encode %{
13256      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13257             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13258    %}
13259   ins_pipe(ialu_reg_reg_shift);
13260 %}
13261 
13262 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13263 %{
13264   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13265   ins_cost(1.9 * INSN_COST);
13266   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13267 
13268    ins_encode %{
13269      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13270             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13271    %}
13272   ins_pipe(ialu_reg_reg_shift);
13273 %}
13274 
13275 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13276 %{
13277   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13278   ins_cost(1.9 * INSN_COST);
13279   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13280 
13281    ins_encode %{
13282      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13283             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13284    %}
13285   ins_pipe(ialu_reg_reg_shift);
13286 %}
13287 
13288 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13289 %{
13290   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13291   ins_cost(1.9 * INSN_COST);
13292   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13293 
13294    ins_encode %{
13295      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13296             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13297    %}
13298   ins_pipe(ialu_reg_reg_shift);
13299 %}
13300 
13301 
13302 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13303 %{
13304   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13305   ins_cost(1.9 * INSN_COST);
13306   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13307 
13308    ins_encode %{
13309      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13310             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13311    %}
13312   ins_pipe(ialu_reg_reg_shift);
13313 %};
13314 
13315 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13316 %{
13317   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13318   ins_cost(1.9 * INSN_COST);
13319   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13320 
13321    ins_encode %{
13322      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13323             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13324    %}
13325   ins_pipe(ialu_reg_reg_shift);
13326 %};
13327 
13328 
13329 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13330 %{
13331   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13332   ins_cost(1.9 * INSN_COST);
13333   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13334 
13335    ins_encode %{
13336      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13337             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13338    %}
13339   ins_pipe(ialu_reg_reg_shift);
13340 %}
13341 
13342 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13343 %{
13344   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13345   ins_cost(1.9 * INSN_COST);
13346   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13347 
13348    ins_encode %{
13349      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13350             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13351    %}
13352   ins_pipe(ialu_reg_reg_shift);
13353 %}
13354 
13355 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13356 %{
13357   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13358   ins_cost(1.9 * INSN_COST);
13359   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13360 
13361    ins_encode %{
13362      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13363             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13364    %}
13365   ins_pipe(ialu_reg_reg_shift);
13366 %}
13367 
13368 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13369 %{
13370   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13371   ins_cost(1.9 * INSN_COST);
13372   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13373 
13374    ins_encode %{
13375      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13376             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13377    %}
13378   ins_pipe(ialu_reg_reg_shift);
13379 %}
13380 
13381 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13382 %{
13383   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13384   ins_cost(1.9 * INSN_COST);
13385   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13386 
13387    ins_encode %{
13388      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13389             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13390    %}
13391   ins_pipe(ialu_reg_reg_shift);
13392 %}
13393 
13394 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13395 %{
13396   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13397   ins_cost(1.9 * INSN_COST);
13398   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13399 
13400    ins_encode %{
13401      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13402             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13403    %}
13404   ins_pipe(ialu_reg_reg_shift);
13405 %}
13406 
13407 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13408 %{
13409   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13410   ins_cost(1.9 * INSN_COST);
13411   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13412 
13413    ins_encode %{
13414      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13415             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13416    %}
13417   ins_pipe(ialu_reg_reg_shift);
13418 %}
13419 
13420 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13421 %{
13422   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13423   ins_cost(1.9 * INSN_COST);
13424   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13425 
13426    ins_encode %{
13427      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13428             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13429    %}
13430   ins_pipe(ialu_reg_reg_shift);
13431 %}
13432 
13433 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13434 %{
13435   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13436   ins_cost(1.9 * INSN_COST);
13437   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13438 
13439    ins_encode %{
13440      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13441             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13442    %}
13443   ins_pipe(ialu_reg_reg_shift);
13444 %}
13445 
13446 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13447 %{
13448   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13449   ins_cost(1.9 * INSN_COST);
13450   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13451 
13452    ins_encode %{
13453      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13454             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13455    %}
13456   ins_pipe(ialu_reg_reg_shift);
13457 %}
13458 // END This section of the file is automatically generated. Do not edit --------------
13459 
13460 // ============================================================================
13461 // Floating Point Arithmetic Instructions
13462 
13463 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13464   match(Set dst (AddF src1 src2));
13465 
13466   ins_cost(INSN_COST * 5);
13467   format %{ "fadds   $dst, $src1, $src2" %}
13468 
13469   ins_encode %{
13470     __ fadds(as_FloatRegister($dst$$reg),
13471              as_FloatRegister($src1$$reg),
13472              as_FloatRegister($src2$$reg));
13473   %}
13474 
13475   ins_pipe(fp_dop_reg_reg_s);
13476 %}
13477 
13478 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13479   match(Set dst (AddD src1 src2));
13480 
13481   ins_cost(INSN_COST * 5);
13482   format %{ "faddd   $dst, $src1, $src2" %}
13483 
13484   ins_encode %{
13485     __ faddd(as_FloatRegister($dst$$reg),
13486              as_FloatRegister($src1$$reg),
13487              as_FloatRegister($src2$$reg));
13488   %}
13489 
13490   ins_pipe(fp_dop_reg_reg_d);
13491 %}
13492 
13493 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13494   match(Set dst (SubF src1 src2));
13495 
13496   ins_cost(INSN_COST * 5);
13497   format %{ "fsubs   $dst, $src1, $src2" %}
13498 
13499   ins_encode %{
13500     __ fsubs(as_FloatRegister($dst$$reg),
13501              as_FloatRegister($src1$$reg),
13502              as_FloatRegister($src2$$reg));
13503   %}
13504 
13505   ins_pipe(fp_dop_reg_reg_s);
13506 %}
13507 
13508 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13509   match(Set dst (SubD src1 src2));
13510 
13511   ins_cost(INSN_COST * 5);
13512   format %{ "fsubd   $dst, $src1, $src2" %}
13513 
13514   ins_encode %{
13515     __ fsubd(as_FloatRegister($dst$$reg),
13516              as_FloatRegister($src1$$reg),
13517              as_FloatRegister($src2$$reg));
13518   %}
13519 
13520   ins_pipe(fp_dop_reg_reg_d);
13521 %}
13522 
13523 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13524   match(Set dst (MulF src1 src2));
13525 
13526   ins_cost(INSN_COST * 6);
13527   format %{ "fmuls   $dst, $src1, $src2" %}
13528 
13529   ins_encode %{
13530     __ fmuls(as_FloatRegister($dst$$reg),
13531              as_FloatRegister($src1$$reg),
13532              as_FloatRegister($src2$$reg));
13533   %}
13534 
13535   ins_pipe(fp_dop_reg_reg_s);
13536 %}
13537 
13538 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13539   match(Set dst (MulD src1 src2));
13540 
13541   ins_cost(INSN_COST * 6);
13542   format %{ "fmuld   $dst, $src1, $src2" %}
13543 
13544   ins_encode %{
13545     __ fmuld(as_FloatRegister($dst$$reg),
13546              as_FloatRegister($src1$$reg),
13547              as_FloatRegister($src2$$reg));
13548   %}
13549 
13550   ins_pipe(fp_dop_reg_reg_d);
13551 %}
13552 
13553 // src1 * src2 + src3
13554 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13555   predicate(UseFMA);
13556   match(Set dst (FmaF src3 (Binary src1 src2)));
13557 
13558   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13559 
13560   ins_encode %{
13561     __ fmadds(as_FloatRegister($dst$$reg),
13562              as_FloatRegister($src1$$reg),
13563              as_FloatRegister($src2$$reg),
13564              as_FloatRegister($src3$$reg));
13565   %}
13566 
13567   ins_pipe(pipe_class_default);
13568 %}
13569 
13570 // src1 * src2 + src3
13571 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13572   predicate(UseFMA);
13573   match(Set dst (FmaD src3 (Binary src1 src2)));
13574 
13575   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13576 
13577   ins_encode %{
13578     __ fmaddd(as_FloatRegister($dst$$reg),
13579              as_FloatRegister($src1$$reg),
13580              as_FloatRegister($src2$$reg),
13581              as_FloatRegister($src3$$reg));
13582   %}
13583 
13584   ins_pipe(pipe_class_default);
13585 %}
13586 
13587 // -src1 * src2 + src3
13588 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13589   predicate(UseFMA);
13590   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13591   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13592 
13593   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13594 
13595   ins_encode %{
13596     __ fmsubs(as_FloatRegister($dst$$reg),
13597               as_FloatRegister($src1$$reg),
13598               as_FloatRegister($src2$$reg),
13599               as_FloatRegister($src3$$reg));
13600   %}
13601 
13602   ins_pipe(pipe_class_default);
13603 %}
13604 
13605 // -src1 * src2 + src3
13606 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13607   predicate(UseFMA);
13608   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13609   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13610 
13611   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13612 
13613   ins_encode %{
13614     __ fmsubd(as_FloatRegister($dst$$reg),
13615               as_FloatRegister($src1$$reg),
13616               as_FloatRegister($src2$$reg),
13617               as_FloatRegister($src3$$reg));
13618   %}
13619 
13620   ins_pipe(pipe_class_default);
13621 %}
13622 
13623 // -src1 * src2 - src3
13624 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13625   predicate(UseFMA);
13626   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13627   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13628 
13629   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13630 
13631   ins_encode %{
13632     __ fnmadds(as_FloatRegister($dst$$reg),
13633                as_FloatRegister($src1$$reg),
13634                as_FloatRegister($src2$$reg),
13635                as_FloatRegister($src3$$reg));
13636   %}
13637 
13638   ins_pipe(pipe_class_default);
13639 %}
13640 
13641 // -src1 * src2 - src3
13642 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13643   predicate(UseFMA);
13644   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13645   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13646 
13647   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13648 
13649   ins_encode %{
13650     __ fnmaddd(as_FloatRegister($dst$$reg),
13651                as_FloatRegister($src1$$reg),
13652                as_FloatRegister($src2$$reg),
13653                as_FloatRegister($src3$$reg));
13654   %}
13655 
13656   ins_pipe(pipe_class_default);
13657 %}
13658 
13659 // src1 * src2 - src3
13660 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13661   predicate(UseFMA);
13662   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13663 
13664   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13665 
13666   ins_encode %{
13667     __ fnmsubs(as_FloatRegister($dst$$reg),
13668                as_FloatRegister($src1$$reg),
13669                as_FloatRegister($src2$$reg),
13670                as_FloatRegister($src3$$reg));
13671   %}
13672 
13673   ins_pipe(pipe_class_default);
13674 %}
13675 
13676 // src1 * src2 - src3
13677 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13678   predicate(UseFMA);
13679   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13680 
13681   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13682 
13683   ins_encode %{
13684   // n.b. insn name should be fnmsubd
13685     __ fnmsub(as_FloatRegister($dst$$reg),
13686               as_FloatRegister($src1$$reg),
13687               as_FloatRegister($src2$$reg),
13688               as_FloatRegister($src3$$reg));
13689   %}
13690 
13691   ins_pipe(pipe_class_default);
13692 %}
13693 
13694 
13695 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13696   match(Set dst (DivF src1  src2));
13697 
13698   ins_cost(INSN_COST * 18);
13699   format %{ "fdivs   $dst, $src1, $src2" %}
13700 
13701   ins_encode %{
13702     __ fdivs(as_FloatRegister($dst$$reg),
13703              as_FloatRegister($src1$$reg),
13704              as_FloatRegister($src2$$reg));
13705   %}
13706 
13707   ins_pipe(fp_div_s);
13708 %}
13709 
13710 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13711   match(Set dst (DivD src1  src2));
13712 
13713   ins_cost(INSN_COST * 32);
13714   format %{ "fdivd   $dst, $src1, $src2" %}
13715 
13716   ins_encode %{
13717     __ fdivd(as_FloatRegister($dst$$reg),
13718              as_FloatRegister($src1$$reg),
13719              as_FloatRegister($src2$$reg));
13720   %}
13721 
13722   ins_pipe(fp_div_d);
13723 %}
13724 
13725 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13726   match(Set dst (NegF src));
13727 
13728   ins_cost(INSN_COST * 3);
13729   format %{ "fneg   $dst, $src" %}
13730 
13731   ins_encode %{
13732     __ fnegs(as_FloatRegister($dst$$reg),
13733              as_FloatRegister($src$$reg));
13734   %}
13735 
13736   ins_pipe(fp_uop_s);
13737 %}
13738 
13739 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13740   match(Set dst (NegD src));
13741 
13742   ins_cost(INSN_COST * 3);
13743   format %{ "fnegd   $dst, $src" %}
13744 
13745   ins_encode %{
13746     __ fnegd(as_FloatRegister($dst$$reg),
13747              as_FloatRegister($src$$reg));
13748   %}
13749 
13750   ins_pipe(fp_uop_d);
13751 %}
13752 
13753 instruct absF_reg(vRegF dst, vRegF src) %{
13754   match(Set dst (AbsF src));
13755 
13756   ins_cost(INSN_COST * 3);
13757   format %{ "fabss   $dst, $src" %}
13758   ins_encode %{
13759     __ fabss(as_FloatRegister($dst$$reg),
13760              as_FloatRegister($src$$reg));
13761   %}
13762 
13763   ins_pipe(fp_uop_s);
13764 %}
13765 
13766 instruct absD_reg(vRegD dst, vRegD src) %{
13767   match(Set dst (AbsD src));
13768 
13769   ins_cost(INSN_COST * 3);
13770   format %{ "fabsd   $dst, $src" %}
13771   ins_encode %{
13772     __ fabsd(as_FloatRegister($dst$$reg),
13773              as_FloatRegister($src$$reg));
13774   %}
13775 
13776   ins_pipe(fp_uop_d);
13777 %}
13778 
13779 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13780   match(Set dst (SqrtD src));
13781 
13782   ins_cost(INSN_COST * 50);
13783   format %{ "fsqrtd  $dst, $src" %}
13784   ins_encode %{
13785     __ fsqrtd(as_FloatRegister($dst$$reg),
13786              as_FloatRegister($src$$reg));
13787   %}
13788 
13789   ins_pipe(fp_div_s);
13790 %}
13791 
13792 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13793   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13794 
13795   ins_cost(INSN_COST * 50);
13796   format %{ "fsqrts  $dst, $src" %}
13797   ins_encode %{
13798     __ fsqrts(as_FloatRegister($dst$$reg),
13799              as_FloatRegister($src$$reg));
13800   %}
13801 
13802   ins_pipe(fp_div_d);
13803 %}
13804 
13805 // ============================================================================
13806 // Logical Instructions
13807 
13808 // Integer Logical Instructions
13809 
13810 // And Instructions
13811 
13812 
13813 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13814   match(Set dst (AndI src1 src2));
13815 
13816   format %{ "andw  $dst, $src1, $src2\t# int" %}
13817 
13818   ins_cost(INSN_COST);
13819   ins_encode %{
13820     __ andw(as_Register($dst$$reg),
13821             as_Register($src1$$reg),
13822             as_Register($src2$$reg));
13823   %}
13824 
13825   ins_pipe(ialu_reg_reg);
13826 %}
13827 
13828 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13829   match(Set dst (AndI src1 src2));
13830 
13831   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13832 
13833   ins_cost(INSN_COST);
13834   ins_encode %{
13835     __ andw(as_Register($dst$$reg),
13836             as_Register($src1$$reg),
13837             (unsigned long)($src2$$constant));
13838   %}
13839 
13840   ins_pipe(ialu_reg_imm);
13841 %}
13842 
13843 // Or Instructions
13844 
13845 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13846   match(Set dst (OrI src1 src2));
13847 
13848   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13849 
13850   ins_cost(INSN_COST);
13851   ins_encode %{
13852     __ orrw(as_Register($dst$$reg),
13853             as_Register($src1$$reg),
13854             as_Register($src2$$reg));
13855   %}
13856 
13857   ins_pipe(ialu_reg_reg);
13858 %}
13859 
13860 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13861   match(Set dst (OrI src1 src2));
13862 
13863   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13864 
13865   ins_cost(INSN_COST);
13866   ins_encode %{
13867     __ orrw(as_Register($dst$$reg),
13868             as_Register($src1$$reg),
13869             (unsigned long)($src2$$constant));
13870   %}
13871 
13872   ins_pipe(ialu_reg_imm);
13873 %}
13874 
13875 // Xor Instructions
13876 
13877 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13878   match(Set dst (XorI src1 src2));
13879 
13880   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13881 
13882   ins_cost(INSN_COST);
13883   ins_encode %{
13884     __ eorw(as_Register($dst$$reg),
13885             as_Register($src1$$reg),
13886             as_Register($src2$$reg));
13887   %}
13888 
13889   ins_pipe(ialu_reg_reg);
13890 %}
13891 
13892 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13893   match(Set dst (XorI src1 src2));
13894 
13895   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13896 
13897   ins_cost(INSN_COST);
13898   ins_encode %{
13899     __ eorw(as_Register($dst$$reg),
13900             as_Register($src1$$reg),
13901             (unsigned long)($src2$$constant));
13902   %}
13903 
13904   ins_pipe(ialu_reg_imm);
13905 %}
13906 
13907 // Long Logical Instructions
13908 // TODO
13909 
13910 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13911   match(Set dst (AndL src1 src2));
13912 
13913   format %{ "and  $dst, $src1, $src2\t# int" %}
13914 
13915   ins_cost(INSN_COST);
13916   ins_encode %{
13917     __ andr(as_Register($dst$$reg),
13918             as_Register($src1$$reg),
13919             as_Register($src2$$reg));
13920   %}
13921 
13922   ins_pipe(ialu_reg_reg);
13923 %}
13924 
13925 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13926   match(Set dst (AndL src1 src2));
13927 
13928   format %{ "and  $dst, $src1, $src2\t# int" %}
13929 
13930   ins_cost(INSN_COST);
13931   ins_encode %{
13932     __ andr(as_Register($dst$$reg),
13933             as_Register($src1$$reg),
13934             (unsigned long)($src2$$constant));
13935   %}
13936 
13937   ins_pipe(ialu_reg_imm);
13938 %}
13939 
13940 // Or Instructions
13941 
13942 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13943   match(Set dst (OrL src1 src2));
13944 
13945   format %{ "orr  $dst, $src1, $src2\t# int" %}
13946 
13947   ins_cost(INSN_COST);
13948   ins_encode %{
13949     __ orr(as_Register($dst$$reg),
13950            as_Register($src1$$reg),
13951            as_Register($src2$$reg));
13952   %}
13953 
13954   ins_pipe(ialu_reg_reg);
13955 %}
13956 
13957 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13958   match(Set dst (OrL src1 src2));
13959 
13960   format %{ "orr  $dst, $src1, $src2\t# int" %}
13961 
13962   ins_cost(INSN_COST);
13963   ins_encode %{
13964     __ orr(as_Register($dst$$reg),
13965            as_Register($src1$$reg),
13966            (unsigned long)($src2$$constant));
13967   %}
13968 
13969   ins_pipe(ialu_reg_imm);
13970 %}
13971 
13972 // Xor Instructions
13973 
13974 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13975   match(Set dst (XorL src1 src2));
13976 
13977   format %{ "eor  $dst, $src1, $src2\t# int" %}
13978 
13979   ins_cost(INSN_COST);
13980   ins_encode %{
13981     __ eor(as_Register($dst$$reg),
13982            as_Register($src1$$reg),
13983            as_Register($src2$$reg));
13984   %}
13985 
13986   ins_pipe(ialu_reg_reg);
13987 %}
13988 
13989 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13990   match(Set dst (XorL src1 src2));
13991 
13992   ins_cost(INSN_COST);
13993   format %{ "eor  $dst, $src1, $src2\t# int" %}
13994 
13995   ins_encode %{
13996     __ eor(as_Register($dst$$reg),
13997            as_Register($src1$$reg),
13998            (unsigned long)($src2$$constant));
13999   %}
14000 
14001   ins_pipe(ialu_reg_imm);
14002 %}
14003 
14004 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
14005 %{
14006   match(Set dst (ConvI2L src));
14007 
14008   ins_cost(INSN_COST);
14009   format %{ "sxtw  $dst, $src\t# i2l" %}
14010   ins_encode %{
14011     __ sbfm($dst$$Register, $src$$Register, 0, 31);
14012   %}
14013   ins_pipe(ialu_reg_shift);
14014 %}
14015 
14016 // this pattern occurs in bigmath arithmetic
14017 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
14018 %{
14019   match(Set dst (AndL (ConvI2L src) mask));
14020 
14021   ins_cost(INSN_COST);
14022   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
14023   ins_encode %{
14024     __ ubfm($dst$$Register, $src$$Register, 0, 31);
14025   %}
14026 
14027   ins_pipe(ialu_reg_shift);
14028 %}
14029 
14030 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
14031   match(Set dst (ConvL2I src));
14032 
14033   ins_cost(INSN_COST);
14034   format %{ "movw  $dst, $src \t// l2i" %}
14035 
14036   ins_encode %{
14037     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
14038   %}
14039 
14040   ins_pipe(ialu_reg);
14041 %}
14042 
14043 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
14044 %{
14045   match(Set dst (Conv2B src));
14046   effect(KILL cr);
14047 
14048   format %{
14049     "cmpw $src, zr\n\t"
14050     "cset $dst, ne"
14051   %}
14052 
14053   ins_encode %{
14054     __ cmpw(as_Register($src$$reg), zr);
14055     __ cset(as_Register($dst$$reg), Assembler::NE);
14056   %}
14057 
14058   ins_pipe(ialu_reg);
14059 %}
14060 
14061 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14062 %{
14063   match(Set dst (Conv2B src));
14064   effect(KILL cr);
14065 
14066   format %{
14067     "cmp  $src, zr\n\t"
14068     "cset $dst, ne"
14069   %}
14070 
14071   ins_encode %{
14072     __ cmp(as_Register($src$$reg), zr);
14073     __ cset(as_Register($dst$$reg), Assembler::NE);
14074   %}
14075 
14076   ins_pipe(ialu_reg);
14077 %}
14078 
14079 instruct convD2F_reg(vRegF dst, vRegD src) %{
14080   match(Set dst (ConvD2F src));
14081 
14082   ins_cost(INSN_COST * 5);
14083   format %{ "fcvtd  $dst, $src \t// d2f" %}
14084 
14085   ins_encode %{
14086     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14087   %}
14088 
14089   ins_pipe(fp_d2f);
14090 %}
14091 
14092 instruct convF2D_reg(vRegD dst, vRegF src) %{
14093   match(Set dst (ConvF2D src));
14094 
14095   ins_cost(INSN_COST * 5);
14096   format %{ "fcvts  $dst, $src \t// f2d" %}
14097 
14098   ins_encode %{
14099     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14100   %}
14101 
14102   ins_pipe(fp_f2d);
14103 %}
14104 
14105 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14106   match(Set dst (ConvF2I src));
14107 
14108   ins_cost(INSN_COST * 5);
14109   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14110 
14111   ins_encode %{
14112     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14113   %}
14114 
14115   ins_pipe(fp_f2i);
14116 %}
14117 
14118 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14119   match(Set dst (ConvF2L src));
14120 
14121   ins_cost(INSN_COST * 5);
14122   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14123 
14124   ins_encode %{
14125     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14126   %}
14127 
14128   ins_pipe(fp_f2l);
14129 %}
14130 
14131 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14132   match(Set dst (ConvI2F src));
14133 
14134   ins_cost(INSN_COST * 5);
14135   format %{ "scvtfws  $dst, $src \t// i2f" %}
14136 
14137   ins_encode %{
14138     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14139   %}
14140 
14141   ins_pipe(fp_i2f);
14142 %}
14143 
14144 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14145   match(Set dst (ConvL2F src));
14146 
14147   ins_cost(INSN_COST * 5);
14148   format %{ "scvtfs  $dst, $src \t// l2f" %}
14149 
14150   ins_encode %{
14151     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14152   %}
14153 
14154   ins_pipe(fp_l2f);
14155 %}
14156 
14157 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14158   match(Set dst (ConvD2I src));
14159 
14160   ins_cost(INSN_COST * 5);
14161   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14162 
14163   ins_encode %{
14164     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14165   %}
14166 
14167   ins_pipe(fp_d2i);
14168 %}
14169 
14170 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14171   match(Set dst (ConvD2L src));
14172 
14173   ins_cost(INSN_COST * 5);
14174   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14175 
14176   ins_encode %{
14177     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14178   %}
14179 
14180   ins_pipe(fp_d2l);
14181 %}
14182 
14183 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14184   match(Set dst (ConvI2D src));
14185 
14186   ins_cost(INSN_COST * 5);
14187   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14188 
14189   ins_encode %{
14190     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14191   %}
14192 
14193   ins_pipe(fp_i2d);
14194 %}
14195 
14196 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14197   match(Set dst (ConvL2D src));
14198 
14199   ins_cost(INSN_COST * 5);
14200   format %{ "scvtfd  $dst, $src \t// l2d" %}
14201 
14202   ins_encode %{
14203     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14204   %}
14205 
14206   ins_pipe(fp_l2d);
14207 %}
14208 
14209 // stack <-> reg and reg <-> reg shuffles with no conversion
14210 
14211 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14212 
14213   match(Set dst (MoveF2I src));
14214 
14215   effect(DEF dst, USE src);
14216 
14217   ins_cost(4 * INSN_COST);
14218 
14219   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14220 
14221   ins_encode %{
14222     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14223   %}
14224 
14225   ins_pipe(iload_reg_reg);
14226 
14227 %}
14228 
14229 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14230 
14231   match(Set dst (MoveI2F src));
14232 
14233   effect(DEF dst, USE src);
14234 
14235   ins_cost(4 * INSN_COST);
14236 
14237   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14238 
14239   ins_encode %{
14240     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14241   %}
14242 
14243   ins_pipe(pipe_class_memory);
14244 
14245 %}
14246 
14247 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14248 
14249   match(Set dst (MoveD2L src));
14250 
14251   effect(DEF dst, USE src);
14252 
14253   ins_cost(4 * INSN_COST);
14254 
14255   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14256 
14257   ins_encode %{
14258     __ ldr($dst$$Register, Address(sp, $src$$disp));
14259   %}
14260 
14261   ins_pipe(iload_reg_reg);
14262 
14263 %}
14264 
14265 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14266 
14267   match(Set dst (MoveL2D src));
14268 
14269   effect(DEF dst, USE src);
14270 
14271   ins_cost(4 * INSN_COST);
14272 
14273   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14274 
14275   ins_encode %{
14276     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14277   %}
14278 
14279   ins_pipe(pipe_class_memory);
14280 
14281 %}
14282 
14283 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14284 
14285   match(Set dst (MoveF2I src));
14286 
14287   effect(DEF dst, USE src);
14288 
14289   ins_cost(INSN_COST);
14290 
14291   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14292 
14293   ins_encode %{
14294     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14295   %}
14296 
14297   ins_pipe(pipe_class_memory);
14298 
14299 %}
14300 
14301 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14302 
14303   match(Set dst (MoveI2F src));
14304 
14305   effect(DEF dst, USE src);
14306 
14307   ins_cost(INSN_COST);
14308 
14309   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14310 
14311   ins_encode %{
14312     __ strw($src$$Register, Address(sp, $dst$$disp));
14313   %}
14314 
14315   ins_pipe(istore_reg_reg);
14316 
14317 %}
14318 
14319 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14320 
14321   match(Set dst (MoveD2L src));
14322 
14323   effect(DEF dst, USE src);
14324 
14325   ins_cost(INSN_COST);
14326 
14327   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14328 
14329   ins_encode %{
14330     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14331   %}
14332 
14333   ins_pipe(pipe_class_memory);
14334 
14335 %}
14336 
14337 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14338 
14339   match(Set dst (MoveL2D src));
14340 
14341   effect(DEF dst, USE src);
14342 
14343   ins_cost(INSN_COST);
14344 
14345   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14346 
14347   ins_encode %{
14348     __ str($src$$Register, Address(sp, $dst$$disp));
14349   %}
14350 
14351   ins_pipe(istore_reg_reg);
14352 
14353 %}
14354 
14355 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14356 
14357   match(Set dst (MoveF2I src));
14358 
14359   effect(DEF dst, USE src);
14360 
14361   ins_cost(INSN_COST);
14362 
14363   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14364 
14365   ins_encode %{
14366     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14367   %}
14368 
14369   ins_pipe(fp_f2i);
14370 
14371 %}
14372 
14373 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14374 
14375   match(Set dst (MoveI2F src));
14376 
14377   effect(DEF dst, USE src);
14378 
14379   ins_cost(INSN_COST);
14380 
14381   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14382 
14383   ins_encode %{
14384     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14385   %}
14386 
14387   ins_pipe(fp_i2f);
14388 
14389 %}
14390 
14391 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14392 
14393   match(Set dst (MoveD2L src));
14394 
14395   effect(DEF dst, USE src);
14396 
14397   ins_cost(INSN_COST);
14398 
14399   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14400 
14401   ins_encode %{
14402     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14403   %}
14404 
14405   ins_pipe(fp_d2l);
14406 
14407 %}
14408 
14409 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14410 
14411   match(Set dst (MoveL2D src));
14412 
14413   effect(DEF dst, USE src);
14414 
14415   ins_cost(INSN_COST);
14416 
14417   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14418 
14419   ins_encode %{
14420     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14421   %}
14422 
14423   ins_pipe(fp_l2d);
14424 
14425 %}
14426 
14427 // ============================================================================
14428 // clearing of an array
14429 
14430 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14431 %{
14432   match(Set dummy (ClearArray cnt base));
14433   effect(USE_KILL cnt, USE_KILL base);
14434 
14435   ins_cost(4 * INSN_COST);
14436   format %{ "ClearArray $cnt, $base" %}
14437 
14438   ins_encode %{
14439     __ zero_words($base$$Register, $cnt$$Register);
14440   %}
14441 
14442   ins_pipe(pipe_class_memory);
14443 %}
14444 
14445 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14446 %{
14447   predicate((u_int64_t)n->in(2)->get_long()
14448             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14449   match(Set dummy (ClearArray cnt base));
14450   effect(USE_KILL base);
14451 
14452   ins_cost(4 * INSN_COST);
14453   format %{ "ClearArray $cnt, $base" %}
14454 
14455   ins_encode %{
14456     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14457   %}
14458 
14459   ins_pipe(pipe_class_memory);
14460 %}
14461 
14462 // ============================================================================
14463 // Overflow Math Instructions
14464 
14465 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14466 %{
14467   match(Set cr (OverflowAddI op1 op2));
14468 
14469   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14470   ins_cost(INSN_COST);
14471   ins_encode %{
14472     __ cmnw($op1$$Register, $op2$$Register);
14473   %}
14474 
14475   ins_pipe(icmp_reg_reg);
14476 %}
14477 
14478 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14479 %{
14480   match(Set cr (OverflowAddI op1 op2));
14481 
14482   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14483   ins_cost(INSN_COST);
14484   ins_encode %{
14485     __ cmnw($op1$$Register, $op2$$constant);
14486   %}
14487 
14488   ins_pipe(icmp_reg_imm);
14489 %}
14490 
14491 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14492 %{
14493   match(Set cr (OverflowAddL op1 op2));
14494 
14495   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14496   ins_cost(INSN_COST);
14497   ins_encode %{
14498     __ cmn($op1$$Register, $op2$$Register);
14499   %}
14500 
14501   ins_pipe(icmp_reg_reg);
14502 %}
14503 
14504 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14505 %{
14506   match(Set cr (OverflowAddL op1 op2));
14507 
14508   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14509   ins_cost(INSN_COST);
14510   ins_encode %{
14511     __ cmn($op1$$Register, $op2$$constant);
14512   %}
14513 
14514   ins_pipe(icmp_reg_imm);
14515 %}
14516 
14517 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14518 %{
14519   match(Set cr (OverflowSubI op1 op2));
14520 
14521   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14522   ins_cost(INSN_COST);
14523   ins_encode %{
14524     __ cmpw($op1$$Register, $op2$$Register);
14525   %}
14526 
14527   ins_pipe(icmp_reg_reg);
14528 %}
14529 
14530 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14531 %{
14532   match(Set cr (OverflowSubI op1 op2));
14533 
14534   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14535   ins_cost(INSN_COST);
14536   ins_encode %{
14537     __ cmpw($op1$$Register, $op2$$constant);
14538   %}
14539 
14540   ins_pipe(icmp_reg_imm);
14541 %}
14542 
14543 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14544 %{
14545   match(Set cr (OverflowSubL op1 op2));
14546 
14547   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14548   ins_cost(INSN_COST);
14549   ins_encode %{
14550     __ cmp($op1$$Register, $op2$$Register);
14551   %}
14552 
14553   ins_pipe(icmp_reg_reg);
14554 %}
14555 
14556 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14557 %{
14558   match(Set cr (OverflowSubL op1 op2));
14559 
14560   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14561   ins_cost(INSN_COST);
14562   ins_encode %{
14563     __ cmp($op1$$Register, $op2$$constant);
14564   %}
14565 
14566   ins_pipe(icmp_reg_imm);
14567 %}
14568 
14569 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14570 %{
14571   match(Set cr (OverflowSubI zero op1));
14572 
14573   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14574   ins_cost(INSN_COST);
14575   ins_encode %{
14576     __ cmpw(zr, $op1$$Register);
14577   %}
14578 
14579   ins_pipe(icmp_reg_imm);
14580 %}
14581 
14582 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14583 %{
14584   match(Set cr (OverflowSubL zero op1));
14585 
14586   format %{ "cmp   zr, $op1\t# overflow check long" %}
14587   ins_cost(INSN_COST);
14588   ins_encode %{
14589     __ cmp(zr, $op1$$Register);
14590   %}
14591 
14592   ins_pipe(icmp_reg_imm);
14593 %}
14594 
14595 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14596 %{
14597   match(Set cr (OverflowMulI op1 op2));
14598 
14599   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14600             "cmp   rscratch1, rscratch1, sxtw\n\t"
14601             "movw  rscratch1, #0x80000000\n\t"
14602             "cselw rscratch1, rscratch1, zr, NE\n\t"
14603             "cmpw  rscratch1, #1" %}
14604   ins_cost(5 * INSN_COST);
14605   ins_encode %{
14606     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14607     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14608     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14609     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14610     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14611   %}
14612 
14613   ins_pipe(pipe_slow);
14614 %}
14615 
14616 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14617 %{
14618   match(If cmp (OverflowMulI op1 op2));
14619   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14620             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14621   effect(USE labl, KILL cr);
14622 
14623   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14624             "cmp   rscratch1, rscratch1, sxtw\n\t"
14625             "b$cmp   $labl" %}
14626   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14627   ins_encode %{
14628     Label* L = $labl$$label;
14629     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14630     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14631     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14632     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14633   %}
14634 
14635   ins_pipe(pipe_serial);
14636 %}
14637 
14638 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14639 %{
14640   match(Set cr (OverflowMulL op1 op2));
14641 
14642   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14643             "smulh rscratch2, $op1, $op2\n\t"
14644             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14645             "movw  rscratch1, #0x80000000\n\t"
14646             "cselw rscratch1, rscratch1, zr, NE\n\t"
14647             "cmpw  rscratch1, #1" %}
14648   ins_cost(6 * INSN_COST);
14649   ins_encode %{
14650     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14651     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14652     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14653     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14654     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14655     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14656   %}
14657 
14658   ins_pipe(pipe_slow);
14659 %}
14660 
14661 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14662 %{
14663   match(If cmp (OverflowMulL op1 op2));
14664   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14665             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14666   effect(USE labl, KILL cr);
14667 
14668   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14669             "smulh rscratch2, $op1, $op2\n\t"
14670             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14671             "b$cmp $labl" %}
14672   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14673   ins_encode %{
14674     Label* L = $labl$$label;
14675     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14676     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14677     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14678     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14679     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14680   %}
14681 
14682   ins_pipe(pipe_serial);
14683 %}
14684 
14685 // ============================================================================
14686 // Compare Instructions
14687 
14688 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14689 %{
14690   match(Set cr (CmpI op1 op2));
14691 
14692   effect(DEF cr, USE op1, USE op2);
14693 
14694   ins_cost(INSN_COST);
14695   format %{ "cmpw  $op1, $op2" %}
14696 
14697   ins_encode(aarch64_enc_cmpw(op1, op2));
14698 
14699   ins_pipe(icmp_reg_reg);
14700 %}
14701 
14702 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14703 %{
14704   match(Set cr (CmpI op1 zero));
14705 
14706   effect(DEF cr, USE op1);
14707 
14708   ins_cost(INSN_COST);
14709   format %{ "cmpw $op1, 0" %}
14710 
14711   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14712 
14713   ins_pipe(icmp_reg_imm);
14714 %}
14715 
14716 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14717 %{
14718   match(Set cr (CmpI op1 op2));
14719 
14720   effect(DEF cr, USE op1);
14721 
14722   ins_cost(INSN_COST);
14723   format %{ "cmpw  $op1, $op2" %}
14724 
14725   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14726 
14727   ins_pipe(icmp_reg_imm);
14728 %}
14729 
14730 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14731 %{
14732   match(Set cr (CmpI op1 op2));
14733 
14734   effect(DEF cr, USE op1);
14735 
14736   ins_cost(INSN_COST * 2);
14737   format %{ "cmpw  $op1, $op2" %}
14738 
14739   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14740 
14741   ins_pipe(icmp_reg_imm);
14742 %}
14743 
14744 // Unsigned compare Instructions; really, same as signed compare
14745 // except it should only be used to feed an If or a CMovI which takes a
14746 // cmpOpU.
14747 
14748 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14749 %{
14750   match(Set cr (CmpU op1 op2));
14751 
14752   effect(DEF cr, USE op1, USE op2);
14753 
14754   ins_cost(INSN_COST);
14755   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14756 
14757   ins_encode(aarch64_enc_cmpw(op1, op2));
14758 
14759   ins_pipe(icmp_reg_reg);
14760 %}
14761 
14762 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14763 %{
14764   match(Set cr (CmpU op1 zero));
14765 
14766   effect(DEF cr, USE op1);
14767 
14768   ins_cost(INSN_COST);
14769   format %{ "cmpw $op1, #0\t# unsigned" %}
14770 
14771   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14772 
14773   ins_pipe(icmp_reg_imm);
14774 %}
14775 
14776 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14777 %{
14778   match(Set cr (CmpU op1 op2));
14779 
14780   effect(DEF cr, USE op1);
14781 
14782   ins_cost(INSN_COST);
14783   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14784 
14785   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14786 
14787   ins_pipe(icmp_reg_imm);
14788 %}
14789 
14790 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14791 %{
14792   match(Set cr (CmpU op1 op2));
14793 
14794   effect(DEF cr, USE op1);
14795 
14796   ins_cost(INSN_COST * 2);
14797   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14798 
14799   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14800 
14801   ins_pipe(icmp_reg_imm);
14802 %}
14803 
14804 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14805 %{
14806   match(Set cr (CmpL op1 op2));
14807 
14808   effect(DEF cr, USE op1, USE op2);
14809 
14810   ins_cost(INSN_COST);
14811   format %{ "cmp  $op1, $op2" %}
14812 
14813   ins_encode(aarch64_enc_cmp(op1, op2));
14814 
14815   ins_pipe(icmp_reg_reg);
14816 %}
14817 
14818 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14819 %{
14820   match(Set cr (CmpL op1 zero));
14821 
14822   effect(DEF cr, USE op1);
14823 
14824   ins_cost(INSN_COST);
14825   format %{ "tst  $op1" %}
14826 
14827   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14828 
14829   ins_pipe(icmp_reg_imm);
14830 %}
14831 
14832 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14833 %{
14834   match(Set cr (CmpL op1 op2));
14835 
14836   effect(DEF cr, USE op1);
14837 
14838   ins_cost(INSN_COST);
14839   format %{ "cmp  $op1, $op2" %}
14840 
14841   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14842 
14843   ins_pipe(icmp_reg_imm);
14844 %}
14845 
14846 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14847 %{
14848   match(Set cr (CmpL op1 op2));
14849 
14850   effect(DEF cr, USE op1);
14851 
14852   ins_cost(INSN_COST * 2);
14853   format %{ "cmp  $op1, $op2" %}
14854 
14855   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14856 
14857   ins_pipe(icmp_reg_imm);
14858 %}
14859 
14860 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14861 %{
14862   match(Set cr (CmpUL op1 op2));
14863 
14864   effect(DEF cr, USE op1, USE op2);
14865 
14866   ins_cost(INSN_COST);
14867   format %{ "cmp  $op1, $op2" %}
14868 
14869   ins_encode(aarch64_enc_cmp(op1, op2));
14870 
14871   ins_pipe(icmp_reg_reg);
14872 %}
14873 
14874 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14875 %{
14876   match(Set cr (CmpUL op1 zero));
14877 
14878   effect(DEF cr, USE op1);
14879 
14880   ins_cost(INSN_COST);
14881   format %{ "tst  $op1" %}
14882 
14883   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14884 
14885   ins_pipe(icmp_reg_imm);
14886 %}
14887 
14888 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14889 %{
14890   match(Set cr (CmpUL op1 op2));
14891 
14892   effect(DEF cr, USE op1);
14893 
14894   ins_cost(INSN_COST);
14895   format %{ "cmp  $op1, $op2" %}
14896 
14897   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14898 
14899   ins_pipe(icmp_reg_imm);
14900 %}
14901 
14902 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14903 %{
14904   match(Set cr (CmpUL op1 op2));
14905 
14906   effect(DEF cr, USE op1);
14907 
14908   ins_cost(INSN_COST * 2);
14909   format %{ "cmp  $op1, $op2" %}
14910 
14911   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14912 
14913   ins_pipe(icmp_reg_imm);
14914 %}
14915 
14916 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14917 %{
14918   match(Set cr (CmpP op1 op2));
14919 
14920   effect(DEF cr, USE op1, USE op2);
14921 
14922   ins_cost(INSN_COST);
14923   format %{ "cmp  $op1, $op2\t // ptr" %}
14924 
14925   ins_encode(aarch64_enc_cmpp(op1, op2));
14926 
14927   ins_pipe(icmp_reg_reg);
14928 %}
14929 
14930 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14931 %{
14932   match(Set cr (CmpN op1 op2));
14933 
14934   effect(DEF cr, USE op1, USE op2);
14935 
14936   ins_cost(INSN_COST);
14937   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14938 
14939   ins_encode(aarch64_enc_cmpn(op1, op2));
14940 
14941   ins_pipe(icmp_reg_reg);
14942 %}
14943 
14944 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14945 %{
14946   match(Set cr (CmpP op1 zero));
14947 
14948   effect(DEF cr, USE op1, USE zero);
14949 
14950   ins_cost(INSN_COST);
14951   format %{ "cmp  $op1, 0\t // ptr" %}
14952 
14953   ins_encode(aarch64_enc_testp(op1));
14954 
14955   ins_pipe(icmp_reg_imm);
14956 %}
14957 
14958 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14959 %{
14960   match(Set cr (CmpN op1 zero));
14961 
14962   effect(DEF cr, USE op1, USE zero);
14963 
14964   ins_cost(INSN_COST);
14965   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14966 
14967   ins_encode(aarch64_enc_testn(op1));
14968 
14969   ins_pipe(icmp_reg_imm);
14970 %}
14971 
14972 // FP comparisons
14973 //
14974 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14975 // using normal cmpOp. See declaration of rFlagsReg for details.
14976 
14977 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14978 %{
14979   match(Set cr (CmpF src1 src2));
14980 
14981   ins_cost(3 * INSN_COST);
14982   format %{ "fcmps $src1, $src2" %}
14983 
14984   ins_encode %{
14985     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14986   %}
14987 
14988   ins_pipe(pipe_class_compare);
14989 %}
14990 
14991 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14992 %{
14993   match(Set cr (CmpF src1 src2));
14994 
14995   ins_cost(3 * INSN_COST);
14996   format %{ "fcmps $src1, 0.0" %}
14997 
14998   ins_encode %{
14999     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
15000   %}
15001 
15002   ins_pipe(pipe_class_compare);
15003 %}
15004 // FROM HERE
15005 
15006 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
15007 %{
15008   match(Set cr (CmpD src1 src2));
15009 
15010   ins_cost(3 * INSN_COST);
15011   format %{ "fcmpd $src1, $src2" %}
15012 
15013   ins_encode %{
15014     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15015   %}
15016 
15017   ins_pipe(pipe_class_compare);
15018 %}
15019 
15020 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
15021 %{
15022   match(Set cr (CmpD src1 src2));
15023 
15024   ins_cost(3 * INSN_COST);
15025   format %{ "fcmpd $src1, 0.0" %}
15026 
15027   ins_encode %{
15028     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
15029   %}
15030 
15031   ins_pipe(pipe_class_compare);
15032 %}
15033 
15034 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
15035 %{
15036   match(Set dst (CmpF3 src1 src2));
15037   effect(KILL cr);
15038 
15039   ins_cost(5 * INSN_COST);
15040   format %{ "fcmps $src1, $src2\n\t"
15041             "csinvw($dst, zr, zr, eq\n\t"
15042             "csnegw($dst, $dst, $dst, lt)"
15043   %}
15044 
15045   ins_encode %{
15046     Label done;
15047     FloatRegister s1 = as_FloatRegister($src1$$reg);
15048     FloatRegister s2 = as_FloatRegister($src2$$reg);
15049     Register d = as_Register($dst$$reg);
15050     __ fcmps(s1, s2);
15051     // installs 0 if EQ else -1
15052     __ csinvw(d, zr, zr, Assembler::EQ);
15053     // keeps -1 if less or unordered else installs 1
15054     __ csnegw(d, d, d, Assembler::LT);
15055     __ bind(done);
15056   %}
15057 
15058   ins_pipe(pipe_class_default);
15059 
15060 %}
15061 
15062 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15063 %{
15064   match(Set dst (CmpD3 src1 src2));
15065   effect(KILL cr);
15066 
15067   ins_cost(5 * INSN_COST);
15068   format %{ "fcmpd $src1, $src2\n\t"
15069             "csinvw($dst, zr, zr, eq\n\t"
15070             "csnegw($dst, $dst, $dst, lt)"
15071   %}
15072 
15073   ins_encode %{
15074     Label done;
15075     FloatRegister s1 = as_FloatRegister($src1$$reg);
15076     FloatRegister s2 = as_FloatRegister($src2$$reg);
15077     Register d = as_Register($dst$$reg);
15078     __ fcmpd(s1, s2);
15079     // installs 0 if EQ else -1
15080     __ csinvw(d, zr, zr, Assembler::EQ);
15081     // keeps -1 if less or unordered else installs 1
15082     __ csnegw(d, d, d, Assembler::LT);
15083     __ bind(done);
15084   %}
15085   ins_pipe(pipe_class_default);
15086 
15087 %}
15088 
15089 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15090 %{
15091   match(Set dst (CmpF3 src1 zero));
15092   effect(KILL cr);
15093 
15094   ins_cost(5 * INSN_COST);
15095   format %{ "fcmps $src1, 0.0\n\t"
15096             "csinvw($dst, zr, zr, eq\n\t"
15097             "csnegw($dst, $dst, $dst, lt)"
15098   %}
15099 
15100   ins_encode %{
15101     Label done;
15102     FloatRegister s1 = as_FloatRegister($src1$$reg);
15103     Register d = as_Register($dst$$reg);
15104     __ fcmps(s1, 0.0D);
15105     // installs 0 if EQ else -1
15106     __ csinvw(d, zr, zr, Assembler::EQ);
15107     // keeps -1 if less or unordered else installs 1
15108     __ csnegw(d, d, d, Assembler::LT);
15109     __ bind(done);
15110   %}
15111 
15112   ins_pipe(pipe_class_default);
15113 
15114 %}
15115 
15116 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15117 %{
15118   match(Set dst (CmpD3 src1 zero));
15119   effect(KILL cr);
15120 
15121   ins_cost(5 * INSN_COST);
15122   format %{ "fcmpd $src1, 0.0\n\t"
15123             "csinvw($dst, zr, zr, eq\n\t"
15124             "csnegw($dst, $dst, $dst, lt)"
15125   %}
15126 
15127   ins_encode %{
15128     Label done;
15129     FloatRegister s1 = as_FloatRegister($src1$$reg);
15130     Register d = as_Register($dst$$reg);
15131     __ fcmpd(s1, 0.0D);
15132     // installs 0 if EQ else -1
15133     __ csinvw(d, zr, zr, Assembler::EQ);
15134     // keeps -1 if less or unordered else installs 1
15135     __ csnegw(d, d, d, Assembler::LT);
15136     __ bind(done);
15137   %}
15138   ins_pipe(pipe_class_default);
15139 
15140 %}
15141 
15142 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15143 %{
15144   match(Set dst (CmpLTMask p q));
15145   effect(KILL cr);
15146 
15147   ins_cost(3 * INSN_COST);
15148 
15149   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15150             "csetw $dst, lt\n\t"
15151             "subw $dst, zr, $dst"
15152   %}
15153 
15154   ins_encode %{
15155     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15156     __ csetw(as_Register($dst$$reg), Assembler::LT);
15157     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15158   %}
15159 
15160   ins_pipe(ialu_reg_reg);
15161 %}
15162 
15163 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15164 %{
15165   match(Set dst (CmpLTMask src zero));
15166   effect(KILL cr);
15167 
15168   ins_cost(INSN_COST);
15169 
15170   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15171 
15172   ins_encode %{
15173     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15174   %}
15175 
15176   ins_pipe(ialu_reg_shift);
15177 %}
15178 
15179 // ============================================================================
15180 // Max and Min
15181 
15182 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15183 %{
15184   match(Set dst (MinI src1 src2));
15185 
15186   effect(DEF dst, USE src1, USE src2, KILL cr);
15187   size(8);
15188 
15189   ins_cost(INSN_COST * 3);
15190   format %{
15191     "cmpw $src1 $src2\t signed int\n\t"
15192     "cselw $dst, $src1, $src2 lt\t"
15193   %}
15194 
15195   ins_encode %{
15196     __ cmpw(as_Register($src1$$reg),
15197             as_Register($src2$$reg));
15198     __ cselw(as_Register($dst$$reg),
15199              as_Register($src1$$reg),
15200              as_Register($src2$$reg),
15201              Assembler::LT);
15202   %}
15203 
15204   ins_pipe(ialu_reg_reg);
15205 %}
15206 // FROM HERE
15207 
15208 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15209 %{
15210   match(Set dst (MaxI src1 src2));
15211 
15212   effect(DEF dst, USE src1, USE src2, KILL cr);
15213   size(8);
15214 
15215   ins_cost(INSN_COST * 3);
15216   format %{
15217     "cmpw $src1 $src2\t signed int\n\t"
15218     "cselw $dst, $src1, $src2 gt\t"
15219   %}
15220 
15221   ins_encode %{
15222     __ cmpw(as_Register($src1$$reg),
15223             as_Register($src2$$reg));
15224     __ cselw(as_Register($dst$$reg),
15225              as_Register($src1$$reg),
15226              as_Register($src2$$reg),
15227              Assembler::GT);
15228   %}
15229 
15230   ins_pipe(ialu_reg_reg);
15231 %}
15232 
15233 // ============================================================================
15234 // Branch Instructions
15235 
15236 // Direct Branch.
15237 instruct branch(label lbl)
15238 %{
15239   match(Goto);
15240 
15241   effect(USE lbl);
15242 
15243   ins_cost(BRANCH_COST);
15244   format %{ "b  $lbl" %}
15245 
15246   ins_encode(aarch64_enc_b(lbl));
15247 
15248   ins_pipe(pipe_branch);
15249 %}
15250 
15251 // Conditional Near Branch
15252 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15253 %{
15254   // Same match rule as `branchConFar'.
15255   match(If cmp cr);
15256 
15257   effect(USE lbl);
15258 
15259   ins_cost(BRANCH_COST);
15260   // If set to 1 this indicates that the current instruction is a
15261   // short variant of a long branch. This avoids using this
15262   // instruction in first-pass matching. It will then only be used in
15263   // the `Shorten_branches' pass.
15264   // ins_short_branch(1);
15265   format %{ "b$cmp  $lbl" %}
15266 
15267   ins_encode(aarch64_enc_br_con(cmp, lbl));
15268 
15269   ins_pipe(pipe_branch_cond);
15270 %}
15271 
15272 // Conditional Near Branch Unsigned
15273 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15274 %{
15275   // Same match rule as `branchConFar'.
15276   match(If cmp cr);
15277 
15278   effect(USE lbl);
15279 
15280   ins_cost(BRANCH_COST);
15281   // If set to 1 this indicates that the current instruction is a
15282   // short variant of a long branch. This avoids using this
15283   // instruction in first-pass matching. It will then only be used in
15284   // the `Shorten_branches' pass.
15285   // ins_short_branch(1);
15286   format %{ "b$cmp  $lbl\t# unsigned" %}
15287 
15288   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15289 
15290   ins_pipe(pipe_branch_cond);
15291 %}
15292 
15293 // Make use of CBZ and CBNZ.  These instructions, as well as being
15294 // shorter than (cmp; branch), have the additional benefit of not
15295 // killing the flags.
15296 
15297 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15298   match(If cmp (CmpI op1 op2));
15299   effect(USE labl);
15300 
15301   ins_cost(BRANCH_COST);
15302   format %{ "cbw$cmp   $op1, $labl" %}
15303   ins_encode %{
15304     Label* L = $labl$$label;
15305     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15306     if (cond == Assembler::EQ)
15307       __ cbzw($op1$$Register, *L);
15308     else
15309       __ cbnzw($op1$$Register, *L);
15310   %}
15311   ins_pipe(pipe_cmp_branch);
15312 %}
15313 
15314 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15315   match(If cmp (CmpL op1 op2));
15316   effect(USE labl);
15317 
15318   ins_cost(BRANCH_COST);
15319   format %{ "cb$cmp   $op1, $labl" %}
15320   ins_encode %{
15321     Label* L = $labl$$label;
15322     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15323     if (cond == Assembler::EQ)
15324       __ cbz($op1$$Register, *L);
15325     else
15326       __ cbnz($op1$$Register, *L);
15327   %}
15328   ins_pipe(pipe_cmp_branch);
15329 %}
15330 
15331 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15332   match(If cmp (CmpP op1 op2));
15333   effect(USE labl);
15334 
15335   ins_cost(BRANCH_COST);
15336   format %{ "cb$cmp   $op1, $labl" %}
15337   ins_encode %{
15338     Label* L = $labl$$label;
15339     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15340     if (cond == Assembler::EQ)
15341       __ cbz($op1$$Register, *L);
15342     else
15343       __ cbnz($op1$$Register, *L);
15344   %}
15345   ins_pipe(pipe_cmp_branch);
15346 %}
15347 
15348 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15349   match(If cmp (CmpN op1 op2));
15350   effect(USE labl);
15351 
15352   ins_cost(BRANCH_COST);
15353   format %{ "cbw$cmp   $op1, $labl" %}
15354   ins_encode %{
15355     Label* L = $labl$$label;
15356     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15357     if (cond == Assembler::EQ)
15358       __ cbzw($op1$$Register, *L);
15359     else
15360       __ cbnzw($op1$$Register, *L);
15361   %}
15362   ins_pipe(pipe_cmp_branch);
15363 %}
15364 
15365 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15366   match(If cmp (CmpP (DecodeN oop) zero));
15367   effect(USE labl);
15368 
15369   ins_cost(BRANCH_COST);
15370   format %{ "cb$cmp   $oop, $labl" %}
15371   ins_encode %{
15372     Label* L = $labl$$label;
15373     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15374     if (cond == Assembler::EQ)
15375       __ cbzw($oop$$Register, *L);
15376     else
15377       __ cbnzw($oop$$Register, *L);
15378   %}
15379   ins_pipe(pipe_cmp_branch);
15380 %}
15381 
15382 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15383   match(If cmp (CmpU op1 op2));
15384   effect(USE labl);
15385 
15386   ins_cost(BRANCH_COST);
15387   format %{ "cbw$cmp   $op1, $labl" %}
15388   ins_encode %{
15389     Label* L = $labl$$label;
15390     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15391     if (cond == Assembler::EQ || cond == Assembler::LS)
15392       __ cbzw($op1$$Register, *L);
15393     else
15394       __ cbnzw($op1$$Register, *L);
15395   %}
15396   ins_pipe(pipe_cmp_branch);
15397 %}
15398 
15399 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15400   match(If cmp (CmpUL op1 op2));
15401   effect(USE labl);
15402 
15403   ins_cost(BRANCH_COST);
15404   format %{ "cb$cmp   $op1, $labl" %}
15405   ins_encode %{
15406     Label* L = $labl$$label;
15407     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15408     if (cond == Assembler::EQ || cond == Assembler::LS)
15409       __ cbz($op1$$Register, *L);
15410     else
15411       __ cbnz($op1$$Register, *L);
15412   %}
15413   ins_pipe(pipe_cmp_branch);
15414 %}
15415 
15416 // Test bit and Branch
15417 
15418 // Patterns for short (< 32KiB) variants
15419 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15420   match(If cmp (CmpL op1 op2));
15421   effect(USE labl);
15422 
15423   ins_cost(BRANCH_COST);
15424   format %{ "cb$cmp   $op1, $labl # long" %}
15425   ins_encode %{
15426     Label* L = $labl$$label;
15427     Assembler::Condition cond =
15428       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15429     __ tbr(cond, $op1$$Register, 63, *L);
15430   %}
15431   ins_pipe(pipe_cmp_branch);
15432   ins_short_branch(1);
15433 %}
15434 
15435 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15436   match(If cmp (CmpI op1 op2));
15437   effect(USE labl);
15438 
15439   ins_cost(BRANCH_COST);
15440   format %{ "cb$cmp   $op1, $labl # int" %}
15441   ins_encode %{
15442     Label* L = $labl$$label;
15443     Assembler::Condition cond =
15444       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15445     __ tbr(cond, $op1$$Register, 31, *L);
15446   %}
15447   ins_pipe(pipe_cmp_branch);
15448   ins_short_branch(1);
15449 %}
15450 
15451 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15452   match(If cmp (CmpL (AndL op1 op2) op3));
15453   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15454   effect(USE labl);
15455 
15456   ins_cost(BRANCH_COST);
15457   format %{ "tb$cmp   $op1, $op2, $labl" %}
15458   ins_encode %{
15459     Label* L = $labl$$label;
15460     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15461     int bit = exact_log2($op2$$constant);
15462     __ tbr(cond, $op1$$Register, bit, *L);
15463   %}
15464   ins_pipe(pipe_cmp_branch);
15465   ins_short_branch(1);
15466 %}
15467 
15468 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15469   match(If cmp (CmpI (AndI op1 op2) op3));
15470   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15471   effect(USE labl);
15472 
15473   ins_cost(BRANCH_COST);
15474   format %{ "tb$cmp   $op1, $op2, $labl" %}
15475   ins_encode %{
15476     Label* L = $labl$$label;
15477     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15478     int bit = exact_log2($op2$$constant);
15479     __ tbr(cond, $op1$$Register, bit, *L);
15480   %}
15481   ins_pipe(pipe_cmp_branch);
15482   ins_short_branch(1);
15483 %}
15484 
15485 // And far variants
15486 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15487   match(If cmp (CmpL op1 op2));
15488   effect(USE labl);
15489 
15490   ins_cost(BRANCH_COST);
15491   format %{ "cb$cmp   $op1, $labl # long" %}
15492   ins_encode %{
15493     Label* L = $labl$$label;
15494     Assembler::Condition cond =
15495       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15496     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15497   %}
15498   ins_pipe(pipe_cmp_branch);
15499 %}
15500 
15501 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15502   match(If cmp (CmpI op1 op2));
15503   effect(USE labl);
15504 
15505   ins_cost(BRANCH_COST);
15506   format %{ "cb$cmp   $op1, $labl # int" %}
15507   ins_encode %{
15508     Label* L = $labl$$label;
15509     Assembler::Condition cond =
15510       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15511     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15512   %}
15513   ins_pipe(pipe_cmp_branch);
15514 %}
15515 
15516 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15517   match(If cmp (CmpL (AndL op1 op2) op3));
15518   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15519   effect(USE labl);
15520 
15521   ins_cost(BRANCH_COST);
15522   format %{ "tb$cmp   $op1, $op2, $labl" %}
15523   ins_encode %{
15524     Label* L = $labl$$label;
15525     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15526     int bit = exact_log2($op2$$constant);
15527     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15528   %}
15529   ins_pipe(pipe_cmp_branch);
15530 %}
15531 
15532 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15533   match(If cmp (CmpI (AndI op1 op2) op3));
15534   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15535   effect(USE labl);
15536 
15537   ins_cost(BRANCH_COST);
15538   format %{ "tb$cmp   $op1, $op2, $labl" %}
15539   ins_encode %{
15540     Label* L = $labl$$label;
15541     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15542     int bit = exact_log2($op2$$constant);
15543     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15544   %}
15545   ins_pipe(pipe_cmp_branch);
15546 %}
15547 
15548 // Test bits
15549 
15550 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15551   match(Set cr (CmpL (AndL op1 op2) op3));
15552   predicate(Assembler::operand_valid_for_logical_immediate
15553             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15554 
15555   ins_cost(INSN_COST);
15556   format %{ "tst $op1, $op2 # long" %}
15557   ins_encode %{
15558     __ tst($op1$$Register, $op2$$constant);
15559   %}
15560   ins_pipe(ialu_reg_reg);
15561 %}
15562 
15563 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15564   match(Set cr (CmpI (AndI op1 op2) op3));
15565   predicate(Assembler::operand_valid_for_logical_immediate
15566             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15567 
15568   ins_cost(INSN_COST);
15569   format %{ "tst $op1, $op2 # int" %}
15570   ins_encode %{
15571     __ tstw($op1$$Register, $op2$$constant);
15572   %}
15573   ins_pipe(ialu_reg_reg);
15574 %}
15575 
15576 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15577   match(Set cr (CmpL (AndL op1 op2) op3));
15578 
15579   ins_cost(INSN_COST);
15580   format %{ "tst $op1, $op2 # long" %}
15581   ins_encode %{
15582     __ tst($op1$$Register, $op2$$Register);
15583   %}
15584   ins_pipe(ialu_reg_reg);
15585 %}
15586 
15587 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15588   match(Set cr (CmpI (AndI op1 op2) op3));
15589 
15590   ins_cost(INSN_COST);
15591   format %{ "tstw $op1, $op2 # int" %}
15592   ins_encode %{
15593     __ tstw($op1$$Register, $op2$$Register);
15594   %}
15595   ins_pipe(ialu_reg_reg);
15596 %}
15597 
15598 
15599 // Conditional Far Branch
15600 // Conditional Far Branch Unsigned
15601 // TODO: fixme
15602 
15603 // counted loop end branch near
15604 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15605 %{
15606   match(CountedLoopEnd cmp cr);
15607 
15608   effect(USE lbl);
15609 
15610   ins_cost(BRANCH_COST);
15611   // short variant.
15612   // ins_short_branch(1);
15613   format %{ "b$cmp $lbl \t// counted loop end" %}
15614 
15615   ins_encode(aarch64_enc_br_con(cmp, lbl));
15616 
15617   ins_pipe(pipe_branch);
15618 %}
15619 
15620 // counted loop end branch near Unsigned
15621 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15622 %{
15623   match(CountedLoopEnd cmp cr);
15624 
15625   effect(USE lbl);
15626 
15627   ins_cost(BRANCH_COST);
15628   // short variant.
15629   // ins_short_branch(1);
15630   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15631 
15632   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15633 
15634   ins_pipe(pipe_branch);
15635 %}
15636 
15637 // counted loop end branch far
15638 // counted loop end branch far unsigned
15639 // TODO: fixme
15640 
15641 // ============================================================================
15642 // inlined locking and unlocking
15643 
15644 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15645 %{
15646   match(Set cr (FastLock object box));
15647   effect(TEMP tmp, TEMP tmp2);
15648 
15649   // TODO
15650   // identify correct cost
15651   ins_cost(5 * INSN_COST);
15652   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15653 
15654   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15655 
15656   ins_pipe(pipe_serial);
15657 %}
15658 
15659 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15660 %{
15661   match(Set cr (FastUnlock object box));
15662   effect(TEMP tmp, TEMP tmp2);
15663 
15664   ins_cost(5 * INSN_COST);
15665   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15666 
15667   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15668 
15669   ins_pipe(pipe_serial);
15670 %}
15671 
15672 
15673 // ============================================================================
15674 // Safepoint Instructions
15675 
15676 // TODO
15677 // provide a near and far version of this code
15678 
15679 instruct safePoint(iRegP poll)
15680 %{
15681   match(SafePoint poll);
15682 
15683   format %{
15684     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15685   %}
15686   ins_encode %{
15687     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15688   %}
15689   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15690 %}
15691 
15692 
15693 // ============================================================================
15694 // Procedure Call/Return Instructions
15695 
15696 // Call Java Static Instruction
15697 
15698 instruct CallStaticJavaDirect(method meth)
15699 %{
15700   match(CallStaticJava);
15701 
15702   effect(USE meth);
15703 
15704   ins_cost(CALL_COST);
15705 
15706   format %{ "call,static $meth \t// ==> " %}
15707 
15708   ins_encode( aarch64_enc_java_static_call(meth),
15709               aarch64_enc_call_epilog );
15710 
15711   ins_pipe(pipe_class_call);
15712 %}
15713 
15714 // TO HERE
15715 
15716 // Call Java Dynamic Instruction
15717 instruct CallDynamicJavaDirect(method meth)
15718 %{
15719   match(CallDynamicJava);
15720 
15721   effect(USE meth);
15722 
15723   ins_cost(CALL_COST);
15724 
15725   format %{ "CALL,dynamic $meth \t// ==> " %}
15726 
15727   ins_encode( aarch64_enc_java_dynamic_call(meth),
15728                aarch64_enc_call_epilog );
15729 
15730   ins_pipe(pipe_class_call);
15731 %}
15732 
15733 // Call Runtime Instruction
15734 
15735 instruct CallRuntimeDirect(method meth)
15736 %{
15737   match(CallRuntime);
15738 
15739   effect(USE meth);
15740 
15741   ins_cost(CALL_COST);
15742 
15743   format %{ "CALL, runtime $meth" %}
15744 
15745   ins_encode( aarch64_enc_java_to_runtime(meth) );
15746 
15747   ins_pipe(pipe_class_call);
15748 %}
15749 
15750 // Call Runtime Instruction
15751 
15752 instruct CallLeafDirect(method meth)
15753 %{
15754   match(CallLeaf);
15755 
15756   effect(USE meth);
15757 
15758   ins_cost(CALL_COST);
15759 
15760   format %{ "CALL, runtime leaf $meth" %}
15761 
15762   ins_encode( aarch64_enc_java_to_runtime(meth) );
15763 
15764   ins_pipe(pipe_class_call);
15765 %}
15766 
15767 // Call Runtime Instruction
15768 
15769 instruct CallLeafNoFPDirect(method meth)
15770 %{
15771   match(CallLeafNoFP);
15772 
15773   effect(USE meth);
15774 
15775   ins_cost(CALL_COST);
15776 
15777   format %{ "CALL, runtime leaf nofp $meth" %}
15778 
15779   ins_encode( aarch64_enc_java_to_runtime(meth) );
15780 
15781   ins_pipe(pipe_class_call);
15782 %}
15783 
15784 // Tail Call; Jump from runtime stub to Java code.
15785 // Also known as an 'interprocedural jump'.
15786 // Target of jump will eventually return to caller.
15787 // TailJump below removes the return address.
15788 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15789 %{
15790   match(TailCall jump_target method_oop);
15791 
15792   ins_cost(CALL_COST);
15793 
15794   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15795 
15796   ins_encode(aarch64_enc_tail_call(jump_target));
15797 
15798   ins_pipe(pipe_class_call);
15799 %}
15800 
15801 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15802 %{
15803   match(TailJump jump_target ex_oop);
15804 
15805   ins_cost(CALL_COST);
15806 
15807   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15808 
15809   ins_encode(aarch64_enc_tail_jmp(jump_target));
15810 
15811   ins_pipe(pipe_class_call);
15812 %}
15813 
15814 // Create exception oop: created by stack-crawling runtime code.
15815 // Created exception is now available to this handler, and is setup
15816 // just prior to jumping to this handler. No code emitted.
15817 // TODO check
15818 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15819 instruct CreateException(iRegP_R0 ex_oop)
15820 %{
15821   match(Set ex_oop (CreateEx));
15822 
15823   format %{ " -- \t// exception oop; no code emitted" %}
15824 
15825   size(0);
15826 
15827   ins_encode( /*empty*/ );
15828 
15829   ins_pipe(pipe_class_empty);
15830 %}
15831 
15832 // Rethrow exception: The exception oop will come in the first
15833 // argument position. Then JUMP (not call) to the rethrow stub code.
15834 instruct RethrowException() %{
15835   match(Rethrow);
15836   ins_cost(CALL_COST);
15837 
15838   format %{ "b rethrow_stub" %}
15839 
15840   ins_encode( aarch64_enc_rethrow() );
15841 
15842   ins_pipe(pipe_class_call);
15843 %}
15844 
15845 
15846 // Return Instruction
15847 // epilog node loads ret address into lr as part of frame pop
15848 instruct Ret()
15849 %{
15850   match(Return);
15851 
15852   format %{ "ret\t// return register" %}
15853 
15854   ins_encode( aarch64_enc_ret() );
15855 
15856   ins_pipe(pipe_branch);
15857 %}
15858 
15859 // Die now.
15860 instruct ShouldNotReachHere() %{
15861   match(Halt);
15862 
15863   ins_cost(CALL_COST);
15864   format %{ "ShouldNotReachHere" %}
15865 
15866   ins_encode %{
15867     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15868     // return true
15869     __ dpcs1(0xdead + 1);
15870   %}
15871 
15872   ins_pipe(pipe_class_default);
15873 %}
15874 
15875 // ============================================================================
15876 // Partial Subtype Check
15877 //
15878 // superklass array for an instance of the superklass.  Set a hidden
15879 // internal cache on a hit (cache is checked with exposed code in
15880 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15881 // encoding ALSO sets flags.
15882 
15883 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15884 %{
15885   match(Set result (PartialSubtypeCheck sub super));
15886   effect(KILL cr, KILL temp);
15887 
15888   ins_cost(1100);  // slightly larger than the next version
15889   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15890 
15891   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15892 
15893   opcode(0x1); // Force zero of result reg on hit
15894 
15895   ins_pipe(pipe_class_memory);
15896 %}
15897 
15898 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15899 %{
15900   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15901   effect(KILL temp, KILL result);
15902 
15903   ins_cost(1100);  // slightly larger than the next version
15904   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15905 
15906   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15907 
15908   opcode(0x0); // Don't zero result reg on hit
15909 
15910   ins_pipe(pipe_class_memory);
15911 %}
15912 
15913 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15914                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15915 %{
15916   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15917   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15918   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15919 
15920   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15921   ins_encode %{
15922     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15923     __ string_compare($str1$$Register, $str2$$Register,
15924                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15925                       $tmp1$$Register,
15926                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15927   %}
15928   ins_pipe(pipe_class_memory);
15929 %}
15930 
15931 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15932                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15933 %{
15934   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15935   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15936   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15937 
15938   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15939   ins_encode %{
15940     __ string_compare($str1$$Register, $str2$$Register,
15941                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15942                       $tmp1$$Register,
15943                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15944   %}
15945   ins_pipe(pipe_class_memory);
15946 %}
15947 
15948 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15949                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15950 %{
15951   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15952   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15953   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15954 
15955   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15956   ins_encode %{
15957     __ string_compare($str1$$Register, $str2$$Register,
15958                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15959                       $tmp1$$Register,
15960                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15961   %}
15962   ins_pipe(pipe_class_memory);
15963 %}
15964 
15965 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15966                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15967 %{
15968   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15969   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15970   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15971 
15972   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15973   ins_encode %{
15974     __ string_compare($str1$$Register, $str2$$Register,
15975                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15976                       $tmp1$$Register,
15977                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15978   %}
15979   ins_pipe(pipe_class_memory);
15980 %}
15981 
15982 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15983        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15984 %{
15985   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15986   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15987   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15988          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15989   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15990 
15991   ins_encode %{
15992     __ string_indexof($str1$$Register, $str2$$Register,
15993                       $cnt1$$Register, $cnt2$$Register,
15994                       $tmp1$$Register, $tmp2$$Register,
15995                       $tmp3$$Register, $tmp4$$Register,
15996                       -1, $result$$Register, StrIntrinsicNode::UU);
15997   %}
15998   ins_pipe(pipe_class_memory);
15999 %}
16000 
16001 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16002        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16003 %{
16004   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16005   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16006   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16007          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16008   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
16009 
16010   ins_encode %{
16011     __ string_indexof($str1$$Register, $str2$$Register,
16012                       $cnt1$$Register, $cnt2$$Register,
16013                       $tmp1$$Register, $tmp2$$Register,
16014                       $tmp3$$Register, $tmp4$$Register,
16015                       -1, $result$$Register, StrIntrinsicNode::LL);
16016   %}
16017   ins_pipe(pipe_class_memory);
16018 %}
16019 
16020 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16021        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16022 %{
16023   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16024   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16025   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16026          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16027   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
16028 
16029   ins_encode %{
16030     __ string_indexof($str1$$Register, $str2$$Register,
16031                       $cnt1$$Register, $cnt2$$Register,
16032                       $tmp1$$Register, $tmp2$$Register,
16033                       $tmp3$$Register, $tmp4$$Register,
16034                       -1, $result$$Register, StrIntrinsicNode::UL);
16035   %}
16036   ins_pipe(pipe_class_memory);
16037 %}
16038 
16039 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16040        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16041 %{
16042   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16043   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16044   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16045          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16046   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
16047 
16048   ins_encode %{
16049     __ string_indexof($str1$$Register, $str2$$Register,
16050                       $cnt1$$Register, $cnt2$$Register,
16051                       $tmp1$$Register, $tmp2$$Register,
16052                       $tmp3$$Register, $tmp4$$Register,
16053                       -1, $result$$Register, StrIntrinsicNode::LU);
16054   %}
16055   ins_pipe(pipe_class_memory);
16056 %}
16057 
16058 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16059                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16060                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16061 %{
16062   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16063   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16064   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16065          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16066   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16067 
16068   ins_encode %{
16069     int icnt2 = (int)$int_cnt2$$constant;
16070     __ string_indexof($str1$$Register, $str2$$Register,
16071                       $cnt1$$Register, zr,
16072                       $tmp1$$Register, $tmp2$$Register,
16073                       $tmp3$$Register, $tmp4$$Register,
16074                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16075   %}
16076   ins_pipe(pipe_class_memory);
16077 %}
16078 
16079 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16080                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16081                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16082 %{
16083   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16084   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16085   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16086          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16087   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16088 
16089   ins_encode %{
16090     int icnt2 = (int)$int_cnt2$$constant;
16091     __ string_indexof($str1$$Register, $str2$$Register,
16092                       $cnt1$$Register, zr,
16093                       $tmp1$$Register, $tmp2$$Register,
16094                       $tmp3$$Register, $tmp4$$Register,
16095                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16096   %}
16097   ins_pipe(pipe_class_memory);
16098 %}
16099 
16100 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16101                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16102                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16103 %{
16104   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16105   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16106   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16107          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16108   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16109 
16110   ins_encode %{
16111     int icnt2 = (int)$int_cnt2$$constant;
16112     __ string_indexof($str1$$Register, $str2$$Register,
16113                       $cnt1$$Register, zr,
16114                       $tmp1$$Register, $tmp2$$Register,
16115                       $tmp3$$Register, $tmp4$$Register,
16116                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16117   %}
16118   ins_pipe(pipe_class_memory);
16119 %}
16120 
16121 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16122                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16123                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16124 %{
16125   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16126   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16127   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16128          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16129   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
16130 
16131   ins_encode %{
16132     int icnt2 = (int)$int_cnt2$$constant;
16133     __ string_indexof($str1$$Register, $str2$$Register,
16134                       $cnt1$$Register, zr,
16135                       $tmp1$$Register, $tmp2$$Register,
16136                       $tmp3$$Register, $tmp4$$Register,
16137                       icnt2, $result$$Register, StrIntrinsicNode::LU);
16138   %}
16139   ins_pipe(pipe_class_memory);
16140 %}
16141 
16142 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16143                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16144                               iRegINoSp tmp3, rFlagsReg cr)
16145 %{
16146   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16147   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16148          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16149 
16150   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16151 
16152   ins_encode %{
16153     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16154                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16155                            $tmp3$$Register);
16156   %}
16157   ins_pipe(pipe_class_memory);
16158 %}
16159 
16160 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16161                         iRegI_R0 result, rFlagsReg cr)
16162 %{
16163   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16164   match(Set result (StrEquals (Binary str1 str2) cnt));
16165   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16166 
16167   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16168   ins_encode %{
16169     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16170     __ string_equals($str1$$Register, $str2$$Register,
16171                      $result$$Register, $cnt$$Register, 1);
16172   %}
16173   ins_pipe(pipe_class_memory);
16174 %}
16175 
16176 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16177                         iRegI_R0 result, rFlagsReg cr)
16178 %{
16179   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16180   match(Set result (StrEquals (Binary str1 str2) cnt));
16181   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16182 
16183   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16184   ins_encode %{
16185     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16186     __ string_equals($str1$$Register, $str2$$Register,
16187                      $result$$Register, $cnt$$Register, 2);
16188   %}
16189   ins_pipe(pipe_class_memory);
16190 %}
16191 
16192 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16193                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16194                        iRegP_R10 tmp, rFlagsReg cr)
16195 %{
16196   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16197   match(Set result (AryEq ary1 ary2));
16198   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16199 
16200   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16201   ins_encode %{
16202     __ arrays_equals($ary1$$Register, $ary2$$Register,
16203                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16204                      $result$$Register, $tmp$$Register, 1);
16205     %}
16206   ins_pipe(pipe_class_memory);
16207 %}
16208 
16209 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16210                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16211                        iRegP_R10 tmp, rFlagsReg cr)
16212 %{
16213   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16214   match(Set result (AryEq ary1 ary2));
16215   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16216 
16217   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16218   ins_encode %{
16219     __ arrays_equals($ary1$$Register, $ary2$$Register,
16220                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16221                      $result$$Register, $tmp$$Register, 2);
16222   %}
16223   ins_pipe(pipe_class_memory);
16224 %}
16225 
16226 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16227 %{
16228   match(Set result (HasNegatives ary1 len));
16229   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16230   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16231   ins_encode %{
16232     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16233   %}
16234   ins_pipe( pipe_slow );
16235 %}
16236 
16237 // fast char[] to byte[] compression
16238 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16239                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16240                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16241                          iRegI_R0 result, rFlagsReg cr)
16242 %{
16243   match(Set result (StrCompressedCopy src (Binary dst len)));
16244   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16245 
16246   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16247   ins_encode %{
16248     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16249                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16250                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16251                            $result$$Register);
16252   %}
16253   ins_pipe( pipe_slow );
16254 %}
16255 
16256 // fast byte[] to char[] inflation
16257 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16258                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16259 %{
16260   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16261   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16262 
16263   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16264   ins_encode %{
16265     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16266                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16267   %}
16268   ins_pipe(pipe_class_memory);
16269 %}
16270 
16271 // encode char[] to byte[] in ISO_8859_1
16272 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16273                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16274                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16275                           iRegI_R0 result, rFlagsReg cr)
16276 %{
16277   match(Set result (EncodeISOArray src (Binary dst len)));
16278   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16279          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16280 
16281   format %{ "Encode array $src,$dst,$len -> $result" %}
16282   ins_encode %{
16283     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16284          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16285          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16286   %}
16287   ins_pipe( pipe_class_memory );
16288 %}
16289 
16290 // ============================================================================
16291 // This name is KNOWN by the ADLC and cannot be changed.
16292 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16293 // for this guy.
16294 instruct tlsLoadP(thread_RegP dst)
16295 %{
16296   match(Set dst (ThreadLocal));
16297 
16298   ins_cost(0);
16299 
16300   format %{ " -- \t// $dst=Thread::current(), empty" %}
16301 
16302   size(0);
16303 
16304   ins_encode( /*empty*/ );
16305 
16306   ins_pipe(pipe_class_empty);
16307 %}
16308 
16309 // ====================VECTOR INSTRUCTIONS=====================================
16310 
16311 // Load vector (32 bits)
16312 instruct loadV4(vecD dst, vmem4 mem)
16313 %{
16314   predicate(n->as_LoadVector()->memory_size() == 4);
16315   match(Set dst (LoadVector mem));
16316   ins_cost(4 * INSN_COST);
16317   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16318   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16319   ins_pipe(vload_reg_mem64);
16320 %}
16321 
16322 // Load vector (64 bits)
16323 instruct loadV8(vecD dst, vmem8 mem)
16324 %{
16325   predicate(n->as_LoadVector()->memory_size() == 8);
16326   match(Set dst (LoadVector mem));
16327   ins_cost(4 * INSN_COST);
16328   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16329   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16330   ins_pipe(vload_reg_mem64);
16331 %}
16332 
16333 // Load Vector (128 bits)
16334 instruct loadV16(vecX dst, vmem16 mem)
16335 %{
16336   predicate(n->as_LoadVector()->memory_size() == 16);
16337   match(Set dst (LoadVector mem));
16338   ins_cost(4 * INSN_COST);
16339   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16340   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16341   ins_pipe(vload_reg_mem128);
16342 %}
16343 
16344 // Store Vector (32 bits)
16345 instruct storeV4(vecD src, vmem4 mem)
16346 %{
16347   predicate(n->as_StoreVector()->memory_size() == 4);
16348   match(Set mem (StoreVector mem src));
16349   ins_cost(4 * INSN_COST);
16350   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16351   ins_encode( aarch64_enc_strvS(src, mem) );
16352   ins_pipe(vstore_reg_mem64);
16353 %}
16354 
16355 // Store Vector (64 bits)
16356 instruct storeV8(vecD src, vmem8 mem)
16357 %{
16358   predicate(n->as_StoreVector()->memory_size() == 8);
16359   match(Set mem (StoreVector mem src));
16360   ins_cost(4 * INSN_COST);
16361   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16362   ins_encode( aarch64_enc_strvD(src, mem) );
16363   ins_pipe(vstore_reg_mem64);
16364 %}
16365 
16366 // Store Vector (128 bits)
16367 instruct storeV16(vecX src, vmem16 mem)
16368 %{
16369   predicate(n->as_StoreVector()->memory_size() == 16);
16370   match(Set mem (StoreVector mem src));
16371   ins_cost(4 * INSN_COST);
16372   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16373   ins_encode( aarch64_enc_strvQ(src, mem) );
16374   ins_pipe(vstore_reg_mem128);
16375 %}
16376 
16377 instruct replicate8B(vecD dst, iRegIorL2I src)
16378 %{
16379   predicate(n->as_Vector()->length() == 4 ||
16380             n->as_Vector()->length() == 8);
16381   match(Set dst (ReplicateB src));
16382   ins_cost(INSN_COST);
16383   format %{ "dup  $dst, $src\t# vector (8B)" %}
16384   ins_encode %{
16385     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16386   %}
16387   ins_pipe(vdup_reg_reg64);
16388 %}
16389 
16390 instruct replicate16B(vecX dst, iRegIorL2I src)
16391 %{
16392   predicate(n->as_Vector()->length() == 16);
16393   match(Set dst (ReplicateB src));
16394   ins_cost(INSN_COST);
16395   format %{ "dup  $dst, $src\t# vector (16B)" %}
16396   ins_encode %{
16397     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16398   %}
16399   ins_pipe(vdup_reg_reg128);
16400 %}
16401 
16402 instruct replicate8B_imm(vecD dst, immI con)
16403 %{
16404   predicate(n->as_Vector()->length() == 4 ||
16405             n->as_Vector()->length() == 8);
16406   match(Set dst (ReplicateB con));
16407   ins_cost(INSN_COST);
16408   format %{ "movi  $dst, $con\t# vector(8B)" %}
16409   ins_encode %{
16410     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16411   %}
16412   ins_pipe(vmovi_reg_imm64);
16413 %}
16414 
16415 instruct replicate16B_imm(vecX dst, immI con)
16416 %{
16417   predicate(n->as_Vector()->length() == 16);
16418   match(Set dst (ReplicateB con));
16419   ins_cost(INSN_COST);
16420   format %{ "movi  $dst, $con\t# vector(16B)" %}
16421   ins_encode %{
16422     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16423   %}
16424   ins_pipe(vmovi_reg_imm128);
16425 %}
16426 
16427 instruct replicate4S(vecD dst, iRegIorL2I src)
16428 %{
16429   predicate(n->as_Vector()->length() == 2 ||
16430             n->as_Vector()->length() == 4);
16431   match(Set dst (ReplicateS src));
16432   ins_cost(INSN_COST);
16433   format %{ "dup  $dst, $src\t# vector (4S)" %}
16434   ins_encode %{
16435     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16436   %}
16437   ins_pipe(vdup_reg_reg64);
16438 %}
16439 
16440 instruct replicate8S(vecX dst, iRegIorL2I src)
16441 %{
16442   predicate(n->as_Vector()->length() == 8);
16443   match(Set dst (ReplicateS src));
16444   ins_cost(INSN_COST);
16445   format %{ "dup  $dst, $src\t# vector (8S)" %}
16446   ins_encode %{
16447     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16448   %}
16449   ins_pipe(vdup_reg_reg128);
16450 %}
16451 
16452 instruct replicate4S_imm(vecD dst, immI con)
16453 %{
16454   predicate(n->as_Vector()->length() == 2 ||
16455             n->as_Vector()->length() == 4);
16456   match(Set dst (ReplicateS con));
16457   ins_cost(INSN_COST);
16458   format %{ "movi  $dst, $con\t# vector(4H)" %}
16459   ins_encode %{
16460     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16461   %}
16462   ins_pipe(vmovi_reg_imm64);
16463 %}
16464 
16465 instruct replicate8S_imm(vecX dst, immI con)
16466 %{
16467   predicate(n->as_Vector()->length() == 8);
16468   match(Set dst (ReplicateS con));
16469   ins_cost(INSN_COST);
16470   format %{ "movi  $dst, $con\t# vector(8H)" %}
16471   ins_encode %{
16472     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16473   %}
16474   ins_pipe(vmovi_reg_imm128);
16475 %}
16476 
16477 instruct replicate2I(vecD dst, iRegIorL2I src)
16478 %{
16479   predicate(n->as_Vector()->length() == 2);
16480   match(Set dst (ReplicateI src));
16481   ins_cost(INSN_COST);
16482   format %{ "dup  $dst, $src\t# vector (2I)" %}
16483   ins_encode %{
16484     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16485   %}
16486   ins_pipe(vdup_reg_reg64);
16487 %}
16488 
16489 instruct replicate4I(vecX dst, iRegIorL2I src)
16490 %{
16491   predicate(n->as_Vector()->length() == 4);
16492   match(Set dst (ReplicateI src));
16493   ins_cost(INSN_COST);
16494   format %{ "dup  $dst, $src\t# vector (4I)" %}
16495   ins_encode %{
16496     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16497   %}
16498   ins_pipe(vdup_reg_reg128);
16499 %}
16500 
16501 instruct replicate2I_imm(vecD dst, immI con)
16502 %{
16503   predicate(n->as_Vector()->length() == 2);
16504   match(Set dst (ReplicateI con));
16505   ins_cost(INSN_COST);
16506   format %{ "movi  $dst, $con\t# vector(2I)" %}
16507   ins_encode %{
16508     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16509   %}
16510   ins_pipe(vmovi_reg_imm64);
16511 %}
16512 
16513 instruct replicate4I_imm(vecX dst, immI con)
16514 %{
16515   predicate(n->as_Vector()->length() == 4);
16516   match(Set dst (ReplicateI con));
16517   ins_cost(INSN_COST);
16518   format %{ "movi  $dst, $con\t# vector(4I)" %}
16519   ins_encode %{
16520     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16521   %}
16522   ins_pipe(vmovi_reg_imm128);
16523 %}
16524 
16525 instruct replicate2L(vecX dst, iRegL src)
16526 %{
16527   predicate(n->as_Vector()->length() == 2);
16528   match(Set dst (ReplicateL src));
16529   ins_cost(INSN_COST);
16530   format %{ "dup  $dst, $src\t# vector (2L)" %}
16531   ins_encode %{
16532     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16533   %}
16534   ins_pipe(vdup_reg_reg128);
16535 %}
16536 
16537 instruct replicate2L_zero(vecX dst, immI0 zero)
16538 %{
16539   predicate(n->as_Vector()->length() == 2);
16540   match(Set dst (ReplicateI zero));
16541   ins_cost(INSN_COST);
16542   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16543   ins_encode %{
16544     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16545            as_FloatRegister($dst$$reg),
16546            as_FloatRegister($dst$$reg));
16547   %}
16548   ins_pipe(vmovi_reg_imm128);
16549 %}
16550 
16551 instruct replicate2F(vecD dst, vRegF src)
16552 %{
16553   predicate(n->as_Vector()->length() == 2);
16554   match(Set dst (ReplicateF src));
16555   ins_cost(INSN_COST);
16556   format %{ "dup  $dst, $src\t# vector (2F)" %}
16557   ins_encode %{
16558     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16559            as_FloatRegister($src$$reg));
16560   %}
16561   ins_pipe(vdup_reg_freg64);
16562 %}
16563 
16564 instruct replicate4F(vecX dst, vRegF src)
16565 %{
16566   predicate(n->as_Vector()->length() == 4);
16567   match(Set dst (ReplicateF src));
16568   ins_cost(INSN_COST);
16569   format %{ "dup  $dst, $src\t# vector (4F)" %}
16570   ins_encode %{
16571     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16572            as_FloatRegister($src$$reg));
16573   %}
16574   ins_pipe(vdup_reg_freg128);
16575 %}
16576 
16577 instruct replicate2D(vecX dst, vRegD src)
16578 %{
16579   predicate(n->as_Vector()->length() == 2);
16580   match(Set dst (ReplicateD src));
16581   ins_cost(INSN_COST);
16582   format %{ "dup  $dst, $src\t# vector (2D)" %}
16583   ins_encode %{
16584     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16585            as_FloatRegister($src$$reg));
16586   %}
16587   ins_pipe(vdup_reg_dreg128);
16588 %}
16589 
16590 // ====================REDUCTION ARITHMETIC====================================
16591 
16592 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16593 %{
16594   match(Set dst (AddReductionVI src1 src2));
16595   ins_cost(INSN_COST);
16596   effect(TEMP tmp, TEMP tmp2);
16597   format %{ "umov  $tmp, $src2, S, 0\n\t"
16598             "umov  $tmp2, $src2, S, 1\n\t"
16599             "addw  $dst, $src1, $tmp\n\t"
16600             "addw  $dst, $dst, $tmp2\t add reduction2i"
16601   %}
16602   ins_encode %{
16603     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16604     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16605     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16606     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16607   %}
16608   ins_pipe(pipe_class_default);
16609 %}
16610 
16611 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16612 %{
16613   match(Set dst (AddReductionVI src1 src2));
16614   ins_cost(INSN_COST);
16615   effect(TEMP tmp, TEMP tmp2);
16616   format %{ "addv  $tmp, T4S, $src2\n\t"
16617             "umov  $tmp2, $tmp, S, 0\n\t"
16618             "addw  $dst, $tmp2, $src1\t add reduction4i"
16619   %}
16620   ins_encode %{
16621     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16622             as_FloatRegister($src2$$reg));
16623     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16624     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16625   %}
16626   ins_pipe(pipe_class_default);
16627 %}
16628 
16629 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16630 %{
16631   match(Set dst (MulReductionVI src1 src2));
16632   ins_cost(INSN_COST);
16633   effect(TEMP tmp, TEMP dst);
16634   format %{ "umov  $tmp, $src2, S, 0\n\t"
16635             "mul   $dst, $tmp, $src1\n\t"
16636             "umov  $tmp, $src2, S, 1\n\t"
16637             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16638   %}
16639   ins_encode %{
16640     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16641     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16642     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16643     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16644   %}
16645   ins_pipe(pipe_class_default);
16646 %}
16647 
16648 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16649 %{
16650   match(Set dst (MulReductionVI src1 src2));
16651   ins_cost(INSN_COST);
16652   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16653   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16654             "mul   $tmp, $tmp, $src2\n\t"
16655             "umov  $tmp2, $tmp, S, 0\n\t"
16656             "mul   $dst, $tmp2, $src1\n\t"
16657             "umov  $tmp2, $tmp, S, 1\n\t"
16658             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16659   %}
16660   ins_encode %{
16661     __ ins(as_FloatRegister($tmp$$reg), __ D,
16662            as_FloatRegister($src2$$reg), 0, 1);
16663     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16664            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16665     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16666     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16667     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16668     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16669   %}
16670   ins_pipe(pipe_class_default);
16671 %}
16672 
16673 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16674 %{
16675   match(Set dst (AddReductionVF src1 src2));
16676   ins_cost(INSN_COST);
16677   effect(TEMP tmp, TEMP dst);
16678   format %{ "fadds $dst, $src1, $src2\n\t"
16679             "ins   $tmp, S, $src2, 0, 1\n\t"
16680             "fadds $dst, $dst, $tmp\t add reduction2f"
16681   %}
16682   ins_encode %{
16683     __ fadds(as_FloatRegister($dst$$reg),
16684              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16685     __ ins(as_FloatRegister($tmp$$reg), __ S,
16686            as_FloatRegister($src2$$reg), 0, 1);
16687     __ fadds(as_FloatRegister($dst$$reg),
16688              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16689   %}
16690   ins_pipe(pipe_class_default);
16691 %}
16692 
16693 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16694 %{
16695   match(Set dst (AddReductionVF src1 src2));
16696   ins_cost(INSN_COST);
16697   effect(TEMP tmp, TEMP dst);
16698   format %{ "fadds $dst, $src1, $src2\n\t"
16699             "ins   $tmp, S, $src2, 0, 1\n\t"
16700             "fadds $dst, $dst, $tmp\n\t"
16701             "ins   $tmp, S, $src2, 0, 2\n\t"
16702             "fadds $dst, $dst, $tmp\n\t"
16703             "ins   $tmp, S, $src2, 0, 3\n\t"
16704             "fadds $dst, $dst, $tmp\t add reduction4f"
16705   %}
16706   ins_encode %{
16707     __ fadds(as_FloatRegister($dst$$reg),
16708              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16709     __ ins(as_FloatRegister($tmp$$reg), __ S,
16710            as_FloatRegister($src2$$reg), 0, 1);
16711     __ fadds(as_FloatRegister($dst$$reg),
16712              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16713     __ ins(as_FloatRegister($tmp$$reg), __ S,
16714            as_FloatRegister($src2$$reg), 0, 2);
16715     __ fadds(as_FloatRegister($dst$$reg),
16716              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16717     __ ins(as_FloatRegister($tmp$$reg), __ S,
16718            as_FloatRegister($src2$$reg), 0, 3);
16719     __ fadds(as_FloatRegister($dst$$reg),
16720              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16721   %}
16722   ins_pipe(pipe_class_default);
16723 %}
16724 
16725 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16726 %{
16727   match(Set dst (MulReductionVF src1 src2));
16728   ins_cost(INSN_COST);
16729   effect(TEMP tmp, TEMP dst);
16730   format %{ "fmuls $dst, $src1, $src2\n\t"
16731             "ins   $tmp, S, $src2, 0, 1\n\t"
16732             "fmuls $dst, $dst, $tmp\t add reduction4f"
16733   %}
16734   ins_encode %{
16735     __ fmuls(as_FloatRegister($dst$$reg),
16736              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16737     __ ins(as_FloatRegister($tmp$$reg), __ S,
16738            as_FloatRegister($src2$$reg), 0, 1);
16739     __ fmuls(as_FloatRegister($dst$$reg),
16740              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16741   %}
16742   ins_pipe(pipe_class_default);
16743 %}
16744 
16745 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16746 %{
16747   match(Set dst (MulReductionVF src1 src2));
16748   ins_cost(INSN_COST);
16749   effect(TEMP tmp, TEMP dst);
16750   format %{ "fmuls $dst, $src1, $src2\n\t"
16751             "ins   $tmp, S, $src2, 0, 1\n\t"
16752             "fmuls $dst, $dst, $tmp\n\t"
16753             "ins   $tmp, S, $src2, 0, 2\n\t"
16754             "fmuls $dst, $dst, $tmp\n\t"
16755             "ins   $tmp, S, $src2, 0, 3\n\t"
16756             "fmuls $dst, $dst, $tmp\t add reduction4f"
16757   %}
16758   ins_encode %{
16759     __ fmuls(as_FloatRegister($dst$$reg),
16760              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16761     __ ins(as_FloatRegister($tmp$$reg), __ S,
16762            as_FloatRegister($src2$$reg), 0, 1);
16763     __ fmuls(as_FloatRegister($dst$$reg),
16764              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16765     __ ins(as_FloatRegister($tmp$$reg), __ S,
16766            as_FloatRegister($src2$$reg), 0, 2);
16767     __ fmuls(as_FloatRegister($dst$$reg),
16768              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16769     __ ins(as_FloatRegister($tmp$$reg), __ S,
16770            as_FloatRegister($src2$$reg), 0, 3);
16771     __ fmuls(as_FloatRegister($dst$$reg),
16772              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16773   %}
16774   ins_pipe(pipe_class_default);
16775 %}
16776 
16777 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16778 %{
16779   match(Set dst (AddReductionVD src1 src2));
16780   ins_cost(INSN_COST);
16781   effect(TEMP tmp, TEMP dst);
16782   format %{ "faddd $dst, $src1, $src2\n\t"
16783             "ins   $tmp, D, $src2, 0, 1\n\t"
16784             "faddd $dst, $dst, $tmp\t add reduction2d"
16785   %}
16786   ins_encode %{
16787     __ faddd(as_FloatRegister($dst$$reg),
16788              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16789     __ ins(as_FloatRegister($tmp$$reg), __ D,
16790            as_FloatRegister($src2$$reg), 0, 1);
16791     __ faddd(as_FloatRegister($dst$$reg),
16792              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16793   %}
16794   ins_pipe(pipe_class_default);
16795 %}
16796 
16797 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16798 %{
16799   match(Set dst (MulReductionVD src1 src2));
16800   ins_cost(INSN_COST);
16801   effect(TEMP tmp, TEMP dst);
16802   format %{ "fmuld $dst, $src1, $src2\n\t"
16803             "ins   $tmp, D, $src2, 0, 1\n\t"
16804             "fmuld $dst, $dst, $tmp\t add reduction2d"
16805   %}
16806   ins_encode %{
16807     __ fmuld(as_FloatRegister($dst$$reg),
16808              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16809     __ ins(as_FloatRegister($tmp$$reg), __ D,
16810            as_FloatRegister($src2$$reg), 0, 1);
16811     __ fmuld(as_FloatRegister($dst$$reg),
16812              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16813   %}
16814   ins_pipe(pipe_class_default);
16815 %}
16816 
16817 // ====================VECTOR ARITHMETIC=======================================
16818 
16819 // --------------------------------- ADD --------------------------------------
16820 
16821 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16822 %{
16823   predicate(n->as_Vector()->length() == 4 ||
16824             n->as_Vector()->length() == 8);
16825   match(Set dst (AddVB src1 src2));
16826   ins_cost(INSN_COST);
16827   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16828   ins_encode %{
16829     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16830             as_FloatRegister($src1$$reg),
16831             as_FloatRegister($src2$$reg));
16832   %}
16833   ins_pipe(vdop64);
16834 %}
16835 
16836 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16837 %{
16838   predicate(n->as_Vector()->length() == 16);
16839   match(Set dst (AddVB src1 src2));
16840   ins_cost(INSN_COST);
16841   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16842   ins_encode %{
16843     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16844             as_FloatRegister($src1$$reg),
16845             as_FloatRegister($src2$$reg));
16846   %}
16847   ins_pipe(vdop128);
16848 %}
16849 
16850 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16851 %{
16852   predicate(n->as_Vector()->length() == 2 ||
16853             n->as_Vector()->length() == 4);
16854   match(Set dst (AddVS src1 src2));
16855   ins_cost(INSN_COST);
16856   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16857   ins_encode %{
16858     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16859             as_FloatRegister($src1$$reg),
16860             as_FloatRegister($src2$$reg));
16861   %}
16862   ins_pipe(vdop64);
16863 %}
16864 
16865 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16866 %{
16867   predicate(n->as_Vector()->length() == 8);
16868   match(Set dst (AddVS src1 src2));
16869   ins_cost(INSN_COST);
16870   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16871   ins_encode %{
16872     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16873             as_FloatRegister($src1$$reg),
16874             as_FloatRegister($src2$$reg));
16875   %}
16876   ins_pipe(vdop128);
16877 %}
16878 
16879 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16880 %{
16881   predicate(n->as_Vector()->length() == 2);
16882   match(Set dst (AddVI src1 src2));
16883   ins_cost(INSN_COST);
16884   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16885   ins_encode %{
16886     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16887             as_FloatRegister($src1$$reg),
16888             as_FloatRegister($src2$$reg));
16889   %}
16890   ins_pipe(vdop64);
16891 %}
16892 
16893 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16894 %{
16895   predicate(n->as_Vector()->length() == 4);
16896   match(Set dst (AddVI src1 src2));
16897   ins_cost(INSN_COST);
16898   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16899   ins_encode %{
16900     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16901             as_FloatRegister($src1$$reg),
16902             as_FloatRegister($src2$$reg));
16903   %}
16904   ins_pipe(vdop128);
16905 %}
16906 
16907 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16908 %{
16909   predicate(n->as_Vector()->length() == 2);
16910   match(Set dst (AddVL src1 src2));
16911   ins_cost(INSN_COST);
16912   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16913   ins_encode %{
16914     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16915             as_FloatRegister($src1$$reg),
16916             as_FloatRegister($src2$$reg));
16917   %}
16918   ins_pipe(vdop128);
16919 %}
16920 
16921 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16922 %{
16923   predicate(n->as_Vector()->length() == 2);
16924   match(Set dst (AddVF src1 src2));
16925   ins_cost(INSN_COST);
16926   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16927   ins_encode %{
16928     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16929             as_FloatRegister($src1$$reg),
16930             as_FloatRegister($src2$$reg));
16931   %}
16932   ins_pipe(vdop_fp64);
16933 %}
16934 
16935 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16936 %{
16937   predicate(n->as_Vector()->length() == 4);
16938   match(Set dst (AddVF src1 src2));
16939   ins_cost(INSN_COST);
16940   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16941   ins_encode %{
16942     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16943             as_FloatRegister($src1$$reg),
16944             as_FloatRegister($src2$$reg));
16945   %}
16946   ins_pipe(vdop_fp128);
16947 %}
16948 
16949 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16950 %{
16951   match(Set dst (AddVD src1 src2));
16952   ins_cost(INSN_COST);
16953   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16954   ins_encode %{
16955     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16956             as_FloatRegister($src1$$reg),
16957             as_FloatRegister($src2$$reg));
16958   %}
16959   ins_pipe(vdop_fp128);
16960 %}
16961 
16962 // --------------------------------- SUB --------------------------------------
16963 
16964 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16965 %{
16966   predicate(n->as_Vector()->length() == 4 ||
16967             n->as_Vector()->length() == 8);
16968   match(Set dst (SubVB src1 src2));
16969   ins_cost(INSN_COST);
16970   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16971   ins_encode %{
16972     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16973             as_FloatRegister($src1$$reg),
16974             as_FloatRegister($src2$$reg));
16975   %}
16976   ins_pipe(vdop64);
16977 %}
16978 
16979 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16980 %{
16981   predicate(n->as_Vector()->length() == 16);
16982   match(Set dst (SubVB src1 src2));
16983   ins_cost(INSN_COST);
16984   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16985   ins_encode %{
16986     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16987             as_FloatRegister($src1$$reg),
16988             as_FloatRegister($src2$$reg));
16989   %}
16990   ins_pipe(vdop128);
16991 %}
16992 
16993 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16994 %{
16995   predicate(n->as_Vector()->length() == 2 ||
16996             n->as_Vector()->length() == 4);
16997   match(Set dst (SubVS src1 src2));
16998   ins_cost(INSN_COST);
16999   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
17000   ins_encode %{
17001     __ subv(as_FloatRegister($dst$$reg), __ T4H,
17002             as_FloatRegister($src1$$reg),
17003             as_FloatRegister($src2$$reg));
17004   %}
17005   ins_pipe(vdop64);
17006 %}
17007 
17008 instruct vsub8S(vecX dst, vecX src1, vecX src2)
17009 %{
17010   predicate(n->as_Vector()->length() == 8);
17011   match(Set dst (SubVS src1 src2));
17012   ins_cost(INSN_COST);
17013   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
17014   ins_encode %{
17015     __ subv(as_FloatRegister($dst$$reg), __ T8H,
17016             as_FloatRegister($src1$$reg),
17017             as_FloatRegister($src2$$reg));
17018   %}
17019   ins_pipe(vdop128);
17020 %}
17021 
17022 instruct vsub2I(vecD dst, vecD src1, vecD src2)
17023 %{
17024   predicate(n->as_Vector()->length() == 2);
17025   match(Set dst (SubVI src1 src2));
17026   ins_cost(INSN_COST);
17027   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
17028   ins_encode %{
17029     __ subv(as_FloatRegister($dst$$reg), __ T2S,
17030             as_FloatRegister($src1$$reg),
17031             as_FloatRegister($src2$$reg));
17032   %}
17033   ins_pipe(vdop64);
17034 %}
17035 
17036 instruct vsub4I(vecX dst, vecX src1, vecX src2)
17037 %{
17038   predicate(n->as_Vector()->length() == 4);
17039   match(Set dst (SubVI src1 src2));
17040   ins_cost(INSN_COST);
17041   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
17042   ins_encode %{
17043     __ subv(as_FloatRegister($dst$$reg), __ T4S,
17044             as_FloatRegister($src1$$reg),
17045             as_FloatRegister($src2$$reg));
17046   %}
17047   ins_pipe(vdop128);
17048 %}
17049 
17050 instruct vsub2L(vecX dst, vecX src1, vecX src2)
17051 %{
17052   predicate(n->as_Vector()->length() == 2);
17053   match(Set dst (SubVL src1 src2));
17054   ins_cost(INSN_COST);
17055   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
17056   ins_encode %{
17057     __ subv(as_FloatRegister($dst$$reg), __ T2D,
17058             as_FloatRegister($src1$$reg),
17059             as_FloatRegister($src2$$reg));
17060   %}
17061   ins_pipe(vdop128);
17062 %}
17063 
17064 instruct vsub2F(vecD dst, vecD src1, vecD src2)
17065 %{
17066   predicate(n->as_Vector()->length() == 2);
17067   match(Set dst (SubVF src1 src2));
17068   ins_cost(INSN_COST);
17069   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17070   ins_encode %{
17071     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17072             as_FloatRegister($src1$$reg),
17073             as_FloatRegister($src2$$reg));
17074   %}
17075   ins_pipe(vdop_fp64);
17076 %}
17077 
17078 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17079 %{
17080   predicate(n->as_Vector()->length() == 4);
17081   match(Set dst (SubVF src1 src2));
17082   ins_cost(INSN_COST);
17083   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17084   ins_encode %{
17085     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17086             as_FloatRegister($src1$$reg),
17087             as_FloatRegister($src2$$reg));
17088   %}
17089   ins_pipe(vdop_fp128);
17090 %}
17091 
17092 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17093 %{
17094   predicate(n->as_Vector()->length() == 2);
17095   match(Set dst (SubVD src1 src2));
17096   ins_cost(INSN_COST);
17097   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17098   ins_encode %{
17099     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17100             as_FloatRegister($src1$$reg),
17101             as_FloatRegister($src2$$reg));
17102   %}
17103   ins_pipe(vdop_fp128);
17104 %}
17105 
17106 // --------------------------------- MUL --------------------------------------
17107 
17108 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17109 %{
17110   predicate(n->as_Vector()->length() == 2 ||
17111             n->as_Vector()->length() == 4);
17112   match(Set dst (MulVS src1 src2));
17113   ins_cost(INSN_COST);
17114   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17115   ins_encode %{
17116     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17117             as_FloatRegister($src1$$reg),
17118             as_FloatRegister($src2$$reg));
17119   %}
17120   ins_pipe(vmul64);
17121 %}
17122 
17123 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17124 %{
17125   predicate(n->as_Vector()->length() == 8);
17126   match(Set dst (MulVS src1 src2));
17127   ins_cost(INSN_COST);
17128   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17129   ins_encode %{
17130     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17131             as_FloatRegister($src1$$reg),
17132             as_FloatRegister($src2$$reg));
17133   %}
17134   ins_pipe(vmul128);
17135 %}
17136 
17137 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17138 %{
17139   predicate(n->as_Vector()->length() == 2);
17140   match(Set dst (MulVI src1 src2));
17141   ins_cost(INSN_COST);
17142   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17143   ins_encode %{
17144     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17145             as_FloatRegister($src1$$reg),
17146             as_FloatRegister($src2$$reg));
17147   %}
17148   ins_pipe(vmul64);
17149 %}
17150 
17151 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17152 %{
17153   predicate(n->as_Vector()->length() == 4);
17154   match(Set dst (MulVI src1 src2));
17155   ins_cost(INSN_COST);
17156   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17157   ins_encode %{
17158     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17159             as_FloatRegister($src1$$reg),
17160             as_FloatRegister($src2$$reg));
17161   %}
17162   ins_pipe(vmul128);
17163 %}
17164 
17165 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17166 %{
17167   predicate(n->as_Vector()->length() == 2);
17168   match(Set dst (MulVF src1 src2));
17169   ins_cost(INSN_COST);
17170   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17171   ins_encode %{
17172     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17173             as_FloatRegister($src1$$reg),
17174             as_FloatRegister($src2$$reg));
17175   %}
17176   ins_pipe(vmuldiv_fp64);
17177 %}
17178 
17179 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17180 %{
17181   predicate(n->as_Vector()->length() == 4);
17182   match(Set dst (MulVF src1 src2));
17183   ins_cost(INSN_COST);
17184   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17185   ins_encode %{
17186     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17187             as_FloatRegister($src1$$reg),
17188             as_FloatRegister($src2$$reg));
17189   %}
17190   ins_pipe(vmuldiv_fp128);
17191 %}
17192 
17193 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17194 %{
17195   predicate(n->as_Vector()->length() == 2);
17196   match(Set dst (MulVD src1 src2));
17197   ins_cost(INSN_COST);
17198   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17199   ins_encode %{
17200     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17201             as_FloatRegister($src1$$reg),
17202             as_FloatRegister($src2$$reg));
17203   %}
17204   ins_pipe(vmuldiv_fp128);
17205 %}
17206 
17207 // --------------------------------- MLA --------------------------------------
17208 
17209 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17210 %{
17211   predicate(n->as_Vector()->length() == 2 ||
17212             n->as_Vector()->length() == 4);
17213   match(Set dst (AddVS dst (MulVS src1 src2)));
17214   ins_cost(INSN_COST);
17215   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17216   ins_encode %{
17217     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17218             as_FloatRegister($src1$$reg),
17219             as_FloatRegister($src2$$reg));
17220   %}
17221   ins_pipe(vmla64);
17222 %}
17223 
17224 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17225 %{
17226   predicate(n->as_Vector()->length() == 8);
17227   match(Set dst (AddVS dst (MulVS src1 src2)));
17228   ins_cost(INSN_COST);
17229   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17230   ins_encode %{
17231     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17232             as_FloatRegister($src1$$reg),
17233             as_FloatRegister($src2$$reg));
17234   %}
17235   ins_pipe(vmla128);
17236 %}
17237 
17238 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17239 %{
17240   predicate(n->as_Vector()->length() == 2);
17241   match(Set dst (AddVI dst (MulVI src1 src2)));
17242   ins_cost(INSN_COST);
17243   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17244   ins_encode %{
17245     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17246             as_FloatRegister($src1$$reg),
17247             as_FloatRegister($src2$$reg));
17248   %}
17249   ins_pipe(vmla64);
17250 %}
17251 
17252 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17253 %{
17254   predicate(n->as_Vector()->length() == 4);
17255   match(Set dst (AddVI dst (MulVI src1 src2)));
17256   ins_cost(INSN_COST);
17257   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17258   ins_encode %{
17259     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17260             as_FloatRegister($src1$$reg),
17261             as_FloatRegister($src2$$reg));
17262   %}
17263   ins_pipe(vmla128);
17264 %}
17265 
17266 // dst + src1 * src2
17267 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17268   predicate(UseFMA && n->as_Vector()->length() == 2);
17269   match(Set dst (FmaVF  dst (Binary src1 src2)));
17270   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17271   ins_cost(INSN_COST);
17272   ins_encode %{
17273     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17274             as_FloatRegister($src1$$reg),
17275             as_FloatRegister($src2$$reg));
17276   %}
17277   ins_pipe(vmuldiv_fp64);
17278 %}
17279 
17280 // dst + src1 * src2
17281 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17282   predicate(UseFMA && n->as_Vector()->length() == 4);
17283   match(Set dst (FmaVF  dst (Binary src1 src2)));
17284   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17285   ins_cost(INSN_COST);
17286   ins_encode %{
17287     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17288             as_FloatRegister($src1$$reg),
17289             as_FloatRegister($src2$$reg));
17290   %}
17291   ins_pipe(vmuldiv_fp128);
17292 %}
17293 
17294 // dst + src1 * src2
17295 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17296   predicate(UseFMA && n->as_Vector()->length() == 2);
17297   match(Set dst (FmaVD  dst (Binary src1 src2)));
17298   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17299   ins_cost(INSN_COST);
17300   ins_encode %{
17301     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17302             as_FloatRegister($src1$$reg),
17303             as_FloatRegister($src2$$reg));
17304   %}
17305   ins_pipe(vmuldiv_fp128);
17306 %}
17307 
17308 // --------------------------------- MLS --------------------------------------
17309 
17310 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17311 %{
17312   predicate(n->as_Vector()->length() == 2 ||
17313             n->as_Vector()->length() == 4);
17314   match(Set dst (SubVS dst (MulVS src1 src2)));
17315   ins_cost(INSN_COST);
17316   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17317   ins_encode %{
17318     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17319             as_FloatRegister($src1$$reg),
17320             as_FloatRegister($src2$$reg));
17321   %}
17322   ins_pipe(vmla64);
17323 %}
17324 
17325 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17326 %{
17327   predicate(n->as_Vector()->length() == 8);
17328   match(Set dst (SubVS dst (MulVS src1 src2)));
17329   ins_cost(INSN_COST);
17330   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17331   ins_encode %{
17332     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17333             as_FloatRegister($src1$$reg),
17334             as_FloatRegister($src2$$reg));
17335   %}
17336   ins_pipe(vmla128);
17337 %}
17338 
17339 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17340 %{
17341   predicate(n->as_Vector()->length() == 2);
17342   match(Set dst (SubVI dst (MulVI src1 src2)));
17343   ins_cost(INSN_COST);
17344   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17345   ins_encode %{
17346     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17347             as_FloatRegister($src1$$reg),
17348             as_FloatRegister($src2$$reg));
17349   %}
17350   ins_pipe(vmla64);
17351 %}
17352 
17353 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17354 %{
17355   predicate(n->as_Vector()->length() == 4);
17356   match(Set dst (SubVI dst (MulVI src1 src2)));
17357   ins_cost(INSN_COST);
17358   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17359   ins_encode %{
17360     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17361             as_FloatRegister($src1$$reg),
17362             as_FloatRegister($src2$$reg));
17363   %}
17364   ins_pipe(vmla128);
17365 %}
17366 
17367 // dst - src1 * src2
17368 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17369   predicate(UseFMA && n->as_Vector()->length() == 2);
17370   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17371   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17372   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17373   ins_cost(INSN_COST);
17374   ins_encode %{
17375     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17376             as_FloatRegister($src1$$reg),
17377             as_FloatRegister($src2$$reg));
17378   %}
17379   ins_pipe(vmuldiv_fp64);
17380 %}
17381 
17382 // dst - src1 * src2
17383 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17384   predicate(UseFMA && n->as_Vector()->length() == 4);
17385   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17386   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17387   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17388   ins_cost(INSN_COST);
17389   ins_encode %{
17390     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17391             as_FloatRegister($src1$$reg),
17392             as_FloatRegister($src2$$reg));
17393   %}
17394   ins_pipe(vmuldiv_fp128);
17395 %}
17396 
17397 // dst - src1 * src2
17398 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17399   predicate(UseFMA && n->as_Vector()->length() == 2);
17400   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17401   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17402   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17403   ins_cost(INSN_COST);
17404   ins_encode %{
17405     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17406             as_FloatRegister($src1$$reg),
17407             as_FloatRegister($src2$$reg));
17408   %}
17409   ins_pipe(vmuldiv_fp128);
17410 %}
17411 
17412 // --------------------------------- DIV --------------------------------------
17413 
17414 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17415 %{
17416   predicate(n->as_Vector()->length() == 2);
17417   match(Set dst (DivVF src1 src2));
17418   ins_cost(INSN_COST);
17419   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17420   ins_encode %{
17421     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17422             as_FloatRegister($src1$$reg),
17423             as_FloatRegister($src2$$reg));
17424   %}
17425   ins_pipe(vmuldiv_fp64);
17426 %}
17427 
17428 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17429 %{
17430   predicate(n->as_Vector()->length() == 4);
17431   match(Set dst (DivVF src1 src2));
17432   ins_cost(INSN_COST);
17433   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17434   ins_encode %{
17435     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17436             as_FloatRegister($src1$$reg),
17437             as_FloatRegister($src2$$reg));
17438   %}
17439   ins_pipe(vmuldiv_fp128);
17440 %}
17441 
17442 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17443 %{
17444   predicate(n->as_Vector()->length() == 2);
17445   match(Set dst (DivVD src1 src2));
17446   ins_cost(INSN_COST);
17447   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17448   ins_encode %{
17449     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17450             as_FloatRegister($src1$$reg),
17451             as_FloatRegister($src2$$reg));
17452   %}
17453   ins_pipe(vmuldiv_fp128);
17454 %}
17455 
17456 // --------------------------------- SQRT -------------------------------------
17457 
17458 instruct vsqrt2D(vecX dst, vecX src)
17459 %{
17460   predicate(n->as_Vector()->length() == 2);
17461   match(Set dst (SqrtVD src));
17462   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17463   ins_encode %{
17464     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17465              as_FloatRegister($src$$reg));
17466   %}
17467   ins_pipe(vsqrt_fp128);
17468 %}
17469 
17470 // --------------------------------- ABS --------------------------------------
17471 
17472 instruct vabs2F(vecD dst, vecD src)
17473 %{
17474   predicate(n->as_Vector()->length() == 2);
17475   match(Set dst (AbsVF src));
17476   ins_cost(INSN_COST * 3);
17477   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17478   ins_encode %{
17479     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17480             as_FloatRegister($src$$reg));
17481   %}
17482   ins_pipe(vunop_fp64);
17483 %}
17484 
17485 instruct vabs4F(vecX dst, vecX src)
17486 %{
17487   predicate(n->as_Vector()->length() == 4);
17488   match(Set dst (AbsVF src));
17489   ins_cost(INSN_COST * 3);
17490   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17491   ins_encode %{
17492     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17493             as_FloatRegister($src$$reg));
17494   %}
17495   ins_pipe(vunop_fp128);
17496 %}
17497 
17498 instruct vabs2D(vecX dst, vecX src)
17499 %{
17500   predicate(n->as_Vector()->length() == 2);
17501   match(Set dst (AbsVD src));
17502   ins_cost(INSN_COST * 3);
17503   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17504   ins_encode %{
17505     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17506             as_FloatRegister($src$$reg));
17507   %}
17508   ins_pipe(vunop_fp128);
17509 %}
17510 
17511 // --------------------------------- NEG --------------------------------------
17512 
17513 instruct vneg2F(vecD dst, vecD src)
17514 %{
17515   predicate(n->as_Vector()->length() == 2);
17516   match(Set dst (NegVF src));
17517   ins_cost(INSN_COST * 3);
17518   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17519   ins_encode %{
17520     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17521             as_FloatRegister($src$$reg));
17522   %}
17523   ins_pipe(vunop_fp64);
17524 %}
17525 
17526 instruct vneg4F(vecX dst, vecX src)
17527 %{
17528   predicate(n->as_Vector()->length() == 4);
17529   match(Set dst (NegVF src));
17530   ins_cost(INSN_COST * 3);
17531   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17532   ins_encode %{
17533     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17534             as_FloatRegister($src$$reg));
17535   %}
17536   ins_pipe(vunop_fp128);
17537 %}
17538 
17539 instruct vneg2D(vecX dst, vecX src)
17540 %{
17541   predicate(n->as_Vector()->length() == 2);
17542   match(Set dst (NegVD src));
17543   ins_cost(INSN_COST * 3);
17544   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17545   ins_encode %{
17546     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17547             as_FloatRegister($src$$reg));
17548   %}
17549   ins_pipe(vunop_fp128);
17550 %}
17551 
17552 // --------------------------------- AND --------------------------------------
17553 
17554 instruct vand8B(vecD dst, vecD src1, vecD src2)
17555 %{
17556   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17557             n->as_Vector()->length_in_bytes() == 8);
17558   match(Set dst (AndV src1 src2));
17559   ins_cost(INSN_COST);
17560   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17561   ins_encode %{
17562     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17563             as_FloatRegister($src1$$reg),
17564             as_FloatRegister($src2$$reg));
17565   %}
17566   ins_pipe(vlogical64);
17567 %}
17568 
17569 instruct vand16B(vecX dst, vecX src1, vecX src2)
17570 %{
17571   predicate(n->as_Vector()->length_in_bytes() == 16);
17572   match(Set dst (AndV src1 src2));
17573   ins_cost(INSN_COST);
17574   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17575   ins_encode %{
17576     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17577             as_FloatRegister($src1$$reg),
17578             as_FloatRegister($src2$$reg));
17579   %}
17580   ins_pipe(vlogical128);
17581 %}
17582 
17583 // --------------------------------- OR ---------------------------------------
17584 
17585 instruct vor8B(vecD dst, vecD src1, vecD src2)
17586 %{
17587   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17588             n->as_Vector()->length_in_bytes() == 8);
17589   match(Set dst (OrV src1 src2));
17590   ins_cost(INSN_COST);
17591   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17592   ins_encode %{
17593     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17594             as_FloatRegister($src1$$reg),
17595             as_FloatRegister($src2$$reg));
17596   %}
17597   ins_pipe(vlogical64);
17598 %}
17599 
17600 instruct vor16B(vecX dst, vecX src1, vecX src2)
17601 %{
17602   predicate(n->as_Vector()->length_in_bytes() == 16);
17603   match(Set dst (OrV src1 src2));
17604   ins_cost(INSN_COST);
17605   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17606   ins_encode %{
17607     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17608             as_FloatRegister($src1$$reg),
17609             as_FloatRegister($src2$$reg));
17610   %}
17611   ins_pipe(vlogical128);
17612 %}
17613 
17614 // --------------------------------- XOR --------------------------------------
17615 
17616 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17617 %{
17618   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17619             n->as_Vector()->length_in_bytes() == 8);
17620   match(Set dst (XorV src1 src2));
17621   ins_cost(INSN_COST);
17622   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17623   ins_encode %{
17624     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17625             as_FloatRegister($src1$$reg),
17626             as_FloatRegister($src2$$reg));
17627   %}
17628   ins_pipe(vlogical64);
17629 %}
17630 
17631 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17632 %{
17633   predicate(n->as_Vector()->length_in_bytes() == 16);
17634   match(Set dst (XorV src1 src2));
17635   ins_cost(INSN_COST);
17636   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17637   ins_encode %{
17638     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17639             as_FloatRegister($src1$$reg),
17640             as_FloatRegister($src2$$reg));
17641   %}
17642   ins_pipe(vlogical128);
17643 %}
17644 
17645 // ------------------------------ Shift ---------------------------------------
17646 
17647 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17648   match(Set dst (LShiftCntV cnt));
17649   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17650   ins_encode %{
17651     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17652   %}
17653   ins_pipe(vdup_reg_reg128);
17654 %}
17655 
17656 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17657 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17658   match(Set dst (RShiftCntV cnt));
17659   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17660   ins_encode %{
17661     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17662     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17663   %}
17664   ins_pipe(vdup_reg_reg128);
17665 %}
17666 
17667 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17668   predicate(n->as_Vector()->length() == 4 ||
17669             n->as_Vector()->length() == 8);
17670   match(Set dst (LShiftVB src shift));
17671   match(Set dst (RShiftVB src shift));
17672   ins_cost(INSN_COST);
17673   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17674   ins_encode %{
17675     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17676             as_FloatRegister($src$$reg),
17677             as_FloatRegister($shift$$reg));
17678   %}
17679   ins_pipe(vshift64);
17680 %}
17681 
17682 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17683   predicate(n->as_Vector()->length() == 16);
17684   match(Set dst (LShiftVB src shift));
17685   match(Set dst (RShiftVB src shift));
17686   ins_cost(INSN_COST);
17687   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17688   ins_encode %{
17689     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17690             as_FloatRegister($src$$reg),
17691             as_FloatRegister($shift$$reg));
17692   %}
17693   ins_pipe(vshift128);
17694 %}
17695 
17696 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17697   predicate(n->as_Vector()->length() == 4 ||
17698             n->as_Vector()->length() == 8);
17699   match(Set dst (URShiftVB src shift));
17700   ins_cost(INSN_COST);
17701   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17702   ins_encode %{
17703     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17704             as_FloatRegister($src$$reg),
17705             as_FloatRegister($shift$$reg));
17706   %}
17707   ins_pipe(vshift64);
17708 %}
17709 
17710 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17711   predicate(n->as_Vector()->length() == 16);
17712   match(Set dst (URShiftVB src shift));
17713   ins_cost(INSN_COST);
17714   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17715   ins_encode %{
17716     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17717             as_FloatRegister($src$$reg),
17718             as_FloatRegister($shift$$reg));
17719   %}
17720   ins_pipe(vshift128);
17721 %}
17722 
17723 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17724   predicate(n->as_Vector()->length() == 4 ||
17725             n->as_Vector()->length() == 8);
17726   match(Set dst (LShiftVB src shift));
17727   ins_cost(INSN_COST);
17728   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17729   ins_encode %{
17730     int sh = (int)$shift$$constant;
17731     if (sh >= 8) {
17732       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17733              as_FloatRegister($src$$reg),
17734              as_FloatRegister($src$$reg));
17735     } else {
17736       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17737              as_FloatRegister($src$$reg), sh);
17738     }
17739   %}
17740   ins_pipe(vshift64_imm);
17741 %}
17742 
17743 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17744   predicate(n->as_Vector()->length() == 16);
17745   match(Set dst (LShiftVB src shift));
17746   ins_cost(INSN_COST);
17747   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17748   ins_encode %{
17749     int sh = (int)$shift$$constant;
17750     if (sh >= 8) {
17751       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17752              as_FloatRegister($src$$reg),
17753              as_FloatRegister($src$$reg));
17754     } else {
17755       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17756              as_FloatRegister($src$$reg), sh);
17757     }
17758   %}
17759   ins_pipe(vshift128_imm);
17760 %}
17761 
17762 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17763   predicate(n->as_Vector()->length() == 4 ||
17764             n->as_Vector()->length() == 8);
17765   match(Set dst (RShiftVB src shift));
17766   ins_cost(INSN_COST);
17767   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17768   ins_encode %{
17769     int sh = (int)$shift$$constant;
17770     if (sh >= 8) sh = 7;
17771     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17772            as_FloatRegister($src$$reg), sh);
17773   %}
17774   ins_pipe(vshift64_imm);
17775 %}
17776 
17777 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17778   predicate(n->as_Vector()->length() == 16);
17779   match(Set dst (RShiftVB src shift));
17780   ins_cost(INSN_COST);
17781   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17782   ins_encode %{
17783     int sh = (int)$shift$$constant;
17784     if (sh >= 8) sh = 7;
17785     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17786            as_FloatRegister($src$$reg), sh);
17787   %}
17788   ins_pipe(vshift128_imm);
17789 %}
17790 
17791 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17792   predicate(n->as_Vector()->length() == 4 ||
17793             n->as_Vector()->length() == 8);
17794   match(Set dst (URShiftVB src shift));
17795   ins_cost(INSN_COST);
17796   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17797   ins_encode %{
17798     int sh = (int)$shift$$constant;
17799     if (sh >= 8) {
17800       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17801              as_FloatRegister($src$$reg),
17802              as_FloatRegister($src$$reg));
17803     } else {
17804       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17805              as_FloatRegister($src$$reg), sh);
17806     }
17807   %}
17808   ins_pipe(vshift64_imm);
17809 %}
17810 
17811 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17812   predicate(n->as_Vector()->length() == 16);
17813   match(Set dst (URShiftVB src shift));
17814   ins_cost(INSN_COST);
17815   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17816   ins_encode %{
17817     int sh = (int)$shift$$constant;
17818     if (sh >= 8) {
17819       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17820              as_FloatRegister($src$$reg),
17821              as_FloatRegister($src$$reg));
17822     } else {
17823       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17824              as_FloatRegister($src$$reg), sh);
17825     }
17826   %}
17827   ins_pipe(vshift128_imm);
17828 %}
17829 
17830 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17831   predicate(n->as_Vector()->length() == 2 ||
17832             n->as_Vector()->length() == 4);
17833   match(Set dst (LShiftVS src shift));
17834   match(Set dst (RShiftVS src shift));
17835   ins_cost(INSN_COST);
17836   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17837   ins_encode %{
17838     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17839             as_FloatRegister($src$$reg),
17840             as_FloatRegister($shift$$reg));
17841   %}
17842   ins_pipe(vshift64);
17843 %}
17844 
17845 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17846   predicate(n->as_Vector()->length() == 8);
17847   match(Set dst (LShiftVS src shift));
17848   match(Set dst (RShiftVS src shift));
17849   ins_cost(INSN_COST);
17850   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17851   ins_encode %{
17852     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17853             as_FloatRegister($src$$reg),
17854             as_FloatRegister($shift$$reg));
17855   %}
17856   ins_pipe(vshift128);
17857 %}
17858 
17859 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17860   predicate(n->as_Vector()->length() == 2 ||
17861             n->as_Vector()->length() == 4);
17862   match(Set dst (URShiftVS src shift));
17863   ins_cost(INSN_COST);
17864   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17865   ins_encode %{
17866     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17867             as_FloatRegister($src$$reg),
17868             as_FloatRegister($shift$$reg));
17869   %}
17870   ins_pipe(vshift64);
17871 %}
17872 
17873 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17874   predicate(n->as_Vector()->length() == 8);
17875   match(Set dst (URShiftVS src shift));
17876   ins_cost(INSN_COST);
17877   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17878   ins_encode %{
17879     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17880             as_FloatRegister($src$$reg),
17881             as_FloatRegister($shift$$reg));
17882   %}
17883   ins_pipe(vshift128);
17884 %}
17885 
17886 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17887   predicate(n->as_Vector()->length() == 2 ||
17888             n->as_Vector()->length() == 4);
17889   match(Set dst (LShiftVS src shift));
17890   ins_cost(INSN_COST);
17891   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17892   ins_encode %{
17893     int sh = (int)$shift$$constant;
17894     if (sh >= 16) {
17895       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17896              as_FloatRegister($src$$reg),
17897              as_FloatRegister($src$$reg));
17898     } else {
17899       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17900              as_FloatRegister($src$$reg), sh);
17901     }
17902   %}
17903   ins_pipe(vshift64_imm);
17904 %}
17905 
17906 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17907   predicate(n->as_Vector()->length() == 8);
17908   match(Set dst (LShiftVS src shift));
17909   ins_cost(INSN_COST);
17910   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17911   ins_encode %{
17912     int sh = (int)$shift$$constant;
17913     if (sh >= 16) {
17914       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17915              as_FloatRegister($src$$reg),
17916              as_FloatRegister($src$$reg));
17917     } else {
17918       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17919              as_FloatRegister($src$$reg), sh);
17920     }
17921   %}
17922   ins_pipe(vshift128_imm);
17923 %}
17924 
17925 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17926   predicate(n->as_Vector()->length() == 2 ||
17927             n->as_Vector()->length() == 4);
17928   match(Set dst (RShiftVS src shift));
17929   ins_cost(INSN_COST);
17930   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17931   ins_encode %{
17932     int sh = (int)$shift$$constant;
17933     if (sh >= 16) sh = 15;
17934     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17935            as_FloatRegister($src$$reg), sh);
17936   %}
17937   ins_pipe(vshift64_imm);
17938 %}
17939 
17940 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17941   predicate(n->as_Vector()->length() == 8);
17942   match(Set dst (RShiftVS src shift));
17943   ins_cost(INSN_COST);
17944   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17945   ins_encode %{
17946     int sh = (int)$shift$$constant;
17947     if (sh >= 16) sh = 15;
17948     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17949            as_FloatRegister($src$$reg), sh);
17950   %}
17951   ins_pipe(vshift128_imm);
17952 %}
17953 
17954 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17955   predicate(n->as_Vector()->length() == 2 ||
17956             n->as_Vector()->length() == 4);
17957   match(Set dst (URShiftVS src shift));
17958   ins_cost(INSN_COST);
17959   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17960   ins_encode %{
17961     int sh = (int)$shift$$constant;
17962     if (sh >= 16) {
17963       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17964              as_FloatRegister($src$$reg),
17965              as_FloatRegister($src$$reg));
17966     } else {
17967       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17968              as_FloatRegister($src$$reg), sh);
17969     }
17970   %}
17971   ins_pipe(vshift64_imm);
17972 %}
17973 
17974 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17975   predicate(n->as_Vector()->length() == 8);
17976   match(Set dst (URShiftVS src shift));
17977   ins_cost(INSN_COST);
17978   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17979   ins_encode %{
17980     int sh = (int)$shift$$constant;
17981     if (sh >= 16) {
17982       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17983              as_FloatRegister($src$$reg),
17984              as_FloatRegister($src$$reg));
17985     } else {
17986       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17987              as_FloatRegister($src$$reg), sh);
17988     }
17989   %}
17990   ins_pipe(vshift128_imm);
17991 %}
17992 
17993 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17994   predicate(n->as_Vector()->length() == 2);
17995   match(Set dst (LShiftVI src shift));
17996   match(Set dst (RShiftVI src shift));
17997   ins_cost(INSN_COST);
17998   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17999   ins_encode %{
18000     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
18001             as_FloatRegister($src$$reg),
18002             as_FloatRegister($shift$$reg));
18003   %}
18004   ins_pipe(vshift64);
18005 %}
18006 
18007 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
18008   predicate(n->as_Vector()->length() == 4);
18009   match(Set dst (LShiftVI src shift));
18010   match(Set dst (RShiftVI src shift));
18011   ins_cost(INSN_COST);
18012   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
18013   ins_encode %{
18014     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
18015             as_FloatRegister($src$$reg),
18016             as_FloatRegister($shift$$reg));
18017   %}
18018   ins_pipe(vshift128);
18019 %}
18020 
18021 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
18022   predicate(n->as_Vector()->length() == 2);
18023   match(Set dst (URShiftVI src shift));
18024   ins_cost(INSN_COST);
18025   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
18026   ins_encode %{
18027     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
18028             as_FloatRegister($src$$reg),
18029             as_FloatRegister($shift$$reg));
18030   %}
18031   ins_pipe(vshift64);
18032 %}
18033 
18034 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
18035   predicate(n->as_Vector()->length() == 4);
18036   match(Set dst (URShiftVI src shift));
18037   ins_cost(INSN_COST);
18038   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
18039   ins_encode %{
18040     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
18041             as_FloatRegister($src$$reg),
18042             as_FloatRegister($shift$$reg));
18043   %}
18044   ins_pipe(vshift128);
18045 %}
18046 
18047 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
18048   predicate(n->as_Vector()->length() == 2);
18049   match(Set dst (LShiftVI src shift));
18050   ins_cost(INSN_COST);
18051   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
18052   ins_encode %{
18053     __ shl(as_FloatRegister($dst$$reg), __ T2S,
18054            as_FloatRegister($src$$reg),
18055            (int)$shift$$constant);
18056   %}
18057   ins_pipe(vshift64_imm);
18058 %}
18059 
18060 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18061   predicate(n->as_Vector()->length() == 4);
18062   match(Set dst (LShiftVI src shift));
18063   ins_cost(INSN_COST);
18064   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18065   ins_encode %{
18066     __ shl(as_FloatRegister($dst$$reg), __ T4S,
18067            as_FloatRegister($src$$reg),
18068            (int)$shift$$constant);
18069   %}
18070   ins_pipe(vshift128_imm);
18071 %}
18072 
18073 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18074   predicate(n->as_Vector()->length() == 2);
18075   match(Set dst (RShiftVI src shift));
18076   ins_cost(INSN_COST);
18077   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18078   ins_encode %{
18079     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18080             as_FloatRegister($src$$reg),
18081             (int)$shift$$constant);
18082   %}
18083   ins_pipe(vshift64_imm);
18084 %}
18085 
18086 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18087   predicate(n->as_Vector()->length() == 4);
18088   match(Set dst (RShiftVI src shift));
18089   ins_cost(INSN_COST);
18090   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18091   ins_encode %{
18092     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18093             as_FloatRegister($src$$reg),
18094             (int)$shift$$constant);
18095   %}
18096   ins_pipe(vshift128_imm);
18097 %}
18098 
18099 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18100   predicate(n->as_Vector()->length() == 2);
18101   match(Set dst (URShiftVI src shift));
18102   ins_cost(INSN_COST);
18103   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18104   ins_encode %{
18105     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18106             as_FloatRegister($src$$reg),
18107             (int)$shift$$constant);
18108   %}
18109   ins_pipe(vshift64_imm);
18110 %}
18111 
18112 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18113   predicate(n->as_Vector()->length() == 4);
18114   match(Set dst (URShiftVI src shift));
18115   ins_cost(INSN_COST);
18116   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18117   ins_encode %{
18118     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18119             as_FloatRegister($src$$reg),
18120             (int)$shift$$constant);
18121   %}
18122   ins_pipe(vshift128_imm);
18123 %}
18124 
18125 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18126   predicate(n->as_Vector()->length() == 2);
18127   match(Set dst (LShiftVL src shift));
18128   match(Set dst (RShiftVL src shift));
18129   ins_cost(INSN_COST);
18130   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18131   ins_encode %{
18132     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18133             as_FloatRegister($src$$reg),
18134             as_FloatRegister($shift$$reg));
18135   %}
18136   ins_pipe(vshift128);
18137 %}
18138 
18139 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18140   predicate(n->as_Vector()->length() == 2);
18141   match(Set dst (URShiftVL src shift));
18142   ins_cost(INSN_COST);
18143   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18144   ins_encode %{
18145     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18146             as_FloatRegister($src$$reg),
18147             as_FloatRegister($shift$$reg));
18148   %}
18149   ins_pipe(vshift128);
18150 %}
18151 
18152 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18153   predicate(n->as_Vector()->length() == 2);
18154   match(Set dst (LShiftVL src shift));
18155   ins_cost(INSN_COST);
18156   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18157   ins_encode %{
18158     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18159            as_FloatRegister($src$$reg),
18160            (int)$shift$$constant);
18161   %}
18162   ins_pipe(vshift128_imm);
18163 %}
18164 
18165 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18166   predicate(n->as_Vector()->length() == 2);
18167   match(Set dst (RShiftVL src shift));
18168   ins_cost(INSN_COST);
18169   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18170   ins_encode %{
18171     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18172             as_FloatRegister($src$$reg),
18173             (int)$shift$$constant);
18174   %}
18175   ins_pipe(vshift128_imm);
18176 %}
18177 
18178 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18179   predicate(n->as_Vector()->length() == 2);
18180   match(Set dst (URShiftVL src shift));
18181   ins_cost(INSN_COST);
18182   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18183   ins_encode %{
18184     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18185             as_FloatRegister($src$$reg),
18186             (int)$shift$$constant);
18187   %}
18188   ins_pipe(vshift128_imm);
18189 %}
18190 
18191 //----------PEEPHOLE RULES-----------------------------------------------------
18192 // These must follow all instruction definitions as they use the names
18193 // defined in the instructions definitions.
18194 //
18195 // peepmatch ( root_instr_name [preceding_instruction]* );
18196 //
18197 // peepconstraint %{
18198 // (instruction_number.operand_name relational_op instruction_number.operand_name
18199 //  [, ...] );
18200 // // instruction numbers are zero-based using left to right order in peepmatch
18201 //
18202 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18203 // // provide an instruction_number.operand_name for each operand that appears
18204 // // in the replacement instruction's match rule
18205 //
18206 // ---------VM FLAGS---------------------------------------------------------
18207 //
18208 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18209 //
18210 // Each peephole rule is given an identifying number starting with zero and
18211 // increasing by one in the order seen by the parser.  An individual peephole
18212 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18213 // on the command-line.
18214 //
18215 // ---------CURRENT LIMITATIONS----------------------------------------------
18216 //
18217 // Only match adjacent instructions in same basic block
18218 // Only equality constraints
18219 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18220 // Only one replacement instruction
18221 //
18222 // ---------EXAMPLE----------------------------------------------------------
18223 //
18224 // // pertinent parts of existing instructions in architecture description
18225 // instruct movI(iRegINoSp dst, iRegI src)
18226 // %{
18227 //   match(Set dst (CopyI src));
18228 // %}
18229 //
18230 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18231 // %{
18232 //   match(Set dst (AddI dst src));
18233 //   effect(KILL cr);
18234 // %}
18235 //
18236 // // Change (inc mov) to lea
18237 // peephole %{
18238 //   // increment preceeded by register-register move
18239 //   peepmatch ( incI_iReg movI );
18240 //   // require that the destination register of the increment
18241 //   // match the destination register of the move
18242 //   peepconstraint ( 0.dst == 1.dst );
18243 //   // construct a replacement instruction that sets
18244 //   // the destination to ( move's source register + one )
18245 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18246 // %}
18247 //
18248 
18249 // Implementation no longer uses movX instructions since
18250 // machine-independent system no longer uses CopyX nodes.
18251 //
18252 // peephole
18253 // %{
18254 //   peepmatch (incI_iReg movI);
18255 //   peepconstraint (0.dst == 1.dst);
18256 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18257 // %}
18258 
18259 // peephole
18260 // %{
18261 //   peepmatch (decI_iReg movI);
18262 //   peepconstraint (0.dst == 1.dst);
18263 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18264 // %}
18265 
18266 // peephole
18267 // %{
18268 //   peepmatch (addI_iReg_imm movI);
18269 //   peepconstraint (0.dst == 1.dst);
18270 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18271 // %}
18272 
18273 // peephole
18274 // %{
18275 //   peepmatch (incL_iReg movL);
18276 //   peepconstraint (0.dst == 1.dst);
18277 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18278 // %}
18279 
18280 // peephole
18281 // %{
18282 //   peepmatch (decL_iReg movL);
18283 //   peepconstraint (0.dst == 1.dst);
18284 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18285 // %}
18286 
18287 // peephole
18288 // %{
18289 //   peepmatch (addL_iReg_imm movL);
18290 //   peepconstraint (0.dst == 1.dst);
18291 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18292 // %}
18293 
18294 // peephole
18295 // %{
18296 //   peepmatch (addP_iReg_imm movP);
18297 //   peepconstraint (0.dst == 1.dst);
18298 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18299 // %}
18300 
18301 // // Change load of spilled value to only a spill
18302 // instruct storeI(memory mem, iRegI src)
18303 // %{
18304 //   match(Set mem (StoreI mem src));
18305 // %}
18306 //
18307 // instruct loadI(iRegINoSp dst, memory mem)
18308 // %{
18309 //   match(Set dst (LoadI mem));
18310 // %}
18311 //
18312 
18313 //----------SMARTSPILL RULES---------------------------------------------------
18314 // These must follow all instruction definitions as they use the names
18315 // defined in the instructions definitions.
18316 
18317 // Local Variables:
18318 // mode: c++
18319 // End: