1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042   bool is_CAS(int opcode);
1043 
1044   MemBarNode *leading_to_normal(MemBarNode *leading);
1045   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1046   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1048   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1049 
1050   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1051 
1052   bool unnecessary_acquire(const Node *barrier);
1053   bool needs_acquiring_load(const Node *load);
1054 
1055   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1056 
1057   bool unnecessary_release(const Node *barrier);
1058   bool unnecessary_volatile(const Node *barrier);
1059   bool needs_releasing_store(const Node *store);
1060 
1061   // predicate controlling translation of CompareAndSwapX
1062   bool needs_acquiring_load_exclusive(const Node *load);
1063 
1064   // predicate controlling translation of StoreCM
1065   bool unnecessary_storestore(const Node *storecm);
1066 %}
1067 
1068 source %{
1069 
1070   // Optimizaton of volatile gets and puts
1071   // -------------------------------------
1072   //
1073   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1074   // use to implement volatile reads and writes. For a volatile read
1075   // we simply need
1076   //
1077   //   ldar<x>
1078   //
1079   // and for a volatile write we need
1080   //
1081   //   stlr<x>
1082   //
1083   // Alternatively, we can implement them by pairing a normal
1084   // load/store with a memory barrier. For a volatile read we need
1085   //
1086   //   ldr<x>
1087   //   dmb ishld
1088   //
1089   // for a volatile write
1090   //
1091   //   dmb ish
1092   //   str<x>
1093   //   dmb ish
1094   //
1095   // We can also use ldaxr and stlxr to implement compare and swap CAS
1096   // sequences. These are normally translated to an instruction
1097   // sequence like the following
1098   //
1099   //   dmb      ish
1100   // retry:
1101   //   ldxr<x>   rval raddr
1102   //   cmp       rval rold
1103   //   b.ne done
1104   //   stlxr<x>  rval, rnew, rold
1105   //   cbnz      rval retry
1106   // done:
1107   //   cset      r0, eq
1108   //   dmb ishld
1109   //
1110   // Note that the exclusive store is already using an stlxr
1111   // instruction. That is required to ensure visibility to other
1112   // threads of the exclusive write (assuming it succeeds) before that
1113   // of any subsequent writes.
1114   //
1115   // The following instruction sequence is an improvement on the above
1116   //
1117   // retry:
1118   //   ldaxr<x>  rval raddr
1119   //   cmp       rval rold
1120   //   b.ne done
1121   //   stlxr<x>  rval, rnew, rold
1122   //   cbnz      rval retry
1123   // done:
1124   //   cset      r0, eq
1125   //
1126   // We don't need the leading dmb ish since the stlxr guarantees
1127   // visibility of prior writes in the case that the swap is
1128   // successful. Crucially we don't have to worry about the case where
1129   // the swap is not successful since no valid program should be
1130   // relying on visibility of prior changes by the attempting thread
1131   // in the case where the CAS fails.
1132   //
1133   // Similarly, we don't need the trailing dmb ishld if we substitute
1134   // an ldaxr instruction since that will provide all the guarantees we
1135   // require regarding observation of changes made by other threads
1136   // before any change to the CAS address observed by the load.
1137   //
1138   // In order to generate the desired instruction sequence we need to
1139   // be able to identify specific 'signature' ideal graph node
1140   // sequences which i) occur as a translation of a volatile reads or
1141   // writes or CAS operations and ii) do not occur through any other
1142   // translation or graph transformation. We can then provide
1143   // alternative aldc matching rules which translate these node
1144   // sequences to the desired machine code sequences. Selection of the
1145   // alternative rules can be implemented by predicates which identify
1146   // the relevant node sequences.
1147   //
1148   // The ideal graph generator translates a volatile read to the node
1149   // sequence
1150   //
1151   //   LoadX[mo_acquire]
1152   //   MemBarAcquire
1153   //
1154   // As a special case when using the compressed oops optimization we
1155   // may also see this variant
1156   //
1157   //   LoadN[mo_acquire]
1158   //   DecodeN
1159   //   MemBarAcquire
1160   //
1161   // A volatile write is translated to the node sequence
1162   //
1163   //   MemBarRelease
1164   //   StoreX[mo_release] {CardMark}-optional
1165   //   MemBarVolatile
1166   //
1167   // n.b. the above node patterns are generated with a strict
1168   // 'signature' configuration of input and output dependencies (see
1169   // the predicates below for exact details). The card mark may be as
1170   // simple as a few extra nodes or, in a few GC configurations, may
1171   // include more complex control flow between the leading and
1172   // trailing memory barriers. However, whatever the card mark
1173   // configuration these signatures are unique to translated volatile
1174   // reads/stores -- they will not appear as a result of any other
1175   // bytecode translation or inlining nor as a consequence of
1176   // optimizing transforms.
1177   //
1178   // We also want to catch inlined unsafe volatile gets and puts and
1179   // be able to implement them using either ldar<x>/stlr<x> or some
1180   // combination of ldr<x>/stlr<x> and dmb instructions.
1181   //
1182   // Inlined unsafe volatiles puts manifest as a minor variant of the
1183   // normal volatile put node sequence containing an extra cpuorder
1184   // membar
1185   //
1186   //   MemBarRelease
1187   //   MemBarCPUOrder
1188   //   StoreX[mo_release] {CardMark}-optional
1189   //   MemBarVolatile
1190   //
1191   // n.b. as an aside, the cpuorder membar is not itself subject to
1192   // matching and translation by adlc rules.  However, the rule
1193   // predicates need to detect its presence in order to correctly
1194   // select the desired adlc rules.
1195   //
1196   // Inlined unsafe volatile gets manifest as a somewhat different
1197   // node sequence to a normal volatile get
1198   //
1199   //   MemBarCPUOrder
1200   //        ||       \\
1201   //   MemBarAcquire LoadX[mo_acquire]
1202   //        ||
1203   //   MemBarCPUOrder
1204   //
1205   // In this case the acquire membar does not directly depend on the
1206   // load. However, we can be sure that the load is generated from an
1207   // inlined unsafe volatile get if we see it dependent on this unique
1208   // sequence of membar nodes. Similarly, given an acquire membar we
1209   // can know that it was added because of an inlined unsafe volatile
1210   // get if it is fed and feeds a cpuorder membar and if its feed
1211   // membar also feeds an acquiring load.
1212   //
1213   // Finally an inlined (Unsafe) CAS operation is translated to the
1214   // following ideal graph
1215   //
1216   //   MemBarRelease
1217   //   MemBarCPUOrder
1218   //   CompareAndSwapX {CardMark}-optional
1219   //   MemBarCPUOrder
1220   //   MemBarAcquire
1221   //
1222   // So, where we can identify these volatile read and write
1223   // signatures we can choose to plant either of the above two code
1224   // sequences. For a volatile read we can simply plant a normal
1225   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1226   // also choose to inhibit translation of the MemBarAcquire and
1227   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1228   //
1229   // When we recognise a volatile store signature we can choose to
1230   // plant at a dmb ish as a translation for the MemBarRelease, a
1231   // normal str<x> and then a dmb ish for the MemBarVolatile.
1232   // Alternatively, we can inhibit translation of the MemBarRelease
1233   // and MemBarVolatile and instead plant a simple stlr<x>
1234   // instruction.
1235   //
1236   // when we recognise a CAS signature we can choose to plant a dmb
1237   // ish as a translation for the MemBarRelease, the conventional
1238   // macro-instruction sequence for the CompareAndSwap node (which
1239   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1240   // Alternatively, we can elide generation of the dmb instructions
1241   // and plant the alternative CompareAndSwap macro-instruction
1242   // sequence (which uses ldaxr<x>).
1243   //
1244   // Of course, the above only applies when we see these signature
1245   // configurations. We still want to plant dmb instructions in any
1246   // other cases where we may see a MemBarAcquire, MemBarRelease or
1247   // MemBarVolatile. For example, at the end of a constructor which
1248   // writes final/volatile fields we will see a MemBarRelease
1249   // instruction and this needs a 'dmb ish' lest we risk the
1250   // constructed object being visible without making the
1251   // final/volatile field writes visible.
1252   //
1253   // n.b. the translation rules below which rely on detection of the
1254   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1255   // If we see anything other than the signature configurations we
1256   // always just translate the loads and stores to ldr<x> and str<x>
1257   // and translate acquire, release and volatile membars to the
1258   // relevant dmb instructions.
1259   //
1260 
1261   // graph traversal helpers used for volatile put/get and CAS
1262   // optimization
1263 
1264   // 1) general purpose helpers
1265 
1266   // if node n is linked to a parent MemBarNode by an intervening
1267   // Control and Memory ProjNode return the MemBarNode otherwise return
1268   // NULL.
1269   //
1270   // n may only be a Load or a MemBar.
1271 
1272   MemBarNode *parent_membar(const Node *n)
1273   {
1274     Node *ctl = NULL;
1275     Node *mem = NULL;
1276     Node *membar = NULL;
1277 
1278     if (n->is_Load()) {
1279       ctl = n->lookup(LoadNode::Control);
1280       mem = n->lookup(LoadNode::Memory);
1281     } else if (n->is_MemBar()) {
1282       ctl = n->lookup(TypeFunc::Control);
1283       mem = n->lookup(TypeFunc::Memory);
1284     } else {
1285         return NULL;
1286     }
1287 
1288     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1289       return NULL;
1290     }
1291 
1292     membar = ctl->lookup(0);
1293 
1294     if (!membar || !membar->is_MemBar()) {
1295       return NULL;
1296     }
1297 
1298     if (mem->lookup(0) != membar) {
1299       return NULL;
1300     }
1301 
1302     return membar->as_MemBar();
1303   }
1304 
1305   // if n is linked to a child MemBarNode by intervening Control and
1306   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1307 
1308   MemBarNode *child_membar(const MemBarNode *n)
1309   {
1310     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1311     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1312 
1313     // MemBar needs to have both a Ctl and Mem projection
1314     if (! ctl || ! mem)
1315       return NULL;
1316 
1317     MemBarNode *child = NULL;
1318     Node *x;
1319 
1320     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1321       x = ctl->fast_out(i);
1322       // if we see a membar we keep hold of it. we may also see a new
1323       // arena copy of the original but it will appear later
1324       if (x->is_MemBar()) {
1325           child = x->as_MemBar();
1326           break;
1327       }
1328     }
1329 
1330     if (child == NULL) {
1331       return NULL;
1332     }
1333 
1334     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1335       x = mem->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x == child) {
1339         return child;
1340       }
1341     }
1342     return NULL;
1343   }
1344 
1345   // helper predicate use to filter candidates for a leading memory
1346   // barrier
1347   //
1348   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1349   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1350 
1351   bool leading_membar(const MemBarNode *barrier)
1352   {
1353     int opcode = barrier->Opcode();
1354     // if this is a release membar we are ok
1355     if (opcode == Op_MemBarRelease) {
1356       return true;
1357     }
1358     // if its a cpuorder membar . . .
1359     if (opcode != Op_MemBarCPUOrder) {
1360       return false;
1361     }
1362     // then the parent has to be a release membar
1363     MemBarNode *parent = parent_membar(barrier);
1364     if (!parent) {
1365       return false;
1366     }
1367     opcode = parent->Opcode();
1368     return opcode == Op_MemBarRelease;
1369   }
1370 
1371   // 2) card mark detection helper
1372 
1373   // helper predicate which can be used to detect a volatile membar
1374   // introduced as part of a conditional card mark sequence either by
1375   // G1 or by CMS when UseCondCardMark is true.
1376   //
1377   // membar can be definitively determined to be part of a card mark
1378   // sequence if and only if all the following hold
1379   //
1380   // i) it is a MemBarVolatile
1381   //
1382   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1383   // true
1384   //
1385   // iii) the node's Mem projection feeds a StoreCM node.
1386 
1387   bool is_card_mark_membar(const MemBarNode *barrier)
1388   {
1389     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1390       return false;
1391     }
1392 
1393     if (barrier->Opcode() != Op_MemBarVolatile) {
1394       return false;
1395     }
1396 
1397     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1398 
1399     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1400       Node *y = mem->fast_out(i);
1401       if (y->Opcode() == Op_StoreCM) {
1402         return true;
1403       }
1404     }
1405 
1406     return false;
1407   }
1408 
1409 
1410   // 3) helper predicates to traverse volatile put or CAS graphs which
1411   // may contain GC barrier subgraphs
1412 
1413   // Preamble
1414   // --------
1415   //
1416   // for volatile writes we can omit generating barriers and employ a
1417   // releasing store when we see a node sequence sequence with a
1418   // leading MemBarRelease and a trailing MemBarVolatile as follows
1419   //
1420   //   MemBarRelease
1421   //  {      ||      } -- optional
1422   //  {MemBarCPUOrder}
1423   //         ||     \\
1424   //         ||     StoreX[mo_release]
1425   //         | \     /
1426   //         | MergeMem
1427   //         | /
1428   //   MemBarVolatile
1429   //
1430   // where
1431   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1432   //  | \ and / indicate further routing of the Ctl and Mem feeds
1433   //
1434   // this is the graph we see for non-object stores. however, for a
1435   // volatile Object store (StoreN/P) we may see other nodes below the
1436   // leading membar because of the need for a GC pre- or post-write
1437   // barrier.
1438   //
1439   // with most GC configurations we with see this simple variant which
1440   // includes a post-write barrier card mark.
1441   //
1442   //   MemBarRelease______________________________
1443   //         ||    \\               Ctl \        \\
1444   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1445   //         | \     /                       . . .  /
1446   //         | MergeMem
1447   //         | /
1448   //         ||      /
1449   //   MemBarVolatile
1450   //
1451   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1452   // the object address to an int used to compute the card offset) and
1453   // Ctl+Mem to a StoreB node (which does the actual card mark).
1454   //
1455   // n.b. a StoreCM node will only appear in this configuration when
1456   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1457   // because it implies a requirement to order visibility of the card
1458   // mark (StoreCM) relative to the object put (StoreP/N) using a
1459   // StoreStore memory barrier (arguably this ought to be represented
1460   // explicitly in the ideal graph but that is not how it works). This
1461   // ordering is required for both non-volatile and volatile
1462   // puts. Normally that means we need to translate a StoreCM using
1463   // the sequence
1464   //
1465   //   dmb ishst
1466   //   stlrb
1467   //
1468   // However, in the case of a volatile put if we can recognise this
1469   // configuration and plant an stlr for the object write then we can
1470   // omit the dmb and just plant an strb since visibility of the stlr
1471   // is ordered before visibility of subsequent stores. StoreCM nodes
1472   // also arise when using G1 or using CMS with conditional card
1473   // marking. In these cases (as we shall see) we don't need to insert
1474   // the dmb when translating StoreCM because there is already an
1475   // intervening StoreLoad barrier between it and the StoreP/N.
1476   //
1477   // It is also possible to perform the card mark conditionally on it
1478   // currently being unmarked in which case the volatile put graph
1479   // will look slightly different
1480   //
1481   //   MemBarRelease____________________________________________
1482   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1483   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1484   //         | \     /                              \            |
1485   //         | MergeMem                            . . .      StoreB
1486   //         | /                                                /
1487   //         ||     /
1488   //   MemBarVolatile
1489   //
1490   // It is worth noting at this stage that both the above
1491   // configurations can be uniquely identified by checking that the
1492   // memory flow includes the following subgraph:
1493   //
1494   //   MemBarRelease
1495   //  {MemBarCPUOrder}
1496   //          |  \      . . .
1497   //          |  StoreX[mo_release]  . . .
1498   //          |   /
1499   //         MergeMem
1500   //          |
1501   //   MemBarVolatile
1502   //
1503   // This is referred to as a *normal* subgraph. It can easily be
1504   // detected starting from any candidate MemBarRelease,
1505   // StoreX[mo_release] or MemBarVolatile.
1506   //
1507   // A simple variation on this normal case occurs for an unsafe CAS
1508   // operation. The basic graph for a non-object CAS is
1509   //
1510   //   MemBarRelease
1511   //         ||
1512   //   MemBarCPUOrder
1513   //         ||     \\   . . .
1514   //         ||     CompareAndSwapX
1515   //         ||       |
1516   //         ||     SCMemProj
1517   //         | \     /
1518   //         | MergeMem
1519   //         | /
1520   //   MemBarCPUOrder
1521   //         ||
1522   //   MemBarAcquire
1523   //
1524   // The same basic variations on this arrangement (mutatis mutandis)
1525   // occur when a card mark is introduced. i.e. we se the same basic
1526   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1527   // tail of the graph is a pair comprising a MemBarCPUOrder +
1528   // MemBarAcquire.
1529   //
1530   // So, in the case of a CAS the normal graph has the variant form
1531   //
1532   //   MemBarRelease
1533   //   MemBarCPUOrder
1534   //          |   \      . . .
1535   //          |  CompareAndSwapX  . . .
1536   //          |    |
1537   //          |   SCMemProj
1538   //          |   /  . . .
1539   //         MergeMem
1540   //          |
1541   //   MemBarCPUOrder
1542   //   MemBarAcquire
1543   //
1544   // This graph can also easily be detected starting from any
1545   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1546   //
1547   // the code below uses two helper predicates, leading_to_normal and
1548   // normal_to_leading to identify these normal graphs, one validating
1549   // the layout starting from the top membar and searching down and
1550   // the other validating the layout starting from the lower membar
1551   // and searching up.
1552   //
1553   // There are two special case GC configurations when a normal graph
1554   // may not be generated: when using G1 (which always employs a
1555   // conditional card mark); and when using CMS with conditional card
1556   // marking configured. These GCs are both concurrent rather than
1557   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1558   // graph between the leading and trailing membar nodes, in
1559   // particular enforcing stronger memory serialisation beween the
1560   // object put and the corresponding conditional card mark. CMS
1561   // employs a post-write GC barrier while G1 employs both a pre- and
1562   // post-write GC barrier. Of course the extra nodes may be absent --
1563   // they are only inserted for object puts. This significantly
1564   // complicates the task of identifying whether a MemBarRelease,
1565   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1566   // when using these GC configurations (see below). It adds similar
1567   // complexity to the task of identifying whether a MemBarRelease,
1568   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1569   //
1570   // In both cases the post-write subtree includes an auxiliary
1571   // MemBarVolatile (StoreLoad barrier) separating the object put and
1572   // the read of the corresponding card. This poses two additional
1573   // problems.
1574   //
1575   // Firstly, a card mark MemBarVolatile needs to be distinguished
1576   // from a normal trailing MemBarVolatile. Resolving this first
1577   // problem is straightforward: a card mark MemBarVolatile always
1578   // projects a Mem feed to a StoreCM node and that is a unique marker
1579   //
1580   //      MemBarVolatile (card mark)
1581   //       C |    \     . . .
1582   //         |   StoreCM   . . .
1583   //       . . .
1584   //
1585   // The second problem is how the code generator is to translate the
1586   // card mark barrier? It always needs to be translated to a "dmb
1587   // ish" instruction whether or not it occurs as part of a volatile
1588   // put. A StoreLoad barrier is needed after the object put to ensure
1589   // i) visibility to GC threads of the object put and ii) visibility
1590   // to the mutator thread of any card clearing write by a GC
1591   // thread. Clearly a normal store (str) will not guarantee this
1592   // ordering but neither will a releasing store (stlr). The latter
1593   // guarantees that the object put is visible but does not guarantee
1594   // that writes by other threads have also been observed.
1595   //
1596   // So, returning to the task of translating the object put and the
1597   // leading/trailing membar nodes: what do the non-normal node graph
1598   // look like for these 2 special cases? and how can we determine the
1599   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1600   // in both normal and non-normal cases?
1601   //
1602   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1603   // which selects conditonal execution based on the value loaded
1604   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1605   // intervening StoreLoad barrier (MemBarVolatile).
1606   //
1607   // So, with CMS we may see a node graph for a volatile object store
1608   // which looks like this
1609   //
1610   //   MemBarRelease
1611   //   MemBarCPUOrder_(leading)__________________
1612   //     C |    M \       \\                   C \
1613   //       |       \    StoreN/P[mo_release]  CastP2X
1614   //       |    Bot \    /
1615   //       |       MergeMem
1616   //       |         /
1617   //      MemBarVolatile (card mark)
1618   //     C |  ||    M |
1619   //       | LoadB    |
1620   //       |   |      |
1621   //       | Cmp      |\
1622   //       | /        | \
1623   //       If         |  \
1624   //       | \        |   \
1625   // IfFalse  IfTrue  |    \
1626   //       \     / \  |     \
1627   //        \   / StoreCM    |
1628   //         \ /      |      |
1629   //        Region   . . .   |
1630   //          | \           /
1631   //          |  . . .  \  / Bot
1632   //          |       MergeMem
1633   //          |          |
1634   //        MemBarVolatile (trailing)
1635   //
1636   // The first MergeMem merges the AliasIdxBot Mem slice from the
1637   // leading membar and the oopptr Mem slice from the Store into the
1638   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1639   // Mem slice from the card mark membar and the AliasIdxRaw slice
1640   // from the StoreCM into the trailing membar (n.b. the latter
1641   // proceeds via a Phi associated with the If region).
1642   //
1643   // The graph for a CAS varies slightly, the obvious difference being
1644   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1645   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1646   // MemBarAcquire pair. The other important difference is that the
1647   // CompareAndSwap node's SCMemProj is not merged into the card mark
1648   // membar - it still feeds the trailing MergeMem. This also means
1649   // that the card mark membar receives its Mem feed directly from the
1650   // leading membar rather than via a MergeMem.
1651   //
1652   //   MemBarRelease
1653   //   MemBarCPUOrder__(leading)_________________________
1654   //       ||                       \\                 C \
1655   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1656   //     C |  ||    M |              |
1657   //       | LoadB    |       ______/|
1658   //       |   |      |      /       |
1659   //       | Cmp      |     /      SCMemProj
1660   //       | /        |    /         |
1661   //       If         |   /         /
1662   //       | \        |  /         /
1663   // IfFalse  IfTrue  | /         /
1664   //       \     / \  |/ prec    /
1665   //        \   / StoreCM       /
1666   //         \ /      |        /
1667   //        Region   . . .    /
1668   //          | \            /
1669   //          |  . . .  \   / Bot
1670   //          |       MergeMem
1671   //          |          |
1672   //        MemBarCPUOrder
1673   //        MemBarAcquire (trailing)
1674   //
1675   // This has a slightly different memory subgraph to the one seen
1676   // previously but the core of it is the same as for the CAS normal
1677   // sungraph
1678   //
1679   //   MemBarRelease
1680   //   MemBarCPUOrder____
1681   //      ||             \      . . .
1682   //   MemBarVolatile  CompareAndSwapX  . . .
1683   //      |  \            |
1684   //        . . .   SCMemProj
1685   //          |     /  . . .
1686   //         MergeMem
1687   //          |
1688   //   MemBarCPUOrder
1689   //   MemBarAcquire
1690   //
1691   //
1692   // G1 is quite a lot more complicated. The nodes inserted on behalf
1693   // of G1 may comprise: a pre-write graph which adds the old value to
1694   // the SATB queue; the releasing store itself; and, finally, a
1695   // post-write graph which performs a card mark.
1696   //
1697   // The pre-write graph may be omitted, but only when the put is
1698   // writing to a newly allocated (young gen) object and then only if
1699   // there is a direct memory chain to the Initialize node for the
1700   // object allocation. This will not happen for a volatile put since
1701   // any memory chain passes through the leading membar.
1702   //
1703   // The pre-write graph includes a series of 3 If tests. The outermost
1704   // If tests whether SATB is enabled (no else case). The next If tests
1705   // whether the old value is non-NULL (no else case). The third tests
1706   // whether the SATB queue index is > 0, if so updating the queue. The
1707   // else case for this third If calls out to the runtime to allocate a
1708   // new queue buffer.
1709   //
1710   // So with G1 the pre-write and releasing store subgraph looks like
1711   // this (the nested Ifs are omitted).
1712   //
1713   //  MemBarRelease (leading)____________
1714   //     C |  ||  M \   M \    M \  M \ . . .
1715   //       | LoadB   \  LoadL  LoadN   \
1716   //       | /        \                 \
1717   //       If         |\                 \
1718   //       | \        | \                 \
1719   //  IfFalse  IfTrue |  \                 \
1720   //       |     |    |   \                 |
1721   //       |     If   |   /\                |
1722   //       |     |          \               |
1723   //       |                 \              |
1724   //       |    . . .         \             |
1725   //       | /       | /       |            |
1726   //      Region  Phi[M]       |            |
1727   //       | \       |         |            |
1728   //       |  \_____ | ___     |            |
1729   //     C | C \     |   C \ M |            |
1730   //       | CastP2X | StoreN/P[mo_release] |
1731   //       |         |         |            |
1732   //     C |       M |       M |          M |
1733   //        \        |         |           /
1734   //                  . . .
1735   //          (post write subtree elided)
1736   //                    . . .
1737   //             C \         M /
1738   //         MemBarVolatile (trailing)
1739   //
1740   // n.b. the LoadB in this subgraph is not the card read -- it's a
1741   // read of the SATB queue active flag.
1742   //
1743   // Once again the CAS graph is a minor variant on the above with the
1744   // expected substitutions of CompareAndSawpX for StoreN/P and
1745   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1746   //
1747   // The G1 post-write subtree is also optional, this time when the
1748   // new value being written is either null or can be identified as a
1749   // newly allocated (young gen) object with no intervening control
1750   // flow. The latter cannot happen but the former may, in which case
1751   // the card mark membar is omitted and the memory feeds form the
1752   // leading membar and the SToreN/P are merged direct into the
1753   // trailing membar as per the normal subgraph. So, the only special
1754   // case which arises is when the post-write subgraph is generated.
1755   //
1756   // The kernel of the post-write G1 subgraph is the card mark itself
1757   // which includes a card mark memory barrier (MemBarVolatile), a
1758   // card test (LoadB), and a conditional update (If feeding a
1759   // StoreCM). These nodes are surrounded by a series of nested Ifs
1760   // which try to avoid doing the card mark. The top level If skips if
1761   // the object reference does not cross regions (i.e. it tests if
1762   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1763   // need not be recorded. The next If, which skips on a NULL value,
1764   // may be absent (it is not generated if the type of value is >=
1765   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1766   // checking if card_val != young).  n.b. although this test requires
1767   // a pre-read of the card it can safely be done before the StoreLoad
1768   // barrier. However that does not bypass the need to reread the card
1769   // after the barrier.
1770   //
1771   //                (pre-write subtree elided)
1772   //        . . .                  . . .    . . .  . . .
1773   //        C |                    M |     M |    M |
1774   //       Region                  Phi[M] StoreN    |
1775   //          |                     / \      |      |
1776   //         / \_______            /   \     |      |
1777   //      C / C \      . . .            \    |      |
1778   //       If   CastP2X . . .            |   |      |
1779   //       / \                           |   |      |
1780   //      /   \                          |   |      |
1781   // IfFalse IfTrue                      |   |      |
1782   //   |       |                         |   |     /|
1783   //   |       If                        |   |    / |
1784   //   |      / \                        |   |   /  |
1785   //   |     /   \                        \  |  /   |
1786   //   | IfFalse IfTrue                   MergeMem  |
1787   //   |  . . .    / \                       /      |
1788   //   |          /   \                     /       |
1789   //   |     IfFalse IfTrue                /        |
1790   //   |      . . .    |                  /         |
1791   //   |               If                /          |
1792   //   |               / \              /           |
1793   //   |              /   \            /            |
1794   //   |         IfFalse IfTrue       /             |
1795   //   |           . . .   |         /              |
1796   //   |                    \       /               |
1797   //   |                     \     /                |
1798   //   |             MemBarVolatile__(card mark)    |
1799   //   |                ||   C |  M \  M \          |
1800   //   |               LoadB   If    |    |         |
1801   //   |                      / \    |    |         |
1802   //   |                     . . .   |    |         |
1803   //   |                          \  |    |        /
1804   //   |                        StoreCM   |       /
1805   //   |                          . . .   |      /
1806   //   |                        _________/      /
1807   //   |                       /  _____________/
1808   //   |   . . .       . . .  |  /            /
1809   //   |    |                 | /   _________/
1810   //   |    |               Phi[M] /        /
1811   //   |    |                 |   /        /
1812   //   |    |                 |  /        /
1813   //   |  Region  . . .     Phi[M]  _____/
1814   //   |    /                 |    /
1815   //   |                      |   /
1816   //   | . . .   . . .        |  /
1817   //   | /                    | /
1818   // Region           |  |  Phi[M]
1819   //   |              |  |  / Bot
1820   //    \            MergeMem
1821   //     \            /
1822   //     MemBarVolatile
1823   //
1824   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1825   // from the leading membar and the oopptr Mem slice from the Store
1826   // into the card mark membar i.e. the memory flow to the card mark
1827   // membar still looks like a normal graph.
1828   //
1829   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1830   // Mem slices (from the StoreCM and other card mark queue stores).
1831   // However in this case the AliasIdxBot Mem slice does not come
1832   // direct from the card mark membar. It is merged through a series
1833   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1834   // from the leading membar with the Mem feed from the card mark
1835   // membar. Each Phi corresponds to one of the Ifs which may skip
1836   // around the card mark membar. So when the If implementing the NULL
1837   // value check has been elided the total number of Phis is 2
1838   // otherwise it is 3.
1839   //
1840   // The CAS graph when using G1GC also includes a pre-write subgraph
1841   // and an optional post-write subgraph. Teh sam evarioations are
1842   // introduced as for CMS with conditional card marking i.e. the
1843   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1844   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1845   // Mem feed from the CompareAndSwapP/N includes a precedence
1846   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1847   // trailing membar. So, as before the configuration includes the
1848   // normal CAS graph as a subgraph of the memory flow.
1849   //
1850   // So, the upshot is that in all cases the volatile put graph will
1851   // include a *normal* memory subgraph betwen the leading membar and
1852   // its child membar, either a volatile put graph (including a
1853   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1854   // When that child is not a card mark membar then it marks the end
1855   // of the volatile put or CAS subgraph. If the child is a card mark
1856   // membar then the normal subgraph will form part of a volatile put
1857   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1858   // to a trailing barrier via a MergeMem. That feed is either direct
1859   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1860   // memory flow (for G1).
1861   //
1862   // The predicates controlling generation of instructions for store
1863   // and barrier nodes employ a few simple helper functions (described
1864   // below) which identify the presence or absence of all these
1865   // subgraph configurations and provide a means of traversing from
1866   // one node in the subgraph to another.
1867 
1868   // is_CAS(int opcode)
1869   //
1870   // return true if opcode is one of the possible CompareAndSwapX
1871   // values otherwise false.
1872 
1873   bool is_CAS(int opcode)
1874   {
1875     return (opcode == Op_CompareAndSwapI ||
1876             opcode == Op_CompareAndSwapL ||
1877             opcode == Op_CompareAndSwapN ||
1878             opcode == Op_CompareAndSwapP);
1879   }
1880 
1881   // leading_to_normal
1882   //
1883   //graph traversal helper which detects the normal case Mem feed from
1884   // a release membar (or, optionally, its cpuorder child) to a
1885   // dependent volatile membar i.e. it ensures that one or other of
1886   // the following Mem flow subgraph is present.
1887   //
1888   //   MemBarRelease
1889   //   MemBarCPUOrder {leading}
1890   //          |  \      . . .
1891   //          |  StoreN/P[mo_release]  . . .
1892   //          |   /
1893   //         MergeMem
1894   //          |
1895   //   MemBarVolatile {trailing or card mark}
1896   //
1897   //   MemBarRelease
1898   //   MemBarCPUOrder {leading}
1899   //      |       \      . . .
1900   //      |     CompareAndSwapX  . . .
1901   //               |
1902   //     . . .    SCMemProj
1903   //           \   |
1904   //      |    MergeMem
1905   //      |       /
1906   //    MemBarCPUOrder
1907   //    MemBarAcquire {trailing}
1908   //
1909   // if the correct configuration is present returns the trailing
1910   // membar otherwise NULL.
1911   //
1912   // the input membar is expected to be either a cpuorder membar or a
1913   // release membar. in the latter case it should not have a cpu membar
1914   // child.
1915   //
1916   // the returned value may be a card mark or trailing membar
1917   //
1918 
1919   MemBarNode *leading_to_normal(MemBarNode *leading)
1920   {
1921     assert((leading->Opcode() == Op_MemBarRelease ||
1922             leading->Opcode() == Op_MemBarCPUOrder),
1923            "expecting a volatile or cpuroder membar!");
1924 
1925     // check the mem flow
1926     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1927 
1928     if (!mem) {
1929       return NULL;
1930     }
1931 
1932     Node *x = NULL;
1933     StoreNode * st = NULL;
1934     LoadStoreNode *cas = NULL;
1935     MergeMemNode *mm = NULL;
1936 
1937     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1938       x = mem->fast_out(i);
1939       if (x->is_MergeMem()) {
1940         if (mm != NULL) {
1941           return NULL;
1942         }
1943         // two merge mems is one too many
1944         mm = x->as_MergeMem();
1945       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1946         // two releasing stores/CAS nodes is one too many
1947         if (st != NULL || cas != NULL) {
1948           return NULL;
1949         }
1950         st = x->as_Store();
1951       } else if (is_CAS(x->Opcode())) {
1952         if (st != NULL || cas != NULL) {
1953           return NULL;
1954         }
1955         cas = x->as_LoadStore();
1956       }
1957     }
1958 
1959     // must have a store or a cas
1960     if (!st && !cas) {
1961       return NULL;
1962     }
1963 
1964     // must have a merge if we also have st
1965     if (st && !mm) {
1966       return NULL;
1967     }
1968 
1969     Node *y = NULL;
1970     if (cas) {
1971       // look for an SCMemProj
1972       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
1973         x = cas->fast_out(i);
1974         if (x->is_Proj()) {
1975           y = x;
1976           break;
1977         }
1978       }
1979       if (y == NULL) {
1980         return NULL;
1981       }
1982       // the proj must feed a MergeMem
1983       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
1984         x = y->fast_out(i);
1985         if (x->is_MergeMem()) {
1986           mm = x->as_MergeMem();
1987           break;
1988         }
1989       }
1990       if (mm == NULL)
1991         return NULL;
1992     } else {
1993       // ensure the store feeds the existing mergemem;
1994       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1995         if (st->fast_out(i) == mm) {
1996           y = st;
1997           break;
1998         }
1999       }
2000       if (y == NULL) {
2001         return NULL;
2002       }
2003     }
2004 
2005     MemBarNode *mbar = NULL;
2006     // ensure the merge feeds to the expected type of membar
2007     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2008       x = mm->fast_out(i);
2009       if (x->is_MemBar()) {
2010         int opcode = x->Opcode();
2011         if (opcode == Op_MemBarVolatile && st) {
2012           mbar = x->as_MemBar();
2013         } else if (cas && opcode == Op_MemBarCPUOrder) {
2014           MemBarNode *y =  x->as_MemBar();
2015           y = child_membar(y);
2016           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2017             mbar = y;
2018           }
2019         }
2020         break;
2021       }
2022     }
2023 
2024     return mbar;
2025   }
2026 
2027   // normal_to_leading
2028   //
2029   // graph traversal helper which detects the normal case Mem feed
2030   // from either a card mark or a trailing membar to a preceding
2031   // release membar (optionally its cpuorder child) i.e. it ensures
2032   // that one or other of the following Mem flow subgraphs is present.
2033   //
2034   //   MemBarRelease
2035   //   MemBarCPUOrder {leading}
2036   //          |  \      . . .
2037   //          |  StoreN/P[mo_release]  . . .
2038   //          |   /
2039   //         MergeMem
2040   //          |
2041   //   MemBarVolatile {card mark or trailing}
2042   //
2043   //   MemBarRelease
2044   //   MemBarCPUOrder {leading}
2045   //      |       \      . . .
2046   //      |     CompareAndSwapX  . . .
2047   //               |
2048   //     . . .    SCMemProj
2049   //           \   |
2050   //      |    MergeMem
2051   //      |        /
2052   //    MemBarCPUOrder
2053   //    MemBarAcquire {trailing}
2054   //
2055   // this predicate checks for the same flow as the previous predicate
2056   // but starting from the bottom rather than the top.
2057   //
2058   // if the configuration is present returns the cpuorder member for
2059   // preference or when absent the release membar otherwise NULL.
2060   //
2061   // n.b. the input membar is expected to be a MemBarVolatile but
2062   // need not be a card mark membar.
2063 
2064   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2065   {
2066     // input must be a volatile membar
2067     assert((barrier->Opcode() == Op_MemBarVolatile ||
2068             barrier->Opcode() == Op_MemBarAcquire),
2069            "expecting a volatile or an acquire membar");
2070     Node *x;
2071     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2072 
2073     // if we have an acquire membar then it must be fed via a CPUOrder
2074     // membar
2075 
2076     if (is_cas) {
2077       // skip to parent barrier which must be a cpuorder
2078       x = parent_membar(barrier);
2079       if (x->Opcode() != Op_MemBarCPUOrder)
2080         return NULL;
2081     } else {
2082       // start from the supplied barrier
2083       x = (Node *)barrier;
2084     }
2085 
2086     // the Mem feed to the membar should be a merge
2087     x = x ->in(TypeFunc::Memory);
2088     if (!x->is_MergeMem())
2089       return NULL;
2090 
2091     MergeMemNode *mm = x->as_MergeMem();
2092 
2093     if (is_cas) {
2094       // the merge should be fed from the CAS via an SCMemProj node
2095       x = NULL;
2096       for (uint idx = 1; idx < mm->req(); idx++) {
2097         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2098           x = mm->in(idx);
2099           break;
2100         }
2101       }
2102       if (x == NULL) {
2103         return NULL;
2104       }
2105       // check for a CAS feeding this proj
2106       x = x->in(0);
2107       int opcode = x->Opcode();
2108       if (!is_CAS(opcode)) {
2109         return NULL;
2110       }
2111       // the CAS should get its mem feed from the leading membar
2112       x = x->in(MemNode::Memory);
2113     } else {
2114       // the merge should get its Bottom mem feed from the leading membar
2115       x = mm->in(Compile::AliasIdxBot);
2116     }
2117 
2118     // ensure this is a non control projection
2119     if (!x->is_Proj() || x->is_CFG()) {
2120       return NULL;
2121     }
2122     // if it is fed by a membar that's the one we want
2123     x = x->in(0);
2124 
2125     if (!x->is_MemBar()) {
2126       return NULL;
2127     }
2128 
2129     MemBarNode *leading = x->as_MemBar();
2130     // reject invalid candidates
2131     if (!leading_membar(leading)) {
2132       return NULL;
2133     }
2134 
2135     // ok, we have a leading membar, now for the sanity clauses
2136 
2137     // the leading membar must feed Mem to a releasing store or CAS
2138     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2139     StoreNode *st = NULL;
2140     LoadStoreNode *cas = NULL;
2141     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2142       x = mem->fast_out(i);
2143       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2144         // two stores or CASes is one too many
2145         if (st != NULL || cas != NULL) {
2146           return NULL;
2147         }
2148         st = x->as_Store();
2149       } else if (is_CAS(x->Opcode())) {
2150         if (st != NULL || cas != NULL) {
2151           return NULL;
2152         }
2153         cas = x->as_LoadStore();
2154       }
2155     }
2156 
2157     // we should not have both a store and a cas
2158     if (st == NULL & cas == NULL) {
2159       return NULL;
2160     }
2161 
2162     if (st == NULL) {
2163       // nothing more to check
2164       return leading;
2165     } else {
2166       // we should not have a store if we started from an acquire
2167       if (is_cas) {
2168         return NULL;
2169       }
2170 
2171       // the store should feed the merge we used to get here
2172       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2173         if (st->fast_out(i) == mm) {
2174           return leading;
2175         }
2176       }
2177     }
2178 
2179     return NULL;
2180   }
2181 
2182   // card_mark_to_trailing
2183   //
2184   // graph traversal helper which detects extra, non-normal Mem feed
2185   // from a card mark volatile membar to a trailing membar i.e. it
2186   // ensures that one of the following three GC post-write Mem flow
2187   // subgraphs is present.
2188   //
2189   // 1)
2190   //     . . .
2191   //       |
2192   //   MemBarVolatile (card mark)
2193   //      |          |
2194   //      |        StoreCM
2195   //      |          |
2196   //      |        . . .
2197   //  Bot |  /
2198   //   MergeMem
2199   //      |
2200   //      |
2201   //    MemBarVolatile {trailing}
2202   //
2203   // 2)
2204   //   MemBarRelease/CPUOrder (leading)
2205   //    |
2206   //    |
2207   //    |\       . . .
2208   //    | \        |
2209   //    |  \  MemBarVolatile (card mark)
2210   //    |   \   |     |
2211   //     \   \  |   StoreCM    . . .
2212   //      \   \ |
2213   //       \  Phi
2214   //        \ /
2215   //        Phi  . . .
2216   //     Bot |   /
2217   //       MergeMem
2218   //         |
2219   //    MemBarVolatile {trailing}
2220   //
2221   //
2222   // 3)
2223   //   MemBarRelease/CPUOrder (leading)
2224   //    |
2225   //    |\
2226   //    | \
2227   //    |  \      . . .
2228   //    |   \       |
2229   //    |\   \  MemBarVolatile (card mark)
2230   //    | \   \   |     |
2231   //    |  \   \  |   StoreCM    . . .
2232   //    |   \   \ |
2233   //     \   \  Phi
2234   //      \   \ /
2235   //       \  Phi
2236   //        \ /
2237   //        Phi  . . .
2238   //     Bot |   /
2239   //       MergeMem
2240   //         |
2241   //         |
2242   //    MemBarVolatile {trailing}
2243   //
2244   // configuration 1 is only valid if UseConcMarkSweepGC &&
2245   // UseCondCardMark
2246   //
2247   // configurations 2 and 3 are only valid if UseG1GC.
2248   //
2249   // if a valid configuration is present returns the trailing membar
2250   // otherwise NULL.
2251   //
2252   // n.b. the supplied membar is expected to be a card mark
2253   // MemBarVolatile i.e. the caller must ensure the input node has the
2254   // correct operand and feeds Mem to a StoreCM node
2255 
2256   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2257   {
2258     // input must be a card mark volatile membar
2259     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2260 
2261     Node *feed = barrier->proj_out(TypeFunc::Memory);
2262     Node *x;
2263     MergeMemNode *mm = NULL;
2264 
2265     const int MAX_PHIS = 3;     // max phis we will search through
2266     int phicount = 0;           // current search count
2267 
2268     bool retry_feed = true;
2269     while (retry_feed) {
2270       // see if we have a direct MergeMem feed
2271       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2272         x = feed->fast_out(i);
2273         // the correct Phi will be merging a Bot memory slice
2274         if (x->is_MergeMem()) {
2275           mm = x->as_MergeMem();
2276           break;
2277         }
2278       }
2279       if (mm) {
2280         retry_feed = false;
2281       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2282         // the barrier may feed indirectly via one or two Phi nodes
2283         PhiNode *phi = NULL;
2284         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2285           x = feed->fast_out(i);
2286           // the correct Phi will be merging a Bot memory slice
2287           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2288             phi = x->as_Phi();
2289             break;
2290           }
2291         }
2292         if (!phi) {
2293           return NULL;
2294         }
2295         // look for another merge below this phi
2296         feed = phi;
2297       } else {
2298         // couldn't find a merge
2299         return NULL;
2300       }
2301     }
2302 
2303     // sanity check this feed turns up as the expected slice
2304     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2305 
2306     MemBarNode *trailing = NULL;
2307     // be sure we have a trailing membar the merge
2308     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2309       x = mm->fast_out(i);
2310       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2311         trailing = x->as_MemBar();
2312         break;
2313       }
2314     }
2315 
2316     return trailing;
2317   }
2318 
2319   // trailing_to_card_mark
2320   //
2321   // graph traversal helper which detects extra, non-normal Mem feed
2322   // from a trailing volatile membar to a preceding card mark volatile
2323   // membar i.e. it identifies whether one of the three possible extra
2324   // GC post-write Mem flow subgraphs is present
2325   //
2326   // this predicate checks for the same flow as the previous predicate
2327   // but starting from the bottom rather than the top.
2328   //
2329   // if the configuration is present returns the card mark membar
2330   // otherwise NULL
2331   //
2332   // n.b. the supplied membar is expected to be a trailing
2333   // MemBarVolatile i.e. the caller must ensure the input node has the
2334   // correct opcode
2335 
2336   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2337   {
2338     assert(trailing->Opcode() == Op_MemBarVolatile,
2339            "expecting a volatile membar");
2340     assert(!is_card_mark_membar(trailing),
2341            "not expecting a card mark membar");
2342 
2343     // the Mem feed to the membar should be a merge
2344     Node *x = trailing->in(TypeFunc::Memory);
2345     if (!x->is_MergeMem()) {
2346       return NULL;
2347     }
2348 
2349     MergeMemNode *mm = x->as_MergeMem();
2350 
2351     x = mm->in(Compile::AliasIdxBot);
2352     // with G1 we may possibly see a Phi or two before we see a Memory
2353     // Proj from the card mark membar
2354 
2355     const int MAX_PHIS = 3;     // max phis we will search through
2356     int phicount = 0;           // current search count
2357 
2358     bool retry_feed = !x->is_Proj();
2359 
2360     while (retry_feed) {
2361       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2362         PhiNode *phi = x->as_Phi();
2363         ProjNode *proj = NULL;
2364         PhiNode *nextphi = NULL;
2365         bool found_leading = false;
2366         for (uint i = 1; i < phi->req(); i++) {
2367           x = phi->in(i);
2368           if (x->is_Phi()) {
2369             nextphi = x->as_Phi();
2370           } else if (x->is_Proj()) {
2371             int opcode = x->in(0)->Opcode();
2372             if (opcode == Op_MemBarVolatile) {
2373               proj = x->as_Proj();
2374             } else if (opcode == Op_MemBarRelease ||
2375                        opcode == Op_MemBarCPUOrder) {
2376               // probably a leading membar
2377               found_leading = true;
2378             }
2379           }
2380         }
2381         // if we found a correct looking proj then retry from there
2382         // otherwise we must see a leading and a phi or this the
2383         // wrong config
2384         if (proj != NULL) {
2385           x = proj;
2386           retry_feed = false;
2387         } else if (found_leading && nextphi != NULL) {
2388           // retry from this phi to check phi2
2389           x = nextphi;
2390         } else {
2391           // not what we were looking for
2392           return NULL;
2393         }
2394       } else {
2395         return NULL;
2396       }
2397     }
2398     // the proj has to come from the card mark membar
2399     x = x->in(0);
2400     if (!x->is_MemBar()) {
2401       return NULL;
2402     }
2403 
2404     MemBarNode *card_mark_membar = x->as_MemBar();
2405 
2406     if (!is_card_mark_membar(card_mark_membar)) {
2407       return NULL;
2408     }
2409 
2410     return card_mark_membar;
2411   }
2412 
2413   // trailing_to_leading
2414   //
2415   // graph traversal helper which checks the Mem flow up the graph
2416   // from a (non-card mark) trailing membar attempting to locate and
2417   // return an associated leading membar. it first looks for a
2418   // subgraph in the normal configuration (relying on helper
2419   // normal_to_leading). failing that it then looks for one of the
2420   // possible post-write card mark subgraphs linking the trailing node
2421   // to a the card mark membar (relying on helper
2422   // trailing_to_card_mark), and then checks that the card mark membar
2423   // is fed by a leading membar (once again relying on auxiliary
2424   // predicate normal_to_leading).
2425   //
2426   // if the configuration is valid returns the cpuorder member for
2427   // preference or when absent the release membar otherwise NULL.
2428   //
2429   // n.b. the input membar is expected to be either a volatile or
2430   // acquire membar but in the former case must *not* be a card mark
2431   // membar.
2432 
2433   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2434   {
2435     assert((trailing->Opcode() == Op_MemBarAcquire ||
2436             trailing->Opcode() == Op_MemBarVolatile),
2437            "expecting an acquire or volatile membar");
2438     assert((trailing->Opcode() != Op_MemBarVolatile ||
2439             !is_card_mark_membar(trailing)),
2440            "not expecting a card mark membar");
2441 
2442     MemBarNode *leading = normal_to_leading(trailing);
2443 
2444     if (leading) {
2445       return leading;
2446     }
2447 
2448     // nothing more to do if this is an acquire
2449     if (trailing->Opcode() == Op_MemBarAcquire) {
2450       return NULL;
2451     }
2452 
2453     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2454 
2455     if (!card_mark_membar) {
2456       return NULL;
2457     }
2458 
2459     return normal_to_leading(card_mark_membar);
2460   }
2461 
2462   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2463 
2464 bool unnecessary_acquire(const Node *barrier)
2465 {
2466   assert(barrier->is_MemBar(), "expecting a membar");
2467 
2468   if (UseBarriersForVolatile) {
2469     // we need to plant a dmb
2470     return false;
2471   }
2472 
2473   // a volatile read derived from bytecode (or also from an inlined
2474   // SHA field read via LibraryCallKit::load_field_from_object)
2475   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2476   // with a bogus read dependency on it's preceding load. so in those
2477   // cases we will find the load node at the PARMS offset of the
2478   // acquire membar.  n.b. there may be an intervening DecodeN node.
2479   //
2480   // a volatile load derived from an inlined unsafe field access
2481   // manifests as a cpuorder membar with Ctl and Mem projections
2482   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2483   // acquire then feeds another cpuorder membar via Ctl and Mem
2484   // projections. The load has no output dependency on these trailing
2485   // membars because subsequent nodes inserted into the graph take
2486   // their control feed from the final membar cpuorder meaning they
2487   // are all ordered after the load.
2488 
2489   Node *x = barrier->lookup(TypeFunc::Parms);
2490   if (x) {
2491     // we are starting from an acquire and it has a fake dependency
2492     //
2493     // need to check for
2494     //
2495     //   LoadX[mo_acquire]
2496     //   {  |1   }
2497     //   {DecodeN}
2498     //      |Parms
2499     //   MemBarAcquire*
2500     //
2501     // where * tags node we were passed
2502     // and |k means input k
2503     if (x->is_DecodeNarrowPtr()) {
2504       x = x->in(1);
2505     }
2506 
2507     return (x->is_Load() && x->as_Load()->is_acquire());
2508   }
2509 
2510   // now check for an unsafe volatile get
2511 
2512   // need to check for
2513   //
2514   //   MemBarCPUOrder
2515   //        ||       \\
2516   //   MemBarAcquire* LoadX[mo_acquire]
2517   //        ||
2518   //   MemBarCPUOrder
2519   //
2520   // where * tags node we were passed
2521   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2522 
2523   // check for a parent MemBarCPUOrder
2524   ProjNode *ctl;
2525   ProjNode *mem;
2526   MemBarNode *parent = parent_membar(barrier);
2527   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2528     return false;
2529   ctl = parent->proj_out(TypeFunc::Control);
2530   mem = parent->proj_out(TypeFunc::Memory);
2531   if (!ctl || !mem) {
2532     return false;
2533   }
2534   // ensure the proj nodes both feed a LoadX[mo_acquire]
2535   LoadNode *ld = NULL;
2536   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2537     x = ctl->fast_out(i);
2538     // if we see a load we keep hold of it and stop searching
2539     if (x->is_Load()) {
2540       ld = x->as_Load();
2541       break;
2542     }
2543   }
2544   // it must be an acquiring load
2545   if (ld && ld->is_acquire()) {
2546 
2547     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2548       x = mem->fast_out(i);
2549       // if we see the same load we drop it and stop searching
2550       if (x == ld) {
2551         ld = NULL;
2552         break;
2553       }
2554     }
2555     // we must have dropped the load
2556     if (ld == NULL) {
2557       // check for a child cpuorder membar
2558       MemBarNode *child  = child_membar(barrier->as_MemBar());
2559       if (child && child->Opcode() == Op_MemBarCPUOrder)
2560         return true;
2561     }
2562   }
2563 
2564   // final option for unnecessary mebar is that it is a trailing node
2565   // belonging to a CAS
2566 
2567   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2568 
2569   return leading != NULL;
2570 }
2571 
2572 bool needs_acquiring_load(const Node *n)
2573 {
2574   assert(n->is_Load(), "expecting a load");
2575   if (UseBarriersForVolatile) {
2576     // we use a normal load and a dmb
2577     return false;
2578   }
2579 
2580   LoadNode *ld = n->as_Load();
2581 
2582   if (!ld->is_acquire()) {
2583     return false;
2584   }
2585 
2586   // check if this load is feeding an acquire membar
2587   //
2588   //   LoadX[mo_acquire]
2589   //   {  |1   }
2590   //   {DecodeN}
2591   //      |Parms
2592   //   MemBarAcquire*
2593   //
2594   // where * tags node we were passed
2595   // and |k means input k
2596 
2597   Node *start = ld;
2598   Node *mbacq = NULL;
2599 
2600   // if we hit a DecodeNarrowPtr we reset the start node and restart
2601   // the search through the outputs
2602  restart:
2603 
2604   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2605     Node *x = start->fast_out(i);
2606     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2607       mbacq = x;
2608     } else if (!mbacq &&
2609                (x->is_DecodeNarrowPtr() ||
2610                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2611       start = x;
2612       goto restart;
2613     }
2614   }
2615 
2616   if (mbacq) {
2617     return true;
2618   }
2619 
2620   // now check for an unsafe volatile get
2621 
2622   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2623   //
2624   //     MemBarCPUOrder
2625   //        ||       \\
2626   //   MemBarAcquire* LoadX[mo_acquire]
2627   //        ||
2628   //   MemBarCPUOrder
2629 
2630   MemBarNode *membar;
2631 
2632   membar = parent_membar(ld);
2633 
2634   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2635     return false;
2636   }
2637 
2638   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2639 
2640   membar = child_membar(membar);
2641 
2642   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2643     return false;
2644   }
2645 
2646   membar = child_membar(membar);
2647 
2648   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2649     return false;
2650   }
2651 
2652   return true;
2653 }
2654 
2655 bool unnecessary_release(const Node *n)
2656 {
2657   assert((n->is_MemBar() &&
2658           n->Opcode() == Op_MemBarRelease),
2659          "expecting a release membar");
2660 
2661   if (UseBarriersForVolatile) {
2662     // we need to plant a dmb
2663     return false;
2664   }
2665 
2666   // if there is a dependent CPUOrder barrier then use that as the
2667   // leading
2668 
2669   MemBarNode *barrier = n->as_MemBar();
2670   // check for an intervening cpuorder membar
2671   MemBarNode *b = child_membar(barrier);
2672   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2673     // ok, so start the check from the dependent cpuorder barrier
2674     barrier = b;
2675   }
2676 
2677   // must start with a normal feed
2678   MemBarNode *child_barrier = leading_to_normal(barrier);
2679 
2680   if (!child_barrier) {
2681     return false;
2682   }
2683 
2684   if (!is_card_mark_membar(child_barrier)) {
2685     // this is the trailing membar and we are done
2686     return true;
2687   }
2688 
2689   // must be sure this card mark feeds a trailing membar
2690   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2691   return (trailing != NULL);
2692 }
2693 
2694 bool unnecessary_volatile(const Node *n)
2695 {
2696   // assert n->is_MemBar();
2697   if (UseBarriersForVolatile) {
2698     // we need to plant a dmb
2699     return false;
2700   }
2701 
2702   MemBarNode *mbvol = n->as_MemBar();
2703 
2704   // first we check if this is part of a card mark. if so then we have
2705   // to generate a StoreLoad barrier
2706 
2707   if (is_card_mark_membar(mbvol)) {
2708       return false;
2709   }
2710 
2711   // ok, if it's not a card mark then we still need to check if it is
2712   // a trailing membar of a volatile put hgraph.
2713 
2714   return (trailing_to_leading(mbvol) != NULL);
2715 }
2716 
2717 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2718 
2719 bool needs_releasing_store(const Node *n)
2720 {
2721   // assert n->is_Store();
2722   if (UseBarriersForVolatile) {
2723     // we use a normal store and dmb combination
2724     return false;
2725   }
2726 
2727   StoreNode *st = n->as_Store();
2728 
2729   // the store must be marked as releasing
2730   if (!st->is_release()) {
2731     return false;
2732   }
2733 
2734   // the store must be fed by a membar
2735 
2736   Node *x = st->lookup(StoreNode::Memory);
2737 
2738   if (! x || !x->is_Proj()) {
2739     return false;
2740   }
2741 
2742   ProjNode *proj = x->as_Proj();
2743 
2744   x = proj->lookup(0);
2745 
2746   if (!x || !x->is_MemBar()) {
2747     return false;
2748   }
2749 
2750   MemBarNode *barrier = x->as_MemBar();
2751 
2752   // if the barrier is a release membar or a cpuorder mmebar fed by a
2753   // release membar then we need to check whether that forms part of a
2754   // volatile put graph.
2755 
2756   // reject invalid candidates
2757   if (!leading_membar(barrier)) {
2758     return false;
2759   }
2760 
2761   // does this lead a normal subgraph?
2762   MemBarNode *mbvol = leading_to_normal(barrier);
2763 
2764   if (!mbvol) {
2765     return false;
2766   }
2767 
2768   // all done unless this is a card mark
2769   if (!is_card_mark_membar(mbvol)) {
2770     return true;
2771   }
2772 
2773   // we found a card mark -- just make sure we have a trailing barrier
2774 
2775   return (card_mark_to_trailing(mbvol) != NULL);
2776 }
2777 
2778 // predicate controlling translation of CAS
2779 //
2780 // returns true if CAS needs to use an acquiring load otherwise false
2781 
2782 bool needs_acquiring_load_exclusive(const Node *n)
2783 {
2784   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2785   if (UseBarriersForVolatile) {
2786     return false;
2787   }
2788 
2789   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2790 #ifdef ASSERT
2791   LoadStoreNode *st = n->as_LoadStore();
2792 
2793   // the store must be fed by a membar
2794 
2795   Node *x = st->lookup(StoreNode::Memory);
2796 
2797   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2798 
2799   ProjNode *proj = x->as_Proj();
2800 
2801   x = proj->lookup(0);
2802 
2803   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2804 
2805   MemBarNode *barrier = x->as_MemBar();
2806 
2807   // the barrier must be a cpuorder mmebar fed by a release membar
2808 
2809   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2810          "CAS not fed by cpuorder membar!");
2811 
2812   MemBarNode *b = parent_membar(barrier);
2813   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2814           "CAS not fed by cpuorder+release membar pair!");
2815 
2816   // does this lead a normal subgraph?
2817   MemBarNode *mbar = leading_to_normal(barrier);
2818 
2819   assert(mbar != NULL, "CAS not embedded in normal graph!");
2820 
2821   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2822 #endif // ASSERT
2823   // so we can just return true here
2824   return true;
2825 }
2826 
2827 // predicate controlling translation of StoreCM
2828 //
2829 // returns true if a StoreStore must precede the card write otherwise
2830 // false
2831 
2832 bool unnecessary_storestore(const Node *storecm)
2833 {
2834   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2835 
2836   // we only ever need to generate a dmb ishst between an object put
2837   // and the associated card mark when we are using CMS without
2838   // conditional card marking
2839 
2840   if (!UseConcMarkSweepGC || UseCondCardMark) {
2841     return true;
2842   }
2843 
2844   // if we are implementing volatile puts using barriers then the
2845   // object put as an str so we must insert the dmb ishst
2846 
2847   if (UseBarriersForVolatile) {
2848     return false;
2849   }
2850 
2851   // we can omit the dmb ishst if this StoreCM is part of a volatile
2852   // put because in thta case the put will be implemented by stlr
2853   //
2854   // we need to check for a normal subgraph feeding this StoreCM.
2855   // that means the StoreCM must be fed Memory from a leading membar,
2856   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2857   // leading membar must be part of a normal subgraph
2858 
2859   Node *x = storecm->in(StoreNode::Memory);
2860 
2861   if (!x->is_Proj()) {
2862     return false;
2863   }
2864 
2865   x = x->in(0);
2866 
2867   if (!x->is_MemBar()) {
2868     return false;
2869   }
2870 
2871   MemBarNode *leading = x->as_MemBar();
2872 
2873   // reject invalid candidates
2874   if (!leading_membar(leading)) {
2875     return false;
2876   }
2877 
2878   // we can omit the StoreStore if it is the head of a normal subgraph
2879   return (leading_to_normal(leading) != NULL);
2880 }
2881 
2882 
2883 #define __ _masm.
2884 
2885 // advance declarations for helper functions to convert register
2886 // indices to register objects
2887 
2888 // the ad file has to provide implementations of certain methods
2889 // expected by the generic code
2890 //
2891 // REQUIRED FUNCTIONALITY
2892 
2893 //=============================================================================
2894 
2895 // !!!!! Special hack to get all types of calls to specify the byte offset
2896 //       from the start of the call to the point where the return address
2897 //       will point.
2898 
2899 int MachCallStaticJavaNode::ret_addr_offset()
2900 {
2901   // call should be a simple bl
2902   int off = 4;
2903   return off;
2904 }
2905 
2906 int MachCallDynamicJavaNode::ret_addr_offset()
2907 {
2908   return 16; // movz, movk, movk, bl
2909 }
2910 
2911 int MachCallRuntimeNode::ret_addr_offset() {
2912   // for generated stubs the call will be
2913   //   far_call(addr)
2914   // for real runtime callouts it will be six instructions
2915   // see aarch64_enc_java_to_runtime
2916   //   adr(rscratch2, retaddr)
2917   //   lea(rscratch1, RuntimeAddress(addr)
2918   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2919   //   blrt rscratch1
2920   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2921   if (cb) {
2922     return MacroAssembler::far_branch_size();
2923   } else {
2924     return 6 * NativeInstruction::instruction_size;
2925   }
2926 }
2927 
2928 // Indicate if the safepoint node needs the polling page as an input
2929 
2930 // the shared code plants the oop data at the start of the generated
2931 // code for the safepoint node and that needs ot be at the load
2932 // instruction itself. so we cannot plant a mov of the safepoint poll
2933 // address followed by a load. setting this to true means the mov is
2934 // scheduled as a prior instruction. that's better for scheduling
2935 // anyway.
2936 
2937 bool SafePointNode::needs_polling_address_input()
2938 {
2939   return true;
2940 }
2941 
2942 //=============================================================================
2943 
2944 #ifndef PRODUCT
2945 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2946   st->print("BREAKPOINT");
2947 }
2948 #endif
2949 
2950 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2951   MacroAssembler _masm(&cbuf);
2952   __ brk(0);
2953 }
2954 
2955 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2956   return MachNode::size(ra_);
2957 }
2958 
2959 //=============================================================================
2960 
2961 #ifndef PRODUCT
2962   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2963     st->print("nop \t# %d bytes pad for loops and calls", _count);
2964   }
2965 #endif
2966 
2967   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2968     MacroAssembler _masm(&cbuf);
2969     for (int i = 0; i < _count; i++) {
2970       __ nop();
2971     }
2972   }
2973 
2974   uint MachNopNode::size(PhaseRegAlloc*) const {
2975     return _count * NativeInstruction::instruction_size;
2976   }
2977 
2978 //=============================================================================
2979 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2980 
2981 int Compile::ConstantTable::calculate_table_base_offset() const {
2982   return 0;  // absolute addressing, no offset
2983 }
2984 
2985 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2986 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2987   ShouldNotReachHere();
2988 }
2989 
2990 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2991   // Empty encoding
2992 }
2993 
2994 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2995   return 0;
2996 }
2997 
2998 #ifndef PRODUCT
2999 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3000   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3001 }
3002 #endif
3003 
3004 #ifndef PRODUCT
3005 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3006   Compile* C = ra_->C;
3007 
3008   int framesize = C->frame_slots() << LogBytesPerInt;
3009 
3010   if (C->need_stack_bang(framesize))
3011     st->print("# stack bang size=%d\n\t", framesize);
3012 
3013   if (framesize < ((1 << 9) + 2 * wordSize)) {
3014     st->print("sub  sp, sp, #%d\n\t", framesize);
3015     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3016     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3017   } else {
3018     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3019     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3020     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3021     st->print("sub  sp, sp, rscratch1");
3022   }
3023 }
3024 #endif
3025 
3026 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3027   Compile* C = ra_->C;
3028   MacroAssembler _masm(&cbuf);
3029 
3030   // n.b. frame size includes space for return pc and rfp
3031   const long framesize = C->frame_size_in_bytes();
3032   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3033 
3034   // insert a nop at the start of the prolog so we can patch in a
3035   // branch if we need to invalidate the method later
3036   __ nop();
3037 
3038   int bangsize = C->bang_size_in_bytes();
3039   if (C->need_stack_bang(bangsize) && UseStackBanging)
3040     __ generate_stack_overflow_check(bangsize);
3041 
3042   __ build_frame(framesize);
3043 
3044   if (NotifySimulator) {
3045     __ notify(Assembler::method_entry);
3046   }
3047 
3048   if (VerifyStackAtCalls) {
3049     Unimplemented();
3050   }
3051 
3052   C->set_frame_complete(cbuf.insts_size());
3053 
3054   if (C->has_mach_constant_base_node()) {
3055     // NOTE: We set the table base offset here because users might be
3056     // emitted before MachConstantBaseNode.
3057     Compile::ConstantTable& constant_table = C->constant_table();
3058     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3059   }
3060 }
3061 
3062 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3063 {
3064   return MachNode::size(ra_); // too many variables; just compute it
3065                               // the hard way
3066 }
3067 
3068 int MachPrologNode::reloc() const
3069 {
3070   return 0;
3071 }
3072 
3073 //=============================================================================
3074 
3075 #ifndef PRODUCT
3076 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3077   Compile* C = ra_->C;
3078   int framesize = C->frame_slots() << LogBytesPerInt;
3079 
3080   st->print("# pop frame %d\n\t",framesize);
3081 
3082   if (framesize == 0) {
3083     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3084   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3085     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3086     st->print("add  sp, sp, #%d\n\t", framesize);
3087   } else {
3088     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3089     st->print("add  sp, sp, rscratch1\n\t");
3090     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3091   }
3092 
3093   if (do_polling() && C->is_method_compilation()) {
3094     st->print("# touch polling page\n\t");
3095     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3096     st->print("ldr zr, [rscratch1]");
3097   }
3098 }
3099 #endif
3100 
3101 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3102   Compile* C = ra_->C;
3103   MacroAssembler _masm(&cbuf);
3104   int framesize = C->frame_slots() << LogBytesPerInt;
3105 
3106   __ remove_frame(framesize);
3107 
3108   if (NotifySimulator) {
3109     __ notify(Assembler::method_reentry);
3110   }
3111 
3112   if (do_polling() && C->is_method_compilation()) {
3113     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3114   }
3115 }
3116 
3117 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3118   // Variable size. Determine dynamically.
3119   return MachNode::size(ra_);
3120 }
3121 
3122 int MachEpilogNode::reloc() const {
3123   // Return number of relocatable values contained in this instruction.
3124   return 1; // 1 for polling page.
3125 }
3126 
3127 const Pipeline * MachEpilogNode::pipeline() const {
3128   return MachNode::pipeline_class();
3129 }
3130 
3131 // This method seems to be obsolete. It is declared in machnode.hpp
3132 // and defined in all *.ad files, but it is never called. Should we
3133 // get rid of it?
3134 int MachEpilogNode::safepoint_offset() const {
3135   assert(do_polling(), "no return for this epilog node");
3136   return 4;
3137 }
3138 
3139 //=============================================================================
3140 
3141 // Figure out which register class each belongs in: rc_int, rc_float or
3142 // rc_stack.
3143 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3144 
3145 static enum RC rc_class(OptoReg::Name reg) {
3146 
3147   if (reg == OptoReg::Bad) {
3148     return rc_bad;
3149   }
3150 
3151   // we have 30 int registers * 2 halves
3152   // (rscratch1 and rscratch2 are omitted)
3153 
3154   if (reg < 60) {
3155     return rc_int;
3156   }
3157 
3158   // we have 32 float register * 2 halves
3159   if (reg < 60 + 128) {
3160     return rc_float;
3161   }
3162 
3163   // Between float regs & stack is the flags regs.
3164   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3165 
3166   return rc_stack;
3167 }
3168 
3169 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3170   Compile* C = ra_->C;
3171 
3172   // Get registers to move.
3173   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3174   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3175   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3176   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3177 
3178   enum RC src_hi_rc = rc_class(src_hi);
3179   enum RC src_lo_rc = rc_class(src_lo);
3180   enum RC dst_hi_rc = rc_class(dst_hi);
3181   enum RC dst_lo_rc = rc_class(dst_lo);
3182 
3183   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3184 
3185   if (src_hi != OptoReg::Bad) {
3186     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3187            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3188            "expected aligned-adjacent pairs");
3189   }
3190 
3191   if (src_lo == dst_lo && src_hi == dst_hi) {
3192     return 0;            // Self copy, no move.
3193   }
3194 
3195   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3196               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3197   int src_offset = ra_->reg2offset(src_lo);
3198   int dst_offset = ra_->reg2offset(dst_lo);
3199 
3200   if (bottom_type()->isa_vect() != NULL) {
3201     uint ireg = ideal_reg();
3202     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3203     if (cbuf) {
3204       MacroAssembler _masm(cbuf);
3205       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3206       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3207         // stack->stack
3208         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
3209         if (ireg == Op_VecD) {
3210           __ unspill(rscratch1, true, src_offset);
3211           __ spill(rscratch1, true, dst_offset);
3212         } else {
3213           __ spill_copy128(src_offset, dst_offset);
3214         }
3215       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3216         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3217                ireg == Op_VecD ? __ T8B : __ T16B,
3218                as_FloatRegister(Matcher::_regEncode[src_lo]));
3219       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3220         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3221                        ireg == Op_VecD ? __ D : __ Q,
3222                        ra_->reg2offset(dst_lo));
3223       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3224         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3225                        ireg == Op_VecD ? __ D : __ Q,
3226                        ra_->reg2offset(src_lo));
3227       } else {
3228         ShouldNotReachHere();
3229       }
3230     }
3231   } else if (cbuf) {
3232     MacroAssembler _masm(cbuf);
3233     switch (src_lo_rc) {
3234     case rc_int:
3235       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3236         if (is64) {
3237             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3238                    as_Register(Matcher::_regEncode[src_lo]));
3239         } else {
3240             MacroAssembler _masm(cbuf);
3241             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3242                     as_Register(Matcher::_regEncode[src_lo]));
3243         }
3244       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3245         if (is64) {
3246             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3247                      as_Register(Matcher::_regEncode[src_lo]));
3248         } else {
3249             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3250                      as_Register(Matcher::_regEncode[src_lo]));
3251         }
3252       } else {                    // gpr --> stack spill
3253         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3254         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3255       }
3256       break;
3257     case rc_float:
3258       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3259         if (is64) {
3260             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3261                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3262         } else {
3263             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3264                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3265         }
3266       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3267           if (cbuf) {
3268             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3269                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3270         } else {
3271             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3272                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3273         }
3274       } else {                    // fpr --> stack spill
3275         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3276         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3277                  is64 ? __ D : __ S, dst_offset);
3278       }
3279       break;
3280     case rc_stack:
3281       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3282         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3283       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3284         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3285                    is64 ? __ D : __ S, src_offset);
3286       } else {                    // stack --> stack copy
3287         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3288         __ unspill(rscratch1, is64, src_offset);
3289         __ spill(rscratch1, is64, dst_offset);
3290       }
3291       break;
3292     default:
3293       assert(false, "bad rc_class for spill");
3294       ShouldNotReachHere();
3295     }
3296   }
3297 
3298   if (st) {
3299     st->print("spill ");
3300     if (src_lo_rc == rc_stack) {
3301       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3302     } else {
3303       st->print("%s -> ", Matcher::regName[src_lo]);
3304     }
3305     if (dst_lo_rc == rc_stack) {
3306       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3307     } else {
3308       st->print("%s", Matcher::regName[dst_lo]);
3309     }
3310     if (bottom_type()->isa_vect() != NULL) {
3311       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3312     } else {
3313       st->print("\t# spill size = %d", is64 ? 64:32);
3314     }
3315   }
3316 
3317   return 0;
3318 
3319 }
3320 
3321 #ifndef PRODUCT
3322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3323   if (!ra_)
3324     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3325   else
3326     implementation(NULL, ra_, false, st);
3327 }
3328 #endif
3329 
3330 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3331   implementation(&cbuf, ra_, false, NULL);
3332 }
3333 
3334 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3335   return MachNode::size(ra_);
3336 }
3337 
3338 //=============================================================================
3339 
3340 #ifndef PRODUCT
3341 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3343   int reg = ra_->get_reg_first(this);
3344   st->print("add %s, rsp, #%d]\t# box lock",
3345             Matcher::regName[reg], offset);
3346 }
3347 #endif
3348 
3349 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3350   MacroAssembler _masm(&cbuf);
3351 
3352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3353   int reg    = ra_->get_encode(this);
3354 
3355   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3356     __ add(as_Register(reg), sp, offset);
3357   } else {
3358     ShouldNotReachHere();
3359   }
3360 }
3361 
3362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3363   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3364   return 4;
3365 }
3366 
3367 //=============================================================================
3368 
3369 #ifndef PRODUCT
3370 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3371 {
3372   st->print_cr("# MachUEPNode");
3373   if (UseCompressedClassPointers) {
3374     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3375     if (Universe::narrow_klass_shift() != 0) {
3376       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3377     }
3378   } else {
3379    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3380   }
3381   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3382   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3383 }
3384 #endif
3385 
3386 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3387 {
3388   // This is the unverified entry point.
3389   MacroAssembler _masm(&cbuf);
3390 
3391   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3392   Label skip;
3393   // TODO
3394   // can we avoid this skip and still use a reloc?
3395   __ br(Assembler::EQ, skip);
3396   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3397   __ bind(skip);
3398 }
3399 
3400 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3401 {
3402   return MachNode::size(ra_);
3403 }
3404 
3405 // REQUIRED EMIT CODE
3406 
3407 //=============================================================================
3408 
3409 // Emit exception handler code.
3410 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3411 {
3412   // mov rscratch1 #exception_blob_entry_point
3413   // br rscratch1
3414   // Note that the code buffer's insts_mark is always relative to insts.
3415   // That's why we must use the macroassembler to generate a handler.
3416   MacroAssembler _masm(&cbuf);
3417   address base = __ start_a_stub(size_exception_handler());
3418   if (base == NULL) {
3419     ciEnv::current()->record_failure("CodeCache is full");
3420     return 0;  // CodeBuffer::expand failed
3421   }
3422   int offset = __ offset();
3423   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3424   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3425   __ end_a_stub();
3426   return offset;
3427 }
3428 
3429 // Emit deopt handler code.
3430 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3431 {
3432   // Note that the code buffer's insts_mark is always relative to insts.
3433   // That's why we must use the macroassembler to generate a handler.
3434   MacroAssembler _masm(&cbuf);
3435   address base = __ start_a_stub(size_deopt_handler());
3436   if (base == NULL) {
3437     ciEnv::current()->record_failure("CodeCache is full");
3438     return 0;  // CodeBuffer::expand failed
3439   }
3440   int offset = __ offset();
3441 
3442   __ adr(lr, __ pc());
3443   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3444 
3445   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3446   __ end_a_stub();
3447   return offset;
3448 }
3449 
3450 // REQUIRED MATCHER CODE
3451 
3452 //=============================================================================
3453 
3454 const bool Matcher::match_rule_supported(int opcode) {
3455 
3456   // TODO
3457   // identify extra cases that we might want to provide match rules for
3458   // e.g. Op_StrEquals and other intrinsics
3459   if (!has_match_rule(opcode)) {
3460     return false;
3461   }
3462 
3463   return true;  // Per default match rules are supported.
3464 }
3465 
3466 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3467 
3468   // TODO
3469   // identify extra cases that we might want to provide match rules for
3470   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3471   bool ret_value = match_rule_supported(opcode);
3472   // Add rules here.
3473 
3474   return ret_value;  // Per default match rules are supported.
3475 }
3476 
3477 const int Matcher::float_pressure(int default_pressure_threshold) {
3478   return default_pressure_threshold;
3479 }
3480 
3481 int Matcher::regnum_to_fpu_offset(int regnum)
3482 {
3483   Unimplemented();
3484   return 0;
3485 }
3486 
3487 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
3488 {
3489   Unimplemented();
3490   return false;
3491 }
3492 
3493 const bool Matcher::isSimpleConstant64(jlong value) {
3494   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3495   // Probably always true, even if a temp register is required.
3496   return true;
3497 }
3498 
3499 // true just means we have fast l2f conversion
3500 const bool Matcher::convL2FSupported(void) {
3501   return true;
3502 }
3503 
3504 // Vector width in bytes.
3505 const int Matcher::vector_width_in_bytes(BasicType bt) {
3506   int size = MIN2(16,(int)MaxVectorSize);
3507   // Minimum 2 values in vector
3508   if (size < 2*type2aelembytes(bt)) size = 0;
3509   // But never < 4
3510   if (size < 4) size = 0;
3511   return size;
3512 }
3513 
3514 // Limits on vector size (number of elements) loaded into vector.
3515 const int Matcher::max_vector_size(const BasicType bt) {
3516   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3517 }
3518 const int Matcher::min_vector_size(const BasicType bt) {
3519 //  For the moment limit the vector size to 8 bytes
3520     int size = 8 / type2aelembytes(bt);
3521     if (size < 2) size = 2;
3522     return size;
3523 }
3524 
3525 // Vector ideal reg.
3526 const int Matcher::vector_ideal_reg(int len) {
3527   switch(len) {
3528     case  8: return Op_VecD;
3529     case 16: return Op_VecX;
3530   }
3531   ShouldNotReachHere();
3532   return 0;
3533 }
3534 
3535 const int Matcher::vector_shift_count_ideal_reg(int size) {
3536   return Op_VecX;
3537 }
3538 
3539 // AES support not yet implemented
3540 const bool Matcher::pass_original_key_for_aes() {
3541   return false;
3542 }
3543 
3544 // x86 supports misaligned vectors store/load.
3545 const bool Matcher::misaligned_vectors_ok() {
3546   return !AlignVector; // can be changed by flag
3547 }
3548 
3549 // false => size gets scaled to BytesPerLong, ok.
3550 const bool Matcher::init_array_count_is_in_bytes = false;
3551 
3552 // Threshold size for cleararray.
3553 const int Matcher::init_array_short_size = 18 * BytesPerLong;
3554 
3555 // Use conditional move (CMOVL)
3556 const int Matcher::long_cmove_cost() {
3557   // long cmoves are no more expensive than int cmoves
3558   return 0;
3559 }
3560 
3561 const int Matcher::float_cmove_cost() {
3562   // float cmoves are no more expensive than int cmoves
3563   return 0;
3564 }
3565 
3566 // Does the CPU require late expand (see block.cpp for description of late expand)?
3567 const bool Matcher::require_postalloc_expand = false;
3568 
3569 // Should the Matcher clone shifts on addressing modes, expecting them
3570 // to be subsumed into complex addressing expressions or compute them
3571 // into registers?  True for Intel but false for most RISCs
3572 const bool Matcher::clone_shift_expressions = false;
3573 
3574 // Do we need to mask the count passed to shift instructions or does
3575 // the cpu only look at the lower 5/6 bits anyway?
3576 const bool Matcher::need_masked_shift_count = false;
3577 
3578 // This affects two different things:
3579 //  - how Decode nodes are matched
3580 //  - how ImplicitNullCheck opportunities are recognized
3581 // If true, the matcher will try to remove all Decodes and match them
3582 // (as operands) into nodes. NullChecks are not prepared to deal with
3583 // Decodes by final_graph_reshaping().
3584 // If false, final_graph_reshaping() forces the decode behind the Cmp
3585 // for a NullCheck. The matcher matches the Decode node into a register.
3586 // Implicit_null_check optimization moves the Decode along with the
3587 // memory operation back up before the NullCheck.
3588 bool Matcher::narrow_oop_use_complex_address() {
3589   return Universe::narrow_oop_shift() == 0;
3590 }
3591 
3592 bool Matcher::narrow_klass_use_complex_address() {
3593 // TODO
3594 // decide whether we need to set this to true
3595   return false;
3596 }
3597 
3598 // Is it better to copy float constants, or load them directly from
3599 // memory?  Intel can load a float constant from a direct address,
3600 // requiring no extra registers.  Most RISCs will have to materialize
3601 // an address into a register first, so they would do better to copy
3602 // the constant from stack.
3603 const bool Matcher::rematerialize_float_constants = false;
3604 
3605 // If CPU can load and store mis-aligned doubles directly then no
3606 // fixup is needed.  Else we split the double into 2 integer pieces
3607 // and move it piece-by-piece.  Only happens when passing doubles into
3608 // C code as the Java calling convention forces doubles to be aligned.
3609 const bool Matcher::misaligned_doubles_ok = true;
3610 
3611 // No-op on amd64
3612 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3613   Unimplemented();
3614 }
3615 
3616 // Advertise here if the CPU requires explicit rounding operations to
3617 // implement the UseStrictFP mode.
3618 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3619 
3620 // Are floats converted to double when stored to stack during
3621 // deoptimization?
3622 bool Matcher::float_in_double() { return true; }
3623 
3624 // Do ints take an entire long register or just half?
3625 // The relevant question is how the int is callee-saved:
3626 // the whole long is written but de-opt'ing will have to extract
3627 // the relevant 32 bits.
3628 const bool Matcher::int_in_long = true;
3629 
3630 // Return whether or not this register is ever used as an argument.
3631 // This function is used on startup to build the trampoline stubs in
3632 // generateOptoStub.  Registers not mentioned will be killed by the VM
3633 // call in the trampoline, and arguments in those registers not be
3634 // available to the callee.
3635 bool Matcher::can_be_java_arg(int reg)
3636 {
3637   return
3638     reg ==  R0_num || reg == R0_H_num ||
3639     reg ==  R1_num || reg == R1_H_num ||
3640     reg ==  R2_num || reg == R2_H_num ||
3641     reg ==  R3_num || reg == R3_H_num ||
3642     reg ==  R4_num || reg == R4_H_num ||
3643     reg ==  R5_num || reg == R5_H_num ||
3644     reg ==  R6_num || reg == R6_H_num ||
3645     reg ==  R7_num || reg == R7_H_num ||
3646     reg ==  V0_num || reg == V0_H_num ||
3647     reg ==  V1_num || reg == V1_H_num ||
3648     reg ==  V2_num || reg == V2_H_num ||
3649     reg ==  V3_num || reg == V3_H_num ||
3650     reg ==  V4_num || reg == V4_H_num ||
3651     reg ==  V5_num || reg == V5_H_num ||
3652     reg ==  V6_num || reg == V6_H_num ||
3653     reg ==  V7_num || reg == V7_H_num;
3654 }
3655 
3656 bool Matcher::is_spillable_arg(int reg)
3657 {
3658   return can_be_java_arg(reg);
3659 }
3660 
3661 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3662   return false;
3663 }
3664 
3665 RegMask Matcher::divI_proj_mask() {
3666   ShouldNotReachHere();
3667   return RegMask();
3668 }
3669 
3670 // Register for MODI projection of divmodI.
3671 RegMask Matcher::modI_proj_mask() {
3672   ShouldNotReachHere();
3673   return RegMask();
3674 }
3675 
3676 // Register for DIVL projection of divmodL.
3677 RegMask Matcher::divL_proj_mask() {
3678   ShouldNotReachHere();
3679   return RegMask();
3680 }
3681 
3682 // Register for MODL projection of divmodL.
3683 RegMask Matcher::modL_proj_mask() {
3684   ShouldNotReachHere();
3685   return RegMask();
3686 }
3687 
3688 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3689   return FP_REG_mask();
3690 }
3691 
3692 // helper for encoding java_to_runtime calls on sim
3693 //
3694 // this is needed to compute the extra arguments required when
3695 // planting a call to the simulator blrt instruction. the TypeFunc
3696 // can be queried to identify the counts for integral, and floating
3697 // arguments and the return type
3698 
3699 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3700 {
3701   int gps = 0;
3702   int fps = 0;
3703   const TypeTuple *domain = tf->domain();
3704   int max = domain->cnt();
3705   for (int i = TypeFunc::Parms; i < max; i++) {
3706     const Type *t = domain->field_at(i);
3707     switch(t->basic_type()) {
3708     case T_FLOAT:
3709     case T_DOUBLE:
3710       fps++;
3711     default:
3712       gps++;
3713     }
3714   }
3715   gpcnt = gps;
3716   fpcnt = fps;
3717   BasicType rt = tf->return_type();
3718   switch (rt) {
3719   case T_VOID:
3720     rtype = MacroAssembler::ret_type_void;
3721     break;
3722   default:
3723     rtype = MacroAssembler::ret_type_integral;
3724     break;
3725   case T_FLOAT:
3726     rtype = MacroAssembler::ret_type_float;
3727     break;
3728   case T_DOUBLE:
3729     rtype = MacroAssembler::ret_type_double;
3730     break;
3731   }
3732 }
3733 
3734 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3735   MacroAssembler _masm(&cbuf);                                          \
3736   {                                                                     \
3737     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3738     guarantee(DISP == 0, "mode not permitted for volatile");            \
3739     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3740     __ INSN(REG, as_Register(BASE));                                    \
3741   }
3742 
3743 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3744 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3745 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3746                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3747 
3748   // Used for all non-volatile memory accesses.  The use of
3749   // $mem->opcode() to discover whether this pattern uses sign-extended
3750   // offsets is something of a kludge.
3751   static void loadStore(MacroAssembler masm, mem_insn insn,
3752                          Register reg, int opcode,
3753                          Register base, int index, int size, int disp)
3754   {
3755     Address::extend scale;
3756 
3757     // Hooboy, this is fugly.  We need a way to communicate to the
3758     // encoder that the index needs to be sign extended, so we have to
3759     // enumerate all the cases.
3760     switch (opcode) {
3761     case INDINDEXSCALEDOFFSETI2L:
3762     case INDINDEXSCALEDI2L:
3763     case INDINDEXSCALEDOFFSETI2LN:
3764     case INDINDEXSCALEDI2LN:
3765     case INDINDEXOFFSETI2L:
3766     case INDINDEXOFFSETI2LN:
3767       scale = Address::sxtw(size);
3768       break;
3769     default:
3770       scale = Address::lsl(size);
3771     }
3772 
3773     if (index == -1) {
3774       (masm.*insn)(reg, Address(base, disp));
3775     } else {
3776       if (disp == 0) {
3777         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3778       } else {
3779         masm.lea(rscratch1, Address(base, disp));
3780         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3781       }
3782     }
3783   }
3784 
3785   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3786                          FloatRegister reg, int opcode,
3787                          Register base, int index, int size, int disp)
3788   {
3789     Address::extend scale;
3790 
3791     switch (opcode) {
3792     case INDINDEXSCALEDOFFSETI2L:
3793     case INDINDEXSCALEDI2L:
3794     case INDINDEXSCALEDOFFSETI2LN:
3795     case INDINDEXSCALEDI2LN:
3796       scale = Address::sxtw(size);
3797       break;
3798     default:
3799       scale = Address::lsl(size);
3800     }
3801 
3802      if (index == -1) {
3803       (masm.*insn)(reg, Address(base, disp));
3804     } else {
3805       if (disp == 0) {
3806         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3807       } else {
3808         masm.lea(rscratch1, Address(base, disp));
3809         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3810       }
3811     }
3812   }
3813 
3814   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3815                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3816                          int opcode, Register base, int index, int size, int disp)
3817   {
3818     if (index == -1) {
3819       (masm.*insn)(reg, T, Address(base, disp));
3820     } else {
3821       assert(disp == 0, "unsupported address mode");
3822       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3823     }
3824   }
3825 
3826 %}
3827 
3828 
3829 
3830 //----------ENCODING BLOCK-----------------------------------------------------
3831 // This block specifies the encoding classes used by the compiler to
3832 // output byte streams.  Encoding classes are parameterized macros
3833 // used by Machine Instruction Nodes in order to generate the bit
3834 // encoding of the instruction.  Operands specify their base encoding
3835 // interface with the interface keyword.  There are currently
3836 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3837 // COND_INTER.  REG_INTER causes an operand to generate a function
3838 // which returns its register number when queried.  CONST_INTER causes
3839 // an operand to generate a function which returns the value of the
3840 // constant when queried.  MEMORY_INTER causes an operand to generate
3841 // four functions which return the Base Register, the Index Register,
3842 // the Scale Value, and the Offset Value of the operand when queried.
3843 // COND_INTER causes an operand to generate six functions which return
3844 // the encoding code (ie - encoding bits for the instruction)
3845 // associated with each basic boolean condition for a conditional
3846 // instruction.
3847 //
3848 // Instructions specify two basic values for encoding.  Again, a
3849 // function is available to check if the constant displacement is an
3850 // oop. They use the ins_encode keyword to specify their encoding
3851 // classes (which must be a sequence of enc_class names, and their
3852 // parameters, specified in the encoding block), and they use the
3853 // opcode keyword to specify, in order, their primary, secondary, and
3854 // tertiary opcode.  Only the opcode sections which a particular
3855 // instruction needs for encoding need to be specified.
3856 encode %{
3857   // Build emit functions for each basic byte or larger field in the
3858   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3859   // from C++ code in the enc_class source block.  Emit functions will
3860   // live in the main source block for now.  In future, we can
3861   // generalize this by adding a syntax that specifies the sizes of
3862   // fields in an order, so that the adlc can build the emit functions
3863   // automagically
3864 
3865   // catch all for unimplemented encodings
3866   enc_class enc_unimplemented %{
3867     MacroAssembler _masm(&cbuf);
3868     __ unimplemented("C2 catch all");
3869   %}
3870 
3871   // BEGIN Non-volatile memory access
3872 
3873   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3874     Register dst_reg = as_Register($dst$$reg);
3875     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3876                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3877   %}
3878 
3879   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3880     Register dst_reg = as_Register($dst$$reg);
3881     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3882                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3883   %}
3884 
3885   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3886     Register dst_reg = as_Register($dst$$reg);
3887     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3888                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3889   %}
3890 
3891   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3892     Register dst_reg = as_Register($dst$$reg);
3893     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3894                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3895   %}
3896 
3897   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3898     Register dst_reg = as_Register($dst$$reg);
3899     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3900                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3901   %}
3902 
3903   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3904     Register dst_reg = as_Register($dst$$reg);
3905     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3906                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3907   %}
3908 
3909   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3910     Register dst_reg = as_Register($dst$$reg);
3911     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3912                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3913   %}
3914 
3915   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3916     Register dst_reg = as_Register($dst$$reg);
3917     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3918                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3919   %}
3920 
3921   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3922     Register dst_reg = as_Register($dst$$reg);
3923     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3924                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3925   %}
3926 
3927   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3928     Register dst_reg = as_Register($dst$$reg);
3929     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3930                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3931   %}
3932 
3933   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3934     Register dst_reg = as_Register($dst$$reg);
3935     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3936                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3937   %}
3938 
3939   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3940     Register dst_reg = as_Register($dst$$reg);
3941     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3942                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3943   %}
3944 
3945   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3946     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3947     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3948                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3949   %}
3950 
3951   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3952     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3953     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3954                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3955   %}
3956 
3957   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3958     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3959     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3960        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3961   %}
3962 
3963   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3964     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3965     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3966        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3967   %}
3968 
3969   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3970     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3971     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3972        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3973   %}
3974 
3975   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3976     Register src_reg = as_Register($src$$reg);
3977     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3978                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3979   %}
3980 
3981   enc_class aarch64_enc_strb0(memory mem) %{
3982     MacroAssembler _masm(&cbuf);
3983     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3984                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3985   %}
3986 
3987   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3988     MacroAssembler _masm(&cbuf);
3989     __ membar(Assembler::StoreStore);
3990     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3991                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3992   %}
3993 
3994   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3995     Register src_reg = as_Register($src$$reg);
3996     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3997                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3998   %}
3999 
4000   enc_class aarch64_enc_strh0(memory mem) %{
4001     MacroAssembler _masm(&cbuf);
4002     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4003                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4004   %}
4005 
4006   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4007     Register src_reg = as_Register($src$$reg);
4008     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4009                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4010   %}
4011 
4012   enc_class aarch64_enc_strw0(memory mem) %{
4013     MacroAssembler _masm(&cbuf);
4014     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4015                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4016   %}
4017 
4018   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4019     Register src_reg = as_Register($src$$reg);
4020     // we sometimes get asked to store the stack pointer into the
4021     // current thread -- we cannot do that directly on AArch64
4022     if (src_reg == r31_sp) {
4023       MacroAssembler _masm(&cbuf);
4024       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4025       __ mov(rscratch2, sp);
4026       src_reg = rscratch2;
4027     }
4028     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4029                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4030   %}
4031 
4032   enc_class aarch64_enc_str0(memory mem) %{
4033     MacroAssembler _masm(&cbuf);
4034     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4035                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4036   %}
4037 
4038   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4039     FloatRegister src_reg = as_FloatRegister($src$$reg);
4040     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4041                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042   %}
4043 
4044   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4045     FloatRegister src_reg = as_FloatRegister($src$$reg);
4046     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4047                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048   %}
4049 
4050   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4051     FloatRegister src_reg = as_FloatRegister($src$$reg);
4052     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4053        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054   %}
4055 
4056   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4057     FloatRegister src_reg = as_FloatRegister($src$$reg);
4058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4059        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060   %}
4061 
4062   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4063     FloatRegister src_reg = as_FloatRegister($src$$reg);
4064     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4065        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066   %}
4067 
4068   // END Non-volatile memory access
4069 
4070   // volatile loads and stores
4071 
4072   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4073     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4074                  rscratch1, stlrb);
4075   %}
4076 
4077   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4078     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4079                  rscratch1, stlrh);
4080   %}
4081 
4082   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4083     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4084                  rscratch1, stlrw);
4085   %}
4086 
4087 
4088   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4089     Register dst_reg = as_Register($dst$$reg);
4090     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4091              rscratch1, ldarb);
4092     __ sxtbw(dst_reg, dst_reg);
4093   %}
4094 
4095   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4096     Register dst_reg = as_Register($dst$$reg);
4097     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4098              rscratch1, ldarb);
4099     __ sxtb(dst_reg, dst_reg);
4100   %}
4101 
4102   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4103     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4104              rscratch1, ldarb);
4105   %}
4106 
4107   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4108     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4109              rscratch1, ldarb);
4110   %}
4111 
4112   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4113     Register dst_reg = as_Register($dst$$reg);
4114     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4115              rscratch1, ldarh);
4116     __ sxthw(dst_reg, dst_reg);
4117   %}
4118 
4119   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4120     Register dst_reg = as_Register($dst$$reg);
4121     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4122              rscratch1, ldarh);
4123     __ sxth(dst_reg, dst_reg);
4124   %}
4125 
4126   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4127     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4128              rscratch1, ldarh);
4129   %}
4130 
4131   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4132     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4133              rscratch1, ldarh);
4134   %}
4135 
4136   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4137     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4138              rscratch1, ldarw);
4139   %}
4140 
4141   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4142     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4143              rscratch1, ldarw);
4144   %}
4145 
4146   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4147     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4148              rscratch1, ldar);
4149   %}
4150 
4151   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4152     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4153              rscratch1, ldarw);
4154     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4155   %}
4156 
4157   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4158     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4159              rscratch1, ldar);
4160     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4161   %}
4162 
4163   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4164     Register src_reg = as_Register($src$$reg);
4165     // we sometimes get asked to store the stack pointer into the
4166     // current thread -- we cannot do that directly on AArch64
4167     if (src_reg == r31_sp) {
4168         MacroAssembler _masm(&cbuf);
4169       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4170       __ mov(rscratch2, sp);
4171       src_reg = rscratch2;
4172     }
4173     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4174                  rscratch1, stlr);
4175   %}
4176 
4177   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4178     {
4179       MacroAssembler _masm(&cbuf);
4180       FloatRegister src_reg = as_FloatRegister($src$$reg);
4181       __ fmovs(rscratch2, src_reg);
4182     }
4183     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4184                  rscratch1, stlrw);
4185   %}
4186 
4187   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4188     {
4189       MacroAssembler _masm(&cbuf);
4190       FloatRegister src_reg = as_FloatRegister($src$$reg);
4191       __ fmovd(rscratch2, src_reg);
4192     }
4193     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4194                  rscratch1, stlr);
4195   %}
4196 
4197   // synchronized read/update encodings
4198 
4199   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4200     MacroAssembler _masm(&cbuf);
4201     Register dst_reg = as_Register($dst$$reg);
4202     Register base = as_Register($mem$$base);
4203     int index = $mem$$index;
4204     int scale = $mem$$scale;
4205     int disp = $mem$$disp;
4206     if (index == -1) {
4207        if (disp != 0) {
4208         __ lea(rscratch1, Address(base, disp));
4209         __ ldaxr(dst_reg, rscratch1);
4210       } else {
4211         // TODO
4212         // should we ever get anything other than this case?
4213         __ ldaxr(dst_reg, base);
4214       }
4215     } else {
4216       Register index_reg = as_Register(index);
4217       if (disp == 0) {
4218         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4219         __ ldaxr(dst_reg, rscratch1);
4220       } else {
4221         __ lea(rscratch1, Address(base, disp));
4222         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4223         __ ldaxr(dst_reg, rscratch1);
4224       }
4225     }
4226   %}
4227 
4228   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4229     MacroAssembler _masm(&cbuf);
4230     Register src_reg = as_Register($src$$reg);
4231     Register base = as_Register($mem$$base);
4232     int index = $mem$$index;
4233     int scale = $mem$$scale;
4234     int disp = $mem$$disp;
4235     if (index == -1) {
4236        if (disp != 0) {
4237         __ lea(rscratch2, Address(base, disp));
4238         __ stlxr(rscratch1, src_reg, rscratch2);
4239       } else {
4240         // TODO
4241         // should we ever get anything other than this case?
4242         __ stlxr(rscratch1, src_reg, base);
4243       }
4244     } else {
4245       Register index_reg = as_Register(index);
4246       if (disp == 0) {
4247         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4248         __ stlxr(rscratch1, src_reg, rscratch2);
4249       } else {
4250         __ lea(rscratch2, Address(base, disp));
4251         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4252         __ stlxr(rscratch1, src_reg, rscratch2);
4253       }
4254     }
4255     __ cmpw(rscratch1, zr);
4256   %}
4257 
4258   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4259     MacroAssembler _masm(&cbuf);
4260     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4261     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4262                &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
4263   %}
4264 
4265   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4266     MacroAssembler _masm(&cbuf);
4267     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4268     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4269                &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
4270   %}
4271 
4272 
4273   // The only difference between aarch64_enc_cmpxchg and
4274   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4275   // CompareAndSwap sequence to serve as a barrier on acquiring a
4276   // lock.
4277   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4278     MacroAssembler _masm(&cbuf);
4279     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4280     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4281                &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
4282   %}
4283 
4284   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4285     MacroAssembler _masm(&cbuf);
4286     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4287     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4288                &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
4289   %}
4290 
4291 
4292   // auxiliary used for CompareAndSwapX to set result register
4293   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4294     MacroAssembler _masm(&cbuf);
4295     Register res_reg = as_Register($res$$reg);
4296     __ cset(res_reg, Assembler::EQ);
4297   %}
4298 
4299   // prefetch encodings
4300 
4301   enc_class aarch64_enc_prefetchw(memory mem) %{
4302     MacroAssembler _masm(&cbuf);
4303     Register base = as_Register($mem$$base);
4304     int index = $mem$$index;
4305     int scale = $mem$$scale;
4306     int disp = $mem$$disp;
4307     if (index == -1) {
4308       __ prfm(Address(base, disp), PSTL1KEEP);
4309       __ nop();
4310     } else {
4311       Register index_reg = as_Register(index);
4312       if (disp == 0) {
4313         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4314       } else {
4315         __ lea(rscratch1, Address(base, disp));
4316         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4317       }
4318     }
4319   %}
4320 
4321   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
4322     MacroAssembler _masm(&cbuf);
4323     Register cnt_reg = as_Register($cnt$$reg);
4324     Register base_reg = as_Register($base$$reg);
4325     // base is word aligned
4326     // cnt is count of words
4327 
4328     Label loop;
4329     Label entry;
4330 
4331 //  Algorithm:
4332 //
4333 //    scratch1 = cnt & 7;
4334 //    cnt -= scratch1;
4335 //    p += scratch1;
4336 //    switch (scratch1) {
4337 //      do {
4338 //        cnt -= 8;
4339 //          p[-8] = 0;
4340 //        case 7:
4341 //          p[-7] = 0;
4342 //        case 6:
4343 //          p[-6] = 0;
4344 //          // ...
4345 //        case 1:
4346 //          p[-1] = 0;
4347 //        case 0:
4348 //          p += 8;
4349 //      } while (cnt);
4350 //    }
4351 
4352     const int unroll = 8; // Number of str(zr) instructions we'll unroll
4353 
4354     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
4355     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
4356     // base_reg always points to the end of the region we're about to zero
4357     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
4358     __ adr(rscratch2, entry);
4359     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
4360     __ br(rscratch2);
4361     __ bind(loop);
4362     __ sub(cnt_reg, cnt_reg, unroll);
4363     for (int i = -unroll; i < 0; i++)
4364       __ str(zr, Address(base_reg, i * wordSize));
4365     __ bind(entry);
4366     __ add(base_reg, base_reg, unroll * wordSize);
4367     __ cbnz(cnt_reg, loop);
4368   %}
4369 
4370   /// mov envcodings
4371 
4372   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4373     MacroAssembler _masm(&cbuf);
4374     u_int32_t con = (u_int32_t)$src$$constant;
4375     Register dst_reg = as_Register($dst$$reg);
4376     if (con == 0) {
4377       __ movw(dst_reg, zr);
4378     } else {
4379       __ movw(dst_reg, con);
4380     }
4381   %}
4382 
4383   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4384     MacroAssembler _masm(&cbuf);
4385     Register dst_reg = as_Register($dst$$reg);
4386     u_int64_t con = (u_int64_t)$src$$constant;
4387     if (con == 0) {
4388       __ mov(dst_reg, zr);
4389     } else {
4390       __ mov(dst_reg, con);
4391     }
4392   %}
4393 
4394   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4395     MacroAssembler _masm(&cbuf);
4396     Register dst_reg = as_Register($dst$$reg);
4397     address con = (address)$src$$constant;
4398     if (con == NULL || con == (address)1) {
4399       ShouldNotReachHere();
4400     } else {
4401       relocInfo::relocType rtype = $src->constant_reloc();
4402       if (rtype == relocInfo::oop_type) {
4403         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4404       } else if (rtype == relocInfo::metadata_type) {
4405         __ mov_metadata(dst_reg, (Metadata*)con);
4406       } else {
4407         assert(rtype == relocInfo::none, "unexpected reloc type");
4408         if (con < (address)(uintptr_t)os::vm_page_size()) {
4409           __ mov(dst_reg, con);
4410         } else {
4411           unsigned long offset;
4412           __ adrp(dst_reg, con, offset);
4413           __ add(dst_reg, dst_reg, offset);
4414         }
4415       }
4416     }
4417   %}
4418 
4419   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4420     MacroAssembler _masm(&cbuf);
4421     Register dst_reg = as_Register($dst$$reg);
4422     __ mov(dst_reg, zr);
4423   %}
4424 
4425   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4426     MacroAssembler _masm(&cbuf);
4427     Register dst_reg = as_Register($dst$$reg);
4428     __ mov(dst_reg, (u_int64_t)1);
4429   %}
4430 
4431   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4432     MacroAssembler _masm(&cbuf);
4433     address page = (address)$src$$constant;
4434     Register dst_reg = as_Register($dst$$reg);
4435     unsigned long off;
4436     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4437     assert(off == 0, "assumed offset == 0");
4438   %}
4439 
4440   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4441     MacroAssembler _masm(&cbuf);
4442     address page = (address)$src$$constant;
4443     Register dst_reg = as_Register($dst$$reg);
4444     unsigned long off;
4445     __ adrp(dst_reg, ExternalAddress(page), off);
4446     assert(off == 0, "assumed offset == 0");
4447   %}
4448 
4449   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4450     MacroAssembler _masm(&cbuf);
4451     Register dst_reg = as_Register($dst$$reg);
4452     address con = (address)$src$$constant;
4453     if (con == NULL) {
4454       ShouldNotReachHere();
4455     } else {
4456       relocInfo::relocType rtype = $src->constant_reloc();
4457       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4458       __ set_narrow_oop(dst_reg, (jobject)con);
4459     }
4460   %}
4461 
4462   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4463     MacroAssembler _masm(&cbuf);
4464     Register dst_reg = as_Register($dst$$reg);
4465     __ mov(dst_reg, zr);
4466   %}
4467 
4468   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4469     MacroAssembler _masm(&cbuf);
4470     Register dst_reg = as_Register($dst$$reg);
4471     address con = (address)$src$$constant;
4472     if (con == NULL) {
4473       ShouldNotReachHere();
4474     } else {
4475       relocInfo::relocType rtype = $src->constant_reloc();
4476       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4477       __ set_narrow_klass(dst_reg, (Klass *)con);
4478     }
4479   %}
4480 
4481   // arithmetic encodings
4482 
4483   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4484     MacroAssembler _masm(&cbuf);
4485     Register dst_reg = as_Register($dst$$reg);
4486     Register src_reg = as_Register($src1$$reg);
4487     int32_t con = (int32_t)$src2$$constant;
4488     // add has primary == 0, subtract has primary == 1
4489     if ($primary) { con = -con; }
4490     if (con < 0) {
4491       __ subw(dst_reg, src_reg, -con);
4492     } else {
4493       __ addw(dst_reg, src_reg, con);
4494     }
4495   %}
4496 
4497   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4498     MacroAssembler _masm(&cbuf);
4499     Register dst_reg = as_Register($dst$$reg);
4500     Register src_reg = as_Register($src1$$reg);
4501     int32_t con = (int32_t)$src2$$constant;
4502     // add has primary == 0, subtract has primary == 1
4503     if ($primary) { con = -con; }
4504     if (con < 0) {
4505       __ sub(dst_reg, src_reg, -con);
4506     } else {
4507       __ add(dst_reg, src_reg, con);
4508     }
4509   %}
4510 
4511   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4512     MacroAssembler _masm(&cbuf);
4513    Register dst_reg = as_Register($dst$$reg);
4514    Register src1_reg = as_Register($src1$$reg);
4515    Register src2_reg = as_Register($src2$$reg);
4516     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4517   %}
4518 
4519   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4520     MacroAssembler _masm(&cbuf);
4521    Register dst_reg = as_Register($dst$$reg);
4522    Register src1_reg = as_Register($src1$$reg);
4523    Register src2_reg = as_Register($src2$$reg);
4524     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4525   %}
4526 
4527   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4528     MacroAssembler _masm(&cbuf);
4529    Register dst_reg = as_Register($dst$$reg);
4530    Register src1_reg = as_Register($src1$$reg);
4531    Register src2_reg = as_Register($src2$$reg);
4532     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4533   %}
4534 
4535   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4536     MacroAssembler _masm(&cbuf);
4537    Register dst_reg = as_Register($dst$$reg);
4538    Register src1_reg = as_Register($src1$$reg);
4539    Register src2_reg = as_Register($src2$$reg);
4540     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4541   %}
4542 
4543   // compare instruction encodings
4544 
4545   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4546     MacroAssembler _masm(&cbuf);
4547     Register reg1 = as_Register($src1$$reg);
4548     Register reg2 = as_Register($src2$$reg);
4549     __ cmpw(reg1, reg2);
4550   %}
4551 
4552   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4553     MacroAssembler _masm(&cbuf);
4554     Register reg = as_Register($src1$$reg);
4555     int32_t val = $src2$$constant;
4556     if (val >= 0) {
4557       __ subsw(zr, reg, val);
4558     } else {
4559       __ addsw(zr, reg, -val);
4560     }
4561   %}
4562 
4563   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4564     MacroAssembler _masm(&cbuf);
4565     Register reg1 = as_Register($src1$$reg);
4566     u_int32_t val = (u_int32_t)$src2$$constant;
4567     __ movw(rscratch1, val);
4568     __ cmpw(reg1, rscratch1);
4569   %}
4570 
4571   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4572     MacroAssembler _masm(&cbuf);
4573     Register reg1 = as_Register($src1$$reg);
4574     Register reg2 = as_Register($src2$$reg);
4575     __ cmp(reg1, reg2);
4576   %}
4577 
4578   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4579     MacroAssembler _masm(&cbuf);
4580     Register reg = as_Register($src1$$reg);
4581     int64_t val = $src2$$constant;
4582     if (val >= 0) {
4583       __ subs(zr, reg, val);
4584     } else if (val != -val) {
4585       __ adds(zr, reg, -val);
4586     } else {
4587     // aargh, Long.MIN_VALUE is a special case
4588       __ orr(rscratch1, zr, (u_int64_t)val);
4589       __ subs(zr, reg, rscratch1);
4590     }
4591   %}
4592 
4593   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4594     MacroAssembler _masm(&cbuf);
4595     Register reg1 = as_Register($src1$$reg);
4596     u_int64_t val = (u_int64_t)$src2$$constant;
4597     __ mov(rscratch1, val);
4598     __ cmp(reg1, rscratch1);
4599   %}
4600 
4601   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4602     MacroAssembler _masm(&cbuf);
4603     Register reg1 = as_Register($src1$$reg);
4604     Register reg2 = as_Register($src2$$reg);
4605     __ cmp(reg1, reg2);
4606   %}
4607 
4608   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4609     MacroAssembler _masm(&cbuf);
4610     Register reg1 = as_Register($src1$$reg);
4611     Register reg2 = as_Register($src2$$reg);
4612     __ cmpw(reg1, reg2);
4613   %}
4614 
4615   enc_class aarch64_enc_testp(iRegP src) %{
4616     MacroAssembler _masm(&cbuf);
4617     Register reg = as_Register($src$$reg);
4618     __ cmp(reg, zr);
4619   %}
4620 
4621   enc_class aarch64_enc_testn(iRegN src) %{
4622     MacroAssembler _masm(&cbuf);
4623     Register reg = as_Register($src$$reg);
4624     __ cmpw(reg, zr);
4625   %}
4626 
4627   enc_class aarch64_enc_b(label lbl) %{
4628     MacroAssembler _masm(&cbuf);
4629     Label *L = $lbl$$label;
4630     __ b(*L);
4631   %}
4632 
4633   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4634     MacroAssembler _masm(&cbuf);
4635     Label *L = $lbl$$label;
4636     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4637   %}
4638 
4639   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4640     MacroAssembler _masm(&cbuf);
4641     Label *L = $lbl$$label;
4642     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4643   %}
4644 
4645   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4646   %{
4647      Register sub_reg = as_Register($sub$$reg);
4648      Register super_reg = as_Register($super$$reg);
4649      Register temp_reg = as_Register($temp$$reg);
4650      Register result_reg = as_Register($result$$reg);
4651 
4652      Label miss;
4653      MacroAssembler _masm(&cbuf);
4654      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4655                                      NULL, &miss,
4656                                      /*set_cond_codes:*/ true);
4657      if ($primary) {
4658        __ mov(result_reg, zr);
4659      }
4660      __ bind(miss);
4661   %}
4662 
4663   enc_class aarch64_enc_java_static_call(method meth) %{
4664     MacroAssembler _masm(&cbuf);
4665 
4666     address addr = (address)$meth$$method;
4667     address call;
4668     if (!_method) {
4669       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4670       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4671     } else if (_optimized_virtual) {
4672       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
4673     } else {
4674       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
4675     }
4676     if (call == NULL) {
4677       ciEnv::current()->record_failure("CodeCache is full");
4678       return;
4679     }
4680 
4681     if (_method) {
4682       // Emit stub for static call
4683       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4684       if (stub == NULL) {
4685         ciEnv::current()->record_failure("CodeCache is full");
4686         return;
4687       }
4688     }
4689   %}
4690 
4691   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4692     MacroAssembler _masm(&cbuf);
4693     address call = __ ic_call((address)$meth$$method);
4694     if (call == NULL) {
4695       ciEnv::current()->record_failure("CodeCache is full");
4696       return;
4697     }
4698   %}
4699 
4700   enc_class aarch64_enc_call_epilog() %{
4701     MacroAssembler _masm(&cbuf);
4702     if (VerifyStackAtCalls) {
4703       // Check that stack depth is unchanged: find majik cookie on stack
4704       __ call_Unimplemented();
4705     }
4706   %}
4707 
4708   enc_class aarch64_enc_java_to_runtime(method meth) %{
4709     MacroAssembler _masm(&cbuf);
4710 
4711     // some calls to generated routines (arraycopy code) are scheduled
4712     // by C2 as runtime calls. if so we can call them using a br (they
4713     // will be in a reachable segment) otherwise we have to use a blrt
4714     // which loads the absolute address into a register.
4715     address entry = (address)$meth$$method;
4716     CodeBlob *cb = CodeCache::find_blob(entry);
4717     if (cb) {
4718       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4719       if (call == NULL) {
4720         ciEnv::current()->record_failure("CodeCache is full");
4721         return;
4722       }
4723     } else {
4724       int gpcnt;
4725       int fpcnt;
4726       int rtype;
4727       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4728       Label retaddr;
4729       __ adr(rscratch2, retaddr);
4730       __ lea(rscratch1, RuntimeAddress(entry));
4731       // Leave a breadcrumb for JavaThread::pd_last_frame().
4732       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4733       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4734       __ bind(retaddr);
4735       __ add(sp, sp, 2 * wordSize);
4736     }
4737   %}
4738 
4739   enc_class aarch64_enc_rethrow() %{
4740     MacroAssembler _masm(&cbuf);
4741     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4742   %}
4743 
4744   enc_class aarch64_enc_ret() %{
4745     MacroAssembler _masm(&cbuf);
4746     __ ret(lr);
4747   %}
4748 
4749   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4750     MacroAssembler _masm(&cbuf);
4751     Register target_reg = as_Register($jump_target$$reg);
4752     __ br(target_reg);
4753   %}
4754 
4755   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4756     MacroAssembler _masm(&cbuf);
4757     Register target_reg = as_Register($jump_target$$reg);
4758     // exception oop should be in r0
4759     // ret addr has been popped into lr
4760     // callee expects it in r3
4761     __ mov(r3, lr);
4762     __ br(target_reg);
4763   %}
4764 
4765   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4766     MacroAssembler _masm(&cbuf);
4767     Register oop = as_Register($object$$reg);
4768     Register box = as_Register($box$$reg);
4769     Register disp_hdr = as_Register($tmp$$reg);
4770     Register tmp = as_Register($tmp2$$reg);
4771     Label cont;
4772     Label object_has_monitor;
4773     Label cas_failed;
4774 
4775     assert_different_registers(oop, box, tmp, disp_hdr);
4776 
4777     // Load markOop from object into displaced_header.
4778     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4779 
4780     // Always do locking in runtime.
4781     if (EmitSync & 0x01) {
4782       __ cmp(oop, zr);
4783       return;
4784     }
4785 
4786     if (UseBiasedLocking && !UseOptoBiasInlining) {
4787       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4788     }
4789 
4790     // Handle existing monitor
4791     if ((EmitSync & 0x02) == 0) {
4792       // we can use AArch64's bit test and branch here but
4793       // markoopDesc does not define a bit index just the bit value
4794       // so assert in case the bit pos changes
4795 #     define __monitor_value_log2 1
4796       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4797       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4798 #     undef __monitor_value_log2
4799     }
4800 
4801     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4802     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4803 
4804     // Load Compare Value application register.
4805 
4806     // Initialize the box. (Must happen before we update the object mark!)
4807     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4808 
4809     // Compare object markOop with mark and if equal exchange scratch1
4810     // with object markOop.
4811     {
4812       Label retry_load;
4813       __ bind(retry_load);
4814       __ ldaxr(tmp, oop);
4815       __ cmp(tmp, disp_hdr);
4816       __ br(Assembler::NE, cas_failed);
4817       // use stlxr to ensure update is immediately visible
4818       __ stlxr(tmp, box, oop);
4819       __ cbzw(tmp, cont);
4820       __ b(retry_load);
4821     }
4822 
4823     // Formerly:
4824     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4825     //               /*newv=*/box,
4826     //               /*addr=*/oop,
4827     //               /*tmp=*/tmp,
4828     //               cont,
4829     //               /*fail*/NULL);
4830 
4831     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4832 
4833     // If the compare-and-exchange succeeded, then we found an unlocked
4834     // object, will have now locked it will continue at label cont
4835 
4836     __ bind(cas_failed);
4837     // We did not see an unlocked object so try the fast recursive case.
4838 
4839     // Check if the owner is self by comparing the value in the
4840     // markOop of object (disp_hdr) with the stack pointer.
4841     __ mov(rscratch1, sp);
4842     __ sub(disp_hdr, disp_hdr, rscratch1);
4843     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4844     // If condition is true we are cont and hence we can store 0 as the
4845     // displaced header in the box, which indicates that it is a recursive lock.
4846     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4847     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4848 
4849     // Handle existing monitor.
4850     if ((EmitSync & 0x02) == 0) {
4851       __ b(cont);
4852 
4853       __ bind(object_has_monitor);
4854       // The object's monitor m is unlocked iff m->owner == NULL,
4855       // otherwise m->owner may contain a thread or a stack address.
4856       //
4857       // Try to CAS m->owner from NULL to current thread.
4858       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4859       __ mov(disp_hdr, zr);
4860 
4861       {
4862         Label retry_load, fail;
4863         __ bind(retry_load);
4864         __ ldaxr(rscratch1, tmp);
4865         __ cmp(disp_hdr, rscratch1);
4866         __ br(Assembler::NE, fail);
4867         // use stlxr to ensure update is immediately visible
4868         __ stlxr(rscratch1, rthread, tmp);
4869         __ cbnzw(rscratch1, retry_load);
4870         __ bind(fail);
4871       }
4872 
4873       // Label next;
4874       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4875       //               /*newv=*/rthread,
4876       //               /*addr=*/tmp,
4877       //               /*tmp=*/rscratch1,
4878       //               /*succeed*/next,
4879       //               /*fail*/NULL);
4880       // __ bind(next);
4881 
4882       // store a non-null value into the box.
4883       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4884 
4885       // PPC port checks the following invariants
4886       // #ifdef ASSERT
4887       // bne(flag, cont);
4888       // We have acquired the monitor, check some invariants.
4889       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4890       // Invariant 1: _recursions should be 0.
4891       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4892       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4893       //                        "monitor->_recursions should be 0", -1);
4894       // Invariant 2: OwnerIsThread shouldn't be 0.
4895       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4896       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4897       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4898       // #endif
4899     }
4900 
4901     __ bind(cont);
4902     // flag == EQ indicates success
4903     // flag == NE indicates failure
4904 
4905   %}
4906 
4907   // TODO
4908   // reimplement this with custom cmpxchgptr code
4909   // which avoids some of the unnecessary branching
4910   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4911     MacroAssembler _masm(&cbuf);
4912     Register oop = as_Register($object$$reg);
4913     Register box = as_Register($box$$reg);
4914     Register disp_hdr = as_Register($tmp$$reg);
4915     Register tmp = as_Register($tmp2$$reg);
4916     Label cont;
4917     Label object_has_monitor;
4918     Label cas_failed;
4919 
4920     assert_different_registers(oop, box, tmp, disp_hdr);
4921 
4922     // Always do locking in runtime.
4923     if (EmitSync & 0x01) {
4924       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4925       return;
4926     }
4927 
4928     if (UseBiasedLocking && !UseOptoBiasInlining) {
4929       __ biased_locking_exit(oop, tmp, cont);
4930     }
4931 
4932     // Find the lock address and load the displaced header from the stack.
4933     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4934 
4935     // If the displaced header is 0, we have a recursive unlock.
4936     __ cmp(disp_hdr, zr);
4937     __ br(Assembler::EQ, cont);
4938 
4939 
4940     // Handle existing monitor.
4941     if ((EmitSync & 0x02) == 0) {
4942       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4943       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4944     }
4945 
4946     // Check if it is still a light weight lock, this is is true if we
4947     // see the stack address of the basicLock in the markOop of the
4948     // object.
4949 
4950       {
4951         Label retry_load;
4952         __ bind(retry_load);
4953         __ ldxr(tmp, oop);
4954         __ cmp(box, tmp);
4955         __ br(Assembler::NE, cas_failed);
4956         // use stlxr to ensure update is immediately visible
4957         __ stlxr(tmp, disp_hdr, oop);
4958         __ cbzw(tmp, cont);
4959         __ b(retry_load);
4960       }
4961 
4962     // __ cmpxchgptr(/*compare_value=*/box,
4963     //               /*exchange_value=*/disp_hdr,
4964     //               /*where=*/oop,
4965     //               /*result=*/tmp,
4966     //               cont,
4967     //               /*cas_failed*/NULL);
4968     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4969 
4970     __ bind(cas_failed);
4971 
4972     // Handle existing monitor.
4973     if ((EmitSync & 0x02) == 0) {
4974       __ b(cont);
4975 
4976       __ bind(object_has_monitor);
4977       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4978       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4979       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4980       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4981       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4982       __ cmp(rscratch1, zr);
4983       __ br(Assembler::NE, cont);
4984 
4985       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4986       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4987       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4988       __ cmp(rscratch1, zr);
4989       __ cbnz(rscratch1, cont);
4990       // need a release store here
4991       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4992       __ stlr(rscratch1, tmp); // rscratch1 is zero
4993     }
4994 
4995     __ bind(cont);
4996     // flag == EQ indicates success
4997     // flag == NE indicates failure
4998   %}
4999 
5000 %}
5001 
5002 //----------FRAME--------------------------------------------------------------
5003 // Definition of frame structure and management information.
5004 //
5005 //  S T A C K   L A Y O U T    Allocators stack-slot number
5006 //                             |   (to get allocators register number
5007 //  G  Owned by    |        |  v    add OptoReg::stack0())
5008 //  r   CALLER     |        |
5009 //  o     |        +--------+      pad to even-align allocators stack-slot
5010 //  w     V        |  pad0  |        numbers; owned by CALLER
5011 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5012 //  h     ^        |   in   |  5
5013 //        |        |  args  |  4   Holes in incoming args owned by SELF
5014 //  |     |        |        |  3
5015 //  |     |        +--------+
5016 //  V     |        | old out|      Empty on Intel, window on Sparc
5017 //        |    old |preserve|      Must be even aligned.
5018 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5019 //        |        |   in   |  3   area for Intel ret address
5020 //     Owned by    |preserve|      Empty on Sparc.
5021 //       SELF      +--------+
5022 //        |        |  pad2  |  2   pad to align old SP
5023 //        |        +--------+  1
5024 //        |        | locks  |  0
5025 //        |        +--------+----> OptoReg::stack0(), even aligned
5026 //        |        |  pad1  | 11   pad to align new SP
5027 //        |        +--------+
5028 //        |        |        | 10
5029 //        |        | spills |  9   spills
5030 //        V        |        |  8   (pad0 slot for callee)
5031 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5032 //        ^        |  out   |  7
5033 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5034 //     Owned by    +--------+
5035 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5036 //        |    new |preserve|      Must be even-aligned.
5037 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5038 //        |        |        |
5039 //
5040 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5041 //         known from SELF's arguments and the Java calling convention.
5042 //         Region 6-7 is determined per call site.
5043 // Note 2: If the calling convention leaves holes in the incoming argument
5044 //         area, those holes are owned by SELF.  Holes in the outgoing area
5045 //         are owned by the CALLEE.  Holes should not be nessecary in the
5046 //         incoming area, as the Java calling convention is completely under
5047 //         the control of the AD file.  Doubles can be sorted and packed to
5048 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5049 //         varargs C calling conventions.
5050 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5051 //         even aligned with pad0 as needed.
5052 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5053 //           (the latter is true on Intel but is it false on AArch64?)
5054 //         region 6-11 is even aligned; it may be padded out more so that
5055 //         the region from SP to FP meets the minimum stack alignment.
5056 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5057 //         alignment.  Region 11, pad1, may be dynamically extended so that
5058 //         SP meets the minimum alignment.
5059 
5060 frame %{
5061   // What direction does stack grow in (assumed to be same for C & Java)
5062   stack_direction(TOWARDS_LOW);
5063 
5064   // These three registers define part of the calling convention
5065   // between compiled code and the interpreter.
5066 
5067   // Inline Cache Register or methodOop for I2C.
5068   inline_cache_reg(R12);
5069 
5070   // Method Oop Register when calling interpreter.
5071   interpreter_method_oop_reg(R12);
5072 
5073   // Number of stack slots consumed by locking an object
5074   sync_stack_slots(2);
5075 
5076   // Compiled code's Frame Pointer
5077   frame_pointer(R31);
5078 
5079   // Interpreter stores its frame pointer in a register which is
5080   // stored to the stack by I2CAdaptors.
5081   // I2CAdaptors convert from interpreted java to compiled java.
5082   interpreter_frame_pointer(R29);
5083 
5084   // Stack alignment requirement
5085   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5086 
5087   // Number of stack slots between incoming argument block and the start of
5088   // a new frame.  The PROLOG must add this many slots to the stack.  The
5089   // EPILOG must remove this many slots. aarch64 needs two slots for
5090   // return address and fp.
5091   // TODO think this is correct but check
5092   in_preserve_stack_slots(4);
5093 
5094   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5095   // for calls to C.  Supports the var-args backing area for register parms.
5096   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5097 
5098   // The after-PROLOG location of the return address.  Location of
5099   // return address specifies a type (REG or STACK) and a number
5100   // representing the register number (i.e. - use a register name) or
5101   // stack slot.
5102   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5103   // Otherwise, it is above the locks and verification slot and alignment word
5104   // TODO this may well be correct but need to check why that - 2 is there
5105   // ppc port uses 0 but we definitely need to allow for fixed_slots
5106   // which folds in the space used for monitors
5107   return_addr(STACK - 2 +
5108               round_to((Compile::current()->in_preserve_stack_slots() +
5109                         Compile::current()->fixed_slots()),
5110                        stack_alignment_in_slots()));
5111 
5112   // Body of function which returns an integer array locating
5113   // arguments either in registers or in stack slots.  Passed an array
5114   // of ideal registers called "sig" and a "length" count.  Stack-slot
5115   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5116   // arguments for a CALLEE.  Incoming stack arguments are
5117   // automatically biased by the preserve_stack_slots field above.
5118 
5119   calling_convention
5120   %{
5121     // No difference between ingoing/outgoing just pass false
5122     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5123   %}
5124 
5125   c_calling_convention
5126   %{
5127     // This is obviously always outgoing
5128     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5129   %}
5130 
5131   // Location of compiled Java return values.  Same as C for now.
5132   return_value
5133   %{
5134     // TODO do we allow ideal_reg == Op_RegN???
5135     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5136            "only return normal values");
5137 
5138     static const int lo[Op_RegL + 1] = { // enum name
5139       0,                                 // Op_Node
5140       0,                                 // Op_Set
5141       R0_num,                            // Op_RegN
5142       R0_num,                            // Op_RegI
5143       R0_num,                            // Op_RegP
5144       V0_num,                            // Op_RegF
5145       V0_num,                            // Op_RegD
5146       R0_num                             // Op_RegL
5147     };
5148 
5149     static const int hi[Op_RegL + 1] = { // enum name
5150       0,                                 // Op_Node
5151       0,                                 // Op_Set
5152       OptoReg::Bad,                       // Op_RegN
5153       OptoReg::Bad,                      // Op_RegI
5154       R0_H_num,                          // Op_RegP
5155       OptoReg::Bad,                      // Op_RegF
5156       V0_H_num,                          // Op_RegD
5157       R0_H_num                           // Op_RegL
5158     };
5159 
5160     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5161   %}
5162 %}
5163 
5164 //----------ATTRIBUTES---------------------------------------------------------
5165 //----------Operand Attributes-------------------------------------------------
5166 op_attrib op_cost(1);        // Required cost attribute
5167 
5168 //----------Instruction Attributes---------------------------------------------
5169 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5170 ins_attrib ins_size(32);        // Required size attribute (in bits)
5171 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5172                                 // a non-matching short branch variant
5173                                 // of some long branch?
5174 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5175                                 // be a power of 2) specifies the
5176                                 // alignment that some part of the
5177                                 // instruction (not necessarily the
5178                                 // start) requires.  If > 1, a
5179                                 // compute_padding() function must be
5180                                 // provided for the instruction
5181 
5182 //----------OPERANDS-----------------------------------------------------------
5183 // Operand definitions must precede instruction definitions for correct parsing
5184 // in the ADLC because operands constitute user defined types which are used in
5185 // instruction definitions.
5186 
5187 //----------Simple Operands----------------------------------------------------
5188 
5189 // Integer operands 32 bit
5190 // 32 bit immediate
5191 operand immI()
5192 %{
5193   match(ConI);
5194 
5195   op_cost(0);
5196   format %{ %}
5197   interface(CONST_INTER);
5198 %}
5199 
5200 // 32 bit zero
5201 operand immI0()
5202 %{
5203   predicate(n->get_int() == 0);
5204   match(ConI);
5205 
5206   op_cost(0);
5207   format %{ %}
5208   interface(CONST_INTER);
5209 %}
5210 
5211 // 32 bit unit increment
5212 operand immI_1()
5213 %{
5214   predicate(n->get_int() == 1);
5215   match(ConI);
5216 
5217   op_cost(0);
5218   format %{ %}
5219   interface(CONST_INTER);
5220 %}
5221 
5222 // 32 bit unit decrement
5223 operand immI_M1()
5224 %{
5225   predicate(n->get_int() == -1);
5226   match(ConI);
5227 
5228   op_cost(0);
5229   format %{ %}
5230   interface(CONST_INTER);
5231 %}
5232 
5233 operand immI_le_4()
5234 %{
5235   predicate(n->get_int() <= 4);
5236   match(ConI);
5237 
5238   op_cost(0);
5239   format %{ %}
5240   interface(CONST_INTER);
5241 %}
5242 
5243 operand immI_31()
5244 %{
5245   predicate(n->get_int() == 31);
5246   match(ConI);
5247 
5248   op_cost(0);
5249   format %{ %}
5250   interface(CONST_INTER);
5251 %}
5252 
5253 operand immI_8()
5254 %{
5255   predicate(n->get_int() == 8);
5256   match(ConI);
5257 
5258   op_cost(0);
5259   format %{ %}
5260   interface(CONST_INTER);
5261 %}
5262 
5263 operand immI_16()
5264 %{
5265   predicate(n->get_int() == 16);
5266   match(ConI);
5267 
5268   op_cost(0);
5269   format %{ %}
5270   interface(CONST_INTER);
5271 %}
5272 
5273 operand immI_24()
5274 %{
5275   predicate(n->get_int() == 24);
5276   match(ConI);
5277 
5278   op_cost(0);
5279   format %{ %}
5280   interface(CONST_INTER);
5281 %}
5282 
5283 operand immI_32()
5284 %{
5285   predicate(n->get_int() == 32);
5286   match(ConI);
5287 
5288   op_cost(0);
5289   format %{ %}
5290   interface(CONST_INTER);
5291 %}
5292 
5293 operand immI_48()
5294 %{
5295   predicate(n->get_int() == 48);
5296   match(ConI);
5297 
5298   op_cost(0);
5299   format %{ %}
5300   interface(CONST_INTER);
5301 %}
5302 
5303 operand immI_56()
5304 %{
5305   predicate(n->get_int() == 56);
5306   match(ConI);
5307 
5308   op_cost(0);
5309   format %{ %}
5310   interface(CONST_INTER);
5311 %}
5312 
5313 operand immI_64()
5314 %{
5315   predicate(n->get_int() == 64);
5316   match(ConI);
5317 
5318   op_cost(0);
5319   format %{ %}
5320   interface(CONST_INTER);
5321 %}
5322 
5323 operand immI_255()
5324 %{
5325   predicate(n->get_int() == 255);
5326   match(ConI);
5327 
5328   op_cost(0);
5329   format %{ %}
5330   interface(CONST_INTER);
5331 %}
5332 
5333 operand immI_65535()
5334 %{
5335   predicate(n->get_int() == 65535);
5336   match(ConI);
5337 
5338   op_cost(0);
5339   format %{ %}
5340   interface(CONST_INTER);
5341 %}
5342 
5343 operand immL_63()
5344 %{
5345   predicate(n->get_int() == 63);
5346   match(ConI);
5347 
5348   op_cost(0);
5349   format %{ %}
5350   interface(CONST_INTER);
5351 %}
5352 
5353 operand immL_255()
5354 %{
5355   predicate(n->get_int() == 255);
5356   match(ConI);
5357 
5358   op_cost(0);
5359   format %{ %}
5360   interface(CONST_INTER);
5361 %}
5362 
5363 operand immL_65535()
5364 %{
5365   predicate(n->get_long() == 65535L);
5366   match(ConL);
5367 
5368   op_cost(0);
5369   format %{ %}
5370   interface(CONST_INTER);
5371 %}
5372 
5373 operand immL_4294967295()
5374 %{
5375   predicate(n->get_long() == 4294967295L);
5376   match(ConL);
5377 
5378   op_cost(0);
5379   format %{ %}
5380   interface(CONST_INTER);
5381 %}
5382 
5383 operand immL_bitmask()
5384 %{
5385   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5386             && is_power_of_2(n->get_long() + 1));
5387   match(ConL);
5388 
5389   op_cost(0);
5390   format %{ %}
5391   interface(CONST_INTER);
5392 %}
5393 
5394 operand immI_bitmask()
5395 %{
5396   predicate(((n->get_int() & 0xc0000000) == 0)
5397             && is_power_of_2(n->get_int() + 1));
5398   match(ConI);
5399 
5400   op_cost(0);
5401   format %{ %}
5402   interface(CONST_INTER);
5403 %}
5404 
5405 // Scale values for scaled offset addressing modes (up to long but not quad)
5406 operand immIScale()
5407 %{
5408   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5409   match(ConI);
5410 
5411   op_cost(0);
5412   format %{ %}
5413   interface(CONST_INTER);
5414 %}
5415 
5416 // 26 bit signed offset -- for pc-relative branches
5417 operand immI26()
5418 %{
5419   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5420   match(ConI);
5421 
5422   op_cost(0);
5423   format %{ %}
5424   interface(CONST_INTER);
5425 %}
5426 
5427 // 19 bit signed offset -- for pc-relative loads
5428 operand immI19()
5429 %{
5430   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5431   match(ConI);
5432 
5433   op_cost(0);
5434   format %{ %}
5435   interface(CONST_INTER);
5436 %}
5437 
5438 // 12 bit unsigned offset -- for base plus immediate loads
5439 operand immIU12()
5440 %{
5441   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5442   match(ConI);
5443 
5444   op_cost(0);
5445   format %{ %}
5446   interface(CONST_INTER);
5447 %}
5448 
5449 operand immLU12()
5450 %{
5451   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5452   match(ConL);
5453 
5454   op_cost(0);
5455   format %{ %}
5456   interface(CONST_INTER);
5457 %}
5458 
5459 // Offset for scaled or unscaled immediate loads and stores
5460 operand immIOffset()
5461 %{
5462   predicate(Address::offset_ok_for_immed(n->get_int()));
5463   match(ConI);
5464 
5465   op_cost(0);
5466   format %{ %}
5467   interface(CONST_INTER);
5468 %}
5469 
5470 operand immLoffset()
5471 %{
5472   predicate(Address::offset_ok_for_immed(n->get_long()));
5473   match(ConL);
5474 
5475   op_cost(0);
5476   format %{ %}
5477   interface(CONST_INTER);
5478 %}
5479 
5480 // 32 bit integer valid for add sub immediate
5481 operand immIAddSub()
5482 %{
5483   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5484   match(ConI);
5485   op_cost(0);
5486   format %{ %}
5487   interface(CONST_INTER);
5488 %}
5489 
5490 // 32 bit unsigned integer valid for logical immediate
5491 // TODO -- check this is right when e.g the mask is 0x80000000
5492 operand immILog()
5493 %{
5494   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5495   match(ConI);
5496 
5497   op_cost(0);
5498   format %{ %}
5499   interface(CONST_INTER);
5500 %}
5501 
5502 // Integer operands 64 bit
5503 // 64 bit immediate
5504 operand immL()
5505 %{
5506   match(ConL);
5507 
5508   op_cost(0);
5509   format %{ %}
5510   interface(CONST_INTER);
5511 %}
5512 
5513 // 64 bit zero
5514 operand immL0()
5515 %{
5516   predicate(n->get_long() == 0);
5517   match(ConL);
5518 
5519   op_cost(0);
5520   format %{ %}
5521   interface(CONST_INTER);
5522 %}
5523 
5524 // 64 bit unit increment
5525 operand immL_1()
5526 %{
5527   predicate(n->get_long() == 1);
5528   match(ConL);
5529 
5530   op_cost(0);
5531   format %{ %}
5532   interface(CONST_INTER);
5533 %}
5534 
5535 // 64 bit unit decrement
5536 operand immL_M1()
5537 %{
5538   predicate(n->get_long() == -1);
5539   match(ConL);
5540 
5541   op_cost(0);
5542   format %{ %}
5543   interface(CONST_INTER);
5544 %}
5545 
5546 // 32 bit offset of pc in thread anchor
5547 
5548 operand immL_pc_off()
5549 %{
5550   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5551                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5552   match(ConL);
5553 
5554   op_cost(0);
5555   format %{ %}
5556   interface(CONST_INTER);
5557 %}
5558 
5559 // 64 bit integer valid for add sub immediate
5560 operand immLAddSub()
5561 %{
5562   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5563   match(ConL);
5564   op_cost(0);
5565   format %{ %}
5566   interface(CONST_INTER);
5567 %}
5568 
5569 // 64 bit integer valid for logical immediate
5570 operand immLLog()
5571 %{
5572   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5573   match(ConL);
5574   op_cost(0);
5575   format %{ %}
5576   interface(CONST_INTER);
5577 %}
5578 
5579 // Long Immediate: low 32-bit mask
5580 operand immL_32bits()
5581 %{
5582   predicate(n->get_long() == 0xFFFFFFFFL);
5583   match(ConL);
5584   op_cost(0);
5585   format %{ %}
5586   interface(CONST_INTER);
5587 %}
5588 
5589 // Pointer operands
5590 // Pointer Immediate
5591 operand immP()
5592 %{
5593   match(ConP);
5594 
5595   op_cost(0);
5596   format %{ %}
5597   interface(CONST_INTER);
5598 %}
5599 
5600 // NULL Pointer Immediate
5601 operand immP0()
5602 %{
5603   predicate(n->get_ptr() == 0);
5604   match(ConP);
5605 
5606   op_cost(0);
5607   format %{ %}
5608   interface(CONST_INTER);
5609 %}
5610 
5611 // Pointer Immediate One
5612 // this is used in object initialization (initial object header)
5613 operand immP_1()
5614 %{
5615   predicate(n->get_ptr() == 1);
5616   match(ConP);
5617 
5618   op_cost(0);
5619   format %{ %}
5620   interface(CONST_INTER);
5621 %}
5622 
5623 // Polling Page Pointer Immediate
5624 operand immPollPage()
5625 %{
5626   predicate((address)n->get_ptr() == os::get_polling_page());
5627   match(ConP);
5628 
5629   op_cost(0);
5630   format %{ %}
5631   interface(CONST_INTER);
5632 %}
5633 
5634 // Card Table Byte Map Base
5635 operand immByteMapBase()
5636 %{
5637   // Get base of card map
5638   predicate((jbyte*)n->get_ptr() ==
5639         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5640   match(ConP);
5641 
5642   op_cost(0);
5643   format %{ %}
5644   interface(CONST_INTER);
5645 %}
5646 
5647 // Pointer Immediate Minus One
5648 // this is used when we want to write the current PC to the thread anchor
5649 operand immP_M1()
5650 %{
5651   predicate(n->get_ptr() == -1);
5652   match(ConP);
5653 
5654   op_cost(0);
5655   format %{ %}
5656   interface(CONST_INTER);
5657 %}
5658 
5659 // Pointer Immediate Minus Two
5660 // this is used when we want to write the current PC to the thread anchor
5661 operand immP_M2()
5662 %{
5663   predicate(n->get_ptr() == -2);
5664   match(ConP);
5665 
5666   op_cost(0);
5667   format %{ %}
5668   interface(CONST_INTER);
5669 %}
5670 
5671 // Float and Double operands
5672 // Double Immediate
5673 operand immD()
5674 %{
5675   match(ConD);
5676   op_cost(0);
5677   format %{ %}
5678   interface(CONST_INTER);
5679 %}
5680 
5681 // Double Immediate: +0.0d
5682 operand immD0()
5683 %{
5684   predicate(jlong_cast(n->getd()) == 0);
5685   match(ConD);
5686 
5687   op_cost(0);
5688   format %{ %}
5689   interface(CONST_INTER);
5690 %}
5691 
5692 // constant 'double +0.0'.
5693 operand immDPacked()
5694 %{
5695   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5696   match(ConD);
5697   op_cost(0);
5698   format %{ %}
5699   interface(CONST_INTER);
5700 %}
5701 
5702 // Float Immediate
5703 operand immF()
5704 %{
5705   match(ConF);
5706   op_cost(0);
5707   format %{ %}
5708   interface(CONST_INTER);
5709 %}
5710 
5711 // Float Immediate: +0.0f.
5712 operand immF0()
5713 %{
5714   predicate(jint_cast(n->getf()) == 0);
5715   match(ConF);
5716 
5717   op_cost(0);
5718   format %{ %}
5719   interface(CONST_INTER);
5720 %}
5721 
5722 //
5723 operand immFPacked()
5724 %{
5725   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5726   match(ConF);
5727   op_cost(0);
5728   format %{ %}
5729   interface(CONST_INTER);
5730 %}
5731 
5732 // Narrow pointer operands
5733 // Narrow Pointer Immediate
5734 operand immN()
5735 %{
5736   match(ConN);
5737 
5738   op_cost(0);
5739   format %{ %}
5740   interface(CONST_INTER);
5741 %}
5742 
5743 // Narrow NULL Pointer Immediate
5744 operand immN0()
5745 %{
5746   predicate(n->get_narrowcon() == 0);
5747   match(ConN);
5748 
5749   op_cost(0);
5750   format %{ %}
5751   interface(CONST_INTER);
5752 %}
5753 
5754 operand immNKlass()
5755 %{
5756   match(ConNKlass);
5757 
5758   op_cost(0);
5759   format %{ %}
5760   interface(CONST_INTER);
5761 %}
5762 
5763 // Integer 32 bit Register Operands
5764 // Integer 32 bitRegister (excludes SP)
5765 operand iRegI()
5766 %{
5767   constraint(ALLOC_IN_RC(any_reg32));
5768   match(RegI);
5769   match(iRegINoSp);
5770   op_cost(0);
5771   format %{ %}
5772   interface(REG_INTER);
5773 %}
5774 
5775 // Integer 32 bit Register not Special
5776 operand iRegINoSp()
5777 %{
5778   constraint(ALLOC_IN_RC(no_special_reg32));
5779   match(RegI);
5780   op_cost(0);
5781   format %{ %}
5782   interface(REG_INTER);
5783 %}
5784 
5785 // Integer 64 bit Register Operands
5786 // Integer 64 bit Register (includes SP)
5787 operand iRegL()
5788 %{
5789   constraint(ALLOC_IN_RC(any_reg));
5790   match(RegL);
5791   match(iRegLNoSp);
5792   op_cost(0);
5793   format %{ %}
5794   interface(REG_INTER);
5795 %}
5796 
5797 // Integer 64 bit Register not Special
5798 operand iRegLNoSp()
5799 %{
5800   constraint(ALLOC_IN_RC(no_special_reg));
5801   match(RegL);
5802   format %{ %}
5803   interface(REG_INTER);
5804 %}
5805 
5806 // Pointer Register Operands
5807 // Pointer Register
5808 operand iRegP()
5809 %{
5810   constraint(ALLOC_IN_RC(ptr_reg));
5811   match(RegP);
5812   match(iRegPNoSp);
5813   match(iRegP_R0);
5814   //match(iRegP_R2);
5815   //match(iRegP_R4);
5816   //match(iRegP_R5);
5817   match(thread_RegP);
5818   op_cost(0);
5819   format %{ %}
5820   interface(REG_INTER);
5821 %}
5822 
5823 // Pointer 64 bit Register not Special
5824 operand iRegPNoSp()
5825 %{
5826   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5827   match(RegP);
5828   // match(iRegP);
5829   // match(iRegP_R0);
5830   // match(iRegP_R2);
5831   // match(iRegP_R4);
5832   // match(iRegP_R5);
5833   // match(thread_RegP);
5834   op_cost(0);
5835   format %{ %}
5836   interface(REG_INTER);
5837 %}
5838 
5839 // Pointer 64 bit Register R0 only
5840 operand iRegP_R0()
5841 %{
5842   constraint(ALLOC_IN_RC(r0_reg));
5843   match(RegP);
5844   // match(iRegP);
5845   match(iRegPNoSp);
5846   op_cost(0);
5847   format %{ %}
5848   interface(REG_INTER);
5849 %}
5850 
5851 // Pointer 64 bit Register R1 only
5852 operand iRegP_R1()
5853 %{
5854   constraint(ALLOC_IN_RC(r1_reg));
5855   match(RegP);
5856   // match(iRegP);
5857   match(iRegPNoSp);
5858   op_cost(0);
5859   format %{ %}
5860   interface(REG_INTER);
5861 %}
5862 
5863 // Pointer 64 bit Register R2 only
5864 operand iRegP_R2()
5865 %{
5866   constraint(ALLOC_IN_RC(r2_reg));
5867   match(RegP);
5868   // match(iRegP);
5869   match(iRegPNoSp);
5870   op_cost(0);
5871   format %{ %}
5872   interface(REG_INTER);
5873 %}
5874 
5875 // Pointer 64 bit Register R3 only
5876 operand iRegP_R3()
5877 %{
5878   constraint(ALLOC_IN_RC(r3_reg));
5879   match(RegP);
5880   // match(iRegP);
5881   match(iRegPNoSp);
5882   op_cost(0);
5883   format %{ %}
5884   interface(REG_INTER);
5885 %}
5886 
5887 // Pointer 64 bit Register R4 only
5888 operand iRegP_R4()
5889 %{
5890   constraint(ALLOC_IN_RC(r4_reg));
5891   match(RegP);
5892   // match(iRegP);
5893   match(iRegPNoSp);
5894   op_cost(0);
5895   format %{ %}
5896   interface(REG_INTER);
5897 %}
5898 
5899 // Pointer 64 bit Register R5 only
5900 operand iRegP_R5()
5901 %{
5902   constraint(ALLOC_IN_RC(r5_reg));
5903   match(RegP);
5904   // match(iRegP);
5905   match(iRegPNoSp);
5906   op_cost(0);
5907   format %{ %}
5908   interface(REG_INTER);
5909 %}
5910 
5911 // Pointer 64 bit Register R10 only
5912 operand iRegP_R10()
5913 %{
5914   constraint(ALLOC_IN_RC(r10_reg));
5915   match(RegP);
5916   // match(iRegP);
5917   match(iRegPNoSp);
5918   op_cost(0);
5919   format %{ %}
5920   interface(REG_INTER);
5921 %}
5922 
5923 // Long 64 bit Register R11 only
5924 operand iRegL_R11()
5925 %{
5926   constraint(ALLOC_IN_RC(r11_reg));
5927   match(RegL);
5928   match(iRegLNoSp);
5929   op_cost(0);
5930   format %{ %}
5931   interface(REG_INTER);
5932 %}
5933 
5934 // Pointer 64 bit Register FP only
5935 operand iRegP_FP()
5936 %{
5937   constraint(ALLOC_IN_RC(fp_reg));
5938   match(RegP);
5939   // match(iRegP);
5940   op_cost(0);
5941   format %{ %}
5942   interface(REG_INTER);
5943 %}
5944 
5945 // Register R0 only
5946 operand iRegI_R0()
5947 %{
5948   constraint(ALLOC_IN_RC(int_r0_reg));
5949   match(RegI);
5950   match(iRegINoSp);
5951   op_cost(0);
5952   format %{ %}
5953   interface(REG_INTER);
5954 %}
5955 
5956 // Register R2 only
5957 operand iRegI_R2()
5958 %{
5959   constraint(ALLOC_IN_RC(int_r2_reg));
5960   match(RegI);
5961   match(iRegINoSp);
5962   op_cost(0);
5963   format %{ %}
5964   interface(REG_INTER);
5965 %}
5966 
5967 // Register R3 only
5968 operand iRegI_R3()
5969 %{
5970   constraint(ALLOC_IN_RC(int_r3_reg));
5971   match(RegI);
5972   match(iRegINoSp);
5973   op_cost(0);
5974   format %{ %}
5975   interface(REG_INTER);
5976 %}
5977 
5978 
5979 // Register R2 only
5980 operand iRegI_R4()
5981 %{
5982   constraint(ALLOC_IN_RC(int_r4_reg));
5983   match(RegI);
5984   match(iRegINoSp);
5985   op_cost(0);
5986   format %{ %}
5987   interface(REG_INTER);
5988 %}
5989 
5990 
5991 // Pointer Register Operands
5992 // Narrow Pointer Register
5993 operand iRegN()
5994 %{
5995   constraint(ALLOC_IN_RC(any_reg32));
5996   match(RegN);
5997   match(iRegNNoSp);
5998   op_cost(0);
5999   format %{ %}
6000   interface(REG_INTER);
6001 %}
6002 
6003 // Integer 64 bit Register not Special
6004 operand iRegNNoSp()
6005 %{
6006   constraint(ALLOC_IN_RC(no_special_reg32));
6007   match(RegN);
6008   op_cost(0);
6009   format %{ %}
6010   interface(REG_INTER);
6011 %}
6012 
6013 // heap base register -- used for encoding immN0
6014 
6015 operand iRegIHeapbase()
6016 %{
6017   constraint(ALLOC_IN_RC(heapbase_reg));
6018   match(RegI);
6019   op_cost(0);
6020   format %{ %}
6021   interface(REG_INTER);
6022 %}
6023 
6024 // Float Register
6025 // Float register operands
6026 operand vRegF()
6027 %{
6028   constraint(ALLOC_IN_RC(float_reg));
6029   match(RegF);
6030 
6031   op_cost(0);
6032   format %{ %}
6033   interface(REG_INTER);
6034 %}
6035 
6036 // Double Register
6037 // Double register operands
6038 operand vRegD()
6039 %{
6040   constraint(ALLOC_IN_RC(double_reg));
6041   match(RegD);
6042 
6043   op_cost(0);
6044   format %{ %}
6045   interface(REG_INTER);
6046 %}
6047 
6048 operand vecD()
6049 %{
6050   constraint(ALLOC_IN_RC(vectord_reg));
6051   match(VecD);
6052 
6053   op_cost(0);
6054   format %{ %}
6055   interface(REG_INTER);
6056 %}
6057 
6058 operand vecX()
6059 %{
6060   constraint(ALLOC_IN_RC(vectorx_reg));
6061   match(VecX);
6062 
6063   op_cost(0);
6064   format %{ %}
6065   interface(REG_INTER);
6066 %}
6067 
6068 operand vRegD_V0()
6069 %{
6070   constraint(ALLOC_IN_RC(v0_reg));
6071   match(RegD);
6072   op_cost(0);
6073   format %{ %}
6074   interface(REG_INTER);
6075 %}
6076 
6077 operand vRegD_V1()
6078 %{
6079   constraint(ALLOC_IN_RC(v1_reg));
6080   match(RegD);
6081   op_cost(0);
6082   format %{ %}
6083   interface(REG_INTER);
6084 %}
6085 
6086 operand vRegD_V2()
6087 %{
6088   constraint(ALLOC_IN_RC(v2_reg));
6089   match(RegD);
6090   op_cost(0);
6091   format %{ %}
6092   interface(REG_INTER);
6093 %}
6094 
6095 operand vRegD_V3()
6096 %{
6097   constraint(ALLOC_IN_RC(v3_reg));
6098   match(RegD);
6099   op_cost(0);
6100   format %{ %}
6101   interface(REG_INTER);
6102 %}
6103 
6104 // Flags register, used as output of signed compare instructions
6105 
6106 // note that on AArch64 we also use this register as the output for
6107 // for floating point compare instructions (CmpF CmpD). this ensures
6108 // that ordered inequality tests use GT, GE, LT or LE none of which
6109 // pass through cases where the result is unordered i.e. one or both
6110 // inputs to the compare is a NaN. this means that the ideal code can
6111 // replace e.g. a GT with an LE and not end up capturing the NaN case
6112 // (where the comparison should always fail). EQ and NE tests are
6113 // always generated in ideal code so that unordered folds into the NE
6114 // case, matching the behaviour of AArch64 NE.
6115 //
6116 // This differs from x86 where the outputs of FP compares use a
6117 // special FP flags registers and where compares based on this
6118 // register are distinguished into ordered inequalities (cmpOpUCF) and
6119 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6120 // to explicitly handle the unordered case in branches. x86 also has
6121 // to include extra CMoveX rules to accept a cmpOpUCF input.
6122 
6123 operand rFlagsReg()
6124 %{
6125   constraint(ALLOC_IN_RC(int_flags));
6126   match(RegFlags);
6127 
6128   op_cost(0);
6129   format %{ "RFLAGS" %}
6130   interface(REG_INTER);
6131 %}
6132 
6133 // Flags register, used as output of unsigned compare instructions
6134 operand rFlagsRegU()
6135 %{
6136   constraint(ALLOC_IN_RC(int_flags));
6137   match(RegFlags);
6138 
6139   op_cost(0);
6140   format %{ "RFLAGSU" %}
6141   interface(REG_INTER);
6142 %}
6143 
6144 // Special Registers
6145 
6146 // Method Register
6147 operand inline_cache_RegP(iRegP reg)
6148 %{
6149   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6150   match(reg);
6151   match(iRegPNoSp);
6152   op_cost(0);
6153   format %{ %}
6154   interface(REG_INTER);
6155 %}
6156 
6157 operand interpreter_method_oop_RegP(iRegP reg)
6158 %{
6159   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6160   match(reg);
6161   match(iRegPNoSp);
6162   op_cost(0);
6163   format %{ %}
6164   interface(REG_INTER);
6165 %}
6166 
6167 // Thread Register
6168 operand thread_RegP(iRegP reg)
6169 %{
6170   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6171   match(reg);
6172   op_cost(0);
6173   format %{ %}
6174   interface(REG_INTER);
6175 %}
6176 
6177 operand lr_RegP(iRegP reg)
6178 %{
6179   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6180   match(reg);
6181   op_cost(0);
6182   format %{ %}
6183   interface(REG_INTER);
6184 %}
6185 
6186 //----------Memory Operands----------------------------------------------------
6187 
6188 operand indirect(iRegP reg)
6189 %{
6190   constraint(ALLOC_IN_RC(ptr_reg));
6191   match(reg);
6192   op_cost(0);
6193   format %{ "[$reg]" %}
6194   interface(MEMORY_INTER) %{
6195     base($reg);
6196     index(0xffffffff);
6197     scale(0x0);
6198     disp(0x0);
6199   %}
6200 %}
6201 
6202 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
6203 %{
6204   constraint(ALLOC_IN_RC(ptr_reg));
6205   match(AddP (AddP reg (LShiftL lreg scale)) off);
6206   op_cost(INSN_COST);
6207   format %{ "$reg, $lreg lsl($scale), $off" %}
6208   interface(MEMORY_INTER) %{
6209     base($reg);
6210     index($lreg);
6211     scale($scale);
6212     disp($off);
6213   %}
6214 %}
6215 
6216 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
6217 %{
6218   constraint(ALLOC_IN_RC(ptr_reg));
6219   match(AddP (AddP reg (LShiftL lreg scale)) off);
6220   op_cost(INSN_COST);
6221   format %{ "$reg, $lreg lsl($scale), $off" %}
6222   interface(MEMORY_INTER) %{
6223     base($reg);
6224     index($lreg);
6225     scale($scale);
6226     disp($off);
6227   %}
6228 %}
6229 
6230 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
6231 %{
6232   constraint(ALLOC_IN_RC(ptr_reg));
6233   match(AddP (AddP reg (ConvI2L ireg)) off);
6234   op_cost(INSN_COST);
6235   format %{ "$reg, $ireg, $off I2L" %}
6236   interface(MEMORY_INTER) %{
6237     base($reg);
6238     index($ireg);
6239     scale(0x0);
6240     disp($off);
6241   %}
6242 %}
6243 
6244 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
6245 %{
6246   constraint(ALLOC_IN_RC(ptr_reg));
6247   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
6248   op_cost(INSN_COST);
6249   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
6250   interface(MEMORY_INTER) %{
6251     base($reg);
6252     index($ireg);
6253     scale($scale);
6254     disp($off);
6255   %}
6256 %}
6257 
6258 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6259 %{
6260   constraint(ALLOC_IN_RC(ptr_reg));
6261   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6262   op_cost(0);
6263   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6264   interface(MEMORY_INTER) %{
6265     base($reg);
6266     index($ireg);
6267     scale($scale);
6268     disp(0x0);
6269   %}
6270 %}
6271 
6272 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6273 %{
6274   constraint(ALLOC_IN_RC(ptr_reg));
6275   match(AddP reg (LShiftL lreg scale));
6276   op_cost(0);
6277   format %{ "$reg, $lreg lsl($scale)" %}
6278   interface(MEMORY_INTER) %{
6279     base($reg);
6280     index($lreg);
6281     scale($scale);
6282     disp(0x0);
6283   %}
6284 %}
6285 
6286 operand indIndex(iRegP reg, iRegL lreg)
6287 %{
6288   constraint(ALLOC_IN_RC(ptr_reg));
6289   match(AddP reg lreg);
6290   op_cost(0);
6291   format %{ "$reg, $lreg" %}
6292   interface(MEMORY_INTER) %{
6293     base($reg);
6294     index($lreg);
6295     scale(0x0);
6296     disp(0x0);
6297   %}
6298 %}
6299 
6300 operand indOffI(iRegP reg, immIOffset off)
6301 %{
6302   constraint(ALLOC_IN_RC(ptr_reg));
6303   match(AddP reg off);
6304   op_cost(0);
6305   format %{ "[$reg, $off]" %}
6306   interface(MEMORY_INTER) %{
6307     base($reg);
6308     index(0xffffffff);
6309     scale(0x0);
6310     disp($off);
6311   %}
6312 %}
6313 
6314 operand indOffL(iRegP reg, immLoffset off)
6315 %{
6316   constraint(ALLOC_IN_RC(ptr_reg));
6317   match(AddP reg off);
6318   op_cost(0);
6319   format %{ "[$reg, $off]" %}
6320   interface(MEMORY_INTER) %{
6321     base($reg);
6322     index(0xffffffff);
6323     scale(0x0);
6324     disp($off);
6325   %}
6326 %}
6327 
6328 
6329 operand indirectN(iRegN reg)
6330 %{
6331   predicate(Universe::narrow_oop_shift() == 0);
6332   constraint(ALLOC_IN_RC(ptr_reg));
6333   match(DecodeN reg);
6334   op_cost(0);
6335   format %{ "[$reg]\t# narrow" %}
6336   interface(MEMORY_INTER) %{
6337     base($reg);
6338     index(0xffffffff);
6339     scale(0x0);
6340     disp(0x0);
6341   %}
6342 %}
6343 
6344 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
6345 %{
6346   predicate(Universe::narrow_oop_shift() == 0);
6347   constraint(ALLOC_IN_RC(ptr_reg));
6348   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
6349   op_cost(0);
6350   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
6351   interface(MEMORY_INTER) %{
6352     base($reg);
6353     index($lreg);
6354     scale($scale);
6355     disp($off);
6356   %}
6357 %}
6358 
6359 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
6360 %{
6361   predicate(Universe::narrow_oop_shift() == 0);
6362   constraint(ALLOC_IN_RC(ptr_reg));
6363   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
6364   op_cost(INSN_COST);
6365   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
6366   interface(MEMORY_INTER) %{
6367     base($reg);
6368     index($lreg);
6369     scale($scale);
6370     disp($off);
6371   %}
6372 %}
6373 
6374 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
6375 %{
6376   predicate(Universe::narrow_oop_shift() == 0);
6377   constraint(ALLOC_IN_RC(ptr_reg));
6378   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
6379   op_cost(INSN_COST);
6380   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
6381   interface(MEMORY_INTER) %{
6382     base($reg);
6383     index($ireg);
6384     scale(0x0);
6385     disp($off);
6386   %}
6387 %}
6388 
6389 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
6390 %{
6391   predicate(Universe::narrow_oop_shift() == 0);
6392   constraint(ALLOC_IN_RC(ptr_reg));
6393   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
6394   op_cost(INSN_COST);
6395   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
6396   interface(MEMORY_INTER) %{
6397     base($reg);
6398     index($ireg);
6399     scale($scale);
6400     disp($off);
6401   %}
6402 %}
6403 
6404 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6405 %{
6406   predicate(Universe::narrow_oop_shift() == 0);
6407   constraint(ALLOC_IN_RC(ptr_reg));
6408   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6409   op_cost(0);
6410   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6411   interface(MEMORY_INTER) %{
6412     base($reg);
6413     index($ireg);
6414     scale($scale);
6415     disp(0x0);
6416   %}
6417 %}
6418 
6419 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6420 %{
6421   predicate(Universe::narrow_oop_shift() == 0);
6422   constraint(ALLOC_IN_RC(ptr_reg));
6423   match(AddP (DecodeN reg) (LShiftL lreg scale));
6424   op_cost(0);
6425   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6426   interface(MEMORY_INTER) %{
6427     base($reg);
6428     index($lreg);
6429     scale($scale);
6430     disp(0x0);
6431   %}
6432 %}
6433 
6434 operand indIndexN(iRegN reg, iRegL lreg)
6435 %{
6436   predicate(Universe::narrow_oop_shift() == 0);
6437   constraint(ALLOC_IN_RC(ptr_reg));
6438   match(AddP (DecodeN reg) lreg);
6439   op_cost(0);
6440   format %{ "$reg, $lreg\t# narrow" %}
6441   interface(MEMORY_INTER) %{
6442     base($reg);
6443     index($lreg);
6444     scale(0x0);
6445     disp(0x0);
6446   %}
6447 %}
6448 
6449 operand indOffIN(iRegN reg, immIOffset off)
6450 %{
6451   predicate(Universe::narrow_oop_shift() == 0);
6452   constraint(ALLOC_IN_RC(ptr_reg));
6453   match(AddP (DecodeN reg) off);
6454   op_cost(0);
6455   format %{ "[$reg, $off]\t# narrow" %}
6456   interface(MEMORY_INTER) %{
6457     base($reg);
6458     index(0xffffffff);
6459     scale(0x0);
6460     disp($off);
6461   %}
6462 %}
6463 
6464 operand indOffLN(iRegN reg, immLoffset off)
6465 %{
6466   predicate(Universe::narrow_oop_shift() == 0);
6467   constraint(ALLOC_IN_RC(ptr_reg));
6468   match(AddP (DecodeN reg) off);
6469   op_cost(0);
6470   format %{ "[$reg, $off]\t# narrow" %}
6471   interface(MEMORY_INTER) %{
6472     base($reg);
6473     index(0xffffffff);
6474     scale(0x0);
6475     disp($off);
6476   %}
6477 %}
6478 
6479 
6480 
6481 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6482 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6483 %{
6484   constraint(ALLOC_IN_RC(ptr_reg));
6485   match(AddP reg off);
6486   op_cost(0);
6487   format %{ "[$reg, $off]" %}
6488   interface(MEMORY_INTER) %{
6489     base($reg);
6490     index(0xffffffff);
6491     scale(0x0);
6492     disp($off);
6493   %}
6494 %}
6495 
6496 //----------Special Memory Operands--------------------------------------------
6497 // Stack Slot Operand - This operand is used for loading and storing temporary
6498 //                      values on the stack where a match requires a value to
6499 //                      flow through memory.
6500 operand stackSlotP(sRegP reg)
6501 %{
6502   constraint(ALLOC_IN_RC(stack_slots));
6503   op_cost(100);
6504   // No match rule because this operand is only generated in matching
6505   // match(RegP);
6506   format %{ "[$reg]" %}
6507   interface(MEMORY_INTER) %{
6508     base(0x1e);  // RSP
6509     index(0x0);  // No Index
6510     scale(0x0);  // No Scale
6511     disp($reg);  // Stack Offset
6512   %}
6513 %}
6514 
6515 operand stackSlotI(sRegI reg)
6516 %{
6517   constraint(ALLOC_IN_RC(stack_slots));
6518   // No match rule because this operand is only generated in matching
6519   // match(RegI);
6520   format %{ "[$reg]" %}
6521   interface(MEMORY_INTER) %{
6522     base(0x1e);  // RSP
6523     index(0x0);  // No Index
6524     scale(0x0);  // No Scale
6525     disp($reg);  // Stack Offset
6526   %}
6527 %}
6528 
6529 operand stackSlotF(sRegF reg)
6530 %{
6531   constraint(ALLOC_IN_RC(stack_slots));
6532   // No match rule because this operand is only generated in matching
6533   // match(RegF);
6534   format %{ "[$reg]" %}
6535   interface(MEMORY_INTER) %{
6536     base(0x1e);  // RSP
6537     index(0x0);  // No Index
6538     scale(0x0);  // No Scale
6539     disp($reg);  // Stack Offset
6540   %}
6541 %}
6542 
6543 operand stackSlotD(sRegD reg)
6544 %{
6545   constraint(ALLOC_IN_RC(stack_slots));
6546   // No match rule because this operand is only generated in matching
6547   // match(RegD);
6548   format %{ "[$reg]" %}
6549   interface(MEMORY_INTER) %{
6550     base(0x1e);  // RSP
6551     index(0x0);  // No Index
6552     scale(0x0);  // No Scale
6553     disp($reg);  // Stack Offset
6554   %}
6555 %}
6556 
6557 operand stackSlotL(sRegL reg)
6558 %{
6559   constraint(ALLOC_IN_RC(stack_slots));
6560   // No match rule because this operand is only generated in matching
6561   // match(RegL);
6562   format %{ "[$reg]" %}
6563   interface(MEMORY_INTER) %{
6564     base(0x1e);  // RSP
6565     index(0x0);  // No Index
6566     scale(0x0);  // No Scale
6567     disp($reg);  // Stack Offset
6568   %}
6569 %}
6570 
6571 // Operands for expressing Control Flow
6572 // NOTE: Label is a predefined operand which should not be redefined in
6573 //       the AD file. It is generically handled within the ADLC.
6574 
6575 //----------Conditional Branch Operands----------------------------------------
6576 // Comparison Op  - This is the operation of the comparison, and is limited to
6577 //                  the following set of codes:
6578 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6579 //
6580 // Other attributes of the comparison, such as unsignedness, are specified
6581 // by the comparison instruction that sets a condition code flags register.
6582 // That result is represented by a flags operand whose subtype is appropriate
6583 // to the unsignedness (etc.) of the comparison.
6584 //
6585 // Later, the instruction which matches both the Comparison Op (a Bool) and
6586 // the flags (produced by the Cmp) specifies the coding of the comparison op
6587 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6588 
6589 // used for signed integral comparisons and fp comparisons
6590 
6591 operand cmpOp()
6592 %{
6593   match(Bool);
6594 
6595   format %{ "" %}
6596   interface(COND_INTER) %{
6597     equal(0x0, "eq");
6598     not_equal(0x1, "ne");
6599     less(0xb, "lt");
6600     greater_equal(0xa, "ge");
6601     less_equal(0xd, "le");
6602     greater(0xc, "gt");
6603     overflow(0x6, "vs");
6604     no_overflow(0x7, "vc");
6605   %}
6606 %}
6607 
6608 // used for unsigned integral comparisons
6609 
6610 operand cmpOpU()
6611 %{
6612   match(Bool);
6613 
6614   format %{ "" %}
6615   interface(COND_INTER) %{
6616     equal(0x0, "eq");
6617     not_equal(0x1, "ne");
6618     less(0x3, "lo");
6619     greater_equal(0x2, "hs");
6620     less_equal(0x9, "ls");
6621     greater(0x8, "hi");
6622     overflow(0x6, "vs");
6623     no_overflow(0x7, "vc");
6624   %}
6625 %}
6626 
6627 // Special operand allowing long args to int ops to be truncated for free
6628 
6629 operand iRegL2I(iRegL reg) %{
6630 
6631   op_cost(0);
6632 
6633   match(ConvL2I reg);
6634 
6635   format %{ "l2i($reg)" %}
6636 
6637   interface(REG_INTER)
6638 %}
6639 
6640 opclass vmem(indirect, indIndex, indOffI, indOffL);
6641 
6642 //----------OPERAND CLASSES----------------------------------------------------
6643 // Operand Classes are groups of operands that are used as to simplify
6644 // instruction definitions by not requiring the AD writer to specify
6645 // separate instructions for every form of operand when the
6646 // instruction accepts multiple operand types with the same basic
6647 // encoding and format. The classic case of this is memory operands.
6648 
6649 // memory is used to define read/write location for load/store
6650 // instruction defs. we can turn a memory op into an Address
6651 
6652 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
6653                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
6654 
6655 
6656 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6657 // operations. it allows the src to be either an iRegI or a (ConvL2I
6658 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6659 // can be elided because the 32-bit instruction will just employ the
6660 // lower 32 bits anyway.
6661 //
6662 // n.b. this does not elide all L2I conversions. if the truncated
6663 // value is consumed by more than one operation then the ConvL2I
6664 // cannot be bundled into the consuming nodes so an l2i gets planted
6665 // (actually a movw $dst $src) and the downstream instructions consume
6666 // the result of the l2i as an iRegI input. That's a shame since the
6667 // movw is actually redundant but its not too costly.
6668 
6669 opclass iRegIorL2I(iRegI, iRegL2I);
6670 
6671 //----------PIPELINE-----------------------------------------------------------
6672 // Rules which define the behavior of the target architectures pipeline.
6673 // Integer ALU reg operation
6674 pipeline %{
6675 
6676 attributes %{
6677   // ARM instructions are of fixed length
6678   fixed_size_instructions;        // Fixed size instructions TODO does
6679   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6680   // ARM instructions come in 32-bit word units
6681   instruction_unit_size = 4;         // An instruction is 4 bytes long
6682   instruction_fetch_unit_size = 64;  // The processor fetches one line
6683   instruction_fetch_units = 1;       // of 64 bytes
6684 
6685   // List of nop instructions
6686   nops( MachNop );
6687 %}
6688 
6689 // We don't use an actual pipeline model so don't care about resources
6690 // or description. we do use pipeline classes to introduce fixed
6691 // latencies
6692 
6693 //----------RESOURCES----------------------------------------------------------
6694 // Resources are the functional units available to the machine
6695 
6696 resources( INS0, INS1, INS01 = INS0 | INS1,
6697            ALU0, ALU1, ALU = ALU0 | ALU1,
6698            MAC,
6699            DIV,
6700            BRANCH,
6701            LDST,
6702            NEON_FP);
6703 
6704 //----------PIPELINE DESCRIPTION-----------------------------------------------
6705 // Pipeline Description specifies the stages in the machine's pipeline
6706 
6707 pipe_desc(ISS, EX1, EX2, WR);
6708 
6709 //----------PIPELINE CLASSES---------------------------------------------------
6710 // Pipeline Classes describe the stages in which input and output are
6711 // referenced by the hardware pipeline.
6712 
6713 //------- Integer ALU operations --------------------------
6714 
6715 // Integer ALU reg-reg operation
6716 // Operands needed in EX1, result generated in EX2
6717 // Eg.  ADD     x0, x1, x2
6718 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6719 %{
6720   single_instruction;
6721   dst    : EX2(write);
6722   src1   : EX1(read);
6723   src2   : EX1(read);
6724   INS01  : ISS; // Dual issue as instruction 0 or 1
6725   ALU    : EX2;
6726 %}
6727 
6728 // Integer ALU reg-reg operation with constant shift
6729 // Shifted register must be available in LATE_ISS instead of EX1
6730 // Eg.  ADD     x0, x1, x2, LSL #2
6731 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6732 %{
6733   single_instruction;
6734   dst    : EX2(write);
6735   src1   : EX1(read);
6736   src2   : ISS(read);
6737   INS01  : ISS;
6738   ALU    : EX2;
6739 %}
6740 
6741 // Integer ALU reg operation with constant shift
6742 // Eg.  LSL     x0, x1, #shift
6743 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6744 %{
6745   single_instruction;
6746   dst    : EX2(write);
6747   src1   : ISS(read);
6748   INS01  : ISS;
6749   ALU    : EX2;
6750 %}
6751 
6752 // Integer ALU reg-reg operation with variable shift
6753 // Both operands must be available in LATE_ISS instead of EX1
6754 // Result is available in EX1 instead of EX2
6755 // Eg.  LSLV    x0, x1, x2
6756 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6757 %{
6758   single_instruction;
6759   dst    : EX1(write);
6760   src1   : ISS(read);
6761   src2   : ISS(read);
6762   INS01  : ISS;
6763   ALU    : EX1;
6764 %}
6765 
6766 // Integer ALU reg-reg operation with extract
6767 // As for _vshift above, but result generated in EX2
6768 // Eg.  EXTR    x0, x1, x2, #N
6769 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6770 %{
6771   single_instruction;
6772   dst    : EX2(write);
6773   src1   : ISS(read);
6774   src2   : ISS(read);
6775   INS1   : ISS; // Can only dual issue as Instruction 1
6776   ALU    : EX1;
6777 %}
6778 
6779 // Integer ALU reg operation
6780 // Eg.  NEG     x0, x1
6781 pipe_class ialu_reg(iRegI dst, iRegI src)
6782 %{
6783   single_instruction;
6784   dst    : EX2(write);
6785   src    : EX1(read);
6786   INS01  : ISS;
6787   ALU    : EX2;
6788 %}
6789 
6790 // Integer ALU reg mmediate operation
6791 // Eg.  ADD     x0, x1, #N
6792 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6793 %{
6794   single_instruction;
6795   dst    : EX2(write);
6796   src1   : EX1(read);
6797   INS01  : ISS;
6798   ALU    : EX2;
6799 %}
6800 
6801 // Integer ALU immediate operation (no source operands)
6802 // Eg.  MOV     x0, #N
6803 pipe_class ialu_imm(iRegI dst)
6804 %{
6805   single_instruction;
6806   dst    : EX1(write);
6807   INS01  : ISS;
6808   ALU    : EX1;
6809 %}
6810 
6811 //------- Compare operation -------------------------------
6812 
6813 // Compare reg-reg
6814 // Eg.  CMP     x0, x1
6815 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6816 %{
6817   single_instruction;
6818 //  fixed_latency(16);
6819   cr     : EX2(write);
6820   op1    : EX1(read);
6821   op2    : EX1(read);
6822   INS01  : ISS;
6823   ALU    : EX2;
6824 %}
6825 
6826 // Compare reg-reg
6827 // Eg.  CMP     x0, #N
6828 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6829 %{
6830   single_instruction;
6831 //  fixed_latency(16);
6832   cr     : EX2(write);
6833   op1    : EX1(read);
6834   INS01  : ISS;
6835   ALU    : EX2;
6836 %}
6837 
6838 //------- Conditional instructions ------------------------
6839 
6840 // Conditional no operands
6841 // Eg.  CSINC   x0, zr, zr, <cond>
6842 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6843 %{
6844   single_instruction;
6845   cr     : EX1(read);
6846   dst    : EX2(write);
6847   INS01  : ISS;
6848   ALU    : EX2;
6849 %}
6850 
6851 // Conditional 2 operand
6852 // EG.  CSEL    X0, X1, X2, <cond>
6853 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6854 %{
6855   single_instruction;
6856   cr     : EX1(read);
6857   src1   : EX1(read);
6858   src2   : EX1(read);
6859   dst    : EX2(write);
6860   INS01  : ISS;
6861   ALU    : EX2;
6862 %}
6863 
6864 // Conditional 2 operand
6865 // EG.  CSEL    X0, X1, X2, <cond>
6866 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6867 %{
6868   single_instruction;
6869   cr     : EX1(read);
6870   src    : EX1(read);
6871   dst    : EX2(write);
6872   INS01  : ISS;
6873   ALU    : EX2;
6874 %}
6875 
6876 //------- Multiply pipeline operations --------------------
6877 
6878 // Multiply reg-reg
6879 // Eg.  MUL     w0, w1, w2
6880 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6881 %{
6882   single_instruction;
6883   dst    : WR(write);
6884   src1   : ISS(read);
6885   src2   : ISS(read);
6886   INS01  : ISS;
6887   MAC    : WR;
6888 %}
6889 
6890 // Multiply accumulate
6891 // Eg.  MADD    w0, w1, w2, w3
6892 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6893 %{
6894   single_instruction;
6895   dst    : WR(write);
6896   src1   : ISS(read);
6897   src2   : ISS(read);
6898   src3   : ISS(read);
6899   INS01  : ISS;
6900   MAC    : WR;
6901 %}
6902 
6903 // Eg.  MUL     w0, w1, w2
6904 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6905 %{
6906   single_instruction;
6907   fixed_latency(3); // Maximum latency for 64 bit mul
6908   dst    : WR(write);
6909   src1   : ISS(read);
6910   src2   : ISS(read);
6911   INS01  : ISS;
6912   MAC    : WR;
6913 %}
6914 
6915 // Multiply accumulate
6916 // Eg.  MADD    w0, w1, w2, w3
6917 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6918 %{
6919   single_instruction;
6920   fixed_latency(3); // Maximum latency for 64 bit mul
6921   dst    : WR(write);
6922   src1   : ISS(read);
6923   src2   : ISS(read);
6924   src3   : ISS(read);
6925   INS01  : ISS;
6926   MAC    : WR;
6927 %}
6928 
6929 //------- Divide pipeline operations --------------------
6930 
6931 // Eg.  SDIV    w0, w1, w2
6932 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6933 %{
6934   single_instruction;
6935   fixed_latency(8); // Maximum latency for 32 bit divide
6936   dst    : WR(write);
6937   src1   : ISS(read);
6938   src2   : ISS(read);
6939   INS0   : ISS; // Can only dual issue as instruction 0
6940   DIV    : WR;
6941 %}
6942 
6943 // Eg.  SDIV    x0, x1, x2
6944 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6945 %{
6946   single_instruction;
6947   fixed_latency(16); // Maximum latency for 64 bit divide
6948   dst    : WR(write);
6949   src1   : ISS(read);
6950   src2   : ISS(read);
6951   INS0   : ISS; // Can only dual issue as instruction 0
6952   DIV    : WR;
6953 %}
6954 
6955 //------- Load pipeline operations ------------------------
6956 
6957 // Load - prefetch
6958 // Eg.  PFRM    <mem>
6959 pipe_class iload_prefetch(memory mem)
6960 %{
6961   single_instruction;
6962   mem    : ISS(read);
6963   INS01  : ISS;
6964   LDST   : WR;
6965 %}
6966 
6967 // Load - reg, mem
6968 // Eg.  LDR     x0, <mem>
6969 pipe_class iload_reg_mem(iRegI dst, memory mem)
6970 %{
6971   single_instruction;
6972   dst    : WR(write);
6973   mem    : ISS(read);
6974   INS01  : ISS;
6975   LDST   : WR;
6976 %}
6977 
6978 // Load - reg, reg
6979 // Eg.  LDR     x0, [sp, x1]
6980 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6981 %{
6982   single_instruction;
6983   dst    : WR(write);
6984   src    : ISS(read);
6985   INS01  : ISS;
6986   LDST   : WR;
6987 %}
6988 
6989 //------- Store pipeline operations -----------------------
6990 
6991 // Store - zr, mem
6992 // Eg.  STR     zr, <mem>
6993 pipe_class istore_mem(memory mem)
6994 %{
6995   single_instruction;
6996   mem    : ISS(read);
6997   INS01  : ISS;
6998   LDST   : WR;
6999 %}
7000 
7001 // Store - reg, mem
7002 // Eg.  STR     x0, <mem>
7003 pipe_class istore_reg_mem(iRegI src, memory mem)
7004 %{
7005   single_instruction;
7006   mem    : ISS(read);
7007   src    : EX2(read);
7008   INS01  : ISS;
7009   LDST   : WR;
7010 %}
7011 
7012 // Store - reg, reg
7013 // Eg. STR      x0, [sp, x1]
7014 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7015 %{
7016   single_instruction;
7017   dst    : ISS(read);
7018   src    : EX2(read);
7019   INS01  : ISS;
7020   LDST   : WR;
7021 %}
7022 
7023 //------- Store pipeline operations -----------------------
7024 
7025 // Branch
7026 pipe_class pipe_branch()
7027 %{
7028   single_instruction;
7029   INS01  : ISS;
7030   BRANCH : EX1;
7031 %}
7032 
7033 // Conditional branch
7034 pipe_class pipe_branch_cond(rFlagsReg cr)
7035 %{
7036   single_instruction;
7037   cr     : EX1(read);
7038   INS01  : ISS;
7039   BRANCH : EX1;
7040 %}
7041 
7042 // Compare & Branch
7043 // EG.  CBZ/CBNZ
7044 pipe_class pipe_cmp_branch(iRegI op1)
7045 %{
7046   single_instruction;
7047   op1    : EX1(read);
7048   INS01  : ISS;
7049   BRANCH : EX1;
7050 %}
7051 
7052 //------- Synchronisation operations ----------------------
7053 
7054 // Any operation requiring serialization.
7055 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7056 pipe_class pipe_serial()
7057 %{
7058   single_instruction;
7059   force_serialization;
7060   fixed_latency(16);
7061   INS01  : ISS(2); // Cannot dual issue with any other instruction
7062   LDST   : WR;
7063 %}
7064 
7065 // Generic big/slow expanded idiom - also serialized
7066 pipe_class pipe_slow()
7067 %{
7068   instruction_count(10);
7069   multiple_bundles;
7070   force_serialization;
7071   fixed_latency(16);
7072   INS01  : ISS(2); // Cannot dual issue with any other instruction
7073   LDST   : WR;
7074 %}
7075 
7076 // Empty pipeline class
7077 pipe_class pipe_class_empty()
7078 %{
7079   single_instruction;
7080   fixed_latency(0);
7081 %}
7082 
7083 // Default pipeline class.
7084 pipe_class pipe_class_default()
7085 %{
7086   single_instruction;
7087   fixed_latency(2);
7088 %}
7089 
7090 // Pipeline class for compares.
7091 pipe_class pipe_class_compare()
7092 %{
7093   single_instruction;
7094   fixed_latency(16);
7095 %}
7096 
7097 // Pipeline class for memory operations.
7098 pipe_class pipe_class_memory()
7099 %{
7100   single_instruction;
7101   fixed_latency(16);
7102 %}
7103 
7104 // Pipeline class for call.
7105 pipe_class pipe_class_call()
7106 %{
7107   single_instruction;
7108   fixed_latency(100);
7109 %}
7110 
7111 // Define the class for the Nop node.
7112 define %{
7113    MachNop = pipe_class_empty;
7114 %}
7115 
7116 %}
7117 //----------INSTRUCTIONS-------------------------------------------------------
7118 //
7119 // match      -- States which machine-independent subtree may be replaced
7120 //               by this instruction.
7121 // ins_cost   -- The estimated cost of this instruction is used by instruction
7122 //               selection to identify a minimum cost tree of machine
7123 //               instructions that matches a tree of machine-independent
7124 //               instructions.
7125 // format     -- A string providing the disassembly for this instruction.
7126 //               The value of an instruction's operand may be inserted
7127 //               by referring to it with a '$' prefix.
7128 // opcode     -- Three instruction opcodes may be provided.  These are referred
7129 //               to within an encode class as $primary, $secondary, and $tertiary
7130 //               rrspectively.  The primary opcode is commonly used to
7131 //               indicate the type of machine instruction, while secondary
7132 //               and tertiary are often used for prefix options or addressing
7133 //               modes.
7134 // ins_encode -- A list of encode classes with parameters. The encode class
7135 //               name must have been defined in an 'enc_class' specification
7136 //               in the encode section of the architecture description.
7137 
7138 // ============================================================================
7139 // Memory (Load/Store) Instructions
7140 
7141 // Load Instructions
7142 
7143 // Load Byte (8 bit signed)
7144 instruct loadB(iRegINoSp dst, memory mem)
7145 %{
7146   match(Set dst (LoadB mem));
7147   predicate(!needs_acquiring_load(n));
7148 
7149   ins_cost(4 * INSN_COST);
7150   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7151 
7152   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7153 
7154   ins_pipe(iload_reg_mem);
7155 %}
7156 
7157 // Load Byte (8 bit signed) into long
7158 instruct loadB2L(iRegLNoSp dst, memory mem)
7159 %{
7160   match(Set dst (ConvI2L (LoadB mem)));
7161   predicate(!needs_acquiring_load(n->in(1)));
7162 
7163   ins_cost(4 * INSN_COST);
7164   format %{ "ldrsb  $dst, $mem\t# byte" %}
7165 
7166   ins_encode(aarch64_enc_ldrsb(dst, mem));
7167 
7168   ins_pipe(iload_reg_mem);
7169 %}
7170 
7171 // Load Byte (8 bit unsigned)
7172 instruct loadUB(iRegINoSp dst, memory mem)
7173 %{
7174   match(Set dst (LoadUB mem));
7175   predicate(!needs_acquiring_load(n));
7176 
7177   ins_cost(4 * INSN_COST);
7178   format %{ "ldrbw  $dst, $mem\t# byte" %}
7179 
7180   ins_encode(aarch64_enc_ldrb(dst, mem));
7181 
7182   ins_pipe(iload_reg_mem);
7183 %}
7184 
7185 // Load Byte (8 bit unsigned) into long
7186 instruct loadUB2L(iRegLNoSp dst, memory mem)
7187 %{
7188   match(Set dst (ConvI2L (LoadUB mem)));
7189   predicate(!needs_acquiring_load(n->in(1)));
7190 
7191   ins_cost(4 * INSN_COST);
7192   format %{ "ldrb  $dst, $mem\t# byte" %}
7193 
7194   ins_encode(aarch64_enc_ldrb(dst, mem));
7195 
7196   ins_pipe(iload_reg_mem);
7197 %}
7198 
7199 // Load Short (16 bit signed)
7200 instruct loadS(iRegINoSp dst, memory mem)
7201 %{
7202   match(Set dst (LoadS mem));
7203   predicate(!needs_acquiring_load(n));
7204 
7205   ins_cost(4 * INSN_COST);
7206   format %{ "ldrshw  $dst, $mem\t# short" %}
7207 
7208   ins_encode(aarch64_enc_ldrshw(dst, mem));
7209 
7210   ins_pipe(iload_reg_mem);
7211 %}
7212 
7213 // Load Short (16 bit signed) into long
7214 instruct loadS2L(iRegLNoSp dst, memory mem)
7215 %{
7216   match(Set dst (ConvI2L (LoadS mem)));
7217   predicate(!needs_acquiring_load(n->in(1)));
7218 
7219   ins_cost(4 * INSN_COST);
7220   format %{ "ldrsh  $dst, $mem\t# short" %}
7221 
7222   ins_encode(aarch64_enc_ldrsh(dst, mem));
7223 
7224   ins_pipe(iload_reg_mem);
7225 %}
7226 
7227 // Load Char (16 bit unsigned)
7228 instruct loadUS(iRegINoSp dst, memory mem)
7229 %{
7230   match(Set dst (LoadUS mem));
7231   predicate(!needs_acquiring_load(n));
7232 
7233   ins_cost(4 * INSN_COST);
7234   format %{ "ldrh  $dst, $mem\t# short" %}
7235 
7236   ins_encode(aarch64_enc_ldrh(dst, mem));
7237 
7238   ins_pipe(iload_reg_mem);
7239 %}
7240 
7241 // Load Short/Char (16 bit unsigned) into long
7242 instruct loadUS2L(iRegLNoSp dst, memory mem)
7243 %{
7244   match(Set dst (ConvI2L (LoadUS mem)));
7245   predicate(!needs_acquiring_load(n->in(1)));
7246 
7247   ins_cost(4 * INSN_COST);
7248   format %{ "ldrh  $dst, $mem\t# short" %}
7249 
7250   ins_encode(aarch64_enc_ldrh(dst, mem));
7251 
7252   ins_pipe(iload_reg_mem);
7253 %}
7254 
7255 // Load Integer (32 bit signed)
7256 instruct loadI(iRegINoSp dst, memory mem)
7257 %{
7258   match(Set dst (LoadI mem));
7259   predicate(!needs_acquiring_load(n));
7260 
7261   ins_cost(4 * INSN_COST);
7262   format %{ "ldrw  $dst, $mem\t# int" %}
7263 
7264   ins_encode(aarch64_enc_ldrw(dst, mem));
7265 
7266   ins_pipe(iload_reg_mem);
7267 %}
7268 
7269 // Load Integer (32 bit signed) into long
7270 instruct loadI2L(iRegLNoSp dst, memory mem)
7271 %{
7272   match(Set dst (ConvI2L (LoadI mem)));
7273   predicate(!needs_acquiring_load(n->in(1)));
7274 
7275   ins_cost(4 * INSN_COST);
7276   format %{ "ldrsw  $dst, $mem\t# int" %}
7277 
7278   ins_encode(aarch64_enc_ldrsw(dst, mem));
7279 
7280   ins_pipe(iload_reg_mem);
7281 %}
7282 
7283 // Load Integer (32 bit unsigned) into long
7284 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7285 %{
7286   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7287   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7288 
7289   ins_cost(4 * INSN_COST);
7290   format %{ "ldrw  $dst, $mem\t# int" %}
7291 
7292   ins_encode(aarch64_enc_ldrw(dst, mem));
7293 
7294   ins_pipe(iload_reg_mem);
7295 %}
7296 
7297 // Load Long (64 bit signed)
7298 instruct loadL(iRegLNoSp dst, memory mem)
7299 %{
7300   match(Set dst (LoadL mem));
7301   predicate(!needs_acquiring_load(n));
7302 
7303   ins_cost(4 * INSN_COST);
7304   format %{ "ldr  $dst, $mem\t# int" %}
7305 
7306   ins_encode(aarch64_enc_ldr(dst, mem));
7307 
7308   ins_pipe(iload_reg_mem);
7309 %}
7310 
7311 // Load Range
7312 instruct loadRange(iRegINoSp dst, memory mem)
7313 %{
7314   match(Set dst (LoadRange mem));
7315 
7316   ins_cost(4 * INSN_COST);
7317   format %{ "ldrw  $dst, $mem\t# range" %}
7318 
7319   ins_encode(aarch64_enc_ldrw(dst, mem));
7320 
7321   ins_pipe(iload_reg_mem);
7322 %}
7323 
7324 // Load Pointer
7325 instruct loadP(iRegPNoSp dst, memory mem)
7326 %{
7327   match(Set dst (LoadP mem));
7328   predicate(!needs_acquiring_load(n));
7329 
7330   ins_cost(4 * INSN_COST);
7331   format %{ "ldr  $dst, $mem\t# ptr" %}
7332 
7333   ins_encode(aarch64_enc_ldr(dst, mem));
7334 
7335   ins_pipe(iload_reg_mem);
7336 %}
7337 
7338 // Load Compressed Pointer
7339 instruct loadN(iRegNNoSp dst, memory mem)
7340 %{
7341   match(Set dst (LoadN mem));
7342   predicate(!needs_acquiring_load(n));
7343 
7344   ins_cost(4 * INSN_COST);
7345   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
7346 
7347   ins_encode(aarch64_enc_ldrw(dst, mem));
7348 
7349   ins_pipe(iload_reg_mem);
7350 %}
7351 
7352 // Load Klass Pointer
7353 instruct loadKlass(iRegPNoSp dst, memory mem)
7354 %{
7355   match(Set dst (LoadKlass mem));
7356   predicate(!needs_acquiring_load(n));
7357 
7358   ins_cost(4 * INSN_COST);
7359   format %{ "ldr  $dst, $mem\t# class" %}
7360 
7361   ins_encode(aarch64_enc_ldr(dst, mem));
7362 
7363   ins_pipe(iload_reg_mem);
7364 %}
7365 
7366 // Load Narrow Klass Pointer
7367 instruct loadNKlass(iRegNNoSp dst, memory mem)
7368 %{
7369   match(Set dst (LoadNKlass mem));
7370   predicate(!needs_acquiring_load(n));
7371 
7372   ins_cost(4 * INSN_COST);
7373   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
7374 
7375   ins_encode(aarch64_enc_ldrw(dst, mem));
7376 
7377   ins_pipe(iload_reg_mem);
7378 %}
7379 
7380 // Load Float
7381 instruct loadF(vRegF dst, memory mem)
7382 %{
7383   match(Set dst (LoadF mem));
7384   predicate(!needs_acquiring_load(n));
7385 
7386   ins_cost(4 * INSN_COST);
7387   format %{ "ldrs  $dst, $mem\t# float" %}
7388 
7389   ins_encode( aarch64_enc_ldrs(dst, mem) );
7390 
7391   ins_pipe(pipe_class_memory);
7392 %}
7393 
7394 // Load Double
7395 instruct loadD(vRegD dst, memory mem)
7396 %{
7397   match(Set dst (LoadD mem));
7398   predicate(!needs_acquiring_load(n));
7399 
7400   ins_cost(4 * INSN_COST);
7401   format %{ "ldrd  $dst, $mem\t# double" %}
7402 
7403   ins_encode( aarch64_enc_ldrd(dst, mem) );
7404 
7405   ins_pipe(pipe_class_memory);
7406 %}
7407 
7408 
7409 // Load Int Constant
7410 instruct loadConI(iRegINoSp dst, immI src)
7411 %{
7412   match(Set dst src);
7413 
7414   ins_cost(INSN_COST);
7415   format %{ "mov $dst, $src\t# int" %}
7416 
7417   ins_encode( aarch64_enc_movw_imm(dst, src) );
7418 
7419   ins_pipe(ialu_imm);
7420 %}
7421 
7422 // Load Long Constant
7423 instruct loadConL(iRegLNoSp dst, immL src)
7424 %{
7425   match(Set dst src);
7426 
7427   ins_cost(INSN_COST);
7428   format %{ "mov $dst, $src\t# long" %}
7429 
7430   ins_encode( aarch64_enc_mov_imm(dst, src) );
7431 
7432   ins_pipe(ialu_imm);
7433 %}
7434 
7435 // Load Pointer Constant
7436 
7437 instruct loadConP(iRegPNoSp dst, immP con)
7438 %{
7439   match(Set dst con);
7440 
7441   ins_cost(INSN_COST * 4);
7442   format %{
7443     "mov  $dst, $con\t# ptr\n\t"
7444   %}
7445 
7446   ins_encode(aarch64_enc_mov_p(dst, con));
7447 
7448   ins_pipe(ialu_imm);
7449 %}
7450 
7451 // Load Null Pointer Constant
7452 
7453 instruct loadConP0(iRegPNoSp dst, immP0 con)
7454 %{
7455   match(Set dst con);
7456 
7457   ins_cost(INSN_COST);
7458   format %{ "mov  $dst, $con\t# NULL ptr" %}
7459 
7460   ins_encode(aarch64_enc_mov_p0(dst, con));
7461 
7462   ins_pipe(ialu_imm);
7463 %}
7464 
7465 // Load Pointer Constant One
7466 
7467 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7468 %{
7469   match(Set dst con);
7470 
7471   ins_cost(INSN_COST);
7472   format %{ "mov  $dst, $con\t# NULL ptr" %}
7473 
7474   ins_encode(aarch64_enc_mov_p1(dst, con));
7475 
7476   ins_pipe(ialu_imm);
7477 %}
7478 
7479 // Load Poll Page Constant
7480 
7481 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
7482 %{
7483   match(Set dst con);
7484 
7485   ins_cost(INSN_COST);
7486   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
7487 
7488   ins_encode(aarch64_enc_mov_poll_page(dst, con));
7489 
7490   ins_pipe(ialu_imm);
7491 %}
7492 
7493 // Load Byte Map Base Constant
7494 
7495 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7496 %{
7497   match(Set dst con);
7498 
7499   ins_cost(INSN_COST);
7500   format %{ "adr  $dst, $con\t# Byte Map Base" %}
7501 
7502   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7503 
7504   ins_pipe(ialu_imm);
7505 %}
7506 
7507 // Load Narrow Pointer Constant
7508 
7509 instruct loadConN(iRegNNoSp dst, immN con)
7510 %{
7511   match(Set dst con);
7512 
7513   ins_cost(INSN_COST * 4);
7514   format %{ "mov  $dst, $con\t# compressed ptr" %}
7515 
7516   ins_encode(aarch64_enc_mov_n(dst, con));
7517 
7518   ins_pipe(ialu_imm);
7519 %}
7520 
7521 // Load Narrow Null Pointer Constant
7522 
7523 instruct loadConN0(iRegNNoSp dst, immN0 con)
7524 %{
7525   match(Set dst con);
7526 
7527   ins_cost(INSN_COST);
7528   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
7529 
7530   ins_encode(aarch64_enc_mov_n0(dst, con));
7531 
7532   ins_pipe(ialu_imm);
7533 %}
7534 
7535 // Load Narrow Klass Constant
7536 
7537 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7538 %{
7539   match(Set dst con);
7540 
7541   ins_cost(INSN_COST);
7542   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
7543 
7544   ins_encode(aarch64_enc_mov_nk(dst, con));
7545 
7546   ins_pipe(ialu_imm);
7547 %}
7548 
7549 // Load Packed Float Constant
7550 
7551 instruct loadConF_packed(vRegF dst, immFPacked con) %{
7552   match(Set dst con);
7553   ins_cost(INSN_COST * 4);
7554   format %{ "fmovs  $dst, $con"%}
7555   ins_encode %{
7556     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
7557   %}
7558 
7559   ins_pipe(pipe_class_default);
7560 %}
7561 
7562 // Load Float Constant
7563 
7564 instruct loadConF(vRegF dst, immF con) %{
7565   match(Set dst con);
7566 
7567   ins_cost(INSN_COST * 4);
7568 
7569   format %{
7570     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7571   %}
7572 
7573   ins_encode %{
7574     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
7575   %}
7576 
7577   ins_pipe(pipe_class_default);
7578 %}
7579 
7580 // Load Packed Double Constant
7581 
7582 instruct loadConD_packed(vRegD dst, immDPacked con) %{
7583   match(Set dst con);
7584   ins_cost(INSN_COST);
7585   format %{ "fmovd  $dst, $con"%}
7586   ins_encode %{
7587     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
7588   %}
7589 
7590   ins_pipe(pipe_class_default);
7591 %}
7592 
7593 // Load Double Constant
7594 
7595 instruct loadConD(vRegD dst, immD con) %{
7596   match(Set dst con);
7597 
7598   ins_cost(INSN_COST * 5);
7599   format %{
7600     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7601   %}
7602 
7603   ins_encode %{
7604     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
7605   %}
7606 
7607   ins_pipe(pipe_class_default);
7608 %}
7609 
7610 // Store Instructions
7611 
7612 // Store CMS card-mark Immediate
7613 instruct storeimmCM0(immI0 zero, memory mem)
7614 %{
7615   match(Set mem (StoreCM mem zero));
7616   predicate(unnecessary_storestore(n));
7617 
7618   ins_cost(INSN_COST);
7619   format %{ "strb zr, $mem\t# byte" %}
7620 
7621   ins_encode(aarch64_enc_strb0(mem));
7622 
7623   ins_pipe(istore_mem);
7624 %}
7625 
7626 // Store CMS card-mark Immediate with intervening StoreStore
7627 // needed when using CMS with no conditional card marking
7628 instruct storeimmCM0_ordered(immI0 zero, memory mem)
7629 %{
7630   match(Set mem (StoreCM mem zero));
7631 
7632   ins_cost(INSN_COST * 2);
7633   format %{ "dmb ishst"
7634       "\n\tstrb zr, $mem\t# byte" %}
7635 
7636   ins_encode(aarch64_enc_strb0_ordered(mem));
7637 
7638   ins_pipe(istore_mem);
7639 %}
7640 
7641 // Store Byte
7642 instruct storeB(iRegIorL2I src, memory mem)
7643 %{
7644   match(Set mem (StoreB mem src));
7645   predicate(!needs_releasing_store(n));
7646 
7647   ins_cost(INSN_COST);
7648   format %{ "strb  $src, $mem\t# byte" %}
7649 
7650   ins_encode(aarch64_enc_strb(src, mem));
7651 
7652   ins_pipe(istore_reg_mem);
7653 %}
7654 
7655 
7656 instruct storeimmB0(immI0 zero, memory mem)
7657 %{
7658   match(Set mem (StoreB mem zero));
7659   predicate(!needs_releasing_store(n));
7660 
7661   ins_cost(INSN_COST);
7662   format %{ "strb rscractch2, $mem\t# byte" %}
7663 
7664   ins_encode(aarch64_enc_strb0(mem));
7665 
7666   ins_pipe(istore_mem);
7667 %}
7668 
7669 // Store Char/Short
7670 instruct storeC(iRegIorL2I src, memory mem)
7671 %{
7672   match(Set mem (StoreC mem src));
7673   predicate(!needs_releasing_store(n));
7674 
7675   ins_cost(INSN_COST);
7676   format %{ "strh  $src, $mem\t# short" %}
7677 
7678   ins_encode(aarch64_enc_strh(src, mem));
7679 
7680   ins_pipe(istore_reg_mem);
7681 %}
7682 
7683 instruct storeimmC0(immI0 zero, memory mem)
7684 %{
7685   match(Set mem (StoreC mem zero));
7686   predicate(!needs_releasing_store(n));
7687 
7688   ins_cost(INSN_COST);
7689   format %{ "strh  zr, $mem\t# short" %}
7690 
7691   ins_encode(aarch64_enc_strh0(mem));
7692 
7693   ins_pipe(istore_mem);
7694 %}
7695 
7696 // Store Integer
7697 
7698 instruct storeI(iRegIorL2I src, memory mem)
7699 %{
7700   match(Set mem(StoreI mem src));
7701   predicate(!needs_releasing_store(n));
7702 
7703   ins_cost(INSN_COST);
7704   format %{ "strw  $src, $mem\t# int" %}
7705 
7706   ins_encode(aarch64_enc_strw(src, mem));
7707 
7708   ins_pipe(istore_reg_mem);
7709 %}
7710 
7711 instruct storeimmI0(immI0 zero, memory mem)
7712 %{
7713   match(Set mem(StoreI mem zero));
7714   predicate(!needs_releasing_store(n));
7715 
7716   ins_cost(INSN_COST);
7717   format %{ "strw  zr, $mem\t# int" %}
7718 
7719   ins_encode(aarch64_enc_strw0(mem));
7720 
7721   ins_pipe(istore_mem);
7722 %}
7723 
7724 // Store Long (64 bit signed)
7725 instruct storeL(iRegL src, memory mem)
7726 %{
7727   match(Set mem (StoreL mem src));
7728   predicate(!needs_releasing_store(n));
7729 
7730   ins_cost(INSN_COST);
7731   format %{ "str  $src, $mem\t# int" %}
7732 
7733   ins_encode(aarch64_enc_str(src, mem));
7734 
7735   ins_pipe(istore_reg_mem);
7736 %}
7737 
7738 // Store Long (64 bit signed)
7739 instruct storeimmL0(immL0 zero, memory mem)
7740 %{
7741   match(Set mem (StoreL mem zero));
7742   predicate(!needs_releasing_store(n));
7743 
7744   ins_cost(INSN_COST);
7745   format %{ "str  zr, $mem\t# int" %}
7746 
7747   ins_encode(aarch64_enc_str0(mem));
7748 
7749   ins_pipe(istore_mem);
7750 %}
7751 
7752 // Store Pointer
7753 instruct storeP(iRegP src, memory mem)
7754 %{
7755   match(Set mem (StoreP mem src));
7756   predicate(!needs_releasing_store(n));
7757 
7758   ins_cost(INSN_COST);
7759   format %{ "str  $src, $mem\t# ptr" %}
7760 
7761   ins_encode(aarch64_enc_str(src, mem));
7762 
7763   ins_pipe(istore_reg_mem);
7764 %}
7765 
7766 // Store Pointer
7767 instruct storeimmP0(immP0 zero, memory mem)
7768 %{
7769   match(Set mem (StoreP mem zero));
7770   predicate(!needs_releasing_store(n));
7771 
7772   ins_cost(INSN_COST);
7773   format %{ "str zr, $mem\t# ptr" %}
7774 
7775   ins_encode(aarch64_enc_str0(mem));
7776 
7777   ins_pipe(istore_mem);
7778 %}
7779 
7780 // Store Compressed Pointer
7781 instruct storeN(iRegN src, memory mem)
7782 %{
7783   match(Set mem (StoreN mem src));
7784   predicate(!needs_releasing_store(n));
7785 
7786   ins_cost(INSN_COST);
7787   format %{ "strw  $src, $mem\t# compressed ptr" %}
7788 
7789   ins_encode(aarch64_enc_strw(src, mem));
7790 
7791   ins_pipe(istore_reg_mem);
7792 %}
7793 
7794 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7795 %{
7796   match(Set mem (StoreN mem zero));
7797   predicate(Universe::narrow_oop_base() == NULL &&
7798             Universe::narrow_klass_base() == NULL &&
7799             (!needs_releasing_store(n)));
7800 
7801   ins_cost(INSN_COST);
7802   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7803 
7804   ins_encode(aarch64_enc_strw(heapbase, mem));
7805 
7806   ins_pipe(istore_reg_mem);
7807 %}
7808 
7809 // Store Float
7810 instruct storeF(vRegF src, memory mem)
7811 %{
7812   match(Set mem (StoreF mem src));
7813   predicate(!needs_releasing_store(n));
7814 
7815   ins_cost(INSN_COST);
7816   format %{ "strs  $src, $mem\t# float" %}
7817 
7818   ins_encode( aarch64_enc_strs(src, mem) );
7819 
7820   ins_pipe(pipe_class_memory);
7821 %}
7822 
7823 // TODO
7824 // implement storeImmF0 and storeFImmPacked
7825 
7826 // Store Double
7827 instruct storeD(vRegD src, memory mem)
7828 %{
7829   match(Set mem (StoreD mem src));
7830   predicate(!needs_releasing_store(n));
7831 
7832   ins_cost(INSN_COST);
7833   format %{ "strd  $src, $mem\t# double" %}
7834 
7835   ins_encode( aarch64_enc_strd(src, mem) );
7836 
7837   ins_pipe(pipe_class_memory);
7838 %}
7839 
7840 // Store Compressed Klass Pointer
7841 instruct storeNKlass(iRegN src, memory mem)
7842 %{
7843   predicate(!needs_releasing_store(n));
7844   match(Set mem (StoreNKlass mem src));
7845 
7846   ins_cost(INSN_COST);
7847   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7848 
7849   ins_encode(aarch64_enc_strw(src, mem));
7850 
7851   ins_pipe(istore_reg_mem);
7852 %}
7853 
7854 // TODO
7855 // implement storeImmD0 and storeDImmPacked
7856 
7857 // prefetch instructions
7858 // Must be safe to execute with invalid address (cannot fault).
7859 
7860 instruct prefetchalloc( memory mem ) %{
7861   match(PrefetchAllocation mem);
7862 
7863   ins_cost(INSN_COST);
7864   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7865 
7866   ins_encode( aarch64_enc_prefetchw(mem) );
7867 
7868   ins_pipe(iload_prefetch);
7869 %}
7870 
7871 //  ---------------- volatile loads and stores ----------------
7872 
7873 // Load Byte (8 bit signed)
7874 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7875 %{
7876   match(Set dst (LoadB mem));
7877 
7878   ins_cost(VOLATILE_REF_COST);
7879   format %{ "ldarsb  $dst, $mem\t# byte" %}
7880 
7881   ins_encode(aarch64_enc_ldarsb(dst, mem));
7882 
7883   ins_pipe(pipe_serial);
7884 %}
7885 
7886 // Load Byte (8 bit signed) into long
7887 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7888 %{
7889   match(Set dst (ConvI2L (LoadB mem)));
7890 
7891   ins_cost(VOLATILE_REF_COST);
7892   format %{ "ldarsb  $dst, $mem\t# byte" %}
7893 
7894   ins_encode(aarch64_enc_ldarsb(dst, mem));
7895 
7896   ins_pipe(pipe_serial);
7897 %}
7898 
7899 // Load Byte (8 bit unsigned)
7900 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7901 %{
7902   match(Set dst (LoadUB mem));
7903 
7904   ins_cost(VOLATILE_REF_COST);
7905   format %{ "ldarb  $dst, $mem\t# byte" %}
7906 
7907   ins_encode(aarch64_enc_ldarb(dst, mem));
7908 
7909   ins_pipe(pipe_serial);
7910 %}
7911 
7912 // Load Byte (8 bit unsigned) into long
7913 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7914 %{
7915   match(Set dst (ConvI2L (LoadUB mem)));
7916 
7917   ins_cost(VOLATILE_REF_COST);
7918   format %{ "ldarb  $dst, $mem\t# byte" %}
7919 
7920   ins_encode(aarch64_enc_ldarb(dst, mem));
7921 
7922   ins_pipe(pipe_serial);
7923 %}
7924 
7925 // Load Short (16 bit signed)
7926 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7927 %{
7928   match(Set dst (LoadS mem));
7929 
7930   ins_cost(VOLATILE_REF_COST);
7931   format %{ "ldarshw  $dst, $mem\t# short" %}
7932 
7933   ins_encode(aarch64_enc_ldarshw(dst, mem));
7934 
7935   ins_pipe(pipe_serial);
7936 %}
7937 
7938 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7939 %{
7940   match(Set dst (LoadUS mem));
7941 
7942   ins_cost(VOLATILE_REF_COST);
7943   format %{ "ldarhw  $dst, $mem\t# short" %}
7944 
7945   ins_encode(aarch64_enc_ldarhw(dst, mem));
7946 
7947   ins_pipe(pipe_serial);
7948 %}
7949 
7950 // Load Short/Char (16 bit unsigned) into long
7951 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7952 %{
7953   match(Set dst (ConvI2L (LoadUS mem)));
7954 
7955   ins_cost(VOLATILE_REF_COST);
7956   format %{ "ldarh  $dst, $mem\t# short" %}
7957 
7958   ins_encode(aarch64_enc_ldarh(dst, mem));
7959 
7960   ins_pipe(pipe_serial);
7961 %}
7962 
7963 // Load Short/Char (16 bit signed) into long
7964 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7965 %{
7966   match(Set dst (ConvI2L (LoadS mem)));
7967 
7968   ins_cost(VOLATILE_REF_COST);
7969   format %{ "ldarh  $dst, $mem\t# short" %}
7970 
7971   ins_encode(aarch64_enc_ldarsh(dst, mem));
7972 
7973   ins_pipe(pipe_serial);
7974 %}
7975 
7976 // Load Integer (32 bit signed)
7977 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7978 %{
7979   match(Set dst (LoadI mem));
7980 
7981   ins_cost(VOLATILE_REF_COST);
7982   format %{ "ldarw  $dst, $mem\t# int" %}
7983 
7984   ins_encode(aarch64_enc_ldarw(dst, mem));
7985 
7986   ins_pipe(pipe_serial);
7987 %}
7988 
7989 // Load Integer (32 bit unsigned) into long
7990 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7991 %{
7992   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7993 
7994   ins_cost(VOLATILE_REF_COST);
7995   format %{ "ldarw  $dst, $mem\t# int" %}
7996 
7997   ins_encode(aarch64_enc_ldarw(dst, mem));
7998 
7999   ins_pipe(pipe_serial);
8000 %}
8001 
8002 // Load Long (64 bit signed)
8003 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8004 %{
8005   match(Set dst (LoadL mem));
8006 
8007   ins_cost(VOLATILE_REF_COST);
8008   format %{ "ldar  $dst, $mem\t# int" %}
8009 
8010   ins_encode(aarch64_enc_ldar(dst, mem));
8011 
8012   ins_pipe(pipe_serial);
8013 %}
8014 
8015 // Load Pointer
8016 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8017 %{
8018   match(Set dst (LoadP mem));
8019 
8020   ins_cost(VOLATILE_REF_COST);
8021   format %{ "ldar  $dst, $mem\t# ptr" %}
8022 
8023   ins_encode(aarch64_enc_ldar(dst, mem));
8024 
8025   ins_pipe(pipe_serial);
8026 %}
8027 
8028 // Load Compressed Pointer
8029 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8030 %{
8031   match(Set dst (LoadN mem));
8032 
8033   ins_cost(VOLATILE_REF_COST);
8034   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8035 
8036   ins_encode(aarch64_enc_ldarw(dst, mem));
8037 
8038   ins_pipe(pipe_serial);
8039 %}
8040 
8041 // Load Float
8042 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8043 %{
8044   match(Set dst (LoadF mem));
8045 
8046   ins_cost(VOLATILE_REF_COST);
8047   format %{ "ldars  $dst, $mem\t# float" %}
8048 
8049   ins_encode( aarch64_enc_fldars(dst, mem) );
8050 
8051   ins_pipe(pipe_serial);
8052 %}
8053 
8054 // Load Double
8055 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8056 %{
8057   match(Set dst (LoadD mem));
8058 
8059   ins_cost(VOLATILE_REF_COST);
8060   format %{ "ldard  $dst, $mem\t# double" %}
8061 
8062   ins_encode( aarch64_enc_fldard(dst, mem) );
8063 
8064   ins_pipe(pipe_serial);
8065 %}
8066 
8067 // Store Byte
8068 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8069 %{
8070   match(Set mem (StoreB mem src));
8071 
8072   ins_cost(VOLATILE_REF_COST);
8073   format %{ "stlrb  $src, $mem\t# byte" %}
8074 
8075   ins_encode(aarch64_enc_stlrb(src, mem));
8076 
8077   ins_pipe(pipe_class_memory);
8078 %}
8079 
8080 // Store Char/Short
8081 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8082 %{
8083   match(Set mem (StoreC mem src));
8084 
8085   ins_cost(VOLATILE_REF_COST);
8086   format %{ "stlrh  $src, $mem\t# short" %}
8087 
8088   ins_encode(aarch64_enc_stlrh(src, mem));
8089 
8090   ins_pipe(pipe_class_memory);
8091 %}
8092 
8093 // Store Integer
8094 
8095 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8096 %{
8097   match(Set mem(StoreI mem src));
8098 
8099   ins_cost(VOLATILE_REF_COST);
8100   format %{ "stlrw  $src, $mem\t# int" %}
8101 
8102   ins_encode(aarch64_enc_stlrw(src, mem));
8103 
8104   ins_pipe(pipe_class_memory);
8105 %}
8106 
8107 // Store Long (64 bit signed)
8108 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8109 %{
8110   match(Set mem (StoreL mem src));
8111 
8112   ins_cost(VOLATILE_REF_COST);
8113   format %{ "stlr  $src, $mem\t# int" %}
8114 
8115   ins_encode(aarch64_enc_stlr(src, mem));
8116 
8117   ins_pipe(pipe_class_memory);
8118 %}
8119 
8120 // Store Pointer
8121 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8122 %{
8123   match(Set mem (StoreP mem src));
8124 
8125   ins_cost(VOLATILE_REF_COST);
8126   format %{ "stlr  $src, $mem\t# ptr" %}
8127 
8128   ins_encode(aarch64_enc_stlr(src, mem));
8129 
8130   ins_pipe(pipe_class_memory);
8131 %}
8132 
8133 // Store Compressed Pointer
8134 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8135 %{
8136   match(Set mem (StoreN mem src));
8137 
8138   ins_cost(VOLATILE_REF_COST);
8139   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8140 
8141   ins_encode(aarch64_enc_stlrw(src, mem));
8142 
8143   ins_pipe(pipe_class_memory);
8144 %}
8145 
8146 // Store Float
8147 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8148 %{
8149   match(Set mem (StoreF mem src));
8150 
8151   ins_cost(VOLATILE_REF_COST);
8152   format %{ "stlrs  $src, $mem\t# float" %}
8153 
8154   ins_encode( aarch64_enc_fstlrs(src, mem) );
8155 
8156   ins_pipe(pipe_class_memory);
8157 %}
8158 
8159 // TODO
8160 // implement storeImmF0 and storeFImmPacked
8161 
8162 // Store Double
8163 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8164 %{
8165   match(Set mem (StoreD mem src));
8166 
8167   ins_cost(VOLATILE_REF_COST);
8168   format %{ "stlrd  $src, $mem\t# double" %}
8169 
8170   ins_encode( aarch64_enc_fstlrd(src, mem) );
8171 
8172   ins_pipe(pipe_class_memory);
8173 %}
8174 
8175 //  ---------------- end of volatile loads and stores ----------------
8176 
8177 // ============================================================================
8178 // BSWAP Instructions
8179 
8180 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8181   match(Set dst (ReverseBytesI src));
8182 
8183   ins_cost(INSN_COST);
8184   format %{ "revw  $dst, $src" %}
8185 
8186   ins_encode %{
8187     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8188   %}
8189 
8190   ins_pipe(ialu_reg);
8191 %}
8192 
8193 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8194   match(Set dst (ReverseBytesL src));
8195 
8196   ins_cost(INSN_COST);
8197   format %{ "rev  $dst, $src" %}
8198 
8199   ins_encode %{
8200     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8201   %}
8202 
8203   ins_pipe(ialu_reg);
8204 %}
8205 
8206 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8207   match(Set dst (ReverseBytesUS src));
8208 
8209   ins_cost(INSN_COST);
8210   format %{ "rev16w  $dst, $src" %}
8211 
8212   ins_encode %{
8213     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8214   %}
8215 
8216   ins_pipe(ialu_reg);
8217 %}
8218 
8219 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8220   match(Set dst (ReverseBytesS src));
8221 
8222   ins_cost(INSN_COST);
8223   format %{ "rev16w  $dst, $src\n\t"
8224             "sbfmw $dst, $dst, #0, #15" %}
8225 
8226   ins_encode %{
8227     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8228     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8229   %}
8230 
8231   ins_pipe(ialu_reg);
8232 %}
8233 
8234 // ============================================================================
8235 // Zero Count Instructions
8236 
8237 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8238   match(Set dst (CountLeadingZerosI src));
8239 
8240   ins_cost(INSN_COST);
8241   format %{ "clzw  $dst, $src" %}
8242   ins_encode %{
8243     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8244   %}
8245 
8246   ins_pipe(ialu_reg);
8247 %}
8248 
8249 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8250   match(Set dst (CountLeadingZerosL src));
8251 
8252   ins_cost(INSN_COST);
8253   format %{ "clz   $dst, $src" %}
8254   ins_encode %{
8255     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8256   %}
8257 
8258   ins_pipe(ialu_reg);
8259 %}
8260 
8261 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8262   match(Set dst (CountTrailingZerosI src));
8263 
8264   ins_cost(INSN_COST * 2);
8265   format %{ "rbitw  $dst, $src\n\t"
8266             "clzw   $dst, $dst" %}
8267   ins_encode %{
8268     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8269     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8270   %}
8271 
8272   ins_pipe(ialu_reg);
8273 %}
8274 
8275 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8276   match(Set dst (CountTrailingZerosL src));
8277 
8278   ins_cost(INSN_COST * 2);
8279   format %{ "rbit   $dst, $src\n\t"
8280             "clz    $dst, $dst" %}
8281   ins_encode %{
8282     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8283     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8284   %}
8285 
8286   ins_pipe(ialu_reg);
8287 %}
8288 
8289 //---------- Population Count Instructions -------------------------------------
8290 //
8291 
8292 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8293   predicate(UsePopCountInstruction);
8294   match(Set dst (PopCountI src));
8295   effect(TEMP tmp);
8296   ins_cost(INSN_COST * 13);
8297 
8298   format %{ "movw   $src, $src\n\t"
8299             "mov    $tmp, $src\t# vector (1D)\n\t"
8300             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8301             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8302             "mov    $dst, $tmp\t# vector (1D)" %}
8303   ins_encode %{
8304     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8305     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8306     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8307     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8308     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8309   %}
8310 
8311   ins_pipe(pipe_class_default);
8312 %}
8313 
8314 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8315   predicate(UsePopCountInstruction);
8316   match(Set dst (PopCountI (LoadI mem)));
8317   effect(TEMP tmp);
8318   ins_cost(INSN_COST * 13);
8319 
8320   format %{ "ldrs   $tmp, $mem\n\t"
8321             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8322             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8323             "mov    $dst, $tmp\t# vector (1D)" %}
8324   ins_encode %{
8325     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8326     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8327                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8328     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8329     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8330     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8331   %}
8332 
8333   ins_pipe(pipe_class_default);
8334 %}
8335 
8336 // Note: Long.bitCount(long) returns an int.
8337 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8338   predicate(UsePopCountInstruction);
8339   match(Set dst (PopCountL src));
8340   effect(TEMP tmp);
8341   ins_cost(INSN_COST * 13);
8342 
8343   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
8344             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8345             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8346             "mov    $dst, $tmp\t# vector (1D)" %}
8347   ins_encode %{
8348     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8349     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8350     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8351     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8352   %}
8353 
8354   ins_pipe(pipe_class_default);
8355 %}
8356 
8357 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
8358   predicate(UsePopCountInstruction);
8359   match(Set dst (PopCountL (LoadL mem)));
8360   effect(TEMP tmp);
8361   ins_cost(INSN_COST * 13);
8362 
8363   format %{ "ldrd   $tmp, $mem\n\t"
8364             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8365             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8366             "mov    $dst, $tmp\t# vector (1D)" %}
8367   ins_encode %{
8368     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8369     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8370                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8371     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8372     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8373     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8374   %}
8375 
8376   ins_pipe(pipe_class_default);
8377 %}
8378 
8379 // ============================================================================
8380 // MemBar Instruction
8381 
8382 instruct load_fence() %{
8383   match(LoadFence);
8384   ins_cost(VOLATILE_REF_COST);
8385 
8386   format %{ "load_fence" %}
8387 
8388   ins_encode %{
8389     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8390   %}
8391   ins_pipe(pipe_serial);
8392 %}
8393 
8394 instruct unnecessary_membar_acquire() %{
8395   predicate(unnecessary_acquire(n));
8396   match(MemBarAcquire);
8397   ins_cost(0);
8398 
8399   format %{ "membar_acquire (elided)" %}
8400 
8401   ins_encode %{
8402     __ block_comment("membar_acquire (elided)");
8403   %}
8404 
8405   ins_pipe(pipe_class_empty);
8406 %}
8407 
8408 instruct membar_acquire() %{
8409   match(MemBarAcquire);
8410   ins_cost(VOLATILE_REF_COST);
8411 
8412   format %{ "membar_acquire" %}
8413 
8414   ins_encode %{
8415     __ block_comment("membar_acquire");
8416     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8417   %}
8418 
8419   ins_pipe(pipe_serial);
8420 %}
8421 
8422 
8423 instruct membar_acquire_lock() %{
8424   match(MemBarAcquireLock);
8425   ins_cost(VOLATILE_REF_COST);
8426 
8427   format %{ "membar_acquire_lock (elided)" %}
8428 
8429   ins_encode %{
8430     __ block_comment("membar_acquire_lock (elided)");
8431   %}
8432 
8433   ins_pipe(pipe_serial);
8434 %}
8435 
8436 instruct store_fence() %{
8437   match(StoreFence);
8438   ins_cost(VOLATILE_REF_COST);
8439 
8440   format %{ "store_fence" %}
8441 
8442   ins_encode %{
8443     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8444   %}
8445   ins_pipe(pipe_serial);
8446 %}
8447 
8448 instruct unnecessary_membar_release() %{
8449   predicate(unnecessary_release(n));
8450   match(MemBarRelease);
8451   ins_cost(0);
8452 
8453   format %{ "membar_release (elided)" %}
8454 
8455   ins_encode %{
8456     __ block_comment("membar_release (elided)");
8457   %}
8458   ins_pipe(pipe_serial);
8459 %}
8460 
8461 instruct membar_release() %{
8462   match(MemBarRelease);
8463   ins_cost(VOLATILE_REF_COST);
8464 
8465   format %{ "membar_release" %}
8466 
8467   ins_encode %{
8468     __ block_comment("membar_release");
8469     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8470   %}
8471   ins_pipe(pipe_serial);
8472 %}
8473 
8474 instruct membar_storestore() %{
8475   match(MemBarStoreStore);
8476   ins_cost(VOLATILE_REF_COST);
8477 
8478   format %{ "MEMBAR-store-store" %}
8479 
8480   ins_encode %{
8481     __ membar(Assembler::StoreStore);
8482   %}
8483   ins_pipe(pipe_serial);
8484 %}
8485 
8486 instruct membar_release_lock() %{
8487   match(MemBarReleaseLock);
8488   ins_cost(VOLATILE_REF_COST);
8489 
8490   format %{ "membar_release_lock (elided)" %}
8491 
8492   ins_encode %{
8493     __ block_comment("membar_release_lock (elided)");
8494   %}
8495 
8496   ins_pipe(pipe_serial);
8497 %}
8498 
8499 instruct unnecessary_membar_volatile() %{
8500   predicate(unnecessary_volatile(n));
8501   match(MemBarVolatile);
8502   ins_cost(0);
8503 
8504   format %{ "membar_volatile (elided)" %}
8505 
8506   ins_encode %{
8507     __ block_comment("membar_volatile (elided)");
8508   %}
8509 
8510   ins_pipe(pipe_serial);
8511 %}
8512 
8513 instruct membar_volatile() %{
8514   match(MemBarVolatile);
8515   ins_cost(VOLATILE_REF_COST*100);
8516 
8517   format %{ "membar_volatile" %}
8518 
8519   ins_encode %{
8520     __ block_comment("membar_volatile");
8521     __ membar(Assembler::StoreLoad);
8522   %}
8523 
8524   ins_pipe(pipe_serial);
8525 %}
8526 
8527 // ============================================================================
8528 // Cast/Convert Instructions
8529 
8530 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8531   match(Set dst (CastX2P src));
8532 
8533   ins_cost(INSN_COST);
8534   format %{ "mov $dst, $src\t# long -> ptr" %}
8535 
8536   ins_encode %{
8537     if ($dst$$reg != $src$$reg) {
8538       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8539     }
8540   %}
8541 
8542   ins_pipe(ialu_reg);
8543 %}
8544 
8545 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8546   match(Set dst (CastP2X src));
8547 
8548   ins_cost(INSN_COST);
8549   format %{ "mov $dst, $src\t# ptr -> long" %}
8550 
8551   ins_encode %{
8552     if ($dst$$reg != $src$$reg) {
8553       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8554     }
8555   %}
8556 
8557   ins_pipe(ialu_reg);
8558 %}
8559 
8560 // Convert oop into int for vectors alignment masking
8561 instruct convP2I(iRegINoSp dst, iRegP src) %{
8562   match(Set dst (ConvL2I (CastP2X src)));
8563 
8564   ins_cost(INSN_COST);
8565   format %{ "movw $dst, $src\t# ptr -> int" %}
8566   ins_encode %{
8567     __ movw($dst$$Register, $src$$Register);
8568   %}
8569 
8570   ins_pipe(ialu_reg);
8571 %}
8572 
8573 // Convert compressed oop into int for vectors alignment masking
8574 // in case of 32bit oops (heap < 4Gb).
8575 instruct convN2I(iRegINoSp dst, iRegN src)
8576 %{
8577   predicate(Universe::narrow_oop_shift() == 0);
8578   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8579 
8580   ins_cost(INSN_COST);
8581   format %{ "mov dst, $src\t# compressed ptr -> int" %}
8582   ins_encode %{
8583     __ movw($dst$$Register, $src$$Register);
8584   %}
8585 
8586   ins_pipe(ialu_reg);
8587 %}
8588 
8589 
8590 // Convert oop pointer into compressed form
8591 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8592   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8593   match(Set dst (EncodeP src));
8594   effect(KILL cr);
8595   ins_cost(INSN_COST * 3);
8596   format %{ "encode_heap_oop $dst, $src" %}
8597   ins_encode %{
8598     Register s = $src$$Register;
8599     Register d = $dst$$Register;
8600     __ encode_heap_oop(d, s);
8601   %}
8602   ins_pipe(ialu_reg);
8603 %}
8604 
8605 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8606   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8607   match(Set dst (EncodeP src));
8608   ins_cost(INSN_COST * 3);
8609   format %{ "encode_heap_oop_not_null $dst, $src" %}
8610   ins_encode %{
8611     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8612   %}
8613   ins_pipe(ialu_reg);
8614 %}
8615 
8616 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8617   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8618             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8619   match(Set dst (DecodeN src));
8620   ins_cost(INSN_COST * 3);
8621   format %{ "decode_heap_oop $dst, $src" %}
8622   ins_encode %{
8623     Register s = $src$$Register;
8624     Register d = $dst$$Register;
8625     __ decode_heap_oop(d, s);
8626   %}
8627   ins_pipe(ialu_reg);
8628 %}
8629 
8630 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8631   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8632             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8633   match(Set dst (DecodeN src));
8634   ins_cost(INSN_COST * 3);
8635   format %{ "decode_heap_oop_not_null $dst, $src" %}
8636   ins_encode %{
8637     Register s = $src$$Register;
8638     Register d = $dst$$Register;
8639     __ decode_heap_oop_not_null(d, s);
8640   %}
8641   ins_pipe(ialu_reg);
8642 %}
8643 
8644 // n.b. AArch64 implementations of encode_klass_not_null and
8645 // decode_klass_not_null do not modify the flags register so, unlike
8646 // Intel, we don't kill CR as a side effect here
8647 
8648 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
8649   match(Set dst (EncodePKlass src));
8650 
8651   ins_cost(INSN_COST * 3);
8652   format %{ "encode_klass_not_null $dst,$src" %}
8653 
8654   ins_encode %{
8655     Register src_reg = as_Register($src$$reg);
8656     Register dst_reg = as_Register($dst$$reg);
8657     __ encode_klass_not_null(dst_reg, src_reg);
8658   %}
8659 
8660    ins_pipe(ialu_reg);
8661 %}
8662 
8663 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8664   match(Set dst (DecodeNKlass src));
8665 
8666   ins_cost(INSN_COST * 3);
8667   format %{ "decode_klass_not_null $dst,$src" %}
8668 
8669   ins_encode %{
8670     Register src_reg = as_Register($src$$reg);
8671     Register dst_reg = as_Register($dst$$reg);
8672     if (dst_reg != src_reg) {
8673       __ decode_klass_not_null(dst_reg, src_reg);
8674     } else {
8675       __ decode_klass_not_null(dst_reg);
8676     }
8677   %}
8678 
8679    ins_pipe(ialu_reg);
8680 %}
8681 
8682 instruct checkCastPP(iRegPNoSp dst)
8683 %{
8684   match(Set dst (CheckCastPP dst));
8685 
8686   size(0);
8687   format %{ "# checkcastPP of $dst" %}
8688   ins_encode(/* empty encoding */);
8689   ins_pipe(pipe_class_empty);
8690 %}
8691 
8692 instruct castPP(iRegPNoSp dst)
8693 %{
8694   match(Set dst (CastPP dst));
8695 
8696   size(0);
8697   format %{ "# castPP of $dst" %}
8698   ins_encode(/* empty encoding */);
8699   ins_pipe(pipe_class_empty);
8700 %}
8701 
8702 instruct castII(iRegI dst)
8703 %{
8704   match(Set dst (CastII dst));
8705 
8706   size(0);
8707   format %{ "# castII of $dst" %}
8708   ins_encode(/* empty encoding */);
8709   ins_cost(0);
8710   ins_pipe(pipe_class_empty);
8711 %}
8712 
8713 // ============================================================================
8714 // Atomic operation instructions
8715 //
8716 // Intel and SPARC both implement Ideal Node LoadPLocked and
8717 // Store{PIL}Conditional instructions using a normal load for the
8718 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8719 //
8720 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8721 // pair to lock object allocations from Eden space when not using
8722 // TLABs.
8723 //
8724 // There does not appear to be a Load{IL}Locked Ideal Node and the
8725 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8726 // and to use StoreIConditional only for 32-bit and StoreLConditional
8727 // only for 64-bit.
8728 //
8729 // We implement LoadPLocked and StorePLocked instructions using,
8730 // respectively the AArch64 hw load-exclusive and store-conditional
8731 // instructions. Whereas we must implement each of
8732 // Store{IL}Conditional using a CAS which employs a pair of
8733 // instructions comprising a load-exclusive followed by a
8734 // store-conditional.
8735 
8736 
8737 // Locked-load (linked load) of the current heap-top
8738 // used when updating the eden heap top
8739 // implemented using ldaxr on AArch64
8740 
8741 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8742 %{
8743   match(Set dst (LoadPLocked mem));
8744 
8745   ins_cost(VOLATILE_REF_COST);
8746 
8747   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8748 
8749   ins_encode(aarch64_enc_ldaxr(dst, mem));
8750 
8751   ins_pipe(pipe_serial);
8752 %}
8753 
8754 // Conditional-store of the updated heap-top.
8755 // Used during allocation of the shared heap.
8756 // Sets flag (EQ) on success.
8757 // implemented using stlxr on AArch64.
8758 
8759 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8760 %{
8761   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8762 
8763   ins_cost(VOLATILE_REF_COST);
8764 
8765  // TODO
8766  // do we need to do a store-conditional release or can we just use a
8767  // plain store-conditional?
8768 
8769   format %{
8770     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8771     "cmpw rscratch1, zr\t# EQ on successful write"
8772   %}
8773 
8774   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8775 
8776   ins_pipe(pipe_serial);
8777 %}
8778 
8779 
8780 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8781 // when attempting to rebias a lock towards the current thread.  We
8782 // must use the acquire form of cmpxchg in order to guarantee acquire
8783 // semantics in this case.
8784 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8785 %{
8786   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8787 
8788   ins_cost(VOLATILE_REF_COST);
8789 
8790   format %{
8791     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8792     "cmpw rscratch1, zr\t# EQ on successful write"
8793   %}
8794 
8795   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8796 
8797   ins_pipe(pipe_slow);
8798 %}
8799 
8800 // storeIConditional also has acquire semantics, for no better reason
8801 // than matching storeLConditional.  At the time of writing this
8802 // comment storeIConditional was not used anywhere by AArch64.
8803 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8804 %{
8805   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8806 
8807   ins_cost(VOLATILE_REF_COST);
8808 
8809   format %{
8810     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8811     "cmpw rscratch1, zr\t# EQ on successful write"
8812   %}
8813 
8814   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8815 
8816   ins_pipe(pipe_slow);
8817 %}
8818 
8819 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8820 // can't match them
8821 
8822 // standard CompareAndSwapX when we are using barriers
8823 // these have higher priority than the rules selected by a predicate
8824 
8825 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8826 
8827   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8828   ins_cost(2 * VOLATILE_REF_COST);
8829 
8830   effect(KILL cr);
8831 
8832  format %{
8833     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8834     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8835  %}
8836 
8837  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8838             aarch64_enc_cset_eq(res));
8839 
8840   ins_pipe(pipe_slow);
8841 %}
8842 
8843 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8844 
8845   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8846   ins_cost(2 * VOLATILE_REF_COST);
8847 
8848   effect(KILL cr);
8849 
8850  format %{
8851     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8852     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8853  %}
8854 
8855  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8856             aarch64_enc_cset_eq(res));
8857 
8858   ins_pipe(pipe_slow);
8859 %}
8860 
8861 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8862 
8863   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8864   ins_cost(2 * VOLATILE_REF_COST);
8865 
8866   effect(KILL cr);
8867 
8868  format %{
8869     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8870     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8871  %}
8872 
8873  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8874             aarch64_enc_cset_eq(res));
8875 
8876   ins_pipe(pipe_slow);
8877 %}
8878 
8879 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8880 
8881   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8882   ins_cost(2 * VOLATILE_REF_COST);
8883 
8884   effect(KILL cr);
8885 
8886  format %{
8887     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8888     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8889  %}
8890 
8891  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8892             aarch64_enc_cset_eq(res));
8893 
8894   ins_pipe(pipe_slow);
8895 %}
8896 
8897 // alternative CompareAndSwapX when we are eliding barriers
8898 
8899 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8900 
8901   predicate(needs_acquiring_load_exclusive(n));
8902   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8903   ins_cost(VOLATILE_REF_COST);
8904 
8905   effect(KILL cr);
8906 
8907  format %{
8908     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8909     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8910  %}
8911 
8912  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8913             aarch64_enc_cset_eq(res));
8914 
8915   ins_pipe(pipe_slow);
8916 %}
8917 
8918 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8919 
8920   predicate(needs_acquiring_load_exclusive(n));
8921   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8922   ins_cost(VOLATILE_REF_COST);
8923 
8924   effect(KILL cr);
8925 
8926  format %{
8927     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8928     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8929  %}
8930 
8931  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8932             aarch64_enc_cset_eq(res));
8933 
8934   ins_pipe(pipe_slow);
8935 %}
8936 
8937 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8938 
8939   predicate(needs_acquiring_load_exclusive(n));
8940   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8941   ins_cost(VOLATILE_REF_COST);
8942 
8943   effect(KILL cr);
8944 
8945  format %{
8946     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8947     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8948  %}
8949 
8950  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8951             aarch64_enc_cset_eq(res));
8952 
8953   ins_pipe(pipe_slow);
8954 %}
8955 
8956 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8957 
8958   predicate(needs_acquiring_load_exclusive(n));
8959   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8960   ins_cost(VOLATILE_REF_COST);
8961 
8962   effect(KILL cr);
8963 
8964  format %{
8965     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8966     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8967  %}
8968 
8969  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8970             aarch64_enc_cset_eq(res));
8971 
8972   ins_pipe(pipe_slow);
8973 %}
8974 
8975 
8976 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8977   match(Set prev (GetAndSetI mem newv));
8978   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8979   ins_encode %{
8980     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8981   %}
8982   ins_pipe(pipe_serial);
8983 %}
8984 
8985 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8986   match(Set prev (GetAndSetL mem newv));
8987   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8988   ins_encode %{
8989     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8990   %}
8991   ins_pipe(pipe_serial);
8992 %}
8993 
8994 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
8995   match(Set prev (GetAndSetN mem newv));
8996   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8997   ins_encode %{
8998     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8999   %}
9000   ins_pipe(pipe_serial);
9001 %}
9002 
9003 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
9004   match(Set prev (GetAndSetP mem newv));
9005   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9006   ins_encode %{
9007     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9008   %}
9009   ins_pipe(pipe_serial);
9010 %}
9011 
9012 
9013 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9014   match(Set newval (GetAndAddL mem incr));
9015   ins_cost(INSN_COST * 10);
9016   format %{ "get_and_addL $newval, [$mem], $incr" %}
9017   ins_encode %{
9018     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9019   %}
9020   ins_pipe(pipe_serial);
9021 %}
9022 
9023 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9024   predicate(n->as_LoadStore()->result_not_used());
9025   match(Set dummy (GetAndAddL mem incr));
9026   ins_cost(INSN_COST * 9);
9027   format %{ "get_and_addL [$mem], $incr" %}
9028   ins_encode %{
9029     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9030   %}
9031   ins_pipe(pipe_serial);
9032 %}
9033 
9034 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9035   match(Set newval (GetAndAddL mem incr));
9036   ins_cost(INSN_COST * 10);
9037   format %{ "get_and_addL $newval, [$mem], $incr" %}
9038   ins_encode %{
9039     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9040   %}
9041   ins_pipe(pipe_serial);
9042 %}
9043 
9044 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9045   predicate(n->as_LoadStore()->result_not_used());
9046   match(Set dummy (GetAndAddL mem incr));
9047   ins_cost(INSN_COST * 9);
9048   format %{ "get_and_addL [$mem], $incr" %}
9049   ins_encode %{
9050     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9051   %}
9052   ins_pipe(pipe_serial);
9053 %}
9054 
9055 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9056   match(Set newval (GetAndAddI mem incr));
9057   ins_cost(INSN_COST * 10);
9058   format %{ "get_and_addI $newval, [$mem], $incr" %}
9059   ins_encode %{
9060     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9061   %}
9062   ins_pipe(pipe_serial);
9063 %}
9064 
9065 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9066   predicate(n->as_LoadStore()->result_not_used());
9067   match(Set dummy (GetAndAddI mem incr));
9068   ins_cost(INSN_COST * 9);
9069   format %{ "get_and_addI [$mem], $incr" %}
9070   ins_encode %{
9071     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9072   %}
9073   ins_pipe(pipe_serial);
9074 %}
9075 
9076 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9077   match(Set newval (GetAndAddI mem incr));
9078   ins_cost(INSN_COST * 10);
9079   format %{ "get_and_addI $newval, [$mem], $incr" %}
9080   ins_encode %{
9081     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9082   %}
9083   ins_pipe(pipe_serial);
9084 %}
9085 
9086 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9087   predicate(n->as_LoadStore()->result_not_used());
9088   match(Set dummy (GetAndAddI mem incr));
9089   ins_cost(INSN_COST * 9);
9090   format %{ "get_and_addI [$mem], $incr" %}
9091   ins_encode %{
9092     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9093   %}
9094   ins_pipe(pipe_serial);
9095 %}
9096 
9097 // Manifest a CmpL result in an integer register.
9098 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9099 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9100 %{
9101   match(Set dst (CmpL3 src1 src2));
9102   effect(KILL flags);
9103 
9104   ins_cost(INSN_COST * 6);
9105   format %{
9106       "cmp $src1, $src2"
9107       "csetw $dst, ne"
9108       "cnegw $dst, lt"
9109   %}
9110   // format %{ "CmpL3 $dst, $src1, $src2" %}
9111   ins_encode %{
9112     __ cmp($src1$$Register, $src2$$Register);
9113     __ csetw($dst$$Register, Assembler::NE);
9114     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9115   %}
9116 
9117   ins_pipe(pipe_class_default);
9118 %}
9119 
9120 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9121 %{
9122   match(Set dst (CmpL3 src1 src2));
9123   effect(KILL flags);
9124 
9125   ins_cost(INSN_COST * 6);
9126   format %{
9127       "cmp $src1, $src2"
9128       "csetw $dst, ne"
9129       "cnegw $dst, lt"
9130   %}
9131   ins_encode %{
9132     int32_t con = (int32_t)$src2$$constant;
9133      if (con < 0) {
9134       __ adds(zr, $src1$$Register, -con);
9135     } else {
9136       __ subs(zr, $src1$$Register, con);
9137     }
9138     __ csetw($dst$$Register, Assembler::NE);
9139     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9140   %}
9141 
9142   ins_pipe(pipe_class_default);
9143 %}
9144 
9145 // ============================================================================
9146 // Conditional Move Instructions
9147 
9148 // n.b. we have identical rules for both a signed compare op (cmpOp)
9149 // and an unsigned compare op (cmpOpU). it would be nice if we could
9150 // define an op class which merged both inputs and use it to type the
9151 // argument to a single rule. unfortunatelyt his fails because the
9152 // opclass does not live up to the COND_INTER interface of its
9153 // component operands. When the generic code tries to negate the
9154 // operand it ends up running the generci Machoper::negate method
9155 // which throws a ShouldNotHappen. So, we have to provide two flavours
9156 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9157 
9158 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9159   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9160 
9161   ins_cost(INSN_COST * 2);
9162   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9163 
9164   ins_encode %{
9165     __ cselw(as_Register($dst$$reg),
9166              as_Register($src2$$reg),
9167              as_Register($src1$$reg),
9168              (Assembler::Condition)$cmp$$cmpcode);
9169   %}
9170 
9171   ins_pipe(icond_reg_reg);
9172 %}
9173 
9174 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9175   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9176 
9177   ins_cost(INSN_COST * 2);
9178   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9179 
9180   ins_encode %{
9181     __ cselw(as_Register($dst$$reg),
9182              as_Register($src2$$reg),
9183              as_Register($src1$$reg),
9184              (Assembler::Condition)$cmp$$cmpcode);
9185   %}
9186 
9187   ins_pipe(icond_reg_reg);
9188 %}
9189 
9190 // special cases where one arg is zero
9191 
9192 // n.b. this is selected in preference to the rule above because it
9193 // avoids loading constant 0 into a source register
9194 
9195 // TODO
9196 // we ought only to be able to cull one of these variants as the ideal
9197 // transforms ought always to order the zero consistently (to left/right?)
9198 
9199 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9200   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9201 
9202   ins_cost(INSN_COST * 2);
9203   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9204 
9205   ins_encode %{
9206     __ cselw(as_Register($dst$$reg),
9207              as_Register($src$$reg),
9208              zr,
9209              (Assembler::Condition)$cmp$$cmpcode);
9210   %}
9211 
9212   ins_pipe(icond_reg);
9213 %}
9214 
9215 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9216   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9217 
9218   ins_cost(INSN_COST * 2);
9219   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9220 
9221   ins_encode %{
9222     __ cselw(as_Register($dst$$reg),
9223              as_Register($src$$reg),
9224              zr,
9225              (Assembler::Condition)$cmp$$cmpcode);
9226   %}
9227 
9228   ins_pipe(icond_reg);
9229 %}
9230 
9231 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9232   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9233 
9234   ins_cost(INSN_COST * 2);
9235   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9236 
9237   ins_encode %{
9238     __ cselw(as_Register($dst$$reg),
9239              zr,
9240              as_Register($src$$reg),
9241              (Assembler::Condition)$cmp$$cmpcode);
9242   %}
9243 
9244   ins_pipe(icond_reg);
9245 %}
9246 
9247 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9248   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9249 
9250   ins_cost(INSN_COST * 2);
9251   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9252 
9253   ins_encode %{
9254     __ cselw(as_Register($dst$$reg),
9255              zr,
9256              as_Register($src$$reg),
9257              (Assembler::Condition)$cmp$$cmpcode);
9258   %}
9259 
9260   ins_pipe(icond_reg);
9261 %}
9262 
9263 // special case for creating a boolean 0 or 1
9264 
9265 // n.b. this is selected in preference to the rule above because it
9266 // avoids loading constants 0 and 1 into a source register
9267 
9268 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9269   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9270 
9271   ins_cost(INSN_COST * 2);
9272   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9273 
9274   ins_encode %{
9275     // equivalently
9276     // cset(as_Register($dst$$reg),
9277     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9278     __ csincw(as_Register($dst$$reg),
9279              zr,
9280              zr,
9281              (Assembler::Condition)$cmp$$cmpcode);
9282   %}
9283 
9284   ins_pipe(icond_none);
9285 %}
9286 
9287 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9288   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9289 
9290   ins_cost(INSN_COST * 2);
9291   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9292 
9293   ins_encode %{
9294     // equivalently
9295     // cset(as_Register($dst$$reg),
9296     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9297     __ csincw(as_Register($dst$$reg),
9298              zr,
9299              zr,
9300              (Assembler::Condition)$cmp$$cmpcode);
9301   %}
9302 
9303   ins_pipe(icond_none);
9304 %}
9305 
9306 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9307   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9308 
9309   ins_cost(INSN_COST * 2);
9310   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9311 
9312   ins_encode %{
9313     __ csel(as_Register($dst$$reg),
9314             as_Register($src2$$reg),
9315             as_Register($src1$$reg),
9316             (Assembler::Condition)$cmp$$cmpcode);
9317   %}
9318 
9319   ins_pipe(icond_reg_reg);
9320 %}
9321 
9322 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9323   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9324 
9325   ins_cost(INSN_COST * 2);
9326   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9327 
9328   ins_encode %{
9329     __ csel(as_Register($dst$$reg),
9330             as_Register($src2$$reg),
9331             as_Register($src1$$reg),
9332             (Assembler::Condition)$cmp$$cmpcode);
9333   %}
9334 
9335   ins_pipe(icond_reg_reg);
9336 %}
9337 
9338 // special cases where one arg is zero
9339 
9340 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9341   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9342 
9343   ins_cost(INSN_COST * 2);
9344   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9345 
9346   ins_encode %{
9347     __ csel(as_Register($dst$$reg),
9348             zr,
9349             as_Register($src$$reg),
9350             (Assembler::Condition)$cmp$$cmpcode);
9351   %}
9352 
9353   ins_pipe(icond_reg);
9354 %}
9355 
9356 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9357   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9358 
9359   ins_cost(INSN_COST * 2);
9360   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9361 
9362   ins_encode %{
9363     __ csel(as_Register($dst$$reg),
9364             zr,
9365             as_Register($src$$reg),
9366             (Assembler::Condition)$cmp$$cmpcode);
9367   %}
9368 
9369   ins_pipe(icond_reg);
9370 %}
9371 
9372 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9373   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9374 
9375   ins_cost(INSN_COST * 2);
9376   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9377 
9378   ins_encode %{
9379     __ csel(as_Register($dst$$reg),
9380             as_Register($src$$reg),
9381             zr,
9382             (Assembler::Condition)$cmp$$cmpcode);
9383   %}
9384 
9385   ins_pipe(icond_reg);
9386 %}
9387 
9388 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9389   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9390 
9391   ins_cost(INSN_COST * 2);
9392   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9393 
9394   ins_encode %{
9395     __ csel(as_Register($dst$$reg),
9396             as_Register($src$$reg),
9397             zr,
9398             (Assembler::Condition)$cmp$$cmpcode);
9399   %}
9400 
9401   ins_pipe(icond_reg);
9402 %}
9403 
9404 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9405   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9406 
9407   ins_cost(INSN_COST * 2);
9408   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9409 
9410   ins_encode %{
9411     __ csel(as_Register($dst$$reg),
9412             as_Register($src2$$reg),
9413             as_Register($src1$$reg),
9414             (Assembler::Condition)$cmp$$cmpcode);
9415   %}
9416 
9417   ins_pipe(icond_reg_reg);
9418 %}
9419 
9420 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9421   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9422 
9423   ins_cost(INSN_COST * 2);
9424   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9425 
9426   ins_encode %{
9427     __ csel(as_Register($dst$$reg),
9428             as_Register($src2$$reg),
9429             as_Register($src1$$reg),
9430             (Assembler::Condition)$cmp$$cmpcode);
9431   %}
9432 
9433   ins_pipe(icond_reg_reg);
9434 %}
9435 
9436 // special cases where one arg is zero
9437 
9438 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9439   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9440 
9441   ins_cost(INSN_COST * 2);
9442   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9443 
9444   ins_encode %{
9445     __ csel(as_Register($dst$$reg),
9446             zr,
9447             as_Register($src$$reg),
9448             (Assembler::Condition)$cmp$$cmpcode);
9449   %}
9450 
9451   ins_pipe(icond_reg);
9452 %}
9453 
9454 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9455   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9456 
9457   ins_cost(INSN_COST * 2);
9458   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9459 
9460   ins_encode %{
9461     __ csel(as_Register($dst$$reg),
9462             zr,
9463             as_Register($src$$reg),
9464             (Assembler::Condition)$cmp$$cmpcode);
9465   %}
9466 
9467   ins_pipe(icond_reg);
9468 %}
9469 
9470 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9471   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9472 
9473   ins_cost(INSN_COST * 2);
9474   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9475 
9476   ins_encode %{
9477     __ csel(as_Register($dst$$reg),
9478             as_Register($src$$reg),
9479             zr,
9480             (Assembler::Condition)$cmp$$cmpcode);
9481   %}
9482 
9483   ins_pipe(icond_reg);
9484 %}
9485 
9486 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9487   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9488 
9489   ins_cost(INSN_COST * 2);
9490   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9491 
9492   ins_encode %{
9493     __ csel(as_Register($dst$$reg),
9494             as_Register($src$$reg),
9495             zr,
9496             (Assembler::Condition)$cmp$$cmpcode);
9497   %}
9498 
9499   ins_pipe(icond_reg);
9500 %}
9501 
9502 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9503   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9504 
9505   ins_cost(INSN_COST * 2);
9506   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9507 
9508   ins_encode %{
9509     __ cselw(as_Register($dst$$reg),
9510              as_Register($src2$$reg),
9511              as_Register($src1$$reg),
9512              (Assembler::Condition)$cmp$$cmpcode);
9513   %}
9514 
9515   ins_pipe(icond_reg_reg);
9516 %}
9517 
9518 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9519   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9520 
9521   ins_cost(INSN_COST * 2);
9522   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9523 
9524   ins_encode %{
9525     __ cselw(as_Register($dst$$reg),
9526              as_Register($src2$$reg),
9527              as_Register($src1$$reg),
9528              (Assembler::Condition)$cmp$$cmpcode);
9529   %}
9530 
9531   ins_pipe(icond_reg_reg);
9532 %}
9533 
9534 // special cases where one arg is zero
9535 
9536 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9537   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9538 
9539   ins_cost(INSN_COST * 2);
9540   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9541 
9542   ins_encode %{
9543     __ cselw(as_Register($dst$$reg),
9544              zr,
9545              as_Register($src$$reg),
9546              (Assembler::Condition)$cmp$$cmpcode);
9547   %}
9548 
9549   ins_pipe(icond_reg);
9550 %}
9551 
9552 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9553   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9554 
9555   ins_cost(INSN_COST * 2);
9556   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9557 
9558   ins_encode %{
9559     __ cselw(as_Register($dst$$reg),
9560              zr,
9561              as_Register($src$$reg),
9562              (Assembler::Condition)$cmp$$cmpcode);
9563   %}
9564 
9565   ins_pipe(icond_reg);
9566 %}
9567 
9568 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9569   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9570 
9571   ins_cost(INSN_COST * 2);
9572   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9573 
9574   ins_encode %{
9575     __ cselw(as_Register($dst$$reg),
9576              as_Register($src$$reg),
9577              zr,
9578              (Assembler::Condition)$cmp$$cmpcode);
9579   %}
9580 
9581   ins_pipe(icond_reg);
9582 %}
9583 
9584 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9585   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9586 
9587   ins_cost(INSN_COST * 2);
9588   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9589 
9590   ins_encode %{
9591     __ cselw(as_Register($dst$$reg),
9592              as_Register($src$$reg),
9593              zr,
9594              (Assembler::Condition)$cmp$$cmpcode);
9595   %}
9596 
9597   ins_pipe(icond_reg);
9598 %}
9599 
9600 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9601 %{
9602   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9603 
9604   ins_cost(INSN_COST * 3);
9605 
9606   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9607   ins_encode %{
9608     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9609     __ fcsels(as_FloatRegister($dst$$reg),
9610               as_FloatRegister($src2$$reg),
9611               as_FloatRegister($src1$$reg),
9612               cond);
9613   %}
9614 
9615   ins_pipe(pipe_class_default);
9616 %}
9617 
9618 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9619 %{
9620   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9621 
9622   ins_cost(INSN_COST * 3);
9623 
9624   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9625   ins_encode %{
9626     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9627     __ fcsels(as_FloatRegister($dst$$reg),
9628               as_FloatRegister($src2$$reg),
9629               as_FloatRegister($src1$$reg),
9630               cond);
9631   %}
9632 
9633   ins_pipe(pipe_class_default);
9634 %}
9635 
9636 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9637 %{
9638   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9639 
9640   ins_cost(INSN_COST * 3);
9641 
9642   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9643   ins_encode %{
9644     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9645     __ fcseld(as_FloatRegister($dst$$reg),
9646               as_FloatRegister($src2$$reg),
9647               as_FloatRegister($src1$$reg),
9648               cond);
9649   %}
9650 
9651   ins_pipe(pipe_class_default);
9652 %}
9653 
9654 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9655 %{
9656   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9657 
9658   ins_cost(INSN_COST * 3);
9659 
9660   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9661   ins_encode %{
9662     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9663     __ fcseld(as_FloatRegister($dst$$reg),
9664               as_FloatRegister($src2$$reg),
9665               as_FloatRegister($src1$$reg),
9666               cond);
9667   %}
9668 
9669   ins_pipe(pipe_class_default);
9670 %}
9671 
9672 // ============================================================================
9673 // Arithmetic Instructions
9674 //
9675 
9676 // Integer Addition
9677 
9678 // TODO
9679 // these currently employ operations which do not set CR and hence are
9680 // not flagged as killing CR but we would like to isolate the cases
9681 // where we want to set flags from those where we don't. need to work
9682 // out how to do that.
9683 
9684 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9685   match(Set dst (AddI src1 src2));
9686 
9687   ins_cost(INSN_COST);
9688   format %{ "addw  $dst, $src1, $src2" %}
9689 
9690   ins_encode %{
9691     __ addw(as_Register($dst$$reg),
9692             as_Register($src1$$reg),
9693             as_Register($src2$$reg));
9694   %}
9695 
9696   ins_pipe(ialu_reg_reg);
9697 %}
9698 
9699 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9700   match(Set dst (AddI src1 src2));
9701 
9702   ins_cost(INSN_COST);
9703   format %{ "addw $dst, $src1, $src2" %}
9704 
9705   // use opcode to indicate that this is an add not a sub
9706   opcode(0x0);
9707 
9708   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9709 
9710   ins_pipe(ialu_reg_imm);
9711 %}
9712 
9713 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9714   match(Set dst (AddI (ConvL2I src1) src2));
9715 
9716   ins_cost(INSN_COST);
9717   format %{ "addw $dst, $src1, $src2" %}
9718 
9719   // use opcode to indicate that this is an add not a sub
9720   opcode(0x0);
9721 
9722   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9723 
9724   ins_pipe(ialu_reg_imm);
9725 %}
9726 
9727 // Pointer Addition
9728 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9729   match(Set dst (AddP src1 src2));
9730 
9731   ins_cost(INSN_COST);
9732   format %{ "add $dst, $src1, $src2\t# ptr" %}
9733 
9734   ins_encode %{
9735     __ add(as_Register($dst$$reg),
9736            as_Register($src1$$reg),
9737            as_Register($src2$$reg));
9738   %}
9739 
9740   ins_pipe(ialu_reg_reg);
9741 %}
9742 
9743 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9744   match(Set dst (AddP src1 (ConvI2L src2)));
9745 
9746   ins_cost(1.9 * INSN_COST);
9747   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9748 
9749   ins_encode %{
9750     __ add(as_Register($dst$$reg),
9751            as_Register($src1$$reg),
9752            as_Register($src2$$reg), ext::sxtw);
9753   %}
9754 
9755   ins_pipe(ialu_reg_reg);
9756 %}
9757 
9758 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9759   match(Set dst (AddP src1 (LShiftL src2 scale)));
9760 
9761   ins_cost(1.9 * INSN_COST);
9762   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9763 
9764   ins_encode %{
9765     __ lea(as_Register($dst$$reg),
9766            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9767                    Address::lsl($scale$$constant)));
9768   %}
9769 
9770   ins_pipe(ialu_reg_reg_shift);
9771 %}
9772 
9773 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9774   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9775 
9776   ins_cost(1.9 * INSN_COST);
9777   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9778 
9779   ins_encode %{
9780     __ lea(as_Register($dst$$reg),
9781            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9782                    Address::sxtw($scale$$constant)));
9783   %}
9784 
9785   ins_pipe(ialu_reg_reg_shift);
9786 %}
9787 
9788 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9789   match(Set dst (LShiftL (ConvI2L src) scale));
9790 
9791   ins_cost(INSN_COST);
9792   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9793 
9794   ins_encode %{
9795     __ sbfiz(as_Register($dst$$reg),
9796           as_Register($src$$reg),
9797           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9798   %}
9799 
9800   ins_pipe(ialu_reg_shift);
9801 %}
9802 
9803 // Pointer Immediate Addition
9804 // n.b. this needs to be more expensive than using an indirect memory
9805 // operand
9806 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9807   match(Set dst (AddP src1 src2));
9808 
9809   ins_cost(INSN_COST);
9810   format %{ "add $dst, $src1, $src2\t# ptr" %}
9811 
9812   // use opcode to indicate that this is an add not a sub
9813   opcode(0x0);
9814 
9815   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9816 
9817   ins_pipe(ialu_reg_imm);
9818 %}
9819 
9820 // Long Addition
9821 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9822 
9823   match(Set dst (AddL src1 src2));
9824 
9825   ins_cost(INSN_COST);
9826   format %{ "add  $dst, $src1, $src2" %}
9827 
9828   ins_encode %{
9829     __ add(as_Register($dst$$reg),
9830            as_Register($src1$$reg),
9831            as_Register($src2$$reg));
9832   %}
9833 
9834   ins_pipe(ialu_reg_reg);
9835 %}
9836 
9837 // No constant pool entries requiredLong Immediate Addition.
9838 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9839   match(Set dst (AddL src1 src2));
9840 
9841   ins_cost(INSN_COST);
9842   format %{ "add $dst, $src1, $src2" %}
9843 
9844   // use opcode to indicate that this is an add not a sub
9845   opcode(0x0);
9846 
9847   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9848 
9849   ins_pipe(ialu_reg_imm);
9850 %}
9851 
9852 // Integer Subtraction
9853 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9854   match(Set dst (SubI src1 src2));
9855 
9856   ins_cost(INSN_COST);
9857   format %{ "subw  $dst, $src1, $src2" %}
9858 
9859   ins_encode %{
9860     __ subw(as_Register($dst$$reg),
9861             as_Register($src1$$reg),
9862             as_Register($src2$$reg));
9863   %}
9864 
9865   ins_pipe(ialu_reg_reg);
9866 %}
9867 
9868 // Immediate Subtraction
9869 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9870   match(Set dst (SubI src1 src2));
9871 
9872   ins_cost(INSN_COST);
9873   format %{ "subw $dst, $src1, $src2" %}
9874 
9875   // use opcode to indicate that this is a sub not an add
9876   opcode(0x1);
9877 
9878   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9879 
9880   ins_pipe(ialu_reg_imm);
9881 %}
9882 
9883 // Long Subtraction
9884 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9885 
9886   match(Set dst (SubL src1 src2));
9887 
9888   ins_cost(INSN_COST);
9889   format %{ "sub  $dst, $src1, $src2" %}
9890 
9891   ins_encode %{
9892     __ sub(as_Register($dst$$reg),
9893            as_Register($src1$$reg),
9894            as_Register($src2$$reg));
9895   %}
9896 
9897   ins_pipe(ialu_reg_reg);
9898 %}
9899 
9900 // No constant pool entries requiredLong Immediate Subtraction.
9901 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9902   match(Set dst (SubL src1 src2));
9903 
9904   ins_cost(INSN_COST);
9905   format %{ "sub$dst, $src1, $src2" %}
9906 
9907   // use opcode to indicate that this is a sub not an add
9908   opcode(0x1);
9909 
9910   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9911 
9912   ins_pipe(ialu_reg_imm);
9913 %}
9914 
9915 // Integer Negation (special case for sub)
9916 
9917 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9918   match(Set dst (SubI zero src));
9919 
9920   ins_cost(INSN_COST);
9921   format %{ "negw $dst, $src\t# int" %}
9922 
9923   ins_encode %{
9924     __ negw(as_Register($dst$$reg),
9925             as_Register($src$$reg));
9926   %}
9927 
9928   ins_pipe(ialu_reg);
9929 %}
9930 
9931 // Long Negation
9932 
9933 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9934   match(Set dst (SubL zero src));
9935 
9936   ins_cost(INSN_COST);
9937   format %{ "neg $dst, $src\t# long" %}
9938 
9939   ins_encode %{
9940     __ neg(as_Register($dst$$reg),
9941            as_Register($src$$reg));
9942   %}
9943 
9944   ins_pipe(ialu_reg);
9945 %}
9946 
9947 // Integer Multiply
9948 
9949 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9950   match(Set dst (MulI src1 src2));
9951 
9952   ins_cost(INSN_COST * 3);
9953   format %{ "mulw  $dst, $src1, $src2" %}
9954 
9955   ins_encode %{
9956     __ mulw(as_Register($dst$$reg),
9957             as_Register($src1$$reg),
9958             as_Register($src2$$reg));
9959   %}
9960 
9961   ins_pipe(imul_reg_reg);
9962 %}
9963 
9964 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9965   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9966 
9967   ins_cost(INSN_COST * 3);
9968   format %{ "smull  $dst, $src1, $src2" %}
9969 
9970   ins_encode %{
9971     __ smull(as_Register($dst$$reg),
9972              as_Register($src1$$reg),
9973              as_Register($src2$$reg));
9974   %}
9975 
9976   ins_pipe(imul_reg_reg);
9977 %}
9978 
9979 // Long Multiply
9980 
9981 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9982   match(Set dst (MulL src1 src2));
9983 
9984   ins_cost(INSN_COST * 5);
9985   format %{ "mul  $dst, $src1, $src2" %}
9986 
9987   ins_encode %{
9988     __ mul(as_Register($dst$$reg),
9989            as_Register($src1$$reg),
9990            as_Register($src2$$reg));
9991   %}
9992 
9993   ins_pipe(lmul_reg_reg);
9994 %}
9995 
9996 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9997 %{
9998   match(Set dst (MulHiL src1 src2));
9999 
10000   ins_cost(INSN_COST * 7);
10001   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10002 
10003   ins_encode %{
10004     __ smulh(as_Register($dst$$reg),
10005              as_Register($src1$$reg),
10006              as_Register($src2$$reg));
10007   %}
10008 
10009   ins_pipe(lmul_reg_reg);
10010 %}
10011 
10012 // Combined Integer Multiply & Add/Sub
10013 
10014 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10015   match(Set dst (AddI src3 (MulI src1 src2)));
10016 
10017   ins_cost(INSN_COST * 3);
10018   format %{ "madd  $dst, $src1, $src2, $src3" %}
10019 
10020   ins_encode %{
10021     __ maddw(as_Register($dst$$reg),
10022              as_Register($src1$$reg),
10023              as_Register($src2$$reg),
10024              as_Register($src3$$reg));
10025   %}
10026 
10027   ins_pipe(imac_reg_reg);
10028 %}
10029 
10030 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10031   match(Set dst (SubI src3 (MulI src1 src2)));
10032 
10033   ins_cost(INSN_COST * 3);
10034   format %{ "msub  $dst, $src1, $src2, $src3" %}
10035 
10036   ins_encode %{
10037     __ msubw(as_Register($dst$$reg),
10038              as_Register($src1$$reg),
10039              as_Register($src2$$reg),
10040              as_Register($src3$$reg));
10041   %}
10042 
10043   ins_pipe(imac_reg_reg);
10044 %}
10045 
10046 // Combined Long Multiply & Add/Sub
10047 
10048 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10049   match(Set dst (AddL src3 (MulL src1 src2)));
10050 
10051   ins_cost(INSN_COST * 5);
10052   format %{ "madd  $dst, $src1, $src2, $src3" %}
10053 
10054   ins_encode %{
10055     __ madd(as_Register($dst$$reg),
10056             as_Register($src1$$reg),
10057             as_Register($src2$$reg),
10058             as_Register($src3$$reg));
10059   %}
10060 
10061   ins_pipe(lmac_reg_reg);
10062 %}
10063 
10064 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10065   match(Set dst (SubL src3 (MulL src1 src2)));
10066 
10067   ins_cost(INSN_COST * 5);
10068   format %{ "msub  $dst, $src1, $src2, $src3" %}
10069 
10070   ins_encode %{
10071     __ msub(as_Register($dst$$reg),
10072             as_Register($src1$$reg),
10073             as_Register($src2$$reg),
10074             as_Register($src3$$reg));
10075   %}
10076 
10077   ins_pipe(lmac_reg_reg);
10078 %}
10079 
10080 // Integer Divide
10081 
10082 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10083   match(Set dst (DivI src1 src2));
10084 
10085   ins_cost(INSN_COST * 19);
10086   format %{ "sdivw  $dst, $src1, $src2" %}
10087 
10088   ins_encode(aarch64_enc_divw(dst, src1, src2));
10089   ins_pipe(idiv_reg_reg);
10090 %}
10091 
10092 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10093   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10094   ins_cost(INSN_COST);
10095   format %{ "lsrw $dst, $src1, $div1" %}
10096   ins_encode %{
10097     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10098   %}
10099   ins_pipe(ialu_reg_shift);
10100 %}
10101 
10102 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10103   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10104   ins_cost(INSN_COST);
10105   format %{ "addw $dst, $src, LSR $div1" %}
10106 
10107   ins_encode %{
10108     __ addw(as_Register($dst$$reg),
10109               as_Register($src$$reg),
10110               as_Register($src$$reg),
10111               Assembler::LSR, 31);
10112   %}
10113   ins_pipe(ialu_reg);
10114 %}
10115 
10116 // Long Divide
10117 
10118 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10119   match(Set dst (DivL src1 src2));
10120 
10121   ins_cost(INSN_COST * 35);
10122   format %{ "sdiv   $dst, $src1, $src2" %}
10123 
10124   ins_encode(aarch64_enc_div(dst, src1, src2));
10125   ins_pipe(ldiv_reg_reg);
10126 %}
10127 
10128 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
10129   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10130   ins_cost(INSN_COST);
10131   format %{ "lsr $dst, $src1, $div1" %}
10132   ins_encode %{
10133     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10134   %}
10135   ins_pipe(ialu_reg_shift);
10136 %}
10137 
10138 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
10139   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10140   ins_cost(INSN_COST);
10141   format %{ "add $dst, $src, $div1" %}
10142 
10143   ins_encode %{
10144     __ add(as_Register($dst$$reg),
10145               as_Register($src$$reg),
10146               as_Register($src$$reg),
10147               Assembler::LSR, 63);
10148   %}
10149   ins_pipe(ialu_reg);
10150 %}
10151 
10152 // Integer Remainder
10153 
10154 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10155   match(Set dst (ModI src1 src2));
10156 
10157   ins_cost(INSN_COST * 22);
10158   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10159             "msubw($dst, rscratch1, $src2, $src1" %}
10160 
10161   ins_encode(aarch64_enc_modw(dst, src1, src2));
10162   ins_pipe(idiv_reg_reg);
10163 %}
10164 
10165 // Long Remainder
10166 
10167 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10168   match(Set dst (ModL src1 src2));
10169 
10170   ins_cost(INSN_COST * 38);
10171   format %{ "sdiv   rscratch1, $src1, $src2\n"
10172             "msub($dst, rscratch1, $src2, $src1" %}
10173 
10174   ins_encode(aarch64_enc_mod(dst, src1, src2));
10175   ins_pipe(ldiv_reg_reg);
10176 %}
10177 
10178 // Integer Shifts
10179 
10180 // Shift Left Register
10181 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10182   match(Set dst (LShiftI src1 src2));
10183 
10184   ins_cost(INSN_COST * 2);
10185   format %{ "lslvw  $dst, $src1, $src2" %}
10186 
10187   ins_encode %{
10188     __ lslvw(as_Register($dst$$reg),
10189              as_Register($src1$$reg),
10190              as_Register($src2$$reg));
10191   %}
10192 
10193   ins_pipe(ialu_reg_reg_vshift);
10194 %}
10195 
10196 // Shift Left Immediate
10197 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10198   match(Set dst (LShiftI src1 src2));
10199 
10200   ins_cost(INSN_COST);
10201   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10202 
10203   ins_encode %{
10204     __ lslw(as_Register($dst$$reg),
10205             as_Register($src1$$reg),
10206             $src2$$constant & 0x1f);
10207   %}
10208 
10209   ins_pipe(ialu_reg_shift);
10210 %}
10211 
10212 // Shift Right Logical Register
10213 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10214   match(Set dst (URShiftI src1 src2));
10215 
10216   ins_cost(INSN_COST * 2);
10217   format %{ "lsrvw  $dst, $src1, $src2" %}
10218 
10219   ins_encode %{
10220     __ lsrvw(as_Register($dst$$reg),
10221              as_Register($src1$$reg),
10222              as_Register($src2$$reg));
10223   %}
10224 
10225   ins_pipe(ialu_reg_reg_vshift);
10226 %}
10227 
10228 // Shift Right Logical Immediate
10229 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10230   match(Set dst (URShiftI src1 src2));
10231 
10232   ins_cost(INSN_COST);
10233   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10234 
10235   ins_encode %{
10236     __ lsrw(as_Register($dst$$reg),
10237             as_Register($src1$$reg),
10238             $src2$$constant & 0x1f);
10239   %}
10240 
10241   ins_pipe(ialu_reg_shift);
10242 %}
10243 
10244 // Shift Right Arithmetic Register
10245 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10246   match(Set dst (RShiftI src1 src2));
10247 
10248   ins_cost(INSN_COST * 2);
10249   format %{ "asrvw  $dst, $src1, $src2" %}
10250 
10251   ins_encode %{
10252     __ asrvw(as_Register($dst$$reg),
10253              as_Register($src1$$reg),
10254              as_Register($src2$$reg));
10255   %}
10256 
10257   ins_pipe(ialu_reg_reg_vshift);
10258 %}
10259 
10260 // Shift Right Arithmetic Immediate
10261 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10262   match(Set dst (RShiftI src1 src2));
10263 
10264   ins_cost(INSN_COST);
10265   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10266 
10267   ins_encode %{
10268     __ asrw(as_Register($dst$$reg),
10269             as_Register($src1$$reg),
10270             $src2$$constant & 0x1f);
10271   %}
10272 
10273   ins_pipe(ialu_reg_shift);
10274 %}
10275 
10276 // Combined Int Mask and Right Shift (using UBFM)
10277 // TODO
10278 
10279 // Long Shifts
10280 
10281 // Shift Left Register
10282 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10283   match(Set dst (LShiftL src1 src2));
10284 
10285   ins_cost(INSN_COST * 2);
10286   format %{ "lslv  $dst, $src1, $src2" %}
10287 
10288   ins_encode %{
10289     __ lslv(as_Register($dst$$reg),
10290             as_Register($src1$$reg),
10291             as_Register($src2$$reg));
10292   %}
10293 
10294   ins_pipe(ialu_reg_reg_vshift);
10295 %}
10296 
10297 // Shift Left Immediate
10298 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10299   match(Set dst (LShiftL src1 src2));
10300 
10301   ins_cost(INSN_COST);
10302   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10303 
10304   ins_encode %{
10305     __ lsl(as_Register($dst$$reg),
10306             as_Register($src1$$reg),
10307             $src2$$constant & 0x3f);
10308   %}
10309 
10310   ins_pipe(ialu_reg_shift);
10311 %}
10312 
10313 // Shift Right Logical Register
10314 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10315   match(Set dst (URShiftL src1 src2));
10316 
10317   ins_cost(INSN_COST * 2);
10318   format %{ "lsrv  $dst, $src1, $src2" %}
10319 
10320   ins_encode %{
10321     __ lsrv(as_Register($dst$$reg),
10322             as_Register($src1$$reg),
10323             as_Register($src2$$reg));
10324   %}
10325 
10326   ins_pipe(ialu_reg_reg_vshift);
10327 %}
10328 
10329 // Shift Right Logical Immediate
10330 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10331   match(Set dst (URShiftL src1 src2));
10332 
10333   ins_cost(INSN_COST);
10334   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10335 
10336   ins_encode %{
10337     __ lsr(as_Register($dst$$reg),
10338            as_Register($src1$$reg),
10339            $src2$$constant & 0x3f);
10340   %}
10341 
10342   ins_pipe(ialu_reg_shift);
10343 %}
10344 
10345 // A special-case pattern for card table stores.
10346 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10347   match(Set dst (URShiftL (CastP2X src1) src2));
10348 
10349   ins_cost(INSN_COST);
10350   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10351 
10352   ins_encode %{
10353     __ lsr(as_Register($dst$$reg),
10354            as_Register($src1$$reg),
10355            $src2$$constant & 0x3f);
10356   %}
10357 
10358   ins_pipe(ialu_reg_shift);
10359 %}
10360 
10361 // Shift Right Arithmetic Register
10362 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10363   match(Set dst (RShiftL src1 src2));
10364 
10365   ins_cost(INSN_COST * 2);
10366   format %{ "asrv  $dst, $src1, $src2" %}
10367 
10368   ins_encode %{
10369     __ asrv(as_Register($dst$$reg),
10370             as_Register($src1$$reg),
10371             as_Register($src2$$reg));
10372   %}
10373 
10374   ins_pipe(ialu_reg_reg_vshift);
10375 %}
10376 
10377 // Shift Right Arithmetic Immediate
10378 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10379   match(Set dst (RShiftL src1 src2));
10380 
10381   ins_cost(INSN_COST);
10382   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10383 
10384   ins_encode %{
10385     __ asr(as_Register($dst$$reg),
10386            as_Register($src1$$reg),
10387            $src2$$constant & 0x3f);
10388   %}
10389 
10390   ins_pipe(ialu_reg_shift);
10391 %}
10392 
10393 // BEGIN This section of the file is automatically generated. Do not edit --------------
10394 
10395 instruct regL_not_reg(iRegLNoSp dst,
10396                          iRegL src1, immL_M1 m1,
10397                          rFlagsReg cr) %{
10398   match(Set dst (XorL src1 m1));
10399   ins_cost(INSN_COST);
10400   format %{ "eon  $dst, $src1, zr" %}
10401 
10402   ins_encode %{
10403     __ eon(as_Register($dst$$reg),
10404               as_Register($src1$$reg),
10405               zr,
10406               Assembler::LSL, 0);
10407   %}
10408 
10409   ins_pipe(ialu_reg);
10410 %}
10411 instruct regI_not_reg(iRegINoSp dst,
10412                          iRegIorL2I src1, immI_M1 m1,
10413                          rFlagsReg cr) %{
10414   match(Set dst (XorI src1 m1));
10415   ins_cost(INSN_COST);
10416   format %{ "eonw  $dst, $src1, zr" %}
10417 
10418   ins_encode %{
10419     __ eonw(as_Register($dst$$reg),
10420               as_Register($src1$$reg),
10421               zr,
10422               Assembler::LSL, 0);
10423   %}
10424 
10425   ins_pipe(ialu_reg);
10426 %}
10427 
10428 instruct AndI_reg_not_reg(iRegINoSp dst,
10429                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10430                          rFlagsReg cr) %{
10431   match(Set dst (AndI src1 (XorI src2 m1)));
10432   ins_cost(INSN_COST);
10433   format %{ "bicw  $dst, $src1, $src2" %}
10434 
10435   ins_encode %{
10436     __ bicw(as_Register($dst$$reg),
10437               as_Register($src1$$reg),
10438               as_Register($src2$$reg),
10439               Assembler::LSL, 0);
10440   %}
10441 
10442   ins_pipe(ialu_reg_reg);
10443 %}
10444 
10445 instruct AndL_reg_not_reg(iRegLNoSp dst,
10446                          iRegL src1, iRegL src2, immL_M1 m1,
10447                          rFlagsReg cr) %{
10448   match(Set dst (AndL src1 (XorL src2 m1)));
10449   ins_cost(INSN_COST);
10450   format %{ "bic  $dst, $src1, $src2" %}
10451 
10452   ins_encode %{
10453     __ bic(as_Register($dst$$reg),
10454               as_Register($src1$$reg),
10455               as_Register($src2$$reg),
10456               Assembler::LSL, 0);
10457   %}
10458 
10459   ins_pipe(ialu_reg_reg);
10460 %}
10461 
10462 instruct OrI_reg_not_reg(iRegINoSp dst,
10463                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10464                          rFlagsReg cr) %{
10465   match(Set dst (OrI src1 (XorI src2 m1)));
10466   ins_cost(INSN_COST);
10467   format %{ "ornw  $dst, $src1, $src2" %}
10468 
10469   ins_encode %{
10470     __ ornw(as_Register($dst$$reg),
10471               as_Register($src1$$reg),
10472               as_Register($src2$$reg),
10473               Assembler::LSL, 0);
10474   %}
10475 
10476   ins_pipe(ialu_reg_reg);
10477 %}
10478 
10479 instruct OrL_reg_not_reg(iRegLNoSp dst,
10480                          iRegL src1, iRegL src2, immL_M1 m1,
10481                          rFlagsReg cr) %{
10482   match(Set dst (OrL src1 (XorL src2 m1)));
10483   ins_cost(INSN_COST);
10484   format %{ "orn  $dst, $src1, $src2" %}
10485 
10486   ins_encode %{
10487     __ orn(as_Register($dst$$reg),
10488               as_Register($src1$$reg),
10489               as_Register($src2$$reg),
10490               Assembler::LSL, 0);
10491   %}
10492 
10493   ins_pipe(ialu_reg_reg);
10494 %}
10495 
10496 instruct XorI_reg_not_reg(iRegINoSp dst,
10497                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10498                          rFlagsReg cr) %{
10499   match(Set dst (XorI m1 (XorI src2 src1)));
10500   ins_cost(INSN_COST);
10501   format %{ "eonw  $dst, $src1, $src2" %}
10502 
10503   ins_encode %{
10504     __ eonw(as_Register($dst$$reg),
10505               as_Register($src1$$reg),
10506               as_Register($src2$$reg),
10507               Assembler::LSL, 0);
10508   %}
10509 
10510   ins_pipe(ialu_reg_reg);
10511 %}
10512 
10513 instruct XorL_reg_not_reg(iRegLNoSp dst,
10514                          iRegL src1, iRegL src2, immL_M1 m1,
10515                          rFlagsReg cr) %{
10516   match(Set dst (XorL m1 (XorL src2 src1)));
10517   ins_cost(INSN_COST);
10518   format %{ "eon  $dst, $src1, $src2" %}
10519 
10520   ins_encode %{
10521     __ eon(as_Register($dst$$reg),
10522               as_Register($src1$$reg),
10523               as_Register($src2$$reg),
10524               Assembler::LSL, 0);
10525   %}
10526 
10527   ins_pipe(ialu_reg_reg);
10528 %}
10529 
10530 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10531                          iRegIorL2I src1, iRegIorL2I src2,
10532                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10533   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10534   ins_cost(1.9 * INSN_COST);
10535   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10536 
10537   ins_encode %{
10538     __ bicw(as_Register($dst$$reg),
10539               as_Register($src1$$reg),
10540               as_Register($src2$$reg),
10541               Assembler::LSR,
10542               $src3$$constant & 0x1f);
10543   %}
10544 
10545   ins_pipe(ialu_reg_reg_shift);
10546 %}
10547 
10548 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10549                          iRegL src1, iRegL src2,
10550                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10551   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10552   ins_cost(1.9 * INSN_COST);
10553   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10554 
10555   ins_encode %{
10556     __ bic(as_Register($dst$$reg),
10557               as_Register($src1$$reg),
10558               as_Register($src2$$reg),
10559               Assembler::LSR,
10560               $src3$$constant & 0x3f);
10561   %}
10562 
10563   ins_pipe(ialu_reg_reg_shift);
10564 %}
10565 
10566 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10567                          iRegIorL2I src1, iRegIorL2I src2,
10568                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10569   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10570   ins_cost(1.9 * INSN_COST);
10571   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10572 
10573   ins_encode %{
10574     __ bicw(as_Register($dst$$reg),
10575               as_Register($src1$$reg),
10576               as_Register($src2$$reg),
10577               Assembler::ASR,
10578               $src3$$constant & 0x1f);
10579   %}
10580 
10581   ins_pipe(ialu_reg_reg_shift);
10582 %}
10583 
10584 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10585                          iRegL src1, iRegL src2,
10586                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10587   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10588   ins_cost(1.9 * INSN_COST);
10589   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10590 
10591   ins_encode %{
10592     __ bic(as_Register($dst$$reg),
10593               as_Register($src1$$reg),
10594               as_Register($src2$$reg),
10595               Assembler::ASR,
10596               $src3$$constant & 0x3f);
10597   %}
10598 
10599   ins_pipe(ialu_reg_reg_shift);
10600 %}
10601 
10602 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10603                          iRegIorL2I src1, iRegIorL2I src2,
10604                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10605   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10606   ins_cost(1.9 * INSN_COST);
10607   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10608 
10609   ins_encode %{
10610     __ bicw(as_Register($dst$$reg),
10611               as_Register($src1$$reg),
10612               as_Register($src2$$reg),
10613               Assembler::LSL,
10614               $src3$$constant & 0x1f);
10615   %}
10616 
10617   ins_pipe(ialu_reg_reg_shift);
10618 %}
10619 
10620 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10621                          iRegL src1, iRegL src2,
10622                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10623   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10624   ins_cost(1.9 * INSN_COST);
10625   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10626 
10627   ins_encode %{
10628     __ bic(as_Register($dst$$reg),
10629               as_Register($src1$$reg),
10630               as_Register($src2$$reg),
10631               Assembler::LSL,
10632               $src3$$constant & 0x3f);
10633   %}
10634 
10635   ins_pipe(ialu_reg_reg_shift);
10636 %}
10637 
10638 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10639                          iRegIorL2I src1, iRegIorL2I src2,
10640                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10641   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10642   ins_cost(1.9 * INSN_COST);
10643   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10644 
10645   ins_encode %{
10646     __ eonw(as_Register($dst$$reg),
10647               as_Register($src1$$reg),
10648               as_Register($src2$$reg),
10649               Assembler::LSR,
10650               $src3$$constant & 0x1f);
10651   %}
10652 
10653   ins_pipe(ialu_reg_reg_shift);
10654 %}
10655 
10656 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10657                          iRegL src1, iRegL src2,
10658                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10659   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10660   ins_cost(1.9 * INSN_COST);
10661   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10662 
10663   ins_encode %{
10664     __ eon(as_Register($dst$$reg),
10665               as_Register($src1$$reg),
10666               as_Register($src2$$reg),
10667               Assembler::LSR,
10668               $src3$$constant & 0x3f);
10669   %}
10670 
10671   ins_pipe(ialu_reg_reg_shift);
10672 %}
10673 
10674 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10675                          iRegIorL2I src1, iRegIorL2I src2,
10676                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10677   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10678   ins_cost(1.9 * INSN_COST);
10679   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10680 
10681   ins_encode %{
10682     __ eonw(as_Register($dst$$reg),
10683               as_Register($src1$$reg),
10684               as_Register($src2$$reg),
10685               Assembler::ASR,
10686               $src3$$constant & 0x1f);
10687   %}
10688 
10689   ins_pipe(ialu_reg_reg_shift);
10690 %}
10691 
10692 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10693                          iRegL src1, iRegL src2,
10694                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10695   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10696   ins_cost(1.9 * INSN_COST);
10697   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10698 
10699   ins_encode %{
10700     __ eon(as_Register($dst$$reg),
10701               as_Register($src1$$reg),
10702               as_Register($src2$$reg),
10703               Assembler::ASR,
10704               $src3$$constant & 0x3f);
10705   %}
10706 
10707   ins_pipe(ialu_reg_reg_shift);
10708 %}
10709 
10710 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10711                          iRegIorL2I src1, iRegIorL2I src2,
10712                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10713   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10714   ins_cost(1.9 * INSN_COST);
10715   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10716 
10717   ins_encode %{
10718     __ eonw(as_Register($dst$$reg),
10719               as_Register($src1$$reg),
10720               as_Register($src2$$reg),
10721               Assembler::LSL,
10722               $src3$$constant & 0x1f);
10723   %}
10724 
10725   ins_pipe(ialu_reg_reg_shift);
10726 %}
10727 
10728 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10729                          iRegL src1, iRegL src2,
10730                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10731   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10732   ins_cost(1.9 * INSN_COST);
10733   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10734 
10735   ins_encode %{
10736     __ eon(as_Register($dst$$reg),
10737               as_Register($src1$$reg),
10738               as_Register($src2$$reg),
10739               Assembler::LSL,
10740               $src3$$constant & 0x3f);
10741   %}
10742 
10743   ins_pipe(ialu_reg_reg_shift);
10744 %}
10745 
10746 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10747                          iRegIorL2I src1, iRegIorL2I src2,
10748                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10749   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10750   ins_cost(1.9 * INSN_COST);
10751   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10752 
10753   ins_encode %{
10754     __ ornw(as_Register($dst$$reg),
10755               as_Register($src1$$reg),
10756               as_Register($src2$$reg),
10757               Assembler::LSR,
10758               $src3$$constant & 0x1f);
10759   %}
10760 
10761   ins_pipe(ialu_reg_reg_shift);
10762 %}
10763 
10764 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10765                          iRegL src1, iRegL src2,
10766                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10767   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10768   ins_cost(1.9 * INSN_COST);
10769   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10770 
10771   ins_encode %{
10772     __ orn(as_Register($dst$$reg),
10773               as_Register($src1$$reg),
10774               as_Register($src2$$reg),
10775               Assembler::LSR,
10776               $src3$$constant & 0x3f);
10777   %}
10778 
10779   ins_pipe(ialu_reg_reg_shift);
10780 %}
10781 
10782 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10783                          iRegIorL2I src1, iRegIorL2I src2,
10784                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10785   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10786   ins_cost(1.9 * INSN_COST);
10787   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10788 
10789   ins_encode %{
10790     __ ornw(as_Register($dst$$reg),
10791               as_Register($src1$$reg),
10792               as_Register($src2$$reg),
10793               Assembler::ASR,
10794               $src3$$constant & 0x1f);
10795   %}
10796 
10797   ins_pipe(ialu_reg_reg_shift);
10798 %}
10799 
10800 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10801                          iRegL src1, iRegL src2,
10802                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10803   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10804   ins_cost(1.9 * INSN_COST);
10805   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10806 
10807   ins_encode %{
10808     __ orn(as_Register($dst$$reg),
10809               as_Register($src1$$reg),
10810               as_Register($src2$$reg),
10811               Assembler::ASR,
10812               $src3$$constant & 0x3f);
10813   %}
10814 
10815   ins_pipe(ialu_reg_reg_shift);
10816 %}
10817 
10818 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10819                          iRegIorL2I src1, iRegIorL2I src2,
10820                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10821   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10822   ins_cost(1.9 * INSN_COST);
10823   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10824 
10825   ins_encode %{
10826     __ ornw(as_Register($dst$$reg),
10827               as_Register($src1$$reg),
10828               as_Register($src2$$reg),
10829               Assembler::LSL,
10830               $src3$$constant & 0x1f);
10831   %}
10832 
10833   ins_pipe(ialu_reg_reg_shift);
10834 %}
10835 
10836 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10837                          iRegL src1, iRegL src2,
10838                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10839   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10840   ins_cost(1.9 * INSN_COST);
10841   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10842 
10843   ins_encode %{
10844     __ orn(as_Register($dst$$reg),
10845               as_Register($src1$$reg),
10846               as_Register($src2$$reg),
10847               Assembler::LSL,
10848               $src3$$constant & 0x3f);
10849   %}
10850 
10851   ins_pipe(ialu_reg_reg_shift);
10852 %}
10853 
10854 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10855                          iRegIorL2I src1, iRegIorL2I src2,
10856                          immI src3, rFlagsReg cr) %{
10857   match(Set dst (AndI src1 (URShiftI src2 src3)));
10858 
10859   ins_cost(1.9 * INSN_COST);
10860   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10861 
10862   ins_encode %{
10863     __ andw(as_Register($dst$$reg),
10864               as_Register($src1$$reg),
10865               as_Register($src2$$reg),
10866               Assembler::LSR,
10867               $src3$$constant & 0x1f);
10868   %}
10869 
10870   ins_pipe(ialu_reg_reg_shift);
10871 %}
10872 
10873 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10874                          iRegL src1, iRegL src2,
10875                          immI src3, rFlagsReg cr) %{
10876   match(Set dst (AndL src1 (URShiftL src2 src3)));
10877 
10878   ins_cost(1.9 * INSN_COST);
10879   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10880 
10881   ins_encode %{
10882     __ andr(as_Register($dst$$reg),
10883               as_Register($src1$$reg),
10884               as_Register($src2$$reg),
10885               Assembler::LSR,
10886               $src3$$constant & 0x3f);
10887   %}
10888 
10889   ins_pipe(ialu_reg_reg_shift);
10890 %}
10891 
10892 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10893                          iRegIorL2I src1, iRegIorL2I src2,
10894                          immI src3, rFlagsReg cr) %{
10895   match(Set dst (AndI src1 (RShiftI src2 src3)));
10896 
10897   ins_cost(1.9 * INSN_COST);
10898   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10899 
10900   ins_encode %{
10901     __ andw(as_Register($dst$$reg),
10902               as_Register($src1$$reg),
10903               as_Register($src2$$reg),
10904               Assembler::ASR,
10905               $src3$$constant & 0x1f);
10906   %}
10907 
10908   ins_pipe(ialu_reg_reg_shift);
10909 %}
10910 
10911 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10912                          iRegL src1, iRegL src2,
10913                          immI src3, rFlagsReg cr) %{
10914   match(Set dst (AndL src1 (RShiftL src2 src3)));
10915 
10916   ins_cost(1.9 * INSN_COST);
10917   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10918 
10919   ins_encode %{
10920     __ andr(as_Register($dst$$reg),
10921               as_Register($src1$$reg),
10922               as_Register($src2$$reg),
10923               Assembler::ASR,
10924               $src3$$constant & 0x3f);
10925   %}
10926 
10927   ins_pipe(ialu_reg_reg_shift);
10928 %}
10929 
10930 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10931                          iRegIorL2I src1, iRegIorL2I src2,
10932                          immI src3, rFlagsReg cr) %{
10933   match(Set dst (AndI src1 (LShiftI src2 src3)));
10934 
10935   ins_cost(1.9 * INSN_COST);
10936   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10937 
10938   ins_encode %{
10939     __ andw(as_Register($dst$$reg),
10940               as_Register($src1$$reg),
10941               as_Register($src2$$reg),
10942               Assembler::LSL,
10943               $src3$$constant & 0x1f);
10944   %}
10945 
10946   ins_pipe(ialu_reg_reg_shift);
10947 %}
10948 
10949 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10950                          iRegL src1, iRegL src2,
10951                          immI src3, rFlagsReg cr) %{
10952   match(Set dst (AndL src1 (LShiftL src2 src3)));
10953 
10954   ins_cost(1.9 * INSN_COST);
10955   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10956 
10957   ins_encode %{
10958     __ andr(as_Register($dst$$reg),
10959               as_Register($src1$$reg),
10960               as_Register($src2$$reg),
10961               Assembler::LSL,
10962               $src3$$constant & 0x3f);
10963   %}
10964 
10965   ins_pipe(ialu_reg_reg_shift);
10966 %}
10967 
10968 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10969                          iRegIorL2I src1, iRegIorL2I src2,
10970                          immI src3, rFlagsReg cr) %{
10971   match(Set dst (XorI src1 (URShiftI src2 src3)));
10972 
10973   ins_cost(1.9 * INSN_COST);
10974   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10975 
10976   ins_encode %{
10977     __ eorw(as_Register($dst$$reg),
10978               as_Register($src1$$reg),
10979               as_Register($src2$$reg),
10980               Assembler::LSR,
10981               $src3$$constant & 0x1f);
10982   %}
10983 
10984   ins_pipe(ialu_reg_reg_shift);
10985 %}
10986 
10987 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10988                          iRegL src1, iRegL src2,
10989                          immI src3, rFlagsReg cr) %{
10990   match(Set dst (XorL src1 (URShiftL src2 src3)));
10991 
10992   ins_cost(1.9 * INSN_COST);
10993   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10994 
10995   ins_encode %{
10996     __ eor(as_Register($dst$$reg),
10997               as_Register($src1$$reg),
10998               as_Register($src2$$reg),
10999               Assembler::LSR,
11000               $src3$$constant & 0x3f);
11001   %}
11002 
11003   ins_pipe(ialu_reg_reg_shift);
11004 %}
11005 
11006 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11007                          iRegIorL2I src1, iRegIorL2I src2,
11008                          immI src3, rFlagsReg cr) %{
11009   match(Set dst (XorI src1 (RShiftI src2 src3)));
11010 
11011   ins_cost(1.9 * INSN_COST);
11012   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11013 
11014   ins_encode %{
11015     __ eorw(as_Register($dst$$reg),
11016               as_Register($src1$$reg),
11017               as_Register($src2$$reg),
11018               Assembler::ASR,
11019               $src3$$constant & 0x1f);
11020   %}
11021 
11022   ins_pipe(ialu_reg_reg_shift);
11023 %}
11024 
11025 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11026                          iRegL src1, iRegL src2,
11027                          immI src3, rFlagsReg cr) %{
11028   match(Set dst (XorL src1 (RShiftL src2 src3)));
11029 
11030   ins_cost(1.9 * INSN_COST);
11031   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11032 
11033   ins_encode %{
11034     __ eor(as_Register($dst$$reg),
11035               as_Register($src1$$reg),
11036               as_Register($src2$$reg),
11037               Assembler::ASR,
11038               $src3$$constant & 0x3f);
11039   %}
11040 
11041   ins_pipe(ialu_reg_reg_shift);
11042 %}
11043 
11044 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11045                          iRegIorL2I src1, iRegIorL2I src2,
11046                          immI src3, rFlagsReg cr) %{
11047   match(Set dst (XorI src1 (LShiftI src2 src3)));
11048 
11049   ins_cost(1.9 * INSN_COST);
11050   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11051 
11052   ins_encode %{
11053     __ eorw(as_Register($dst$$reg),
11054               as_Register($src1$$reg),
11055               as_Register($src2$$reg),
11056               Assembler::LSL,
11057               $src3$$constant & 0x1f);
11058   %}
11059 
11060   ins_pipe(ialu_reg_reg_shift);
11061 %}
11062 
11063 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11064                          iRegL src1, iRegL src2,
11065                          immI src3, rFlagsReg cr) %{
11066   match(Set dst (XorL src1 (LShiftL src2 src3)));
11067 
11068   ins_cost(1.9 * INSN_COST);
11069   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11070 
11071   ins_encode %{
11072     __ eor(as_Register($dst$$reg),
11073               as_Register($src1$$reg),
11074               as_Register($src2$$reg),
11075               Assembler::LSL,
11076               $src3$$constant & 0x3f);
11077   %}
11078 
11079   ins_pipe(ialu_reg_reg_shift);
11080 %}
11081 
11082 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11083                          iRegIorL2I src1, iRegIorL2I src2,
11084                          immI src3, rFlagsReg cr) %{
11085   match(Set dst (OrI src1 (URShiftI src2 src3)));
11086 
11087   ins_cost(1.9 * INSN_COST);
11088   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11089 
11090   ins_encode %{
11091     __ orrw(as_Register($dst$$reg),
11092               as_Register($src1$$reg),
11093               as_Register($src2$$reg),
11094               Assembler::LSR,
11095               $src3$$constant & 0x1f);
11096   %}
11097 
11098   ins_pipe(ialu_reg_reg_shift);
11099 %}
11100 
11101 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11102                          iRegL src1, iRegL src2,
11103                          immI src3, rFlagsReg cr) %{
11104   match(Set dst (OrL src1 (URShiftL src2 src3)));
11105 
11106   ins_cost(1.9 * INSN_COST);
11107   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11108 
11109   ins_encode %{
11110     __ orr(as_Register($dst$$reg),
11111               as_Register($src1$$reg),
11112               as_Register($src2$$reg),
11113               Assembler::LSR,
11114               $src3$$constant & 0x3f);
11115   %}
11116 
11117   ins_pipe(ialu_reg_reg_shift);
11118 %}
11119 
11120 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11121                          iRegIorL2I src1, iRegIorL2I src2,
11122                          immI src3, rFlagsReg cr) %{
11123   match(Set dst (OrI src1 (RShiftI src2 src3)));
11124 
11125   ins_cost(1.9 * INSN_COST);
11126   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11127 
11128   ins_encode %{
11129     __ orrw(as_Register($dst$$reg),
11130               as_Register($src1$$reg),
11131               as_Register($src2$$reg),
11132               Assembler::ASR,
11133               $src3$$constant & 0x1f);
11134   %}
11135 
11136   ins_pipe(ialu_reg_reg_shift);
11137 %}
11138 
11139 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11140                          iRegL src1, iRegL src2,
11141                          immI src3, rFlagsReg cr) %{
11142   match(Set dst (OrL src1 (RShiftL src2 src3)));
11143 
11144   ins_cost(1.9 * INSN_COST);
11145   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11146 
11147   ins_encode %{
11148     __ orr(as_Register($dst$$reg),
11149               as_Register($src1$$reg),
11150               as_Register($src2$$reg),
11151               Assembler::ASR,
11152               $src3$$constant & 0x3f);
11153   %}
11154 
11155   ins_pipe(ialu_reg_reg_shift);
11156 %}
11157 
11158 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11159                          iRegIorL2I src1, iRegIorL2I src2,
11160                          immI src3, rFlagsReg cr) %{
11161   match(Set dst (OrI src1 (LShiftI src2 src3)));
11162 
11163   ins_cost(1.9 * INSN_COST);
11164   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11165 
11166   ins_encode %{
11167     __ orrw(as_Register($dst$$reg),
11168               as_Register($src1$$reg),
11169               as_Register($src2$$reg),
11170               Assembler::LSL,
11171               $src3$$constant & 0x1f);
11172   %}
11173 
11174   ins_pipe(ialu_reg_reg_shift);
11175 %}
11176 
11177 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11178                          iRegL src1, iRegL src2,
11179                          immI src3, rFlagsReg cr) %{
11180   match(Set dst (OrL src1 (LShiftL src2 src3)));
11181 
11182   ins_cost(1.9 * INSN_COST);
11183   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11184 
11185   ins_encode %{
11186     __ orr(as_Register($dst$$reg),
11187               as_Register($src1$$reg),
11188               as_Register($src2$$reg),
11189               Assembler::LSL,
11190               $src3$$constant & 0x3f);
11191   %}
11192 
11193   ins_pipe(ialu_reg_reg_shift);
11194 %}
11195 
11196 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11197                          iRegIorL2I src1, iRegIorL2I src2,
11198                          immI src3, rFlagsReg cr) %{
11199   match(Set dst (AddI src1 (URShiftI src2 src3)));
11200 
11201   ins_cost(1.9 * INSN_COST);
11202   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11203 
11204   ins_encode %{
11205     __ addw(as_Register($dst$$reg),
11206               as_Register($src1$$reg),
11207               as_Register($src2$$reg),
11208               Assembler::LSR,
11209               $src3$$constant & 0x1f);
11210   %}
11211 
11212   ins_pipe(ialu_reg_reg_shift);
11213 %}
11214 
11215 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11216                          iRegL src1, iRegL src2,
11217                          immI src3, rFlagsReg cr) %{
11218   match(Set dst (AddL src1 (URShiftL src2 src3)));
11219 
11220   ins_cost(1.9 * INSN_COST);
11221   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11222 
11223   ins_encode %{
11224     __ add(as_Register($dst$$reg),
11225               as_Register($src1$$reg),
11226               as_Register($src2$$reg),
11227               Assembler::LSR,
11228               $src3$$constant & 0x3f);
11229   %}
11230 
11231   ins_pipe(ialu_reg_reg_shift);
11232 %}
11233 
11234 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11235                          iRegIorL2I src1, iRegIorL2I src2,
11236                          immI src3, rFlagsReg cr) %{
11237   match(Set dst (AddI src1 (RShiftI src2 src3)));
11238 
11239   ins_cost(1.9 * INSN_COST);
11240   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11241 
11242   ins_encode %{
11243     __ addw(as_Register($dst$$reg),
11244               as_Register($src1$$reg),
11245               as_Register($src2$$reg),
11246               Assembler::ASR,
11247               $src3$$constant & 0x1f);
11248   %}
11249 
11250   ins_pipe(ialu_reg_reg_shift);
11251 %}
11252 
11253 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11254                          iRegL src1, iRegL src2,
11255                          immI src3, rFlagsReg cr) %{
11256   match(Set dst (AddL src1 (RShiftL src2 src3)));
11257 
11258   ins_cost(1.9 * INSN_COST);
11259   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11260 
11261   ins_encode %{
11262     __ add(as_Register($dst$$reg),
11263               as_Register($src1$$reg),
11264               as_Register($src2$$reg),
11265               Assembler::ASR,
11266               $src3$$constant & 0x3f);
11267   %}
11268 
11269   ins_pipe(ialu_reg_reg_shift);
11270 %}
11271 
11272 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11273                          iRegIorL2I src1, iRegIorL2I src2,
11274                          immI src3, rFlagsReg cr) %{
11275   match(Set dst (AddI src1 (LShiftI src2 src3)));
11276 
11277   ins_cost(1.9 * INSN_COST);
11278   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11279 
11280   ins_encode %{
11281     __ addw(as_Register($dst$$reg),
11282               as_Register($src1$$reg),
11283               as_Register($src2$$reg),
11284               Assembler::LSL,
11285               $src3$$constant & 0x1f);
11286   %}
11287 
11288   ins_pipe(ialu_reg_reg_shift);
11289 %}
11290 
11291 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11292                          iRegL src1, iRegL src2,
11293                          immI src3, rFlagsReg cr) %{
11294   match(Set dst (AddL src1 (LShiftL src2 src3)));
11295 
11296   ins_cost(1.9 * INSN_COST);
11297   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11298 
11299   ins_encode %{
11300     __ add(as_Register($dst$$reg),
11301               as_Register($src1$$reg),
11302               as_Register($src2$$reg),
11303               Assembler::LSL,
11304               $src3$$constant & 0x3f);
11305   %}
11306 
11307   ins_pipe(ialu_reg_reg_shift);
11308 %}
11309 
11310 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11311                          iRegIorL2I src1, iRegIorL2I src2,
11312                          immI src3, rFlagsReg cr) %{
11313   match(Set dst (SubI src1 (URShiftI src2 src3)));
11314 
11315   ins_cost(1.9 * INSN_COST);
11316   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11317 
11318   ins_encode %{
11319     __ subw(as_Register($dst$$reg),
11320               as_Register($src1$$reg),
11321               as_Register($src2$$reg),
11322               Assembler::LSR,
11323               $src3$$constant & 0x1f);
11324   %}
11325 
11326   ins_pipe(ialu_reg_reg_shift);
11327 %}
11328 
11329 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11330                          iRegL src1, iRegL src2,
11331                          immI src3, rFlagsReg cr) %{
11332   match(Set dst (SubL src1 (URShiftL src2 src3)));
11333 
11334   ins_cost(1.9 * INSN_COST);
11335   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11336 
11337   ins_encode %{
11338     __ sub(as_Register($dst$$reg),
11339               as_Register($src1$$reg),
11340               as_Register($src2$$reg),
11341               Assembler::LSR,
11342               $src3$$constant & 0x3f);
11343   %}
11344 
11345   ins_pipe(ialu_reg_reg_shift);
11346 %}
11347 
11348 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11349                          iRegIorL2I src1, iRegIorL2I src2,
11350                          immI src3, rFlagsReg cr) %{
11351   match(Set dst (SubI src1 (RShiftI src2 src3)));
11352 
11353   ins_cost(1.9 * INSN_COST);
11354   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11355 
11356   ins_encode %{
11357     __ subw(as_Register($dst$$reg),
11358               as_Register($src1$$reg),
11359               as_Register($src2$$reg),
11360               Assembler::ASR,
11361               $src3$$constant & 0x1f);
11362   %}
11363 
11364   ins_pipe(ialu_reg_reg_shift);
11365 %}
11366 
11367 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11368                          iRegL src1, iRegL src2,
11369                          immI src3, rFlagsReg cr) %{
11370   match(Set dst (SubL src1 (RShiftL src2 src3)));
11371 
11372   ins_cost(1.9 * INSN_COST);
11373   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11374 
11375   ins_encode %{
11376     __ sub(as_Register($dst$$reg),
11377               as_Register($src1$$reg),
11378               as_Register($src2$$reg),
11379               Assembler::ASR,
11380               $src3$$constant & 0x3f);
11381   %}
11382 
11383   ins_pipe(ialu_reg_reg_shift);
11384 %}
11385 
11386 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11387                          iRegIorL2I src1, iRegIorL2I src2,
11388                          immI src3, rFlagsReg cr) %{
11389   match(Set dst (SubI src1 (LShiftI src2 src3)));
11390 
11391   ins_cost(1.9 * INSN_COST);
11392   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11393 
11394   ins_encode %{
11395     __ subw(as_Register($dst$$reg),
11396               as_Register($src1$$reg),
11397               as_Register($src2$$reg),
11398               Assembler::LSL,
11399               $src3$$constant & 0x1f);
11400   %}
11401 
11402   ins_pipe(ialu_reg_reg_shift);
11403 %}
11404 
11405 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11406                          iRegL src1, iRegL src2,
11407                          immI src3, rFlagsReg cr) %{
11408   match(Set dst (SubL src1 (LShiftL src2 src3)));
11409 
11410   ins_cost(1.9 * INSN_COST);
11411   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11412 
11413   ins_encode %{
11414     __ sub(as_Register($dst$$reg),
11415               as_Register($src1$$reg),
11416               as_Register($src2$$reg),
11417               Assembler::LSL,
11418               $src3$$constant & 0x3f);
11419   %}
11420 
11421   ins_pipe(ialu_reg_reg_shift);
11422 %}
11423 
11424 
11425 
11426 // Shift Left followed by Shift Right.
11427 // This idiom is used by the compiler for the i2b bytecode etc.
11428 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11429 %{
11430   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11431   // Make sure we are not going to exceed what sbfm can do.
11432   predicate((unsigned int)n->in(2)->get_int() <= 63
11433             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11434 
11435   ins_cost(INSN_COST * 2);
11436   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11437   ins_encode %{
11438     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11439     int s = 63 - lshift;
11440     int r = (rshift - lshift) & 63;
11441     __ sbfm(as_Register($dst$$reg),
11442             as_Register($src$$reg),
11443             r, s);
11444   %}
11445 
11446   ins_pipe(ialu_reg_shift);
11447 %}
11448 
11449 // Shift Left followed by Shift Right.
11450 // This idiom is used by the compiler for the i2b bytecode etc.
11451 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11452 %{
11453   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11454   // Make sure we are not going to exceed what sbfmw can do.
11455   predicate((unsigned int)n->in(2)->get_int() <= 31
11456             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11457 
11458   ins_cost(INSN_COST * 2);
11459   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11460   ins_encode %{
11461     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11462     int s = 31 - lshift;
11463     int r = (rshift - lshift) & 31;
11464     __ sbfmw(as_Register($dst$$reg),
11465             as_Register($src$$reg),
11466             r, s);
11467   %}
11468 
11469   ins_pipe(ialu_reg_shift);
11470 %}
11471 
11472 // Shift Left followed by Shift Right.
11473 // This idiom is used by the compiler for the i2b bytecode etc.
11474 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11475 %{
11476   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11477   // Make sure we are not going to exceed what ubfm can do.
11478   predicate((unsigned int)n->in(2)->get_int() <= 63
11479             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11480 
11481   ins_cost(INSN_COST * 2);
11482   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11483   ins_encode %{
11484     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11485     int s = 63 - lshift;
11486     int r = (rshift - lshift) & 63;
11487     __ ubfm(as_Register($dst$$reg),
11488             as_Register($src$$reg),
11489             r, s);
11490   %}
11491 
11492   ins_pipe(ialu_reg_shift);
11493 %}
11494 
11495 // Shift Left followed by Shift Right.
11496 // This idiom is used by the compiler for the i2b bytecode etc.
11497 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11498 %{
11499   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11500   // Make sure we are not going to exceed what ubfmw can do.
11501   predicate((unsigned int)n->in(2)->get_int() <= 31
11502             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11503 
11504   ins_cost(INSN_COST * 2);
11505   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11506   ins_encode %{
11507     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11508     int s = 31 - lshift;
11509     int r = (rshift - lshift) & 31;
11510     __ ubfmw(as_Register($dst$$reg),
11511             as_Register($src$$reg),
11512             r, s);
11513   %}
11514 
11515   ins_pipe(ialu_reg_shift);
11516 %}
11517 // Bitfield extract with shift & mask
11518 
11519 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11520 %{
11521   match(Set dst (AndI (URShiftI src rshift) mask));
11522 
11523   ins_cost(INSN_COST);
11524   format %{ "ubfxw $dst, $src, $mask" %}
11525   ins_encode %{
11526     int rshift = $rshift$$constant;
11527     long mask = $mask$$constant;
11528     int width = exact_log2(mask+1);
11529     __ ubfxw(as_Register($dst$$reg),
11530             as_Register($src$$reg), rshift, width);
11531   %}
11532   ins_pipe(ialu_reg_shift);
11533 %}
11534 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11535 %{
11536   match(Set dst (AndL (URShiftL src rshift) mask));
11537 
11538   ins_cost(INSN_COST);
11539   format %{ "ubfx $dst, $src, $mask" %}
11540   ins_encode %{
11541     int rshift = $rshift$$constant;
11542     long mask = $mask$$constant;
11543     int width = exact_log2(mask+1);
11544     __ ubfx(as_Register($dst$$reg),
11545             as_Register($src$$reg), rshift, width);
11546   %}
11547   ins_pipe(ialu_reg_shift);
11548 %}
11549 
11550 // We can use ubfx when extending an And with a mask when we know mask
11551 // is positive.  We know that because immI_bitmask guarantees it.
11552 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11553 %{
11554   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11555 
11556   ins_cost(INSN_COST * 2);
11557   format %{ "ubfx $dst, $src, $mask" %}
11558   ins_encode %{
11559     int rshift = $rshift$$constant;
11560     long mask = $mask$$constant;
11561     int width = exact_log2(mask+1);
11562     __ ubfx(as_Register($dst$$reg),
11563             as_Register($src$$reg), rshift, width);
11564   %}
11565   ins_pipe(ialu_reg_shift);
11566 %}
11567 
11568 // Rotations
11569 
11570 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11571 %{
11572   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11573   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11574 
11575   ins_cost(INSN_COST);
11576   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11577 
11578   ins_encode %{
11579     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11580             $rshift$$constant & 63);
11581   %}
11582   ins_pipe(ialu_reg_reg_extr);
11583 %}
11584 
11585 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11586 %{
11587   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11588   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11589 
11590   ins_cost(INSN_COST);
11591   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11592 
11593   ins_encode %{
11594     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11595             $rshift$$constant & 31);
11596   %}
11597   ins_pipe(ialu_reg_reg_extr);
11598 %}
11599 
11600 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11601 %{
11602   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11603   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11604 
11605   ins_cost(INSN_COST);
11606   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11607 
11608   ins_encode %{
11609     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11610             $rshift$$constant & 63);
11611   %}
11612   ins_pipe(ialu_reg_reg_extr);
11613 %}
11614 
11615 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11616 %{
11617   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11618   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11619 
11620   ins_cost(INSN_COST);
11621   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11622 
11623   ins_encode %{
11624     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11625             $rshift$$constant & 31);
11626   %}
11627   ins_pipe(ialu_reg_reg_extr);
11628 %}
11629 
11630 
11631 // rol expander
11632 
11633 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11634 %{
11635   effect(DEF dst, USE src, USE shift);
11636 
11637   format %{ "rol    $dst, $src, $shift" %}
11638   ins_cost(INSN_COST * 3);
11639   ins_encode %{
11640     __ subw(rscratch1, zr, as_Register($shift$$reg));
11641     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11642             rscratch1);
11643     %}
11644   ins_pipe(ialu_reg_reg_vshift);
11645 %}
11646 
11647 // rol expander
11648 
11649 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11650 %{
11651   effect(DEF dst, USE src, USE shift);
11652 
11653   format %{ "rol    $dst, $src, $shift" %}
11654   ins_cost(INSN_COST * 3);
11655   ins_encode %{
11656     __ subw(rscratch1, zr, as_Register($shift$$reg));
11657     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11658             rscratch1);
11659     %}
11660   ins_pipe(ialu_reg_reg_vshift);
11661 %}
11662 
11663 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11664 %{
11665   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11666 
11667   expand %{
11668     rolL_rReg(dst, src, shift, cr);
11669   %}
11670 %}
11671 
11672 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11673 %{
11674   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11675 
11676   expand %{
11677     rolL_rReg(dst, src, shift, cr);
11678   %}
11679 %}
11680 
11681 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11682 %{
11683   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11684 
11685   expand %{
11686     rolL_rReg(dst, src, shift, cr);
11687   %}
11688 %}
11689 
11690 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11691 %{
11692   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11693 
11694   expand %{
11695     rolL_rReg(dst, src, shift, cr);
11696   %}
11697 %}
11698 
11699 // ror expander
11700 
11701 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11702 %{
11703   effect(DEF dst, USE src, USE shift);
11704 
11705   format %{ "ror    $dst, $src, $shift" %}
11706   ins_cost(INSN_COST);
11707   ins_encode %{
11708     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11709             as_Register($shift$$reg));
11710     %}
11711   ins_pipe(ialu_reg_reg_vshift);
11712 %}
11713 
11714 // ror expander
11715 
11716 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11717 %{
11718   effect(DEF dst, USE src, USE shift);
11719 
11720   format %{ "ror    $dst, $src, $shift" %}
11721   ins_cost(INSN_COST);
11722   ins_encode %{
11723     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11724             as_Register($shift$$reg));
11725     %}
11726   ins_pipe(ialu_reg_reg_vshift);
11727 %}
11728 
11729 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11730 %{
11731   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11732 
11733   expand %{
11734     rorL_rReg(dst, src, shift, cr);
11735   %}
11736 %}
11737 
11738 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11739 %{
11740   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11741 
11742   expand %{
11743     rorL_rReg(dst, src, shift, cr);
11744   %}
11745 %}
11746 
11747 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11748 %{
11749   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11750 
11751   expand %{
11752     rorL_rReg(dst, src, shift, cr);
11753   %}
11754 %}
11755 
11756 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11757 %{
11758   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11759 
11760   expand %{
11761     rorL_rReg(dst, src, shift, cr);
11762   %}
11763 %}
11764 
11765 // Add/subtract (extended)
11766 
11767 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11768 %{
11769   match(Set dst (AddL src1 (ConvI2L src2)));
11770   ins_cost(INSN_COST);
11771   format %{ "add  $dst, $src1, sxtw $src2" %}
11772 
11773    ins_encode %{
11774      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11775             as_Register($src2$$reg), ext::sxtw);
11776    %}
11777   ins_pipe(ialu_reg_reg);
11778 %};
11779 
11780 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11781 %{
11782   match(Set dst (SubL src1 (ConvI2L src2)));
11783   ins_cost(INSN_COST);
11784   format %{ "sub  $dst, $src1, sxtw $src2" %}
11785 
11786    ins_encode %{
11787      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11788             as_Register($src2$$reg), ext::sxtw);
11789    %}
11790   ins_pipe(ialu_reg_reg);
11791 %};
11792 
11793 
11794 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11795 %{
11796   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11797   ins_cost(INSN_COST);
11798   format %{ "add  $dst, $src1, sxth $src2" %}
11799 
11800    ins_encode %{
11801      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11802             as_Register($src2$$reg), ext::sxth);
11803    %}
11804   ins_pipe(ialu_reg_reg);
11805 %}
11806 
11807 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11808 %{
11809   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11810   ins_cost(INSN_COST);
11811   format %{ "add  $dst, $src1, sxtb $src2" %}
11812 
11813    ins_encode %{
11814      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11815             as_Register($src2$$reg), ext::sxtb);
11816    %}
11817   ins_pipe(ialu_reg_reg);
11818 %}
11819 
11820 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11821 %{
11822   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11823   ins_cost(INSN_COST);
11824   format %{ "add  $dst, $src1, uxtb $src2" %}
11825 
11826    ins_encode %{
11827      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11828             as_Register($src2$$reg), ext::uxtb);
11829    %}
11830   ins_pipe(ialu_reg_reg);
11831 %}
11832 
11833 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11834 %{
11835   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11836   ins_cost(INSN_COST);
11837   format %{ "add  $dst, $src1, sxth $src2" %}
11838 
11839    ins_encode %{
11840      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11841             as_Register($src2$$reg), ext::sxth);
11842    %}
11843   ins_pipe(ialu_reg_reg);
11844 %}
11845 
11846 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11847 %{
11848   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11849   ins_cost(INSN_COST);
11850   format %{ "add  $dst, $src1, sxtw $src2" %}
11851 
11852    ins_encode %{
11853      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11854             as_Register($src2$$reg), ext::sxtw);
11855    %}
11856   ins_pipe(ialu_reg_reg);
11857 %}
11858 
11859 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11860 %{
11861   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11862   ins_cost(INSN_COST);
11863   format %{ "add  $dst, $src1, sxtb $src2" %}
11864 
11865    ins_encode %{
11866      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11867             as_Register($src2$$reg), ext::sxtb);
11868    %}
11869   ins_pipe(ialu_reg_reg);
11870 %}
11871 
11872 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11873 %{
11874   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11875   ins_cost(INSN_COST);
11876   format %{ "add  $dst, $src1, uxtb $src2" %}
11877 
11878    ins_encode %{
11879      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11880             as_Register($src2$$reg), ext::uxtb);
11881    %}
11882   ins_pipe(ialu_reg_reg);
11883 %}
11884 
11885 
11886 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11887 %{
11888   match(Set dst (AddI src1 (AndI src2 mask)));
11889   ins_cost(INSN_COST);
11890   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11891 
11892    ins_encode %{
11893      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11894             as_Register($src2$$reg), ext::uxtb);
11895    %}
11896   ins_pipe(ialu_reg_reg);
11897 %}
11898 
11899 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11900 %{
11901   match(Set dst (AddI src1 (AndI src2 mask)));
11902   ins_cost(INSN_COST);
11903   format %{ "addw  $dst, $src1, $src2, uxth" %}
11904 
11905    ins_encode %{
11906      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11907             as_Register($src2$$reg), ext::uxth);
11908    %}
11909   ins_pipe(ialu_reg_reg);
11910 %}
11911 
11912 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11913 %{
11914   match(Set dst (AddL src1 (AndL src2 mask)));
11915   ins_cost(INSN_COST);
11916   format %{ "add  $dst, $src1, $src2, uxtb" %}
11917 
11918    ins_encode %{
11919      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11920             as_Register($src2$$reg), ext::uxtb);
11921    %}
11922   ins_pipe(ialu_reg_reg);
11923 %}
11924 
11925 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11926 %{
11927   match(Set dst (AddL src1 (AndL src2 mask)));
11928   ins_cost(INSN_COST);
11929   format %{ "add  $dst, $src1, $src2, uxth" %}
11930 
11931    ins_encode %{
11932      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11933             as_Register($src2$$reg), ext::uxth);
11934    %}
11935   ins_pipe(ialu_reg_reg);
11936 %}
11937 
11938 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11939 %{
11940   match(Set dst (AddL src1 (AndL src2 mask)));
11941   ins_cost(INSN_COST);
11942   format %{ "add  $dst, $src1, $src2, uxtw" %}
11943 
11944    ins_encode %{
11945      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11946             as_Register($src2$$reg), ext::uxtw);
11947    %}
11948   ins_pipe(ialu_reg_reg);
11949 %}
11950 
11951 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11952 %{
11953   match(Set dst (SubI src1 (AndI src2 mask)));
11954   ins_cost(INSN_COST);
11955   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11956 
11957    ins_encode %{
11958      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11959             as_Register($src2$$reg), ext::uxtb);
11960    %}
11961   ins_pipe(ialu_reg_reg);
11962 %}
11963 
11964 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11965 %{
11966   match(Set dst (SubI src1 (AndI src2 mask)));
11967   ins_cost(INSN_COST);
11968   format %{ "subw  $dst, $src1, $src2, uxth" %}
11969 
11970    ins_encode %{
11971      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11972             as_Register($src2$$reg), ext::uxth);
11973    %}
11974   ins_pipe(ialu_reg_reg);
11975 %}
11976 
11977 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11978 %{
11979   match(Set dst (SubL src1 (AndL src2 mask)));
11980   ins_cost(INSN_COST);
11981   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11982 
11983    ins_encode %{
11984      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11985             as_Register($src2$$reg), ext::uxtb);
11986    %}
11987   ins_pipe(ialu_reg_reg);
11988 %}
11989 
11990 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11991 %{
11992   match(Set dst (SubL src1 (AndL src2 mask)));
11993   ins_cost(INSN_COST);
11994   format %{ "sub  $dst, $src1, $src2, uxth" %}
11995 
11996    ins_encode %{
11997      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11998             as_Register($src2$$reg), ext::uxth);
11999    %}
12000   ins_pipe(ialu_reg_reg);
12001 %}
12002 
12003 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12004 %{
12005   match(Set dst (SubL src1 (AndL src2 mask)));
12006   ins_cost(INSN_COST);
12007   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12008 
12009    ins_encode %{
12010      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12011             as_Register($src2$$reg), ext::uxtw);
12012    %}
12013   ins_pipe(ialu_reg_reg);
12014 %}
12015 
12016 // END This section of the file is automatically generated. Do not edit --------------
12017 
12018 // ============================================================================
12019 // Floating Point Arithmetic Instructions
12020 
12021 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12022   match(Set dst (AddF src1 src2));
12023 
12024   ins_cost(INSN_COST * 5);
12025   format %{ "fadds   $dst, $src1, $src2" %}
12026 
12027   ins_encode %{
12028     __ fadds(as_FloatRegister($dst$$reg),
12029              as_FloatRegister($src1$$reg),
12030              as_FloatRegister($src2$$reg));
12031   %}
12032 
12033   ins_pipe(pipe_class_default);
12034 %}
12035 
12036 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12037   match(Set dst (AddD src1 src2));
12038 
12039   ins_cost(INSN_COST * 5);
12040   format %{ "faddd   $dst, $src1, $src2" %}
12041 
12042   ins_encode %{
12043     __ faddd(as_FloatRegister($dst$$reg),
12044              as_FloatRegister($src1$$reg),
12045              as_FloatRegister($src2$$reg));
12046   %}
12047 
12048   ins_pipe(pipe_class_default);
12049 %}
12050 
12051 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12052   match(Set dst (SubF src1 src2));
12053 
12054   ins_cost(INSN_COST * 5);
12055   format %{ "fsubs   $dst, $src1, $src2" %}
12056 
12057   ins_encode %{
12058     __ fsubs(as_FloatRegister($dst$$reg),
12059              as_FloatRegister($src1$$reg),
12060              as_FloatRegister($src2$$reg));
12061   %}
12062 
12063   ins_pipe(pipe_class_default);
12064 %}
12065 
12066 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12067   match(Set dst (SubD src1 src2));
12068 
12069   ins_cost(INSN_COST * 5);
12070   format %{ "fsubd   $dst, $src1, $src2" %}
12071 
12072   ins_encode %{
12073     __ fsubd(as_FloatRegister($dst$$reg),
12074              as_FloatRegister($src1$$reg),
12075              as_FloatRegister($src2$$reg));
12076   %}
12077 
12078   ins_pipe(pipe_class_default);
12079 %}
12080 
12081 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12082   match(Set dst (MulF src1 src2));
12083 
12084   ins_cost(INSN_COST * 6);
12085   format %{ "fmuls   $dst, $src1, $src2" %}
12086 
12087   ins_encode %{
12088     __ fmuls(as_FloatRegister($dst$$reg),
12089              as_FloatRegister($src1$$reg),
12090              as_FloatRegister($src2$$reg));
12091   %}
12092 
12093   ins_pipe(pipe_class_default);
12094 %}
12095 
12096 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12097   match(Set dst (MulD src1 src2));
12098 
12099   ins_cost(INSN_COST * 6);
12100   format %{ "fmuld   $dst, $src1, $src2" %}
12101 
12102   ins_encode %{
12103     __ fmuld(as_FloatRegister($dst$$reg),
12104              as_FloatRegister($src1$$reg),
12105              as_FloatRegister($src2$$reg));
12106   %}
12107 
12108   ins_pipe(pipe_class_default);
12109 %}
12110 
12111 // We cannot use these fused mul w add/sub ops because they don't
12112 // produce the same result as the equivalent separated ops
12113 // (essentially they don't round the intermediate result). that's a
12114 // shame. leaving them here in case we can idenitfy cases where it is
12115 // legitimate to use them
12116 
12117 
12118 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12119 //   match(Set dst (AddF (MulF src1 src2) src3));
12120 
12121 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12122 
12123 //   ins_encode %{
12124 //     __ fmadds(as_FloatRegister($dst$$reg),
12125 //              as_FloatRegister($src1$$reg),
12126 //              as_FloatRegister($src2$$reg),
12127 //              as_FloatRegister($src3$$reg));
12128 //   %}
12129 
12130 //   ins_pipe(pipe_class_default);
12131 // %}
12132 
12133 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12134 //   match(Set dst (AddD (MulD src1 src2) src3));
12135 
12136 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12137 
12138 //   ins_encode %{
12139 //     __ fmaddd(as_FloatRegister($dst$$reg),
12140 //              as_FloatRegister($src1$$reg),
12141 //              as_FloatRegister($src2$$reg),
12142 //              as_FloatRegister($src3$$reg));
12143 //   %}
12144 
12145 //   ins_pipe(pipe_class_default);
12146 // %}
12147 
12148 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12149 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
12150 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
12151 
12152 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12153 
12154 //   ins_encode %{
12155 //     __ fmsubs(as_FloatRegister($dst$$reg),
12156 //               as_FloatRegister($src1$$reg),
12157 //               as_FloatRegister($src2$$reg),
12158 //              as_FloatRegister($src3$$reg));
12159 //   %}
12160 
12161 //   ins_pipe(pipe_class_default);
12162 // %}
12163 
12164 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12165 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
12166 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
12167 
12168 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12169 
12170 //   ins_encode %{
12171 //     __ fmsubd(as_FloatRegister($dst$$reg),
12172 //               as_FloatRegister($src1$$reg),
12173 //               as_FloatRegister($src2$$reg),
12174 //               as_FloatRegister($src3$$reg));
12175 //   %}
12176 
12177 //   ins_pipe(pipe_class_default);
12178 // %}
12179 
12180 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12181 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
12182 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
12183 
12184 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12185 
12186 //   ins_encode %{
12187 //     __ fnmadds(as_FloatRegister($dst$$reg),
12188 //                as_FloatRegister($src1$$reg),
12189 //                as_FloatRegister($src2$$reg),
12190 //                as_FloatRegister($src3$$reg));
12191 //   %}
12192 
12193 //   ins_pipe(pipe_class_default);
12194 // %}
12195 
12196 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12197 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
12198 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
12199 
12200 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12201 
12202 //   ins_encode %{
12203 //     __ fnmaddd(as_FloatRegister($dst$$reg),
12204 //                as_FloatRegister($src1$$reg),
12205 //                as_FloatRegister($src2$$reg),
12206 //                as_FloatRegister($src3$$reg));
12207 //   %}
12208 
12209 //   ins_pipe(pipe_class_default);
12210 // %}
12211 
12212 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12213 //   match(Set dst (SubF (MulF src1 src2) src3));
12214 
12215 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12216 
12217 //   ins_encode %{
12218 //     __ fnmsubs(as_FloatRegister($dst$$reg),
12219 //                as_FloatRegister($src1$$reg),
12220 //                as_FloatRegister($src2$$reg),
12221 //                as_FloatRegister($src3$$reg));
12222 //   %}
12223 
12224 //   ins_pipe(pipe_class_default);
12225 // %}
12226 
12227 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12228 //   match(Set dst (SubD (MulD src1 src2) src3));
12229 
12230 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12231 
12232 //   ins_encode %{
12233 //   // n.b. insn name should be fnmsubd
12234 //     __ fnmsub(as_FloatRegister($dst$$reg),
12235 //                as_FloatRegister($src1$$reg),
12236 //                as_FloatRegister($src2$$reg),
12237 //                as_FloatRegister($src3$$reg));
12238 //   %}
12239 
12240 //   ins_pipe(pipe_class_default);
12241 // %}
12242 
12243 
12244 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12245   match(Set dst (DivF src1  src2));
12246 
12247   ins_cost(INSN_COST * 18);
12248   format %{ "fdivs   $dst, $src1, $src2" %}
12249 
12250   ins_encode %{
12251     __ fdivs(as_FloatRegister($dst$$reg),
12252              as_FloatRegister($src1$$reg),
12253              as_FloatRegister($src2$$reg));
12254   %}
12255 
12256   ins_pipe(pipe_class_default);
12257 %}
12258 
12259 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12260   match(Set dst (DivD src1  src2));
12261 
12262   ins_cost(INSN_COST * 32);
12263   format %{ "fdivd   $dst, $src1, $src2" %}
12264 
12265   ins_encode %{
12266     __ fdivd(as_FloatRegister($dst$$reg),
12267              as_FloatRegister($src1$$reg),
12268              as_FloatRegister($src2$$reg));
12269   %}
12270 
12271   ins_pipe(pipe_class_default);
12272 %}
12273 
12274 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12275   match(Set dst (NegF src));
12276 
12277   ins_cost(INSN_COST * 3);
12278   format %{ "fneg   $dst, $src" %}
12279 
12280   ins_encode %{
12281     __ fnegs(as_FloatRegister($dst$$reg),
12282              as_FloatRegister($src$$reg));
12283   %}
12284 
12285   ins_pipe(pipe_class_default);
12286 %}
12287 
12288 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12289   match(Set dst (NegD src));
12290 
12291   ins_cost(INSN_COST * 3);
12292   format %{ "fnegd   $dst, $src" %}
12293 
12294   ins_encode %{
12295     __ fnegd(as_FloatRegister($dst$$reg),
12296              as_FloatRegister($src$$reg));
12297   %}
12298 
12299   ins_pipe(pipe_class_default);
12300 %}
12301 
12302 instruct absF_reg(vRegF dst, vRegF src) %{
12303   match(Set dst (AbsF src));
12304 
12305   ins_cost(INSN_COST * 3);
12306   format %{ "fabss   $dst, $src" %}
12307   ins_encode %{
12308     __ fabss(as_FloatRegister($dst$$reg),
12309              as_FloatRegister($src$$reg));
12310   %}
12311 
12312   ins_pipe(pipe_class_default);
12313 %}
12314 
12315 instruct absD_reg(vRegD dst, vRegD src) %{
12316   match(Set dst (AbsD src));
12317 
12318   ins_cost(INSN_COST * 3);
12319   format %{ "fabsd   $dst, $src" %}
12320   ins_encode %{
12321     __ fabsd(as_FloatRegister($dst$$reg),
12322              as_FloatRegister($src$$reg));
12323   %}
12324 
12325   ins_pipe(pipe_class_default);
12326 %}
12327 
12328 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12329   match(Set dst (SqrtD src));
12330 
12331   ins_cost(INSN_COST * 50);
12332   format %{ "fsqrtd  $dst, $src" %}
12333   ins_encode %{
12334     __ fsqrtd(as_FloatRegister($dst$$reg),
12335              as_FloatRegister($src$$reg));
12336   %}
12337 
12338   ins_pipe(pipe_class_default);
12339 %}
12340 
12341 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12342   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12343 
12344   ins_cost(INSN_COST * 50);
12345   format %{ "fsqrts  $dst, $src" %}
12346   ins_encode %{
12347     __ fsqrts(as_FloatRegister($dst$$reg),
12348              as_FloatRegister($src$$reg));
12349   %}
12350 
12351   ins_pipe(pipe_class_default);
12352 %}
12353 
12354 // ============================================================================
12355 // Logical Instructions
12356 
12357 // Integer Logical Instructions
12358 
12359 // And Instructions
12360 
12361 
12362 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12363   match(Set dst (AndI src1 src2));
12364 
12365   format %{ "andw  $dst, $src1, $src2\t# int" %}
12366 
12367   ins_cost(INSN_COST);
12368   ins_encode %{
12369     __ andw(as_Register($dst$$reg),
12370             as_Register($src1$$reg),
12371             as_Register($src2$$reg));
12372   %}
12373 
12374   ins_pipe(ialu_reg_reg);
12375 %}
12376 
12377 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12378   match(Set dst (AndI src1 src2));
12379 
12380   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12381 
12382   ins_cost(INSN_COST);
12383   ins_encode %{
12384     __ andw(as_Register($dst$$reg),
12385             as_Register($src1$$reg),
12386             (unsigned long)($src2$$constant));
12387   %}
12388 
12389   ins_pipe(ialu_reg_imm);
12390 %}
12391 
12392 // Or Instructions
12393 
12394 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12395   match(Set dst (OrI src1 src2));
12396 
12397   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12398 
12399   ins_cost(INSN_COST);
12400   ins_encode %{
12401     __ orrw(as_Register($dst$$reg),
12402             as_Register($src1$$reg),
12403             as_Register($src2$$reg));
12404   %}
12405 
12406   ins_pipe(ialu_reg_reg);
12407 %}
12408 
12409 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12410   match(Set dst (OrI src1 src2));
12411 
12412   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12413 
12414   ins_cost(INSN_COST);
12415   ins_encode %{
12416     __ orrw(as_Register($dst$$reg),
12417             as_Register($src1$$reg),
12418             (unsigned long)($src2$$constant));
12419   %}
12420 
12421   ins_pipe(ialu_reg_imm);
12422 %}
12423 
12424 // Xor Instructions
12425 
12426 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12427   match(Set dst (XorI src1 src2));
12428 
12429   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12430 
12431   ins_cost(INSN_COST);
12432   ins_encode %{
12433     __ eorw(as_Register($dst$$reg),
12434             as_Register($src1$$reg),
12435             as_Register($src2$$reg));
12436   %}
12437 
12438   ins_pipe(ialu_reg_reg);
12439 %}
12440 
12441 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12442   match(Set dst (XorI src1 src2));
12443 
12444   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12445 
12446   ins_cost(INSN_COST);
12447   ins_encode %{
12448     __ eorw(as_Register($dst$$reg),
12449             as_Register($src1$$reg),
12450             (unsigned long)($src2$$constant));
12451   %}
12452 
12453   ins_pipe(ialu_reg_imm);
12454 %}
12455 
12456 // Long Logical Instructions
12457 // TODO
12458 
12459 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12460   match(Set dst (AndL src1 src2));
12461 
12462   format %{ "and  $dst, $src1, $src2\t# int" %}
12463 
12464   ins_cost(INSN_COST);
12465   ins_encode %{
12466     __ andr(as_Register($dst$$reg),
12467             as_Register($src1$$reg),
12468             as_Register($src2$$reg));
12469   %}
12470 
12471   ins_pipe(ialu_reg_reg);
12472 %}
12473 
12474 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12475   match(Set dst (AndL src1 src2));
12476 
12477   format %{ "and  $dst, $src1, $src2\t# int" %}
12478 
12479   ins_cost(INSN_COST);
12480   ins_encode %{
12481     __ andr(as_Register($dst$$reg),
12482             as_Register($src1$$reg),
12483             (unsigned long)($src2$$constant));
12484   %}
12485 
12486   ins_pipe(ialu_reg_imm);
12487 %}
12488 
12489 // Or Instructions
12490 
12491 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12492   match(Set dst (OrL src1 src2));
12493 
12494   format %{ "orr  $dst, $src1, $src2\t# int" %}
12495 
12496   ins_cost(INSN_COST);
12497   ins_encode %{
12498     __ orr(as_Register($dst$$reg),
12499            as_Register($src1$$reg),
12500            as_Register($src2$$reg));
12501   %}
12502 
12503   ins_pipe(ialu_reg_reg);
12504 %}
12505 
12506 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12507   match(Set dst (OrL src1 src2));
12508 
12509   format %{ "orr  $dst, $src1, $src2\t# int" %}
12510 
12511   ins_cost(INSN_COST);
12512   ins_encode %{
12513     __ orr(as_Register($dst$$reg),
12514            as_Register($src1$$reg),
12515            (unsigned long)($src2$$constant));
12516   %}
12517 
12518   ins_pipe(ialu_reg_imm);
12519 %}
12520 
12521 // Xor Instructions
12522 
12523 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12524   match(Set dst (XorL src1 src2));
12525 
12526   format %{ "eor  $dst, $src1, $src2\t# int" %}
12527 
12528   ins_cost(INSN_COST);
12529   ins_encode %{
12530     __ eor(as_Register($dst$$reg),
12531            as_Register($src1$$reg),
12532            as_Register($src2$$reg));
12533   %}
12534 
12535   ins_pipe(ialu_reg_reg);
12536 %}
12537 
12538 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12539   match(Set dst (XorL src1 src2));
12540 
12541   ins_cost(INSN_COST);
12542   format %{ "eor  $dst, $src1, $src2\t# int" %}
12543 
12544   ins_encode %{
12545     __ eor(as_Register($dst$$reg),
12546            as_Register($src1$$reg),
12547            (unsigned long)($src2$$constant));
12548   %}
12549 
12550   ins_pipe(ialu_reg_imm);
12551 %}
12552 
12553 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12554 %{
12555   match(Set dst (ConvI2L src));
12556 
12557   ins_cost(INSN_COST);
12558   format %{ "sxtw  $dst, $src\t# i2l" %}
12559   ins_encode %{
12560     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12561   %}
12562   ins_pipe(ialu_reg_shift);
12563 %}
12564 
12565 // this pattern occurs in bigmath arithmetic
12566 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12567 %{
12568   match(Set dst (AndL (ConvI2L src) mask));
12569 
12570   ins_cost(INSN_COST);
12571   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12572   ins_encode %{
12573     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12574   %}
12575 
12576   ins_pipe(ialu_reg_shift);
12577 %}
12578 
12579 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12580   match(Set dst (ConvL2I src));
12581 
12582   ins_cost(INSN_COST);
12583   format %{ "movw  $dst, $src \t// l2i" %}
12584 
12585   ins_encode %{
12586     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12587   %}
12588 
12589   ins_pipe(ialu_reg);
12590 %}
12591 
12592 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12593 %{
12594   match(Set dst (Conv2B src));
12595   effect(KILL cr);
12596 
12597   format %{
12598     "cmpw $src, zr\n\t"
12599     "cset $dst, ne"
12600   %}
12601 
12602   ins_encode %{
12603     __ cmpw(as_Register($src$$reg), zr);
12604     __ cset(as_Register($dst$$reg), Assembler::NE);
12605   %}
12606 
12607   ins_pipe(ialu_reg);
12608 %}
12609 
12610 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12611 %{
12612   match(Set dst (Conv2B src));
12613   effect(KILL cr);
12614 
12615   format %{
12616     "cmp  $src, zr\n\t"
12617     "cset $dst, ne"
12618   %}
12619 
12620   ins_encode %{
12621     __ cmp(as_Register($src$$reg), zr);
12622     __ cset(as_Register($dst$$reg), Assembler::NE);
12623   %}
12624 
12625   ins_pipe(ialu_reg);
12626 %}
12627 
12628 instruct convD2F_reg(vRegF dst, vRegD src) %{
12629   match(Set dst (ConvD2F src));
12630 
12631   ins_cost(INSN_COST * 5);
12632   format %{ "fcvtd  $dst, $src \t// d2f" %}
12633 
12634   ins_encode %{
12635     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12636   %}
12637 
12638   ins_pipe(pipe_class_default);
12639 %}
12640 
12641 instruct convF2D_reg(vRegD dst, vRegF src) %{
12642   match(Set dst (ConvF2D src));
12643 
12644   ins_cost(INSN_COST * 5);
12645   format %{ "fcvts  $dst, $src \t// f2d" %}
12646 
12647   ins_encode %{
12648     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12649   %}
12650 
12651   ins_pipe(pipe_class_default);
12652 %}
12653 
12654 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12655   match(Set dst (ConvF2I src));
12656 
12657   ins_cost(INSN_COST * 5);
12658   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12659 
12660   ins_encode %{
12661     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12662   %}
12663 
12664   ins_pipe(pipe_class_default);
12665 %}
12666 
12667 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12668   match(Set dst (ConvF2L src));
12669 
12670   ins_cost(INSN_COST * 5);
12671   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12672 
12673   ins_encode %{
12674     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12675   %}
12676 
12677   ins_pipe(pipe_class_default);
12678 %}
12679 
12680 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12681   match(Set dst (ConvI2F src));
12682 
12683   ins_cost(INSN_COST * 5);
12684   format %{ "scvtfws  $dst, $src \t// i2f" %}
12685 
12686   ins_encode %{
12687     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12688   %}
12689 
12690   ins_pipe(pipe_class_default);
12691 %}
12692 
12693 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12694   match(Set dst (ConvL2F src));
12695 
12696   ins_cost(INSN_COST * 5);
12697   format %{ "scvtfs  $dst, $src \t// l2f" %}
12698 
12699   ins_encode %{
12700     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12701   %}
12702 
12703   ins_pipe(pipe_class_default);
12704 %}
12705 
12706 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12707   match(Set dst (ConvD2I src));
12708 
12709   ins_cost(INSN_COST * 5);
12710   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12711 
12712   ins_encode %{
12713     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12714   %}
12715 
12716   ins_pipe(pipe_class_default);
12717 %}
12718 
12719 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12720   match(Set dst (ConvD2L src));
12721 
12722   ins_cost(INSN_COST * 5);
12723   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12724 
12725   ins_encode %{
12726     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12727   %}
12728 
12729   ins_pipe(pipe_class_default);
12730 %}
12731 
12732 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12733   match(Set dst (ConvI2D src));
12734 
12735   ins_cost(INSN_COST * 5);
12736   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12737 
12738   ins_encode %{
12739     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12740   %}
12741 
12742   ins_pipe(pipe_class_default);
12743 %}
12744 
12745 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12746   match(Set dst (ConvL2D src));
12747 
12748   ins_cost(INSN_COST * 5);
12749   format %{ "scvtfd  $dst, $src \t// l2d" %}
12750 
12751   ins_encode %{
12752     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12753   %}
12754 
12755   ins_pipe(pipe_class_default);
12756 %}
12757 
12758 // stack <-> reg and reg <-> reg shuffles with no conversion
12759 
12760 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12761 
12762   match(Set dst (MoveF2I src));
12763 
12764   effect(DEF dst, USE src);
12765 
12766   ins_cost(4 * INSN_COST);
12767 
12768   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12769 
12770   ins_encode %{
12771     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12772   %}
12773 
12774   ins_pipe(iload_reg_reg);
12775 
12776 %}
12777 
12778 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12779 
12780   match(Set dst (MoveI2F src));
12781 
12782   effect(DEF dst, USE src);
12783 
12784   ins_cost(4 * INSN_COST);
12785 
12786   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12787 
12788   ins_encode %{
12789     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12790   %}
12791 
12792   ins_pipe(pipe_class_memory);
12793 
12794 %}
12795 
12796 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12797 
12798   match(Set dst (MoveD2L src));
12799 
12800   effect(DEF dst, USE src);
12801 
12802   ins_cost(4 * INSN_COST);
12803 
12804   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12805 
12806   ins_encode %{
12807     __ ldr($dst$$Register, Address(sp, $src$$disp));
12808   %}
12809 
12810   ins_pipe(iload_reg_reg);
12811 
12812 %}
12813 
12814 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12815 
12816   match(Set dst (MoveL2D src));
12817 
12818   effect(DEF dst, USE src);
12819 
12820   ins_cost(4 * INSN_COST);
12821 
12822   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12823 
12824   ins_encode %{
12825     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12826   %}
12827 
12828   ins_pipe(pipe_class_memory);
12829 
12830 %}
12831 
12832 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12833 
12834   match(Set dst (MoveF2I src));
12835 
12836   effect(DEF dst, USE src);
12837 
12838   ins_cost(INSN_COST);
12839 
12840   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12841 
12842   ins_encode %{
12843     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12844   %}
12845 
12846   ins_pipe(pipe_class_memory);
12847 
12848 %}
12849 
12850 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12851 
12852   match(Set dst (MoveI2F src));
12853 
12854   effect(DEF dst, USE src);
12855 
12856   ins_cost(INSN_COST);
12857 
12858   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12859 
12860   ins_encode %{
12861     __ strw($src$$Register, Address(sp, $dst$$disp));
12862   %}
12863 
12864   ins_pipe(istore_reg_reg);
12865 
12866 %}
12867 
12868 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12869 
12870   match(Set dst (MoveD2L src));
12871 
12872   effect(DEF dst, USE src);
12873 
12874   ins_cost(INSN_COST);
12875 
12876   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12877 
12878   ins_encode %{
12879     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12880   %}
12881 
12882   ins_pipe(pipe_class_memory);
12883 
12884 %}
12885 
12886 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12887 
12888   match(Set dst (MoveL2D src));
12889 
12890   effect(DEF dst, USE src);
12891 
12892   ins_cost(INSN_COST);
12893 
12894   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12895 
12896   ins_encode %{
12897     __ str($src$$Register, Address(sp, $dst$$disp));
12898   %}
12899 
12900   ins_pipe(istore_reg_reg);
12901 
12902 %}
12903 
12904 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12905 
12906   match(Set dst (MoveF2I src));
12907 
12908   effect(DEF dst, USE src);
12909 
12910   ins_cost(INSN_COST);
12911 
12912   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12913 
12914   ins_encode %{
12915     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12916   %}
12917 
12918   ins_pipe(pipe_class_memory);
12919 
12920 %}
12921 
12922 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12923 
12924   match(Set dst (MoveI2F src));
12925 
12926   effect(DEF dst, USE src);
12927 
12928   ins_cost(INSN_COST);
12929 
12930   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12931 
12932   ins_encode %{
12933     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12934   %}
12935 
12936   ins_pipe(pipe_class_memory);
12937 
12938 %}
12939 
12940 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12941 
12942   match(Set dst (MoveD2L src));
12943 
12944   effect(DEF dst, USE src);
12945 
12946   ins_cost(INSN_COST);
12947 
12948   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12949 
12950   ins_encode %{
12951     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12952   %}
12953 
12954   ins_pipe(pipe_class_memory);
12955 
12956 %}
12957 
12958 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12959 
12960   match(Set dst (MoveL2D src));
12961 
12962   effect(DEF dst, USE src);
12963 
12964   ins_cost(INSN_COST);
12965 
12966   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12967 
12968   ins_encode %{
12969     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12970   %}
12971 
12972   ins_pipe(pipe_class_memory);
12973 
12974 %}
12975 
12976 // ============================================================================
12977 // clearing of an array
12978 
12979 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12980 %{
12981   match(Set dummy (ClearArray cnt base));
12982   effect(USE_KILL cnt, USE_KILL base);
12983 
12984   ins_cost(4 * INSN_COST);
12985   format %{ "ClearArray $cnt, $base" %}
12986 
12987   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
12988 
12989   ins_pipe(pipe_class_memory);
12990 %}
12991 
12992 // ============================================================================
12993 // Overflow Math Instructions
12994 
12995 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12996 %{
12997   match(Set cr (OverflowAddI op1 op2));
12998 
12999   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13000   ins_cost(INSN_COST);
13001   ins_encode %{
13002     __ cmnw($op1$$Register, $op2$$Register);
13003   %}
13004 
13005   ins_pipe(icmp_reg_reg);
13006 %}
13007 
13008 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13009 %{
13010   match(Set cr (OverflowAddI op1 op2));
13011 
13012   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13013   ins_cost(INSN_COST);
13014   ins_encode %{
13015     __ cmnw($op1$$Register, $op2$$constant);
13016   %}
13017 
13018   ins_pipe(icmp_reg_imm);
13019 %}
13020 
13021 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13022 %{
13023   match(Set cr (OverflowAddL op1 op2));
13024 
13025   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13026   ins_cost(INSN_COST);
13027   ins_encode %{
13028     __ cmn($op1$$Register, $op2$$Register);
13029   %}
13030 
13031   ins_pipe(icmp_reg_reg);
13032 %}
13033 
13034 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13035 %{
13036   match(Set cr (OverflowAddL op1 op2));
13037 
13038   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13039   ins_cost(INSN_COST);
13040   ins_encode %{
13041     __ cmn($op1$$Register, $op2$$constant);
13042   %}
13043 
13044   ins_pipe(icmp_reg_imm);
13045 %}
13046 
13047 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13048 %{
13049   match(Set cr (OverflowSubI op1 op2));
13050 
13051   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13052   ins_cost(INSN_COST);
13053   ins_encode %{
13054     __ cmpw($op1$$Register, $op2$$Register);
13055   %}
13056 
13057   ins_pipe(icmp_reg_reg);
13058 %}
13059 
13060 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13061 %{
13062   match(Set cr (OverflowSubI op1 op2));
13063 
13064   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13065   ins_cost(INSN_COST);
13066   ins_encode %{
13067     __ cmpw($op1$$Register, $op2$$constant);
13068   %}
13069 
13070   ins_pipe(icmp_reg_imm);
13071 %}
13072 
13073 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13074 %{
13075   match(Set cr (OverflowSubL op1 op2));
13076 
13077   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13078   ins_cost(INSN_COST);
13079   ins_encode %{
13080     __ cmp($op1$$Register, $op2$$Register);
13081   %}
13082 
13083   ins_pipe(icmp_reg_reg);
13084 %}
13085 
13086 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13087 %{
13088   match(Set cr (OverflowSubL op1 op2));
13089 
13090   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13091   ins_cost(INSN_COST);
13092   ins_encode %{
13093     __ cmp($op1$$Register, $op2$$constant);
13094   %}
13095 
13096   ins_pipe(icmp_reg_imm);
13097 %}
13098 
13099 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13100 %{
13101   match(Set cr (OverflowSubI zero op1));
13102 
13103   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13104   ins_cost(INSN_COST);
13105   ins_encode %{
13106     __ cmpw(zr, $op1$$Register);
13107   %}
13108 
13109   ins_pipe(icmp_reg_imm);
13110 %}
13111 
13112 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13113 %{
13114   match(Set cr (OverflowSubL zero op1));
13115 
13116   format %{ "cmp   zr, $op1\t# overflow check long" %}
13117   ins_cost(INSN_COST);
13118   ins_encode %{
13119     __ cmp(zr, $op1$$Register);
13120   %}
13121 
13122   ins_pipe(icmp_reg_imm);
13123 %}
13124 
13125 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13126 %{
13127   match(Set cr (OverflowMulI op1 op2));
13128 
13129   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13130             "cmp   rscratch1, rscratch1, sxtw\n\t"
13131             "movw  rscratch1, #0x80000000\n\t"
13132             "cselw rscratch1, rscratch1, zr, NE\n\t"
13133             "cmpw  rscratch1, #1" %}
13134   ins_cost(5 * INSN_COST);
13135   ins_encode %{
13136     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13137     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13138     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13139     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13140     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13141   %}
13142 
13143   ins_pipe(pipe_slow);
13144 %}
13145 
13146 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13147 %{
13148   match(If cmp (OverflowMulI op1 op2));
13149   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13150             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13151   effect(USE labl, KILL cr);
13152 
13153   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13154             "cmp   rscratch1, rscratch1, sxtw\n\t"
13155             "b$cmp   $labl" %}
13156   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13157   ins_encode %{
13158     Label* L = $labl$$label;
13159     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13160     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13161     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13162     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13163   %}
13164 
13165   ins_pipe(pipe_serial);
13166 %}
13167 
13168 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13169 %{
13170   match(Set cr (OverflowMulL op1 op2));
13171 
13172   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13173             "smulh rscratch2, $op1, $op2\n\t"
13174             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13175             "movw  rscratch1, #0x80000000\n\t"
13176             "cselw rscratch1, rscratch1, zr, NE\n\t"
13177             "cmpw  rscratch1, #1" %}
13178   ins_cost(6 * INSN_COST);
13179   ins_encode %{
13180     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13181     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13182     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13183     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13184     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13185     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13186   %}
13187 
13188   ins_pipe(pipe_slow);
13189 %}
13190 
13191 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13192 %{
13193   match(If cmp (OverflowMulL op1 op2));
13194   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13195             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13196   effect(USE labl, KILL cr);
13197 
13198   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13199             "smulh rscratch2, $op1, $op2\n\t"
13200             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13201             "b$cmp $labl" %}
13202   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13203   ins_encode %{
13204     Label* L = $labl$$label;
13205     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13206     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13207     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13208     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13209     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13210   %}
13211 
13212   ins_pipe(pipe_serial);
13213 %}
13214 
13215 // ============================================================================
13216 // Compare Instructions
13217 
13218 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13219 %{
13220   match(Set cr (CmpI op1 op2));
13221 
13222   effect(DEF cr, USE op1, USE op2);
13223 
13224   ins_cost(INSN_COST);
13225   format %{ "cmpw  $op1, $op2" %}
13226 
13227   ins_encode(aarch64_enc_cmpw(op1, op2));
13228 
13229   ins_pipe(icmp_reg_reg);
13230 %}
13231 
13232 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13233 %{
13234   match(Set cr (CmpI op1 zero));
13235 
13236   effect(DEF cr, USE op1);
13237 
13238   ins_cost(INSN_COST);
13239   format %{ "cmpw $op1, 0" %}
13240 
13241   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13242 
13243   ins_pipe(icmp_reg_imm);
13244 %}
13245 
13246 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13247 %{
13248   match(Set cr (CmpI op1 op2));
13249 
13250   effect(DEF cr, USE op1);
13251 
13252   ins_cost(INSN_COST);
13253   format %{ "cmpw  $op1, $op2" %}
13254 
13255   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13256 
13257   ins_pipe(icmp_reg_imm);
13258 %}
13259 
13260 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13261 %{
13262   match(Set cr (CmpI op1 op2));
13263 
13264   effect(DEF cr, USE op1);
13265 
13266   ins_cost(INSN_COST * 2);
13267   format %{ "cmpw  $op1, $op2" %}
13268 
13269   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13270 
13271   ins_pipe(icmp_reg_imm);
13272 %}
13273 
13274 // Unsigned compare Instructions; really, same as signed compare
13275 // except it should only be used to feed an If or a CMovI which takes a
13276 // cmpOpU.
13277 
13278 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13279 %{
13280   match(Set cr (CmpU op1 op2));
13281 
13282   effect(DEF cr, USE op1, USE op2);
13283 
13284   ins_cost(INSN_COST);
13285   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13286 
13287   ins_encode(aarch64_enc_cmpw(op1, op2));
13288 
13289   ins_pipe(icmp_reg_reg);
13290 %}
13291 
13292 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13293 %{
13294   match(Set cr (CmpU op1 zero));
13295 
13296   effect(DEF cr, USE op1);
13297 
13298   ins_cost(INSN_COST);
13299   format %{ "cmpw $op1, #0\t# unsigned" %}
13300 
13301   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13302 
13303   ins_pipe(icmp_reg_imm);
13304 %}
13305 
13306 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13307 %{
13308   match(Set cr (CmpU op1 op2));
13309 
13310   effect(DEF cr, USE op1);
13311 
13312   ins_cost(INSN_COST);
13313   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13314 
13315   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13316 
13317   ins_pipe(icmp_reg_imm);
13318 %}
13319 
13320 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13321 %{
13322   match(Set cr (CmpU op1 op2));
13323 
13324   effect(DEF cr, USE op1);
13325 
13326   ins_cost(INSN_COST * 2);
13327   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13328 
13329   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13330 
13331   ins_pipe(icmp_reg_imm);
13332 %}
13333 
13334 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13335 %{
13336   match(Set cr (CmpL op1 op2));
13337 
13338   effect(DEF cr, USE op1, USE op2);
13339 
13340   ins_cost(INSN_COST);
13341   format %{ "cmp  $op1, $op2" %}
13342 
13343   ins_encode(aarch64_enc_cmp(op1, op2));
13344 
13345   ins_pipe(icmp_reg_reg);
13346 %}
13347 
13348 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
13349 %{
13350   match(Set cr (CmpL op1 zero));
13351 
13352   effect(DEF cr, USE op1);
13353 
13354   ins_cost(INSN_COST);
13355   format %{ "tst  $op1" %}
13356 
13357   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13358 
13359   ins_pipe(icmp_reg_imm);
13360 %}
13361 
13362 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13363 %{
13364   match(Set cr (CmpL op1 op2));
13365 
13366   effect(DEF cr, USE op1);
13367 
13368   ins_cost(INSN_COST);
13369   format %{ "cmp  $op1, $op2" %}
13370 
13371   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13372 
13373   ins_pipe(icmp_reg_imm);
13374 %}
13375 
13376 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13377 %{
13378   match(Set cr (CmpL op1 op2));
13379 
13380   effect(DEF cr, USE op1);
13381 
13382   ins_cost(INSN_COST * 2);
13383   format %{ "cmp  $op1, $op2" %}
13384 
13385   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13386 
13387   ins_pipe(icmp_reg_imm);
13388 %}
13389 
13390 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13391 %{
13392   match(Set cr (CmpP op1 op2));
13393 
13394   effect(DEF cr, USE op1, USE op2);
13395 
13396   ins_cost(INSN_COST);
13397   format %{ "cmp  $op1, $op2\t // ptr" %}
13398 
13399   ins_encode(aarch64_enc_cmpp(op1, op2));
13400 
13401   ins_pipe(icmp_reg_reg);
13402 %}
13403 
13404 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13405 %{
13406   match(Set cr (CmpN op1 op2));
13407 
13408   effect(DEF cr, USE op1, USE op2);
13409 
13410   ins_cost(INSN_COST);
13411   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13412 
13413   ins_encode(aarch64_enc_cmpn(op1, op2));
13414 
13415   ins_pipe(icmp_reg_reg);
13416 %}
13417 
13418 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13419 %{
13420   match(Set cr (CmpP op1 zero));
13421 
13422   effect(DEF cr, USE op1, USE zero);
13423 
13424   ins_cost(INSN_COST);
13425   format %{ "cmp  $op1, 0\t // ptr" %}
13426 
13427   ins_encode(aarch64_enc_testp(op1));
13428 
13429   ins_pipe(icmp_reg_imm);
13430 %}
13431 
13432 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13433 %{
13434   match(Set cr (CmpN op1 zero));
13435 
13436   effect(DEF cr, USE op1, USE zero);
13437 
13438   ins_cost(INSN_COST);
13439   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13440 
13441   ins_encode(aarch64_enc_testn(op1));
13442 
13443   ins_pipe(icmp_reg_imm);
13444 %}
13445 
13446 // FP comparisons
13447 //
13448 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13449 // using normal cmpOp. See declaration of rFlagsReg for details.
13450 
13451 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13452 %{
13453   match(Set cr (CmpF src1 src2));
13454 
13455   ins_cost(3 * INSN_COST);
13456   format %{ "fcmps $src1, $src2" %}
13457 
13458   ins_encode %{
13459     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13460   %}
13461 
13462   ins_pipe(pipe_class_compare);
13463 %}
13464 
13465 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13466 %{
13467   match(Set cr (CmpF src1 src2));
13468 
13469   ins_cost(3 * INSN_COST);
13470   format %{ "fcmps $src1, 0.0" %}
13471 
13472   ins_encode %{
13473     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
13474   %}
13475 
13476   ins_pipe(pipe_class_compare);
13477 %}
13478 // FROM HERE
13479 
13480 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13481 %{
13482   match(Set cr (CmpD src1 src2));
13483 
13484   ins_cost(3 * INSN_COST);
13485   format %{ "fcmpd $src1, $src2" %}
13486 
13487   ins_encode %{
13488     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13489   %}
13490 
13491   ins_pipe(pipe_class_compare);
13492 %}
13493 
13494 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13495 %{
13496   match(Set cr (CmpD src1 src2));
13497 
13498   ins_cost(3 * INSN_COST);
13499   format %{ "fcmpd $src1, 0.0" %}
13500 
13501   ins_encode %{
13502     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13503   %}
13504 
13505   ins_pipe(pipe_class_compare);
13506 %}
13507 
13508 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13509 %{
13510   match(Set dst (CmpF3 src1 src2));
13511   effect(KILL cr);
13512 
13513   ins_cost(5 * INSN_COST);
13514   format %{ "fcmps $src1, $src2\n\t"
13515             "csinvw($dst, zr, zr, eq\n\t"
13516             "csnegw($dst, $dst, $dst, lt)"
13517   %}
13518 
13519   ins_encode %{
13520     Label done;
13521     FloatRegister s1 = as_FloatRegister($src1$$reg);
13522     FloatRegister s2 = as_FloatRegister($src2$$reg);
13523     Register d = as_Register($dst$$reg);
13524     __ fcmps(s1, s2);
13525     // installs 0 if EQ else -1
13526     __ csinvw(d, zr, zr, Assembler::EQ);
13527     // keeps -1 if less or unordered else installs 1
13528     __ csnegw(d, d, d, Assembler::LT);
13529     __ bind(done);
13530   %}
13531 
13532   ins_pipe(pipe_class_default);
13533 
13534 %}
13535 
13536 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13537 %{
13538   match(Set dst (CmpD3 src1 src2));
13539   effect(KILL cr);
13540 
13541   ins_cost(5 * INSN_COST);
13542   format %{ "fcmpd $src1, $src2\n\t"
13543             "csinvw($dst, zr, zr, eq\n\t"
13544             "csnegw($dst, $dst, $dst, lt)"
13545   %}
13546 
13547   ins_encode %{
13548     Label done;
13549     FloatRegister s1 = as_FloatRegister($src1$$reg);
13550     FloatRegister s2 = as_FloatRegister($src2$$reg);
13551     Register d = as_Register($dst$$reg);
13552     __ fcmpd(s1, s2);
13553     // installs 0 if EQ else -1
13554     __ csinvw(d, zr, zr, Assembler::EQ);
13555     // keeps -1 if less or unordered else installs 1
13556     __ csnegw(d, d, d, Assembler::LT);
13557     __ bind(done);
13558   %}
13559   ins_pipe(pipe_class_default);
13560 
13561 %}
13562 
13563 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13564 %{
13565   match(Set dst (CmpF3 src1 zero));
13566   effect(KILL cr);
13567 
13568   ins_cost(5 * INSN_COST);
13569   format %{ "fcmps $src1, 0.0\n\t"
13570             "csinvw($dst, zr, zr, eq\n\t"
13571             "csnegw($dst, $dst, $dst, lt)"
13572   %}
13573 
13574   ins_encode %{
13575     Label done;
13576     FloatRegister s1 = as_FloatRegister($src1$$reg);
13577     Register d = as_Register($dst$$reg);
13578     __ fcmps(s1, 0.0D);
13579     // installs 0 if EQ else -1
13580     __ csinvw(d, zr, zr, Assembler::EQ);
13581     // keeps -1 if less or unordered else installs 1
13582     __ csnegw(d, d, d, Assembler::LT);
13583     __ bind(done);
13584   %}
13585 
13586   ins_pipe(pipe_class_default);
13587 
13588 %}
13589 
13590 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13591 %{
13592   match(Set dst (CmpD3 src1 zero));
13593   effect(KILL cr);
13594 
13595   ins_cost(5 * INSN_COST);
13596   format %{ "fcmpd $src1, 0.0\n\t"
13597             "csinvw($dst, zr, zr, eq\n\t"
13598             "csnegw($dst, $dst, $dst, lt)"
13599   %}
13600 
13601   ins_encode %{
13602     Label done;
13603     FloatRegister s1 = as_FloatRegister($src1$$reg);
13604     Register d = as_Register($dst$$reg);
13605     __ fcmpd(s1, 0.0D);
13606     // installs 0 if EQ else -1
13607     __ csinvw(d, zr, zr, Assembler::EQ);
13608     // keeps -1 if less or unordered else installs 1
13609     __ csnegw(d, d, d, Assembler::LT);
13610     __ bind(done);
13611   %}
13612   ins_pipe(pipe_class_default);
13613 
13614 %}
13615 
13616 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13617 %{
13618   match(Set dst (CmpLTMask p q));
13619   effect(KILL cr);
13620 
13621   ins_cost(3 * INSN_COST);
13622 
13623   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13624             "csetw $dst, lt\n\t"
13625             "subw $dst, zr, $dst"
13626   %}
13627 
13628   ins_encode %{
13629     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13630     __ csetw(as_Register($dst$$reg), Assembler::LT);
13631     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13632   %}
13633 
13634   ins_pipe(ialu_reg_reg);
13635 %}
13636 
13637 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13638 %{
13639   match(Set dst (CmpLTMask src zero));
13640   effect(KILL cr);
13641 
13642   ins_cost(INSN_COST);
13643 
13644   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13645 
13646   ins_encode %{
13647     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13648   %}
13649 
13650   ins_pipe(ialu_reg_shift);
13651 %}
13652 
13653 // ============================================================================
13654 // Max and Min
13655 
13656 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13657 %{
13658   match(Set dst (MinI src1 src2));
13659 
13660   effect(DEF dst, USE src1, USE src2, KILL cr);
13661   size(8);
13662 
13663   ins_cost(INSN_COST * 3);
13664   format %{
13665     "cmpw $src1 $src2\t signed int\n\t"
13666     "cselw $dst, $src1, $src2 lt\t"
13667   %}
13668 
13669   ins_encode %{
13670     __ cmpw(as_Register($src1$$reg),
13671             as_Register($src2$$reg));
13672     __ cselw(as_Register($dst$$reg),
13673              as_Register($src1$$reg),
13674              as_Register($src2$$reg),
13675              Assembler::LT);
13676   %}
13677 
13678   ins_pipe(ialu_reg_reg);
13679 %}
13680 // FROM HERE
13681 
13682 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13683 %{
13684   match(Set dst (MaxI src1 src2));
13685 
13686   effect(DEF dst, USE src1, USE src2, KILL cr);
13687   size(8);
13688 
13689   ins_cost(INSN_COST * 3);
13690   format %{
13691     "cmpw $src1 $src2\t signed int\n\t"
13692     "cselw $dst, $src1, $src2 gt\t"
13693   %}
13694 
13695   ins_encode %{
13696     __ cmpw(as_Register($src1$$reg),
13697             as_Register($src2$$reg));
13698     __ cselw(as_Register($dst$$reg),
13699              as_Register($src1$$reg),
13700              as_Register($src2$$reg),
13701              Assembler::GT);
13702   %}
13703 
13704   ins_pipe(ialu_reg_reg);
13705 %}
13706 
13707 // ============================================================================
13708 // Branch Instructions
13709 
13710 // Direct Branch.
13711 instruct branch(label lbl)
13712 %{
13713   match(Goto);
13714 
13715   effect(USE lbl);
13716 
13717   ins_cost(BRANCH_COST);
13718   format %{ "b  $lbl" %}
13719 
13720   ins_encode(aarch64_enc_b(lbl));
13721 
13722   ins_pipe(pipe_branch);
13723 %}
13724 
13725 // Conditional Near Branch
13726 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13727 %{
13728   // Same match rule as `branchConFar'.
13729   match(If cmp cr);
13730 
13731   effect(USE lbl);
13732 
13733   ins_cost(BRANCH_COST);
13734   // If set to 1 this indicates that the current instruction is a
13735   // short variant of a long branch. This avoids using this
13736   // instruction in first-pass matching. It will then only be used in
13737   // the `Shorten_branches' pass.
13738   // ins_short_branch(1);
13739   format %{ "b$cmp  $lbl" %}
13740 
13741   ins_encode(aarch64_enc_br_con(cmp, lbl));
13742 
13743   ins_pipe(pipe_branch_cond);
13744 %}
13745 
13746 // Conditional Near Branch Unsigned
13747 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13748 %{
13749   // Same match rule as `branchConFar'.
13750   match(If cmp cr);
13751 
13752   effect(USE lbl);
13753 
13754   ins_cost(BRANCH_COST);
13755   // If set to 1 this indicates that the current instruction is a
13756   // short variant of a long branch. This avoids using this
13757   // instruction in first-pass matching. It will then only be used in
13758   // the `Shorten_branches' pass.
13759   // ins_short_branch(1);
13760   format %{ "b$cmp  $lbl\t# unsigned" %}
13761 
13762   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13763 
13764   ins_pipe(pipe_branch_cond);
13765 %}
13766 
13767 // Make use of CBZ and CBNZ.  These instructions, as well as being
13768 // shorter than (cmp; branch), have the additional benefit of not
13769 // killing the flags.
13770 
13771 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13772   match(If cmp (CmpI op1 op2));
13773   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13774             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13775   effect(USE labl);
13776 
13777   ins_cost(BRANCH_COST);
13778   format %{ "cbw$cmp   $op1, $labl" %}
13779   ins_encode %{
13780     Label* L = $labl$$label;
13781     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13782     if (cond == Assembler::EQ)
13783       __ cbzw($op1$$Register, *L);
13784     else
13785       __ cbnzw($op1$$Register, *L);
13786   %}
13787   ins_pipe(pipe_cmp_branch);
13788 %}
13789 
13790 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13791   match(If cmp (CmpL op1 op2));
13792   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13793             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13794   effect(USE labl);
13795 
13796   ins_cost(BRANCH_COST);
13797   format %{ "cb$cmp   $op1, $labl" %}
13798   ins_encode %{
13799     Label* L = $labl$$label;
13800     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13801     if (cond == Assembler::EQ)
13802       __ cbz($op1$$Register, *L);
13803     else
13804       __ cbnz($op1$$Register, *L);
13805   %}
13806   ins_pipe(pipe_cmp_branch);
13807 %}
13808 
13809 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13810   match(If cmp (CmpP op1 op2));
13811   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13812             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13813   effect(USE labl);
13814 
13815   ins_cost(BRANCH_COST);
13816   format %{ "cb$cmp   $op1, $labl" %}
13817   ins_encode %{
13818     Label* L = $labl$$label;
13819     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13820     if (cond == Assembler::EQ)
13821       __ cbz($op1$$Register, *L);
13822     else
13823       __ cbnz($op1$$Register, *L);
13824   %}
13825   ins_pipe(pipe_cmp_branch);
13826 %}
13827 
13828 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13829   match(If cmp (CmpP (DecodeN oop) zero));
13830   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13831             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13832   effect(USE labl);
13833 
13834   ins_cost(BRANCH_COST);
13835   format %{ "cb$cmp   $oop, $labl" %}
13836   ins_encode %{
13837     Label* L = $labl$$label;
13838     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13839     if (cond == Assembler::EQ)
13840       __ cbzw($oop$$Register, *L);
13841     else
13842       __ cbnzw($oop$$Register, *L);
13843   %}
13844   ins_pipe(pipe_cmp_branch);
13845 %}
13846 
13847 // Conditional Far Branch
13848 // Conditional Far Branch Unsigned
13849 // TODO: fixme
13850 
13851 // counted loop end branch near
13852 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13853 %{
13854   match(CountedLoopEnd cmp cr);
13855 
13856   effect(USE lbl);
13857 
13858   ins_cost(BRANCH_COST);
13859   // short variant.
13860   // ins_short_branch(1);
13861   format %{ "b$cmp $lbl \t// counted loop end" %}
13862 
13863   ins_encode(aarch64_enc_br_con(cmp, lbl));
13864 
13865   ins_pipe(pipe_branch);
13866 %}
13867 
13868 // counted loop end branch near Unsigned
13869 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13870 %{
13871   match(CountedLoopEnd cmp cr);
13872 
13873   effect(USE lbl);
13874 
13875   ins_cost(BRANCH_COST);
13876   // short variant.
13877   // ins_short_branch(1);
13878   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13879 
13880   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13881 
13882   ins_pipe(pipe_branch);
13883 %}
13884 
13885 // counted loop end branch far
13886 // counted loop end branch far unsigned
13887 // TODO: fixme
13888 
13889 // ============================================================================
13890 // inlined locking and unlocking
13891 
13892 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13893 %{
13894   match(Set cr (FastLock object box));
13895   effect(TEMP tmp, TEMP tmp2);
13896 
13897   // TODO
13898   // identify correct cost
13899   ins_cost(5 * INSN_COST);
13900   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13901 
13902   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13903 
13904   ins_pipe(pipe_serial);
13905 %}
13906 
13907 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13908 %{
13909   match(Set cr (FastUnlock object box));
13910   effect(TEMP tmp, TEMP tmp2);
13911 
13912   ins_cost(5 * INSN_COST);
13913   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13914 
13915   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13916 
13917   ins_pipe(pipe_serial);
13918 %}
13919 
13920 
13921 // ============================================================================
13922 // Safepoint Instructions
13923 
13924 // TODO
13925 // provide a near and far version of this code
13926 
13927 instruct safePoint(iRegP poll)
13928 %{
13929   match(SafePoint poll);
13930 
13931   format %{
13932     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13933   %}
13934   ins_encode %{
13935     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13936   %}
13937   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13938 %}
13939 
13940 
13941 // ============================================================================
13942 // Procedure Call/Return Instructions
13943 
13944 // Call Java Static Instruction
13945 
13946 instruct CallStaticJavaDirect(method meth)
13947 %{
13948   match(CallStaticJava);
13949 
13950   effect(USE meth);
13951 
13952   ins_cost(CALL_COST);
13953 
13954   format %{ "call,static $meth \t// ==> " %}
13955 
13956   ins_encode( aarch64_enc_java_static_call(meth),
13957               aarch64_enc_call_epilog );
13958 
13959   ins_pipe(pipe_class_call);
13960 %}
13961 
13962 // TO HERE
13963 
13964 // Call Java Dynamic Instruction
13965 instruct CallDynamicJavaDirect(method meth)
13966 %{
13967   match(CallDynamicJava);
13968 
13969   effect(USE meth);
13970 
13971   ins_cost(CALL_COST);
13972 
13973   format %{ "CALL,dynamic $meth \t// ==> " %}
13974 
13975   ins_encode( aarch64_enc_java_dynamic_call(meth),
13976                aarch64_enc_call_epilog );
13977 
13978   ins_pipe(pipe_class_call);
13979 %}
13980 
13981 // Call Runtime Instruction
13982 
13983 instruct CallRuntimeDirect(method meth)
13984 %{
13985   match(CallRuntime);
13986 
13987   effect(USE meth);
13988 
13989   ins_cost(CALL_COST);
13990 
13991   format %{ "CALL, runtime $meth" %}
13992 
13993   ins_encode( aarch64_enc_java_to_runtime(meth) );
13994 
13995   ins_pipe(pipe_class_call);
13996 %}
13997 
13998 // Call Runtime Instruction
13999 
14000 instruct CallLeafDirect(method meth)
14001 %{
14002   match(CallLeaf);
14003 
14004   effect(USE meth);
14005 
14006   ins_cost(CALL_COST);
14007 
14008   format %{ "CALL, runtime leaf $meth" %}
14009 
14010   ins_encode( aarch64_enc_java_to_runtime(meth) );
14011 
14012   ins_pipe(pipe_class_call);
14013 %}
14014 
14015 // Call Runtime Instruction
14016 
14017 instruct CallLeafNoFPDirect(method meth)
14018 %{
14019   match(CallLeafNoFP);
14020 
14021   effect(USE meth);
14022 
14023   ins_cost(CALL_COST);
14024 
14025   format %{ "CALL, runtime leaf nofp $meth" %}
14026 
14027   ins_encode( aarch64_enc_java_to_runtime(meth) );
14028 
14029   ins_pipe(pipe_class_call);
14030 %}
14031 
14032 // Tail Call; Jump from runtime stub to Java code.
14033 // Also known as an 'interprocedural jump'.
14034 // Target of jump will eventually return to caller.
14035 // TailJump below removes the return address.
14036 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14037 %{
14038   match(TailCall jump_target method_oop);
14039 
14040   ins_cost(CALL_COST);
14041 
14042   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14043 
14044   ins_encode(aarch64_enc_tail_call(jump_target));
14045 
14046   ins_pipe(pipe_class_call);
14047 %}
14048 
14049 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14050 %{
14051   match(TailJump jump_target ex_oop);
14052 
14053   ins_cost(CALL_COST);
14054 
14055   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14056 
14057   ins_encode(aarch64_enc_tail_jmp(jump_target));
14058 
14059   ins_pipe(pipe_class_call);
14060 %}
14061 
14062 // Create exception oop: created by stack-crawling runtime code.
14063 // Created exception is now available to this handler, and is setup
14064 // just prior to jumping to this handler. No code emitted.
14065 // TODO check
14066 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14067 instruct CreateException(iRegP_R0 ex_oop)
14068 %{
14069   match(Set ex_oop (CreateEx));
14070 
14071   format %{ " -- \t// exception oop; no code emitted" %}
14072 
14073   size(0);
14074 
14075   ins_encode( /*empty*/ );
14076 
14077   ins_pipe(pipe_class_empty);
14078 %}
14079 
14080 // Rethrow exception: The exception oop will come in the first
14081 // argument position. Then JUMP (not call) to the rethrow stub code.
14082 instruct RethrowException() %{
14083   match(Rethrow);
14084   ins_cost(CALL_COST);
14085 
14086   format %{ "b rethrow_stub" %}
14087 
14088   ins_encode( aarch64_enc_rethrow() );
14089 
14090   ins_pipe(pipe_class_call);
14091 %}
14092 
14093 
14094 // Return Instruction
14095 // epilog node loads ret address into lr as part of frame pop
14096 instruct Ret()
14097 %{
14098   match(Return);
14099 
14100   format %{ "ret\t// return register" %}
14101 
14102   ins_encode( aarch64_enc_ret() );
14103 
14104   ins_pipe(pipe_branch);
14105 %}
14106 
14107 // Die now.
14108 instruct ShouldNotReachHere() %{
14109   match(Halt);
14110 
14111   ins_cost(CALL_COST);
14112   format %{ "ShouldNotReachHere" %}
14113 
14114   ins_encode %{
14115     // TODO
14116     // implement proper trap call here
14117     __ brk(999);
14118   %}
14119 
14120   ins_pipe(pipe_class_default);
14121 %}
14122 
14123 // ============================================================================
14124 // Partial Subtype Check
14125 //
14126 // superklass array for an instance of the superklass.  Set a hidden
14127 // internal cache on a hit (cache is checked with exposed code in
14128 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14129 // encoding ALSO sets flags.
14130 
14131 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14132 %{
14133   match(Set result (PartialSubtypeCheck sub super));
14134   effect(KILL cr, KILL temp);
14135 
14136   ins_cost(1100);  // slightly larger than the next version
14137   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14138 
14139   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14140 
14141   opcode(0x1); // Force zero of result reg on hit
14142 
14143   ins_pipe(pipe_class_memory);
14144 %}
14145 
14146 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14147 %{
14148   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14149   effect(KILL temp, KILL result);
14150 
14151   ins_cost(1100);  // slightly larger than the next version
14152   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14153 
14154   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14155 
14156   opcode(0x0); // Don't zero result reg on hit
14157 
14158   ins_pipe(pipe_class_memory);
14159 %}
14160 
14161 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14162                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
14163 %{
14164   predicate(!CompactStrings);
14165   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14166   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14167 
14168   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14169   ins_encode %{
14170     __ string_compare($str1$$Register, $str2$$Register,
14171                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14172                       $tmp1$$Register);
14173   %}
14174   ins_pipe(pipe_class_memory);
14175 %}
14176 
14177 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14178        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
14179 %{
14180   predicate(!CompactStrings);
14181   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14182   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14183          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14184   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
14185 
14186   ins_encode %{
14187     __ string_indexof($str1$$Register, $str2$$Register,
14188                       $cnt1$$Register, $cnt2$$Register,
14189                       $tmp1$$Register, $tmp2$$Register,
14190                       $tmp3$$Register, $tmp4$$Register,
14191                       -1, $result$$Register);
14192   %}
14193   ins_pipe(pipe_class_memory);
14194 %}
14195 
14196 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14197                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
14198                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
14199 %{
14200   predicate(!CompactStrings);
14201   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14202   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14203          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14204   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
14205 
14206   ins_encode %{
14207     int icnt2 = (int)$int_cnt2$$constant;
14208     __ string_indexof($str1$$Register, $str2$$Register,
14209                       $cnt1$$Register, zr,
14210                       $tmp1$$Register, $tmp2$$Register,
14211                       $tmp3$$Register, $tmp4$$Register,
14212                       icnt2, $result$$Register);
14213   %}
14214   ins_pipe(pipe_class_memory);
14215 %}
14216 
14217 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14218                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
14219 %{
14220   predicate(!CompactStrings);
14221   match(Set result (StrEquals (Binary str1 str2) cnt));
14222   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14223 
14224   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
14225   ins_encode %{
14226     __ string_equals($str1$$Register, $str2$$Register,
14227                       $cnt$$Register, $result$$Register,
14228                       $tmp$$Register);
14229   %}
14230   ins_pipe(pipe_class_memory);
14231 %}
14232 
14233 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14234                       iRegP_R10 tmp, rFlagsReg cr)
14235 %{
14236   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14237   match(Set result (AryEq ary1 ary2));
14238   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
14239 
14240   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14241   ins_encode %{
14242     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
14243                           $result$$Register, $tmp$$Register);
14244   %}
14245   ins_pipe(pipe_class_memory);
14246 %}
14247 
14248 // encode char[] to byte[] in ISO_8859_1
14249 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14250                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14251                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14252                           iRegI_R0 result, rFlagsReg cr)
14253 %{
14254   match(Set result (EncodeISOArray src (Binary dst len)));
14255   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14256          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14257 
14258   format %{ "Encode array $src,$dst,$len -> $result" %}
14259   ins_encode %{
14260     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14261          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14262          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14263   %}
14264   ins_pipe( pipe_class_memory );
14265 %}
14266 
14267 // ============================================================================
14268 // This name is KNOWN by the ADLC and cannot be changed.
14269 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14270 // for this guy.
14271 instruct tlsLoadP(thread_RegP dst)
14272 %{
14273   match(Set dst (ThreadLocal));
14274 
14275   ins_cost(0);
14276 
14277   format %{ " -- \t// $dst=Thread::current(), empty" %}
14278 
14279   size(0);
14280 
14281   ins_encode( /*empty*/ );
14282 
14283   ins_pipe(pipe_class_empty);
14284 %}
14285 
14286 // ====================VECTOR INSTRUCTIONS=====================================
14287 
14288 // Load vector (32 bits)
14289 instruct loadV4(vecD dst, vmem mem)
14290 %{
14291   predicate(n->as_LoadVector()->memory_size() == 4);
14292   match(Set dst (LoadVector mem));
14293   ins_cost(4 * INSN_COST);
14294   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14295   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14296   ins_pipe(pipe_class_memory);
14297 %}
14298 
14299 // Load vector (64 bits)
14300 instruct loadV8(vecD dst, vmem mem)
14301 %{
14302   predicate(n->as_LoadVector()->memory_size() == 8);
14303   match(Set dst (LoadVector mem));
14304   ins_cost(4 * INSN_COST);
14305   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14306   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14307   ins_pipe(pipe_class_memory);
14308 %}
14309 
14310 // Load Vector (128 bits)
14311 instruct loadV16(vecX dst, vmem mem)
14312 %{
14313   predicate(n->as_LoadVector()->memory_size() == 16);
14314   match(Set dst (LoadVector mem));
14315   ins_cost(4 * INSN_COST);
14316   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14317   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14318   ins_pipe(pipe_class_memory);
14319 %}
14320 
14321 // Store Vector (32 bits)
14322 instruct storeV4(vecD src, vmem mem)
14323 %{
14324   predicate(n->as_StoreVector()->memory_size() == 4);
14325   match(Set mem (StoreVector mem src));
14326   ins_cost(4 * INSN_COST);
14327   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14328   ins_encode( aarch64_enc_strvS(src, mem) );
14329   ins_pipe(pipe_class_memory);
14330 %}
14331 
14332 // Store Vector (64 bits)
14333 instruct storeV8(vecD src, vmem mem)
14334 %{
14335   predicate(n->as_StoreVector()->memory_size() == 8);
14336   match(Set mem (StoreVector mem src));
14337   ins_cost(4 * INSN_COST);
14338   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14339   ins_encode( aarch64_enc_strvD(src, mem) );
14340   ins_pipe(pipe_class_memory);
14341 %}
14342 
14343 // Store Vector (128 bits)
14344 instruct storeV16(vecX src, vmem mem)
14345 %{
14346   predicate(n->as_StoreVector()->memory_size() == 16);
14347   match(Set mem (StoreVector mem src));
14348   ins_cost(4 * INSN_COST);
14349   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14350   ins_encode( aarch64_enc_strvQ(src, mem) );
14351   ins_pipe(pipe_class_memory);
14352 %}
14353 
14354 instruct replicate8B(vecD dst, iRegIorL2I src)
14355 %{
14356   predicate(n->as_Vector()->length() == 4 ||
14357             n->as_Vector()->length() == 8);
14358   match(Set dst (ReplicateB src));
14359   ins_cost(INSN_COST);
14360   format %{ "dup  $dst, $src\t# vector (8B)" %}
14361   ins_encode %{
14362     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14363   %}
14364   ins_pipe(pipe_class_default);
14365 %}
14366 
14367 instruct replicate16B(vecX dst, iRegIorL2I src)
14368 %{
14369   predicate(n->as_Vector()->length() == 16);
14370   match(Set dst (ReplicateB src));
14371   ins_cost(INSN_COST);
14372   format %{ "dup  $dst, $src\t# vector (16B)" %}
14373   ins_encode %{
14374     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14375   %}
14376   ins_pipe(pipe_class_default);
14377 %}
14378 
14379 instruct replicate8B_imm(vecD dst, immI con)
14380 %{
14381   predicate(n->as_Vector()->length() == 4 ||
14382             n->as_Vector()->length() == 8);
14383   match(Set dst (ReplicateB con));
14384   ins_cost(INSN_COST);
14385   format %{ "movi  $dst, $con\t# vector(8B)" %}
14386   ins_encode %{
14387     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14388   %}
14389   ins_pipe(pipe_class_default);
14390 %}
14391 
14392 instruct replicate16B_imm(vecX dst, immI con)
14393 %{
14394   predicate(n->as_Vector()->length() == 16);
14395   match(Set dst (ReplicateB con));
14396   ins_cost(INSN_COST);
14397   format %{ "movi  $dst, $con\t# vector(16B)" %}
14398   ins_encode %{
14399     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14400   %}
14401   ins_pipe(pipe_class_default);
14402 %}
14403 
14404 instruct replicate4S(vecD dst, iRegIorL2I src)
14405 %{
14406   predicate(n->as_Vector()->length() == 2 ||
14407             n->as_Vector()->length() == 4);
14408   match(Set dst (ReplicateS src));
14409   ins_cost(INSN_COST);
14410   format %{ "dup  $dst, $src\t# vector (4S)" %}
14411   ins_encode %{
14412     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14413   %}
14414   ins_pipe(pipe_class_default);
14415 %}
14416 
14417 instruct replicate8S(vecX dst, iRegIorL2I src)
14418 %{
14419   predicate(n->as_Vector()->length() == 8);
14420   match(Set dst (ReplicateS src));
14421   ins_cost(INSN_COST);
14422   format %{ "dup  $dst, $src\t# vector (8S)" %}
14423   ins_encode %{
14424     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14425   %}
14426   ins_pipe(pipe_class_default);
14427 %}
14428 
14429 instruct replicate4S_imm(vecD dst, immI con)
14430 %{
14431   predicate(n->as_Vector()->length() == 2 ||
14432             n->as_Vector()->length() == 4);
14433   match(Set dst (ReplicateS con));
14434   ins_cost(INSN_COST);
14435   format %{ "movi  $dst, $con\t# vector(4H)" %}
14436   ins_encode %{
14437     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14438   %}
14439   ins_pipe(pipe_class_default);
14440 %}
14441 
14442 instruct replicate8S_imm(vecX dst, immI con)
14443 %{
14444   predicate(n->as_Vector()->length() == 8);
14445   match(Set dst (ReplicateS con));
14446   ins_cost(INSN_COST);
14447   format %{ "movi  $dst, $con\t# vector(8H)" %}
14448   ins_encode %{
14449     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14450   %}
14451   ins_pipe(pipe_class_default);
14452 %}
14453 
14454 instruct replicate2I(vecD dst, iRegIorL2I src)
14455 %{
14456   predicate(n->as_Vector()->length() == 2);
14457   match(Set dst (ReplicateI src));
14458   ins_cost(INSN_COST);
14459   format %{ "dup  $dst, $src\t# vector (2I)" %}
14460   ins_encode %{
14461     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14462   %}
14463   ins_pipe(pipe_class_default);
14464 %}
14465 
14466 instruct replicate4I(vecX dst, iRegIorL2I src)
14467 %{
14468   predicate(n->as_Vector()->length() == 4);
14469   match(Set dst (ReplicateI src));
14470   ins_cost(INSN_COST);
14471   format %{ "dup  $dst, $src\t# vector (4I)" %}
14472   ins_encode %{
14473     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14474   %}
14475   ins_pipe(pipe_class_default);
14476 %}
14477 
14478 instruct replicate2I_imm(vecD dst, immI con)
14479 %{
14480   predicate(n->as_Vector()->length() == 2);
14481   match(Set dst (ReplicateI con));
14482   ins_cost(INSN_COST);
14483   format %{ "movi  $dst, $con\t# vector(2I)" %}
14484   ins_encode %{
14485     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14486   %}
14487   ins_pipe(pipe_class_default);
14488 %}
14489 
14490 instruct replicate4I_imm(vecX dst, immI con)
14491 %{
14492   predicate(n->as_Vector()->length() == 4);
14493   match(Set dst (ReplicateI con));
14494   ins_cost(INSN_COST);
14495   format %{ "movi  $dst, $con\t# vector(4I)" %}
14496   ins_encode %{
14497     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14498   %}
14499   ins_pipe(pipe_class_default);
14500 %}
14501 
14502 instruct replicate2L(vecX dst, iRegL src)
14503 %{
14504   predicate(n->as_Vector()->length() == 2);
14505   match(Set dst (ReplicateL src));
14506   ins_cost(INSN_COST);
14507   format %{ "dup  $dst, $src\t# vector (2L)" %}
14508   ins_encode %{
14509     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14510   %}
14511   ins_pipe(pipe_class_default);
14512 %}
14513 
14514 instruct replicate2L_zero(vecX dst, immI0 zero)
14515 %{
14516   predicate(n->as_Vector()->length() == 2);
14517   match(Set dst (ReplicateI zero));
14518   ins_cost(INSN_COST);
14519   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14520   ins_encode %{
14521     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14522            as_FloatRegister($dst$$reg),
14523            as_FloatRegister($dst$$reg));
14524   %}
14525   ins_pipe(pipe_class_default);
14526 %}
14527 
14528 instruct replicate2F(vecD dst, vRegF src)
14529 %{
14530   predicate(n->as_Vector()->length() == 2);
14531   match(Set dst (ReplicateF src));
14532   ins_cost(INSN_COST);
14533   format %{ "dup  $dst, $src\t# vector (2F)" %}
14534   ins_encode %{
14535     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14536            as_FloatRegister($src$$reg));
14537   %}
14538   ins_pipe(pipe_class_default);
14539 %}
14540 
14541 instruct replicate4F(vecX dst, vRegF src)
14542 %{
14543   predicate(n->as_Vector()->length() == 4);
14544   match(Set dst (ReplicateF src));
14545   ins_cost(INSN_COST);
14546   format %{ "dup  $dst, $src\t# vector (4F)" %}
14547   ins_encode %{
14548     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14549            as_FloatRegister($src$$reg));
14550   %}
14551   ins_pipe(pipe_class_default);
14552 %}
14553 
14554 instruct replicate2D(vecX dst, vRegD src)
14555 %{
14556   predicate(n->as_Vector()->length() == 2);
14557   match(Set dst (ReplicateD src));
14558   ins_cost(INSN_COST);
14559   format %{ "dup  $dst, $src\t# vector (2D)" %}
14560   ins_encode %{
14561     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14562            as_FloatRegister($src$$reg));
14563   %}
14564   ins_pipe(pipe_class_default);
14565 %}
14566 
14567 // ====================REDUCTION ARITHMETIC====================================
14568 
14569 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
14570 %{
14571   match(Set dst (AddReductionVI src1 src2));
14572   ins_cost(INSN_COST);
14573   effect(TEMP tmp, TEMP tmp2);
14574   format %{ "umov  $tmp, $src2, S, 0\n\t"
14575             "umov  $tmp2, $src2, S, 1\n\t"
14576             "addw  $dst, $src1, $tmp\n\t"
14577             "addw  $dst, $dst, $tmp2\t add reduction2i"
14578   %}
14579   ins_encode %{
14580     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14581     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14582     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
14583     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
14584   %}
14585   ins_pipe(pipe_class_default);
14586 %}
14587 
14588 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14589 %{
14590   match(Set dst (AddReductionVI src1 src2));
14591   ins_cost(INSN_COST);
14592   effect(TEMP tmp, TEMP tmp2);
14593   format %{ "addv  $tmp, T4S, $src2\n\t"
14594             "umov  $tmp2, $tmp, S, 0\n\t"
14595             "addw  $dst, $tmp2, $src1\t add reduction4i"
14596   %}
14597   ins_encode %{
14598     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
14599             as_FloatRegister($src2$$reg));
14600     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14601     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
14602   %}
14603   ins_pipe(pipe_class_default);
14604 %}
14605 
14606 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
14607 %{
14608   match(Set dst (MulReductionVI src1 src2));
14609   ins_cost(INSN_COST);
14610   effect(TEMP tmp, TEMP dst);
14611   format %{ "umov  $tmp, $src2, S, 0\n\t"
14612             "mul   $dst, $tmp, $src1\n\t"
14613             "umov  $tmp, $src2, S, 1\n\t"
14614             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
14615   %}
14616   ins_encode %{
14617     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14618     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
14619     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14620     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
14621   %}
14622   ins_pipe(pipe_class_default);
14623 %}
14624 
14625 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14626 %{
14627   match(Set dst (MulReductionVI src1 src2));
14628   ins_cost(INSN_COST);
14629   effect(TEMP tmp, TEMP tmp2, TEMP dst);
14630   format %{ "ins   $tmp, $src2, 0, 1\n\t"
14631             "mul   $tmp, $tmp, $src2\n\t"
14632             "umov  $tmp2, $tmp, S, 0\n\t"
14633             "mul   $dst, $tmp2, $src1\n\t"
14634             "umov  $tmp2, $tmp, S, 1\n\t"
14635             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
14636   %}
14637   ins_encode %{
14638     __ ins(as_FloatRegister($tmp$$reg), __ D,
14639            as_FloatRegister($src2$$reg), 0, 1);
14640     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
14641            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
14642     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14643     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
14644     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
14645     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
14646   %}
14647   ins_pipe(pipe_class_default);
14648 %}
14649 
14650 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14651 %{
14652   match(Set dst (AddReductionVF src1 src2));
14653   ins_cost(INSN_COST);
14654   effect(TEMP tmp, TEMP dst);
14655   format %{ "fadds $dst, $src1, $src2\n\t"
14656             "ins   $tmp, S, $src2, 0, 1\n\t"
14657             "fadds $dst, $dst, $tmp\t add reduction2f"
14658   %}
14659   ins_encode %{
14660     __ fadds(as_FloatRegister($dst$$reg),
14661              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14662     __ ins(as_FloatRegister($tmp$$reg), __ S,
14663            as_FloatRegister($src2$$reg), 0, 1);
14664     __ fadds(as_FloatRegister($dst$$reg),
14665              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14666   %}
14667   ins_pipe(pipe_class_default);
14668 %}
14669 
14670 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14671 %{
14672   match(Set dst (AddReductionVF src1 src2));
14673   ins_cost(INSN_COST);
14674   effect(TEMP tmp, TEMP dst);
14675   format %{ "fadds $dst, $src1, $src2\n\t"
14676             "ins   $tmp, S, $src2, 0, 1\n\t"
14677             "fadds $dst, $dst, $tmp\n\t"
14678             "ins   $tmp, S, $src2, 0, 2\n\t"
14679             "fadds $dst, $dst, $tmp\n\t"
14680             "ins   $tmp, S, $src2, 0, 3\n\t"
14681             "fadds $dst, $dst, $tmp\t add reduction4f"
14682   %}
14683   ins_encode %{
14684     __ fadds(as_FloatRegister($dst$$reg),
14685              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14686     __ ins(as_FloatRegister($tmp$$reg), __ S,
14687            as_FloatRegister($src2$$reg), 0, 1);
14688     __ fadds(as_FloatRegister($dst$$reg),
14689              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14690     __ ins(as_FloatRegister($tmp$$reg), __ S,
14691            as_FloatRegister($src2$$reg), 0, 2);
14692     __ fadds(as_FloatRegister($dst$$reg),
14693              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14694     __ ins(as_FloatRegister($tmp$$reg), __ S,
14695            as_FloatRegister($src2$$reg), 0, 3);
14696     __ fadds(as_FloatRegister($dst$$reg),
14697              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14698   %}
14699   ins_pipe(pipe_class_default);
14700 %}
14701 
14702 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14703 %{
14704   match(Set dst (MulReductionVF src1 src2));
14705   ins_cost(INSN_COST);
14706   effect(TEMP tmp, TEMP dst);
14707   format %{ "fmuls $dst, $src1, $src2\n\t"
14708             "ins   $tmp, S, $src2, 0, 1\n\t"
14709             "fmuls $dst, $dst, $tmp\t add reduction4f"
14710   %}
14711   ins_encode %{
14712     __ fmuls(as_FloatRegister($dst$$reg),
14713              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14714     __ ins(as_FloatRegister($tmp$$reg), __ S,
14715            as_FloatRegister($src2$$reg), 0, 1);
14716     __ fmuls(as_FloatRegister($dst$$reg),
14717              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14718   %}
14719   ins_pipe(pipe_class_default);
14720 %}
14721 
14722 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14723 %{
14724   match(Set dst (MulReductionVF src1 src2));
14725   ins_cost(INSN_COST);
14726   effect(TEMP tmp, TEMP dst);
14727   format %{ "fmuls $dst, $src1, $src2\n\t"
14728             "ins   $tmp, S, $src2, 0, 1\n\t"
14729             "fmuls $dst, $dst, $tmp\n\t"
14730             "ins   $tmp, S, $src2, 0, 2\n\t"
14731             "fmuls $dst, $dst, $tmp\n\t"
14732             "ins   $tmp, S, $src2, 0, 3\n\t"
14733             "fmuls $dst, $dst, $tmp\t add reduction4f"
14734   %}
14735   ins_encode %{
14736     __ fmuls(as_FloatRegister($dst$$reg),
14737              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14738     __ ins(as_FloatRegister($tmp$$reg), __ S,
14739            as_FloatRegister($src2$$reg), 0, 1);
14740     __ fmuls(as_FloatRegister($dst$$reg),
14741              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14742     __ ins(as_FloatRegister($tmp$$reg), __ S,
14743            as_FloatRegister($src2$$reg), 0, 2);
14744     __ fmuls(as_FloatRegister($dst$$reg),
14745              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14746     __ ins(as_FloatRegister($tmp$$reg), __ S,
14747            as_FloatRegister($src2$$reg), 0, 3);
14748     __ fmuls(as_FloatRegister($dst$$reg),
14749              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14750   %}
14751   ins_pipe(pipe_class_default);
14752 %}
14753 
14754 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14755 %{
14756   match(Set dst (AddReductionVD src1 src2));
14757   ins_cost(INSN_COST);
14758   effect(TEMP tmp, TEMP dst);
14759   format %{ "faddd $dst, $src1, $src2\n\t"
14760             "ins   $tmp, D, $src2, 0, 1\n\t"
14761             "faddd $dst, $dst, $tmp\t add reduction2d"
14762   %}
14763   ins_encode %{
14764     __ faddd(as_FloatRegister($dst$$reg),
14765              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14766     __ ins(as_FloatRegister($tmp$$reg), __ D,
14767            as_FloatRegister($src2$$reg), 0, 1);
14768     __ faddd(as_FloatRegister($dst$$reg),
14769              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14770   %}
14771   ins_pipe(pipe_class_default);
14772 %}
14773 
14774 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14775 %{
14776   match(Set dst (MulReductionVD src1 src2));
14777   ins_cost(INSN_COST);
14778   effect(TEMP tmp, TEMP dst);
14779   format %{ "fmuld $dst, $src1, $src2\n\t"
14780             "ins   $tmp, D, $src2, 0, 1\n\t"
14781             "fmuld $dst, $dst, $tmp\t add reduction2d"
14782   %}
14783   ins_encode %{
14784     __ fmuld(as_FloatRegister($dst$$reg),
14785              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14786     __ ins(as_FloatRegister($tmp$$reg), __ D,
14787            as_FloatRegister($src2$$reg), 0, 1);
14788     __ fmuld(as_FloatRegister($dst$$reg),
14789              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14790   %}
14791   ins_pipe(pipe_class_default);
14792 %}
14793 
14794 // ====================VECTOR ARITHMETIC=======================================
14795 
14796 // --------------------------------- ADD --------------------------------------
14797 
14798 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14799 %{
14800   predicate(n->as_Vector()->length() == 4 ||
14801             n->as_Vector()->length() == 8);
14802   match(Set dst (AddVB src1 src2));
14803   ins_cost(INSN_COST);
14804   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14805   ins_encode %{
14806     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14807             as_FloatRegister($src1$$reg),
14808             as_FloatRegister($src2$$reg));
14809   %}
14810   ins_pipe(pipe_class_default);
14811 %}
14812 
14813 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14814 %{
14815   predicate(n->as_Vector()->length() == 16);
14816   match(Set dst (AddVB src1 src2));
14817   ins_cost(INSN_COST);
14818   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14819   ins_encode %{
14820     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14821             as_FloatRegister($src1$$reg),
14822             as_FloatRegister($src2$$reg));
14823   %}
14824   ins_pipe(pipe_class_default);
14825 %}
14826 
14827 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14828 %{
14829   predicate(n->as_Vector()->length() == 2 ||
14830             n->as_Vector()->length() == 4);
14831   match(Set dst (AddVS src1 src2));
14832   ins_cost(INSN_COST);
14833   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14834   ins_encode %{
14835     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14836             as_FloatRegister($src1$$reg),
14837             as_FloatRegister($src2$$reg));
14838   %}
14839   ins_pipe(pipe_class_default);
14840 %}
14841 
14842 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14843 %{
14844   predicate(n->as_Vector()->length() == 8);
14845   match(Set dst (AddVS src1 src2));
14846   ins_cost(INSN_COST);
14847   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14848   ins_encode %{
14849     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14850             as_FloatRegister($src1$$reg),
14851             as_FloatRegister($src2$$reg));
14852   %}
14853   ins_pipe(pipe_class_default);
14854 %}
14855 
14856 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14857 %{
14858   predicate(n->as_Vector()->length() == 2);
14859   match(Set dst (AddVI src1 src2));
14860   ins_cost(INSN_COST);
14861   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14862   ins_encode %{
14863     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14864             as_FloatRegister($src1$$reg),
14865             as_FloatRegister($src2$$reg));
14866   %}
14867   ins_pipe(pipe_class_default);
14868 %}
14869 
14870 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14871 %{
14872   predicate(n->as_Vector()->length() == 4);
14873   match(Set dst (AddVI src1 src2));
14874   ins_cost(INSN_COST);
14875   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14876   ins_encode %{
14877     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14878             as_FloatRegister($src1$$reg),
14879             as_FloatRegister($src2$$reg));
14880   %}
14881   ins_pipe(pipe_class_default);
14882 %}
14883 
14884 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14885 %{
14886   predicate(n->as_Vector()->length() == 2);
14887   match(Set dst (AddVL src1 src2));
14888   ins_cost(INSN_COST);
14889   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14890   ins_encode %{
14891     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14892             as_FloatRegister($src1$$reg),
14893             as_FloatRegister($src2$$reg));
14894   %}
14895   ins_pipe(pipe_class_default);
14896 %}
14897 
14898 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14899 %{
14900   predicate(n->as_Vector()->length() == 2);
14901   match(Set dst (AddVF src1 src2));
14902   ins_cost(INSN_COST);
14903   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14904   ins_encode %{
14905     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14906             as_FloatRegister($src1$$reg),
14907             as_FloatRegister($src2$$reg));
14908   %}
14909   ins_pipe(pipe_class_default);
14910 %}
14911 
14912 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14913 %{
14914   predicate(n->as_Vector()->length() == 4);
14915   match(Set dst (AddVF src1 src2));
14916   ins_cost(INSN_COST);
14917   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14918   ins_encode %{
14919     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14920             as_FloatRegister($src1$$reg),
14921             as_FloatRegister($src2$$reg));
14922   %}
14923   ins_pipe(pipe_class_default);
14924 %}
14925 
14926 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14927 %{
14928   match(Set dst (AddVD src1 src2));
14929   ins_cost(INSN_COST);
14930   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14931   ins_encode %{
14932     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14933             as_FloatRegister($src1$$reg),
14934             as_FloatRegister($src2$$reg));
14935   %}
14936   ins_pipe(pipe_class_default);
14937 %}
14938 
14939 // --------------------------------- SUB --------------------------------------
14940 
14941 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14942 %{
14943   predicate(n->as_Vector()->length() == 4 ||
14944             n->as_Vector()->length() == 8);
14945   match(Set dst (SubVB src1 src2));
14946   ins_cost(INSN_COST);
14947   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14948   ins_encode %{
14949     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14950             as_FloatRegister($src1$$reg),
14951             as_FloatRegister($src2$$reg));
14952   %}
14953   ins_pipe(pipe_class_default);
14954 %}
14955 
14956 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14957 %{
14958   predicate(n->as_Vector()->length() == 16);
14959   match(Set dst (SubVB src1 src2));
14960   ins_cost(INSN_COST);
14961   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14962   ins_encode %{
14963     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14964             as_FloatRegister($src1$$reg),
14965             as_FloatRegister($src2$$reg));
14966   %}
14967   ins_pipe(pipe_class_default);
14968 %}
14969 
14970 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14971 %{
14972   predicate(n->as_Vector()->length() == 2 ||
14973             n->as_Vector()->length() == 4);
14974   match(Set dst (SubVS src1 src2));
14975   ins_cost(INSN_COST);
14976   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14977   ins_encode %{
14978     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14979             as_FloatRegister($src1$$reg),
14980             as_FloatRegister($src2$$reg));
14981   %}
14982   ins_pipe(pipe_class_default);
14983 %}
14984 
14985 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14986 %{
14987   predicate(n->as_Vector()->length() == 8);
14988   match(Set dst (SubVS src1 src2));
14989   ins_cost(INSN_COST);
14990   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14991   ins_encode %{
14992     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14993             as_FloatRegister($src1$$reg),
14994             as_FloatRegister($src2$$reg));
14995   %}
14996   ins_pipe(pipe_class_default);
14997 %}
14998 
14999 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15000 %{
15001   predicate(n->as_Vector()->length() == 2);
15002   match(Set dst (SubVI src1 src2));
15003   ins_cost(INSN_COST);
15004   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15005   ins_encode %{
15006     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15007             as_FloatRegister($src1$$reg),
15008             as_FloatRegister($src2$$reg));
15009   %}
15010   ins_pipe(pipe_class_default);
15011 %}
15012 
15013 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15014 %{
15015   predicate(n->as_Vector()->length() == 4);
15016   match(Set dst (SubVI src1 src2));
15017   ins_cost(INSN_COST);
15018   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15019   ins_encode %{
15020     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15021             as_FloatRegister($src1$$reg),
15022             as_FloatRegister($src2$$reg));
15023   %}
15024   ins_pipe(pipe_class_default);
15025 %}
15026 
15027 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15028 %{
15029   predicate(n->as_Vector()->length() == 2);
15030   match(Set dst (SubVL src1 src2));
15031   ins_cost(INSN_COST);
15032   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15033   ins_encode %{
15034     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15035             as_FloatRegister($src1$$reg),
15036             as_FloatRegister($src2$$reg));
15037   %}
15038   ins_pipe(pipe_class_default);
15039 %}
15040 
15041 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15042 %{
15043   predicate(n->as_Vector()->length() == 2);
15044   match(Set dst (SubVF src1 src2));
15045   ins_cost(INSN_COST);
15046   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15047   ins_encode %{
15048     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15049             as_FloatRegister($src1$$reg),
15050             as_FloatRegister($src2$$reg));
15051   %}
15052   ins_pipe(pipe_class_default);
15053 %}
15054 
15055 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15056 %{
15057   predicate(n->as_Vector()->length() == 4);
15058   match(Set dst (SubVF src1 src2));
15059   ins_cost(INSN_COST);
15060   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15061   ins_encode %{
15062     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15063             as_FloatRegister($src1$$reg),
15064             as_FloatRegister($src2$$reg));
15065   %}
15066   ins_pipe(pipe_class_default);
15067 %}
15068 
15069 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15070 %{
15071   predicate(n->as_Vector()->length() == 2);
15072   match(Set dst (SubVD src1 src2));
15073   ins_cost(INSN_COST);
15074   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15075   ins_encode %{
15076     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15077             as_FloatRegister($src1$$reg),
15078             as_FloatRegister($src2$$reg));
15079   %}
15080   ins_pipe(pipe_class_default);
15081 %}
15082 
15083 // --------------------------------- MUL --------------------------------------
15084 
15085 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15086 %{
15087   predicate(n->as_Vector()->length() == 2 ||
15088             n->as_Vector()->length() == 4);
15089   match(Set dst (MulVS src1 src2));
15090   ins_cost(INSN_COST);
15091   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15092   ins_encode %{
15093     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15094             as_FloatRegister($src1$$reg),
15095             as_FloatRegister($src2$$reg));
15096   %}
15097   ins_pipe(pipe_class_default);
15098 %}
15099 
15100 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15101 %{
15102   predicate(n->as_Vector()->length() == 8);
15103   match(Set dst (MulVS src1 src2));
15104   ins_cost(INSN_COST);
15105   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15106   ins_encode %{
15107     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15108             as_FloatRegister($src1$$reg),
15109             as_FloatRegister($src2$$reg));
15110   %}
15111   ins_pipe(pipe_class_default);
15112 %}
15113 
15114 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15115 %{
15116   predicate(n->as_Vector()->length() == 2);
15117   match(Set dst (MulVI src1 src2));
15118   ins_cost(INSN_COST);
15119   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15120   ins_encode %{
15121     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15122             as_FloatRegister($src1$$reg),
15123             as_FloatRegister($src2$$reg));
15124   %}
15125   ins_pipe(pipe_class_default);
15126 %}
15127 
15128 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15129 %{
15130   predicate(n->as_Vector()->length() == 4);
15131   match(Set dst (MulVI src1 src2));
15132   ins_cost(INSN_COST);
15133   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15134   ins_encode %{
15135     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15136             as_FloatRegister($src1$$reg),
15137             as_FloatRegister($src2$$reg));
15138   %}
15139   ins_pipe(pipe_class_default);
15140 %}
15141 
15142 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15143 %{
15144   predicate(n->as_Vector()->length() == 2);
15145   match(Set dst (MulVF src1 src2));
15146   ins_cost(INSN_COST);
15147   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15148   ins_encode %{
15149     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15150             as_FloatRegister($src1$$reg),
15151             as_FloatRegister($src2$$reg));
15152   %}
15153   ins_pipe(pipe_class_default);
15154 %}
15155 
15156 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15157 %{
15158   predicate(n->as_Vector()->length() == 4);
15159   match(Set dst (MulVF src1 src2));
15160   ins_cost(INSN_COST);
15161   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15162   ins_encode %{
15163     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15164             as_FloatRegister($src1$$reg),
15165             as_FloatRegister($src2$$reg));
15166   %}
15167   ins_pipe(pipe_class_default);
15168 %}
15169 
15170 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15171 %{
15172   predicate(n->as_Vector()->length() == 2);
15173   match(Set dst (MulVD src1 src2));
15174   ins_cost(INSN_COST);
15175   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15176   ins_encode %{
15177     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15178             as_FloatRegister($src1$$reg),
15179             as_FloatRegister($src2$$reg));
15180   %}
15181   ins_pipe(pipe_class_default);
15182 %}
15183 
15184 // --------------------------------- DIV --------------------------------------
15185 
15186 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
15187 %{
15188   predicate(n->as_Vector()->length() == 2);
15189   match(Set dst (DivVF src1 src2));
15190   ins_cost(INSN_COST);
15191   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
15192   ins_encode %{
15193     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
15194             as_FloatRegister($src1$$reg),
15195             as_FloatRegister($src2$$reg));
15196   %}
15197   ins_pipe(pipe_class_default);
15198 %}
15199 
15200 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
15201 %{
15202   predicate(n->as_Vector()->length() == 4);
15203   match(Set dst (DivVF src1 src2));
15204   ins_cost(INSN_COST);
15205   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
15206   ins_encode %{
15207     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
15208             as_FloatRegister($src1$$reg),
15209             as_FloatRegister($src2$$reg));
15210   %}
15211   ins_pipe(pipe_class_default);
15212 %}
15213 
15214 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
15215 %{
15216   predicate(n->as_Vector()->length() == 2);
15217   match(Set dst (DivVD src1 src2));
15218   ins_cost(INSN_COST);
15219   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
15220   ins_encode %{
15221     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
15222             as_FloatRegister($src1$$reg),
15223             as_FloatRegister($src2$$reg));
15224   %}
15225   ins_pipe(pipe_class_default);
15226 %}
15227 
15228 // --------------------------------- SQRT -------------------------------------
15229 
15230 instruct vsqrt2D(vecX dst, vecX src)
15231 %{
15232   predicate(n->as_Vector()->length() == 2);
15233   match(Set dst (SqrtVD src));
15234   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
15235   ins_encode %{
15236     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
15237              as_FloatRegister($src$$reg));
15238   %}
15239   ins_pipe(pipe_class_default);
15240 %}
15241 
15242 // --------------------------------- ABS --------------------------------------
15243 
15244 instruct vabs2F(vecD dst, vecD src)
15245 %{
15246   predicate(n->as_Vector()->length() == 2);
15247   match(Set dst (AbsVF src));
15248   ins_cost(INSN_COST * 3);
15249   format %{ "fabs  $dst,$src\t# vector (2S)" %}
15250   ins_encode %{
15251     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
15252             as_FloatRegister($src$$reg));
15253   %}
15254   ins_pipe(pipe_class_default);
15255 %}
15256 
15257 instruct vabs4F(vecX dst, vecX src)
15258 %{
15259   predicate(n->as_Vector()->length() == 4);
15260   match(Set dst (AbsVF src));
15261   ins_cost(INSN_COST * 3);
15262   format %{ "fabs  $dst,$src\t# vector (4S)" %}
15263   ins_encode %{
15264     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
15265             as_FloatRegister($src$$reg));
15266   %}
15267   ins_pipe(pipe_class_default);
15268 %}
15269 
15270 instruct vabs2D(vecX dst, vecX src)
15271 %{
15272   predicate(n->as_Vector()->length() == 2);
15273   match(Set dst (AbsVD src));
15274   ins_cost(INSN_COST * 3);
15275   format %{ "fabs  $dst,$src\t# vector (2D)" %}
15276   ins_encode %{
15277     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
15278             as_FloatRegister($src$$reg));
15279   %}
15280   ins_pipe(pipe_class_default);
15281 %}
15282 
15283 // --------------------------------- NEG --------------------------------------
15284 
15285 instruct vneg2F(vecD dst, vecD src)
15286 %{
15287   predicate(n->as_Vector()->length() == 2);
15288   match(Set dst (NegVF src));
15289   ins_cost(INSN_COST * 3);
15290   format %{ "fneg  $dst,$src\t# vector (2S)" %}
15291   ins_encode %{
15292     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
15293             as_FloatRegister($src$$reg));
15294   %}
15295   ins_pipe(pipe_class_default);
15296 %}
15297 
15298 instruct vneg4F(vecX dst, vecX src)
15299 %{
15300   predicate(n->as_Vector()->length() == 4);
15301   match(Set dst (NegVF src));
15302   ins_cost(INSN_COST * 3);
15303   format %{ "fneg  $dst,$src\t# vector (4S)" %}
15304   ins_encode %{
15305     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
15306             as_FloatRegister($src$$reg));
15307   %}
15308   ins_pipe(pipe_class_default);
15309 %}
15310 
15311 instruct vneg2D(vecX dst, vecX src)
15312 %{
15313   predicate(n->as_Vector()->length() == 2);
15314   match(Set dst (NegVD src));
15315   ins_cost(INSN_COST * 3);
15316   format %{ "fneg  $dst,$src\t# vector (2D)" %}
15317   ins_encode %{
15318     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
15319             as_FloatRegister($src$$reg));
15320   %}
15321   ins_pipe(pipe_class_default);
15322 %}
15323 
15324 // --------------------------------- AND --------------------------------------
15325 
15326 instruct vand8B(vecD dst, vecD src1, vecD src2)
15327 %{
15328   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15329             n->as_Vector()->length_in_bytes() == 8);
15330   match(Set dst (AndV src1 src2));
15331   ins_cost(INSN_COST);
15332   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15333   ins_encode %{
15334     __ andr(as_FloatRegister($dst$$reg), __ T8B,
15335             as_FloatRegister($src1$$reg),
15336             as_FloatRegister($src2$$reg));
15337   %}
15338   ins_pipe(pipe_class_default);
15339 %}
15340 
15341 instruct vand16B(vecX dst, vecX src1, vecX src2)
15342 %{
15343   predicate(n->as_Vector()->length_in_bytes() == 16);
15344   match(Set dst (AndV src1 src2));
15345   ins_cost(INSN_COST);
15346   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
15347   ins_encode %{
15348     __ andr(as_FloatRegister($dst$$reg), __ T16B,
15349             as_FloatRegister($src1$$reg),
15350             as_FloatRegister($src2$$reg));
15351   %}
15352   ins_pipe(pipe_class_default);
15353 %}
15354 
15355 // --------------------------------- OR ---------------------------------------
15356 
15357 instruct vor8B(vecD dst, vecD src1, vecD src2)
15358 %{
15359   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15360             n->as_Vector()->length_in_bytes() == 8);
15361   match(Set dst (OrV src1 src2));
15362   ins_cost(INSN_COST);
15363   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
15364   ins_encode %{
15365     __ orr(as_FloatRegister($dst$$reg), __ T8B,
15366             as_FloatRegister($src1$$reg),
15367             as_FloatRegister($src2$$reg));
15368   %}
15369   ins_pipe(pipe_class_default);
15370 %}
15371 
15372 instruct vor16B(vecX dst, vecX src1, vecX src2)
15373 %{
15374   predicate(n->as_Vector()->length_in_bytes() == 16);
15375   match(Set dst (OrV src1 src2));
15376   ins_cost(INSN_COST);
15377   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
15378   ins_encode %{
15379     __ orr(as_FloatRegister($dst$$reg), __ T16B,
15380             as_FloatRegister($src1$$reg),
15381             as_FloatRegister($src2$$reg));
15382   %}
15383   ins_pipe(pipe_class_default);
15384 %}
15385 
15386 // --------------------------------- XOR --------------------------------------
15387 
15388 instruct vxor8B(vecD dst, vecD src1, vecD src2)
15389 %{
15390   predicate(n->as_Vector()->length_in_bytes() == 4 ||
15391             n->as_Vector()->length_in_bytes() == 8);
15392   match(Set dst (XorV src1 src2));
15393   ins_cost(INSN_COST);
15394   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
15395   ins_encode %{
15396     __ eor(as_FloatRegister($dst$$reg), __ T8B,
15397             as_FloatRegister($src1$$reg),
15398             as_FloatRegister($src2$$reg));
15399   %}
15400   ins_pipe(pipe_class_default);
15401 %}
15402 
15403 instruct vxor16B(vecX dst, vecX src1, vecX src2)
15404 %{
15405   predicate(n->as_Vector()->length_in_bytes() == 16);
15406   match(Set dst (XorV src1 src2));
15407   ins_cost(INSN_COST);
15408   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
15409   ins_encode %{
15410     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15411             as_FloatRegister($src1$$reg),
15412             as_FloatRegister($src2$$reg));
15413   %}
15414   ins_pipe(pipe_class_default);
15415 %}
15416 
15417 // ------------------------------ Shift ---------------------------------------
15418 
15419 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
15420   match(Set dst (LShiftCntV cnt));
15421   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
15422   ins_encode %{
15423     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15424   %}
15425   ins_pipe(pipe_class_default);
15426 %}
15427 
15428 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
15429 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
15430   match(Set dst (RShiftCntV cnt));
15431   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
15432   ins_encode %{
15433     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
15434     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
15435   %}
15436   ins_pipe(pipe_class_default);
15437 %}
15438 
15439 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
15440   predicate(n->as_Vector()->length() == 4 ||
15441             n->as_Vector()->length() == 8);
15442   match(Set dst (LShiftVB src shift));
15443   match(Set dst (RShiftVB src shift));
15444   ins_cost(INSN_COST);
15445   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15446   ins_encode %{
15447     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15448             as_FloatRegister($src$$reg),
15449             as_FloatRegister($shift$$reg));
15450   %}
15451   ins_pipe(pipe_class_default);
15452 %}
15453 
15454 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15455   predicate(n->as_Vector()->length() == 16);
15456   match(Set dst (LShiftVB src shift));
15457   match(Set dst (RShiftVB src shift));
15458   ins_cost(INSN_COST);
15459   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15460   ins_encode %{
15461     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15462             as_FloatRegister($src$$reg),
15463             as_FloatRegister($shift$$reg));
15464   %}
15465   ins_pipe(pipe_class_default);
15466 %}
15467 
15468 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
15469   predicate(n->as_Vector()->length() == 4 ||
15470             n->as_Vector()->length() == 8);
15471   match(Set dst (URShiftVB src shift));
15472   ins_cost(INSN_COST);
15473   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
15474   ins_encode %{
15475     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15476             as_FloatRegister($src$$reg),
15477             as_FloatRegister($shift$$reg));
15478   %}
15479   ins_pipe(pipe_class_default);
15480 %}
15481 
15482 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
15483   predicate(n->as_Vector()->length() == 16);
15484   match(Set dst (URShiftVB src shift));
15485   ins_cost(INSN_COST);
15486   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
15487   ins_encode %{
15488     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15489             as_FloatRegister($src$$reg),
15490             as_FloatRegister($shift$$reg));
15491   %}
15492   ins_pipe(pipe_class_default);
15493 %}
15494 
15495 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15496   predicate(n->as_Vector()->length() == 4 ||
15497             n->as_Vector()->length() == 8);
15498   match(Set dst (LShiftVB src shift));
15499   ins_cost(INSN_COST);
15500   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15501   ins_encode %{
15502     int sh = (int)$shift$$constant & 31;
15503     if (sh >= 8) {
15504       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15505              as_FloatRegister($src$$reg),
15506              as_FloatRegister($src$$reg));
15507     } else {
15508       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15509              as_FloatRegister($src$$reg), sh);
15510     }
15511   %}
15512   ins_pipe(pipe_class_default);
15513 %}
15514 
15515 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15516   predicate(n->as_Vector()->length() == 16);
15517   match(Set dst (LShiftVB src shift));
15518   ins_cost(INSN_COST);
15519   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15520   ins_encode %{
15521     int sh = (int)$shift$$constant & 31;
15522     if (sh >= 8) {
15523       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15524              as_FloatRegister($src$$reg),
15525              as_FloatRegister($src$$reg));
15526     } else {
15527       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15528              as_FloatRegister($src$$reg), sh);
15529     }
15530   %}
15531   ins_pipe(pipe_class_default);
15532 %}
15533 
15534 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15535   predicate(n->as_Vector()->length() == 4 ||
15536             n->as_Vector()->length() == 8);
15537   match(Set dst (RShiftVB src shift));
15538   ins_cost(INSN_COST);
15539   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15540   ins_encode %{
15541     int sh = (int)$shift$$constant & 31;
15542     if (sh >= 8) sh = 7;
15543     sh = -sh & 7;
15544     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15545            as_FloatRegister($src$$reg), sh);
15546   %}
15547   ins_pipe(pipe_class_default);
15548 %}
15549 
15550 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15551   predicate(n->as_Vector()->length() == 16);
15552   match(Set dst (RShiftVB src shift));
15553   ins_cost(INSN_COST);
15554   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15555   ins_encode %{
15556     int sh = (int)$shift$$constant & 31;
15557     if (sh >= 8) sh = 7;
15558     sh = -sh & 7;
15559     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15560            as_FloatRegister($src$$reg), sh);
15561   %}
15562   ins_pipe(pipe_class_default);
15563 %}
15564 
15565 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15566   predicate(n->as_Vector()->length() == 4 ||
15567             n->as_Vector()->length() == 8);
15568   match(Set dst (URShiftVB src shift));
15569   ins_cost(INSN_COST);
15570   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15571   ins_encode %{
15572     int sh = (int)$shift$$constant & 31;
15573     if (sh >= 8) {
15574       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15575              as_FloatRegister($src$$reg),
15576              as_FloatRegister($src$$reg));
15577     } else {
15578       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15579              as_FloatRegister($src$$reg), -sh & 7);
15580     }
15581   %}
15582   ins_pipe(pipe_class_default);
15583 %}
15584 
15585 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15586   predicate(n->as_Vector()->length() == 16);
15587   match(Set dst (URShiftVB src shift));
15588   ins_cost(INSN_COST);
15589   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15590   ins_encode %{
15591     int sh = (int)$shift$$constant & 31;
15592     if (sh >= 8) {
15593       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15594              as_FloatRegister($src$$reg),
15595              as_FloatRegister($src$$reg));
15596     } else {
15597       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15598              as_FloatRegister($src$$reg), -sh & 7);
15599     }
15600   %}
15601   ins_pipe(pipe_class_default);
15602 %}
15603 
15604 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
15605   predicate(n->as_Vector()->length() == 2 ||
15606             n->as_Vector()->length() == 4);
15607   match(Set dst (LShiftVS src shift));
15608   match(Set dst (RShiftVS src shift));
15609   ins_cost(INSN_COST);
15610   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15611   ins_encode %{
15612     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15613             as_FloatRegister($src$$reg),
15614             as_FloatRegister($shift$$reg));
15615   %}
15616   ins_pipe(pipe_class_default);
15617 %}
15618 
15619 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15620   predicate(n->as_Vector()->length() == 8);
15621   match(Set dst (LShiftVS src shift));
15622   match(Set dst (RShiftVS src shift));
15623   ins_cost(INSN_COST);
15624   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15625   ins_encode %{
15626     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15627             as_FloatRegister($src$$reg),
15628             as_FloatRegister($shift$$reg));
15629   %}
15630   ins_pipe(pipe_class_default);
15631 %}
15632 
15633 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
15634   predicate(n->as_Vector()->length() == 2 ||
15635             n->as_Vector()->length() == 4);
15636   match(Set dst (URShiftVS src shift));
15637   ins_cost(INSN_COST);
15638   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
15639   ins_encode %{
15640     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15641             as_FloatRegister($src$$reg),
15642             as_FloatRegister($shift$$reg));
15643   %}
15644   ins_pipe(pipe_class_default);
15645 %}
15646 
15647 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
15648   predicate(n->as_Vector()->length() == 8);
15649   match(Set dst (URShiftVS src shift));
15650   ins_cost(INSN_COST);
15651   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
15652   ins_encode %{
15653     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15654             as_FloatRegister($src$$reg),
15655             as_FloatRegister($shift$$reg));
15656   %}
15657   ins_pipe(pipe_class_default);
15658 %}
15659 
15660 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15661   predicate(n->as_Vector()->length() == 2 ||
15662             n->as_Vector()->length() == 4);
15663   match(Set dst (LShiftVS src shift));
15664   ins_cost(INSN_COST);
15665   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15666   ins_encode %{
15667     int sh = (int)$shift$$constant & 31;
15668     if (sh >= 16) {
15669       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15670              as_FloatRegister($src$$reg),
15671              as_FloatRegister($src$$reg));
15672     } else {
15673       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15674              as_FloatRegister($src$$reg), sh);
15675     }
15676   %}
15677   ins_pipe(pipe_class_default);
15678 %}
15679 
15680 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15681   predicate(n->as_Vector()->length() == 8);
15682   match(Set dst (LShiftVS src shift));
15683   ins_cost(INSN_COST);
15684   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15685   ins_encode %{
15686     int sh = (int)$shift$$constant & 31;
15687     if (sh >= 16) {
15688       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15689              as_FloatRegister($src$$reg),
15690              as_FloatRegister($src$$reg));
15691     } else {
15692       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15693              as_FloatRegister($src$$reg), sh);
15694     }
15695   %}
15696   ins_pipe(pipe_class_default);
15697 %}
15698 
15699 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15700   predicate(n->as_Vector()->length() == 2 ||
15701             n->as_Vector()->length() == 4);
15702   match(Set dst (RShiftVS src shift));
15703   ins_cost(INSN_COST);
15704   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15705   ins_encode %{
15706     int sh = (int)$shift$$constant & 31;
15707     if (sh >= 16) sh = 15;
15708     sh = -sh & 15;
15709     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15710            as_FloatRegister($src$$reg), sh);
15711   %}
15712   ins_pipe(pipe_class_default);
15713 %}
15714 
15715 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15716   predicate(n->as_Vector()->length() == 8);
15717   match(Set dst (RShiftVS src shift));
15718   ins_cost(INSN_COST);
15719   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15720   ins_encode %{
15721     int sh = (int)$shift$$constant & 31;
15722     if (sh >= 16) sh = 15;
15723     sh = -sh & 15;
15724     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15725            as_FloatRegister($src$$reg), sh);
15726   %}
15727   ins_pipe(pipe_class_default);
15728 %}
15729 
15730 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15731   predicate(n->as_Vector()->length() == 2 ||
15732             n->as_Vector()->length() == 4);
15733   match(Set dst (URShiftVS src shift));
15734   ins_cost(INSN_COST);
15735   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15736   ins_encode %{
15737     int sh = (int)$shift$$constant & 31;
15738     if (sh >= 16) {
15739       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15740              as_FloatRegister($src$$reg),
15741              as_FloatRegister($src$$reg));
15742     } else {
15743       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15744              as_FloatRegister($src$$reg), -sh & 15);
15745     }
15746   %}
15747   ins_pipe(pipe_class_default);
15748 %}
15749 
15750 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15751   predicate(n->as_Vector()->length() == 8);
15752   match(Set dst (URShiftVS src shift));
15753   ins_cost(INSN_COST);
15754   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15755   ins_encode %{
15756     int sh = (int)$shift$$constant & 31;
15757     if (sh >= 16) {
15758       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15759              as_FloatRegister($src$$reg),
15760              as_FloatRegister($src$$reg));
15761     } else {
15762       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15763              as_FloatRegister($src$$reg), -sh & 15);
15764     }
15765   %}
15766   ins_pipe(pipe_class_default);
15767 %}
15768 
15769 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
15770   predicate(n->as_Vector()->length() == 2);
15771   match(Set dst (LShiftVI src shift));
15772   match(Set dst (RShiftVI src shift));
15773   ins_cost(INSN_COST);
15774   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15775   ins_encode %{
15776     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15777             as_FloatRegister($src$$reg),
15778             as_FloatRegister($shift$$reg));
15779   %}
15780   ins_pipe(pipe_class_default);
15781 %}
15782 
15783 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15784   predicate(n->as_Vector()->length() == 4);
15785   match(Set dst (LShiftVI src shift));
15786   match(Set dst (RShiftVI src shift));
15787   ins_cost(INSN_COST);
15788   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15789   ins_encode %{
15790     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15791             as_FloatRegister($src$$reg),
15792             as_FloatRegister($shift$$reg));
15793   %}
15794   ins_pipe(pipe_class_default);
15795 %}
15796 
15797 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
15798   predicate(n->as_Vector()->length() == 2);
15799   match(Set dst (URShiftVI src shift));
15800   ins_cost(INSN_COST);
15801   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
15802   ins_encode %{
15803     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15804             as_FloatRegister($src$$reg),
15805             as_FloatRegister($shift$$reg));
15806   %}
15807   ins_pipe(pipe_class_default);
15808 %}
15809 
15810 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
15811   predicate(n->as_Vector()->length() == 4);
15812   match(Set dst (URShiftVI src shift));
15813   ins_cost(INSN_COST);
15814   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
15815   ins_encode %{
15816     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15817             as_FloatRegister($src$$reg),
15818             as_FloatRegister($shift$$reg));
15819   %}
15820   ins_pipe(pipe_class_default);
15821 %}
15822 
15823 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15824   predicate(n->as_Vector()->length() == 2);
15825   match(Set dst (LShiftVI src shift));
15826   ins_cost(INSN_COST);
15827   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15828   ins_encode %{
15829     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15830            as_FloatRegister($src$$reg),
15831            (int)$shift$$constant & 31);
15832   %}
15833   ins_pipe(pipe_class_default);
15834 %}
15835 
15836 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15837   predicate(n->as_Vector()->length() == 4);
15838   match(Set dst (LShiftVI src shift));
15839   ins_cost(INSN_COST);
15840   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15841   ins_encode %{
15842     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15843            as_FloatRegister($src$$reg),
15844            (int)$shift$$constant & 31);
15845   %}
15846   ins_pipe(pipe_class_default);
15847 %}
15848 
15849 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15850   predicate(n->as_Vector()->length() == 2);
15851   match(Set dst (RShiftVI src shift));
15852   ins_cost(INSN_COST);
15853   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15854   ins_encode %{
15855     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15856             as_FloatRegister($src$$reg),
15857             -(int)$shift$$constant & 31);
15858   %}
15859   ins_pipe(pipe_class_default);
15860 %}
15861 
15862 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15863   predicate(n->as_Vector()->length() == 4);
15864   match(Set dst (RShiftVI src shift));
15865   ins_cost(INSN_COST);
15866   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15867   ins_encode %{
15868     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15869             as_FloatRegister($src$$reg),
15870             -(int)$shift$$constant & 31);
15871   %}
15872   ins_pipe(pipe_class_default);
15873 %}
15874 
15875 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15876   predicate(n->as_Vector()->length() == 2);
15877   match(Set dst (URShiftVI src shift));
15878   ins_cost(INSN_COST);
15879   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15880   ins_encode %{
15881     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15882             as_FloatRegister($src$$reg),
15883             -(int)$shift$$constant & 31);
15884   %}
15885   ins_pipe(pipe_class_default);
15886 %}
15887 
15888 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15889   predicate(n->as_Vector()->length() == 4);
15890   match(Set dst (URShiftVI src shift));
15891   ins_cost(INSN_COST);
15892   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15893   ins_encode %{
15894     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15895             as_FloatRegister($src$$reg),
15896             -(int)$shift$$constant & 31);
15897   %}
15898   ins_pipe(pipe_class_default);
15899 %}
15900 
15901 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15902   predicate(n->as_Vector()->length() == 2);
15903   match(Set dst (LShiftVL src shift));
15904   match(Set dst (RShiftVL src shift));
15905   ins_cost(INSN_COST);
15906   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15907   ins_encode %{
15908     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15909             as_FloatRegister($src$$reg),
15910             as_FloatRegister($shift$$reg));
15911   %}
15912   ins_pipe(pipe_class_default);
15913 %}
15914 
15915 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
15916   predicate(n->as_Vector()->length() == 2);
15917   match(Set dst (URShiftVL src shift));
15918   ins_cost(INSN_COST);
15919   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
15920   ins_encode %{
15921     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15922             as_FloatRegister($src$$reg),
15923             as_FloatRegister($shift$$reg));
15924   %}
15925   ins_pipe(pipe_class_default);
15926 %}
15927 
15928 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15929   predicate(n->as_Vector()->length() == 2);
15930   match(Set dst (LShiftVL src shift));
15931   ins_cost(INSN_COST);
15932   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15933   ins_encode %{
15934     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15935            as_FloatRegister($src$$reg),
15936            (int)$shift$$constant & 63);
15937   %}
15938   ins_pipe(pipe_class_default);
15939 %}
15940 
15941 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15942   predicate(n->as_Vector()->length() == 2);
15943   match(Set dst (RShiftVL src shift));
15944   ins_cost(INSN_COST);
15945   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15946   ins_encode %{
15947     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15948             as_FloatRegister($src$$reg),
15949             -(int)$shift$$constant & 63);
15950   %}
15951   ins_pipe(pipe_class_default);
15952 %}
15953 
15954 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15955   predicate(n->as_Vector()->length() == 2);
15956   match(Set dst (URShiftVL src shift));
15957   ins_cost(INSN_COST);
15958   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15959   ins_encode %{
15960     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15961             as_FloatRegister($src$$reg),
15962             -(int)$shift$$constant & 63);
15963   %}
15964   ins_pipe(pipe_class_default);
15965 %}
15966 
15967 //----------PEEPHOLE RULES-----------------------------------------------------
15968 // These must follow all instruction definitions as they use the names
15969 // defined in the instructions definitions.
15970 //
15971 // peepmatch ( root_instr_name [preceding_instruction]* );
15972 //
15973 // peepconstraint %{
15974 // (instruction_number.operand_name relational_op instruction_number.operand_name
15975 //  [, ...] );
15976 // // instruction numbers are zero-based using left to right order in peepmatch
15977 //
15978 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15979 // // provide an instruction_number.operand_name for each operand that appears
15980 // // in the replacement instruction's match rule
15981 //
15982 // ---------VM FLAGS---------------------------------------------------------
15983 //
15984 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15985 //
15986 // Each peephole rule is given an identifying number starting with zero and
15987 // increasing by one in the order seen by the parser.  An individual peephole
15988 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15989 // on the command-line.
15990 //
15991 // ---------CURRENT LIMITATIONS----------------------------------------------
15992 //
15993 // Only match adjacent instructions in same basic block
15994 // Only equality constraints
15995 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15996 // Only one replacement instruction
15997 //
15998 // ---------EXAMPLE----------------------------------------------------------
15999 //
16000 // // pertinent parts of existing instructions in architecture description
16001 // instruct movI(iRegINoSp dst, iRegI src)
16002 // %{
16003 //   match(Set dst (CopyI src));
16004 // %}
16005 //
16006 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
16007 // %{
16008 //   match(Set dst (AddI dst src));
16009 //   effect(KILL cr);
16010 // %}
16011 //
16012 // // Change (inc mov) to lea
16013 // peephole %{
16014 //   // increment preceeded by register-register move
16015 //   peepmatch ( incI_iReg movI );
16016 //   // require that the destination register of the increment
16017 //   // match the destination register of the move
16018 //   peepconstraint ( 0.dst == 1.dst );
16019 //   // construct a replacement instruction that sets
16020 //   // the destination to ( move's source register + one )
16021 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
16022 // %}
16023 //
16024 
16025 // Implementation no longer uses movX instructions since
16026 // machine-independent system no longer uses CopyX nodes.
16027 //
16028 // peephole
16029 // %{
16030 //   peepmatch (incI_iReg movI);
16031 //   peepconstraint (0.dst == 1.dst);
16032 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16033 // %}
16034 
16035 // peephole
16036 // %{
16037 //   peepmatch (decI_iReg movI);
16038 //   peepconstraint (0.dst == 1.dst);
16039 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16040 // %}
16041 
16042 // peephole
16043 // %{
16044 //   peepmatch (addI_iReg_imm movI);
16045 //   peepconstraint (0.dst == 1.dst);
16046 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
16047 // %}
16048 
16049 // peephole
16050 // %{
16051 //   peepmatch (incL_iReg movL);
16052 //   peepconstraint (0.dst == 1.dst);
16053 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16054 // %}
16055 
16056 // peephole
16057 // %{
16058 //   peepmatch (decL_iReg movL);
16059 //   peepconstraint (0.dst == 1.dst);
16060 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16061 // %}
16062 
16063 // peephole
16064 // %{
16065 //   peepmatch (addL_iReg_imm movL);
16066 //   peepconstraint (0.dst == 1.dst);
16067 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
16068 // %}
16069 
16070 // peephole
16071 // %{
16072 //   peepmatch (addP_iReg_imm movP);
16073 //   peepconstraint (0.dst == 1.dst);
16074 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
16075 // %}
16076 
16077 // // Change load of spilled value to only a spill
16078 // instruct storeI(memory mem, iRegI src)
16079 // %{
16080 //   match(Set mem (StoreI mem src));
16081 // %}
16082 //
16083 // instruct loadI(iRegINoSp dst, memory mem)
16084 // %{
16085 //   match(Set dst (LoadI mem));
16086 // %}
16087 //
16088 
16089 //----------SMARTSPILL RULES---------------------------------------------------
16090 // These must follow all instruction definitions as they use the names
16091 // defined in the instructions definitions.
16092 
16093 // Local Variables:
16094 // mode: c++
16095 // End: