1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat Inc.
   4 // All rights reserved.
   5 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 //
   7 // This code is free software; you can redistribute it and/or modify it
   8 // under the terms of the GNU General Public License version 2 only, as
   9 // published by the Free Software Foundation.
  10 //
  11 // This code is distributed in the hope that it will be useful, but WITHOUT
  12 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 // version 2 for more details (a copy is included in the LICENSE file that
  15 // accompanied this code).
  16 //
  17 // You should have received a copy of the GNU General Public License version
  18 // 2 along with this work; if not, write to the Free Software Foundation,
  19 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 //
  21 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 // or visit www.oracle.com if you need additional information or have any
  23 // questions.
  24 //
  25 //
  26 
  27 // AArch64 Architecture Description File
  28 
  29 //----------REGISTER DEFINITION BLOCK------------------------------------------
  30 // This information is used by the matcher and the register allocator to
  31 // describe individual registers and classes of registers within the target
  32 // archtecture.
  33 
  34 register %{
  35 //----------Architecture Description Register Definitions----------------------
  36 // General Registers
  37 // "reg_def"  name ( register save type, C convention save type,
  38 //                   ideal register type, encoding );
  39 // Register Save Types:
  40 //
  41 // NS  = No-Save:       The register allocator assumes that these registers
  42 //                      can be used without saving upon entry to the method, &
  43 //                      that they do not need to be saved at call sites.
  44 //
  45 // SOC = Save-On-Call:  The register allocator assumes that these registers
  46 //                      can be used without saving upon entry to the method,
  47 //                      but that they must be saved at call sites.
  48 //
  49 // SOE = Save-On-Entry: The register allocator assumes that these registers
  50 //                      must be saved before using them upon entry to the
  51 //                      method, but they do not need to be saved at call
  52 //                      sites.
  53 //
  54 // AS  = Always-Save:   The register allocator assumes that these registers
  55 //                      must be saved before using them upon entry to the
  56 //                      method, & that they must be saved at call sites.
  57 //
  58 // Ideal Register Type is used to determine how to save & restore a
  59 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  60 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  61 //
  62 // The encoding number is the actual bit-pattern placed into the opcodes.
  63 
  64 // We must define the 64 bit int registers in two 32 bit halves, the
  65 // real lower register and a virtual upper half register. upper halves
  66 // are used by the register allocator but are not actually supplied as
  67 // operands to memory ops.
  68 //
  69 // follow the C1 compiler in making registers
  70 //
  71 //   r0-r7,r10-r26 volatile (caller save)
  72 //   r27-r32 system (no save, no allocate)
  73 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  74 //
  75 // as regards Java usage. we don't use any callee save registers
  76 // because this makes it difficult to de-optimise a frame (see comment
  77 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  78 //
  79 
  80 // General Registers
  81 
  82 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  83 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  84 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  85 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  86 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  87 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  88 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  89 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  90 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  91 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  92 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  93 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  94 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  95 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  96 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  97 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  98 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  99 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 100 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 101 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 102 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 103 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 104 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 105 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 106 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 107 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 108 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 109 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 110 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 111 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 112 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 113 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 114 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 115 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 116 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 117 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 118 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 119 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 120 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 121 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 122 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 123 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 124 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 125 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 126 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 127 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 128 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 129 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 130 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 131 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 132 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 133 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());  
 134 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 135 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 136 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 137 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 138 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 139 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 140 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 141 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 142 
 143 // ----------------------------
 144 // Float/Double Registers
 145 // ----------------------------
 146 
 147 // Double Registers
 148 
 149 // The rules of ADL require that double registers be defined in pairs.
 150 // Each pair must be two 32-bit values, but not necessarily a pair of
 151 // single float registers. In each pair, ADLC-assigned register numbers
 152 // must be adjacent, with the lower number even. Finally, when the
 153 // CPU stores such a register pair to memory, the word associated with
 154 // the lower ADLC-assigned number must be stored to the lower address.
 155 
 156 // AArch64 has 32 floating-point registers. Each can store a vector of
 157 // single or double precision floating-point values up to 8 * 32
 158 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 159 // use the first float or double element of the vector.
 160 
 161 // for Java use float registers v0-v15 are always save on call whereas
 162 // the platform ABI treats v8-v15 as callee save). float registers
 163 // v16-v31 are SOC as per the platform spec
 164 
 165   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 166   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 167   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 168   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 169 
 170   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 171   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 172   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 173   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 174 
 175   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 176   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 177   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 178   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 179 
 180   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 181   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 182   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 183   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 184 
 185   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 186   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 187   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 188   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 189 
 190   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 191   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 192   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 193   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 194 
 195   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 196   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 197   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 198   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 199 
 200   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 201   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 202   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 203   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 204 
 205   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 206   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 207   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 208   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 209 
 210   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 211   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 212   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 213   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 214 
 215   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 216   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 217   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 218   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 219 
 220   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 221   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 222   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 223   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 224 
 225   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 226   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 227   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 228   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 229 
 230   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 231   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 232   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 233   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 234 
 235   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 236   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 237   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 238   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 239 
 240   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 241   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 242   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 243   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 244 
 245   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 246   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 247   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 248   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 249 
 250   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 251   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 252   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 253   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 254 
 255   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 256   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 257   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 258   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 259 
 260   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 261   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 262   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 263   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 264 
 265   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 266   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 267   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 268   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 269 
 270   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 271   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 272   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 273   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 274 
 275   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 276   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 277   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 278   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 279 
 280   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 281   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 282   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 283   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 284 
 285   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 286   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 287   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 288   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 289 
 290   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 291   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 292   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 293   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 294 
 295   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 296   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 297   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 298   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 299 
 300   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 301   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 302   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 303   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 304 
 305   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 306   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 307   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 308   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 309 
 310   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 311   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 312   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 313   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 314 
 315   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 316   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 317   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 318   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 319 
 320   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 321   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 322   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 323   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 324 
 325 // ----------------------------
 326 // Special Registers
 327 // ----------------------------
 328 
 329 // the AArch64 CSPR status flag register is not directly acessible as
 330 // instruction operand. the FPSR status flag register is a system
 331 // register which can be written/read using MSR/MRS but again does not
 332 // appear as an operand (a code identifying the FSPR occurs as an
 333 // immediate value in the instruction).
 334 
 335 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 336 
 337 
 338 // Specify priority of register selection within phases of register
 339 // allocation.  Highest priority is first.  A useful heuristic is to
 340 // give registers a low priority when they are required by machine
 341 // instructions, like EAX and EDX on I486, and choose no-save registers
 342 // before save-on-call, & save-on-call before save-on-entry.  Registers
 343 // which participate in fixed calling sequences should come last.
 344 // Registers which are used as pairs must fall on an even boundary.
 345 
 346 alloc_class chunk0(
 347     // volatiles
 348     R10, R10_H,
 349     R11, R11_H,
 350     R12, R12_H,
 351     R13, R13_H,
 352     R14, R14_H,
 353     R15, R15_H,
 354     R16, R16_H,
 355     R17, R17_H,
 356     R18, R18_H,
 357 
 358     // arg registers
 359     R0, R0_H,
 360     R1, R1_H,
 361     R2, R2_H,
 362     R3, R3_H,
 363     R4, R4_H,
 364     R5, R5_H,
 365     R6, R6_H,
 366     R7, R7_H,
 367 
 368     // non-volatiles
 369     R19, R19_H,
 370     R20, R20_H,
 371     R21, R21_H,
 372     R22, R22_H,
 373     R23, R23_H,
 374     R24, R24_H,
 375     R25, R25_H,
 376     R26, R26_H,
 377     
 378     // non-allocatable registers
 379 
 380     R27, R27_H, // heapbase
 381     R28, R28_H, // thread
 382     R29, R29_H, // fp
 383     R30, R30_H, // lr
 384     R31, R31_H, // sp
 385 );
 386 
 387 alloc_class chunk1(
 388 
 389     // no save
 390     V16, V16_H, V16_J, V16_K,
 391     V17, V17_H, V17_J, V17_K,
 392     V18, V18_H, V18_J, V18_K,
 393     V19, V19_H, V19_J, V19_K,
 394     V20, V20_H, V20_J, V20_K,
 395     V21, V21_H, V21_J, V21_K,
 396     V22, V22_H, V22_J, V22_K,
 397     V23, V23_H, V23_J, V23_K,
 398     V24, V24_H, V24_J, V24_K,
 399     V25, V25_H, V25_J, V25_K,
 400     V26, V26_H, V26_J, V26_K,
 401     V27, V27_H, V27_J, V27_K,
 402     V28, V28_H, V28_J, V28_K,
 403     V29, V29_H, V29_J, V29_K,
 404     V30, V30_H, V30_J, V30_K,
 405     V31, V31_H, V31_J, V31_K,
 406 
 407     // arg registers
 408     V0, V0_H, V0_J, V0_K,
 409     V1, V1_H, V1_J, V1_K,
 410     V2, V2_H, V2_J, V2_K,
 411     V3, V3_H, V3_J, V3_K,
 412     V4, V4_H, V4_J, V4_K,
 413     V5, V5_H, V5_J, V5_K,
 414     V6, V6_H, V6_J, V6_K,
 415     V7, V7_H, V7_J, V7_K,
 416 
 417     // non-volatiles
 418     V8, V8_H, V8_J, V8_K,
 419     V9, V9_H, V9_J, V9_K,
 420     V10, V10_H, V10_J, V10_K,
 421     V11, V11_H, V11_J, V11_K,
 422     V12, V12_H, V12_J, V12_K,
 423     V13, V13_H, V13_J, V13_K,
 424     V14, V14_H, V14_J, V14_K,
 425     V15, V15_H, V15_J, V15_K,
 426 );
 427 
 428 alloc_class chunk2(RFLAGS);
 429 
 430 //----------Architecture Description Register Classes--------------------------
 431 // Several register classes are automatically defined based upon information in
 432 // this architecture description.
 433 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 434 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 435 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 436 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 437 //
 438 
 439 // Class for all 32 bit integer registers -- excludes SP which will
 440 // never be used as an integer register
 441 reg_class any_reg32(
 442     R0,
 443     R1,
 444     R2,
 445     R3,
 446     R4,
 447     R5,
 448     R6,
 449     R7,
 450     R10,
 451     R11,
 452     R12,
 453     R13,
 454     R14,
 455     R15,
 456     R16,
 457     R17,
 458     R18,
 459     R19,
 460     R20,
 461     R21,
 462     R22,
 463     R23,
 464     R24,
 465     R25,
 466     R26,
 467     R27,
 468     R28,
 469     R29,
 470     R30
 471 );
 472 
 473 // Singleton class for R0 int register
 474 reg_class int_r0_reg(R0);
 475 
 476 // Singleton class for R2 int register
 477 reg_class int_r2_reg(R2);
 478 
 479 // Singleton class for R3 int register
 480 reg_class int_r3_reg(R3);
 481 
 482 // Singleton class for R4 int register
 483 reg_class int_r4_reg(R4);
 484 
 485 // Class for all long integer registers (including RSP)
 486 reg_class any_reg(
 487     R0, R0_H,
 488     R1, R1_H,
 489     R2, R2_H,
 490     R3, R3_H,
 491     R4, R4_H,
 492     R5, R5_H,
 493     R6, R6_H,
 494     R7, R7_H,
 495     R10, R10_H,
 496     R11, R11_H,
 497     R12, R12_H,
 498     R13, R13_H,
 499     R14, R14_H,
 500     R15, R15_H,
 501     R16, R16_H,
 502     R17, R17_H,
 503     R18, R18_H,
 504     R19, R19_H,
 505     R20, R20_H,
 506     R21, R21_H,
 507     R22, R22_H,
 508     R23, R23_H,
 509     R24, R24_H,
 510     R25, R25_H,
 511     R26, R26_H,
 512     R27, R27_H,
 513     R28, R28_H,
 514     R29, R29_H,
 515     R30, R30_H,
 516     R31, R31_H
 517 );
 518 
 519 // Class for all non-special integer registers
 520 reg_class no_special_reg32(
 521     R0,
 522     R1,
 523     R2,
 524     R3,
 525     R4,
 526     R5,
 527     R6,
 528     R7,
 529     R10,
 530     R11,
 531     R12,                        // rmethod
 532     R13,
 533     R14,
 534     R15,
 535     R16,
 536     R17,
 537     R18,
 538     R19,
 539     R20,
 540     R21,
 541     R22,
 542     R23,
 543     R24,
 544     R25,
 545     R26
 546  /* R27, */                     // heapbase
 547  /* R28, */                     // thread
 548  /* R29, */                     // fp
 549  /* R30, */                     // lr
 550  /* R31 */                      // sp
 551 );
 552 
 553 // Class for all non-special long integer registers
 554 reg_class no_special_reg(
 555     R0, R0_H,
 556     R1, R1_H,
 557     R2, R2_H,
 558     R3, R3_H,
 559     R4, R4_H,
 560     R5, R5_H,
 561     R6, R6_H,
 562     R7, R7_H,
 563     R10, R10_H,
 564     R11, R11_H,
 565     R12, R12_H,                 // rmethod
 566     R13, R13_H,
 567     R14, R14_H,
 568     R15, R15_H,
 569     R16, R16_H,
 570     R17, R17_H,
 571     R18, R18_H,
 572     R19, R19_H,
 573     R20, R20_H,
 574     R21, R21_H,
 575     R22, R22_H,
 576     R23, R23_H,
 577     R24, R24_H,
 578     R25, R25_H,
 579     R26, R26_H,
 580  /* R27, R27_H, */              // heapbase
 581  /* R28, R28_H, */              // thread
 582  /* R29, R29_H, */              // fp
 583  /* R30, R30_H, */              // lr
 584  /* R31, R31_H */               // sp
 585 );
 586 
 587 // Class for 64 bit register r0
 588 reg_class r0_reg(
 589     R0, R0_H
 590 );
 591 
 592 // Class for 64 bit register r1
 593 reg_class r1_reg(
 594     R1, R1_H
 595 );
 596 
 597 // Class for 64 bit register r2
 598 reg_class r2_reg(
 599     R2, R2_H
 600 );
 601 
 602 // Class for 64 bit register r3
 603 reg_class r3_reg(
 604     R3, R3_H
 605 );
 606 
 607 // Class for 64 bit register r4
 608 reg_class r4_reg(
 609     R4, R4_H
 610 );
 611 
 612 // Class for 64 bit register r5
 613 reg_class r5_reg(
 614     R5, R5_H
 615 );
 616 
 617 // Class for 64 bit register r10
 618 reg_class r10_reg(
 619     R10, R10_H
 620 );
 621 
 622 // Class for 64 bit register r11
 623 reg_class r11_reg(
 624     R11, R11_H
 625 );
 626 
 627 // Class for method register
 628 reg_class method_reg(
 629     R12, R12_H
 630 );
 631 
 632 // Class for heapbase register
 633 reg_class heapbase_reg(
 634     R27, R27_H
 635 );
 636 
 637 // Class for thread register
 638 reg_class thread_reg(
 639     R28, R28_H
 640 );
 641 
 642 // Class for frame pointer register
 643 reg_class fp_reg(
 644     R29, R29_H
 645 );
 646 
 647 // Class for link register
 648 reg_class lr_reg(
 649     R30, R30_H
 650 );
 651 
 652 // Class for long sp register
 653 reg_class sp_reg(
 654   R31, R31_H
 655 );
 656 
 657 // Class for all pointer registers
 658 reg_class ptr_reg(
 659     R0, R0_H,
 660     R1, R1_H,
 661     R2, R2_H,
 662     R3, R3_H,
 663     R4, R4_H,
 664     R5, R5_H,
 665     R6, R6_H,
 666     R7, R7_H,
 667     R10, R10_H,
 668     R11, R11_H,
 669     R12, R12_H,
 670     R13, R13_H,
 671     R14, R14_H,
 672     R15, R15_H,
 673     R16, R16_H,
 674     R17, R17_H,
 675     R18, R18_H,
 676     R19, R19_H,
 677     R20, R20_H,
 678     R21, R21_H,
 679     R22, R22_H,
 680     R23, R23_H,
 681     R24, R24_H,
 682     R25, R25_H,
 683     R26, R26_H,
 684     R27, R27_H,
 685     R28, R28_H,
 686     R29, R29_H,
 687     R30, R30_H,
 688     R31, R31_H
 689 );
 690 
 691 // Class for all non_special pointer registers
 692 reg_class no_special_ptr_reg(
 693     R0, R0_H,
 694     R1, R1_H,
 695     R2, R2_H,
 696     R3, R3_H,
 697     R4, R4_H,
 698     R5, R5_H,
 699     R6, R6_H,
 700     R7, R7_H,
 701     R10, R10_H,
 702     R11, R11_H,
 703     R12, R12_H,
 704     R13, R13_H,
 705     R14, R14_H,
 706     R15, R15_H,
 707     R16, R16_H,
 708     R17, R17_H,
 709     R18, R18_H,
 710     R19, R19_H,
 711     R20, R20_H,
 712     R21, R21_H,
 713     R22, R22_H,
 714     R23, R23_H,
 715     R24, R24_H,
 716     R25, R25_H,
 717     R26, R26_H,
 718  /* R27, R27_H, */              // heapbase
 719  /* R28, R28_H, */              // thread
 720  /* R29, R29_H, */              // fp
 721  /* R30, R30_H, */              // lr
 722  /* R31, R31_H */               // sp
 723 );
 724 
 725 // Class for all float registers
 726 reg_class float_reg(
 727     V0,
 728     V1,
 729     V2,
 730     V3,
 731     V4,
 732     V5,
 733     V6,
 734     V7,
 735     V8,
 736     V9,
 737     V10,
 738     V11,
 739     V12,
 740     V13,
 741     V14,
 742     V15,
 743     V16,
 744     V17,
 745     V18,
 746     V19,
 747     V20,
 748     V21,
 749     V22,
 750     V23,
 751     V24,
 752     V25,
 753     V26,
 754     V27,
 755     V28,
 756     V29,
 757     V30,
 758     V31
 759 );
 760 
 761 // Double precision float registers have virtual `high halves' that
 762 // are needed by the allocator.
 763 // Class for all double registers
 764 reg_class double_reg(
 765     V0, V0_H, 
 766     V1, V1_H, 
 767     V2, V2_H, 
 768     V3, V3_H, 
 769     V4, V4_H, 
 770     V5, V5_H, 
 771     V6, V6_H, 
 772     V7, V7_H, 
 773     V8, V8_H, 
 774     V9, V9_H, 
 775     V10, V10_H, 
 776     V11, V11_H, 
 777     V12, V12_H, 
 778     V13, V13_H, 
 779     V14, V14_H, 
 780     V15, V15_H, 
 781     V16, V16_H, 
 782     V17, V17_H, 
 783     V18, V18_H, 
 784     V19, V19_H, 
 785     V20, V20_H, 
 786     V21, V21_H, 
 787     V22, V22_H, 
 788     V23, V23_H, 
 789     V24, V24_H, 
 790     V25, V25_H, 
 791     V26, V26_H, 
 792     V27, V27_H, 
 793     V28, V28_H, 
 794     V29, V29_H, 
 795     V30, V30_H, 
 796     V31, V31_H
 797 );
 798 
 799 // Class for all 64bit vector registers
 800 reg_class vectord_reg(
 801     V0, V0_H,
 802     V1, V1_H,
 803     V2, V2_H,
 804     V3, V3_H,
 805     V4, V4_H,
 806     V5, V5_H,
 807     V6, V6_H,
 808     V7, V7_H,
 809     V8, V8_H,
 810     V9, V9_H,
 811     V10, V10_H,
 812     V11, V11_H,
 813     V12, V12_H,
 814     V13, V13_H,
 815     V14, V14_H,
 816     V15, V15_H,
 817     V16, V16_H,
 818     V17, V17_H,
 819     V18, V18_H,
 820     V19, V19_H,
 821     V20, V20_H,
 822     V21, V21_H,
 823     V22, V22_H,
 824     V23, V23_H,
 825     V24, V24_H,
 826     V25, V25_H,
 827     V26, V26_H,
 828     V27, V27_H,
 829     V28, V28_H,
 830     V29, V29_H,
 831     V30, V30_H,
 832     V31, V31_H
 833 );
 834 
 835 // Class for all 128bit vector registers
 836 reg_class vectorx_reg(
 837     V0, V0_H, V0_J, V0_K,
 838     V1, V1_H, V1_J, V1_K,
 839     V2, V2_H, V2_J, V2_K,
 840     V3, V3_H, V3_J, V3_K,
 841     V4, V4_H, V4_J, V4_K,
 842     V5, V5_H, V5_J, V5_K,
 843     V6, V6_H, V6_J, V6_K,
 844     V7, V7_H, V7_J, V7_K,
 845     V8, V8_H, V8_J, V8_K,
 846     V9, V9_H, V9_J, V9_K,
 847     V10, V10_H, V10_J, V10_K,
 848     V11, V11_H, V11_J, V11_K,
 849     V12, V12_H, V12_J, V12_K,
 850     V13, V13_H, V13_J, V13_K,
 851     V14, V14_H, V14_J, V14_K,
 852     V15, V15_H, V15_J, V15_K,
 853     V16, V16_H, V16_J, V16_K,
 854     V17, V17_H, V17_J, V17_K,
 855     V18, V18_H, V18_J, V18_K,
 856     V19, V19_H, V19_J, V19_K,
 857     V20, V20_H, V20_J, V20_K,
 858     V21, V21_H, V21_J, V21_K,
 859     V22, V22_H, V22_J, V22_K,
 860     V23, V23_H, V23_J, V23_K,
 861     V24, V24_H, V24_J, V24_K,
 862     V25, V25_H, V25_J, V25_K,
 863     V26, V26_H, V26_J, V26_K,
 864     V27, V27_H, V27_J, V27_K,
 865     V28, V28_H, V28_J, V28_K,
 866     V29, V29_H, V29_J, V29_K,
 867     V30, V30_H, V30_J, V30_K,
 868     V31, V31_H, V31_J, V31_K
 869 );
 870 
 871 // Class for 128 bit register v0
 872 reg_class v0_reg(
 873     V0, V0_H
 874 );
 875 
 876 // Class for 128 bit register v1
 877 reg_class v1_reg(
 878     V1, V1_H
 879 );
 880 
 881 // Class for 128 bit register v2
 882 reg_class v2_reg(
 883     V2, V2_H
 884 );
 885 
 886 // Class for 128 bit register v3
 887 reg_class v3_reg(
 888     V3, V3_H
 889 );
 890 
 891 // Singleton class for condition codes
 892 reg_class int_flags(RFLAGS);
 893 
 894 %}
 895 
 896 //----------DEFINITION BLOCK---------------------------------------------------
 897 // Define name --> value mappings to inform the ADLC of an integer valued name
 898 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 899 // Format:
 900 //        int_def  <name>         ( <int_value>, <expression>);
 901 // Generated Code in ad_<arch>.hpp
 902 //        #define  <name>   (<expression>)
 903 //        // value == <int_value>
 904 // Generated code in ad_<arch>.cpp adlc_verification()
 905 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 906 //
 907 
 908 // we follow the ppc-aix port in using a simple cost model which ranks
 909 // register operations as cheap, memory ops as more expensive and
 910 // branches as most expensive. the first two have a low as well as a
 911 // normal cost. huge cost appears to be a way of saying don't do
 912 // something
 913 
 914 definitions %{
 915   // The default cost (of a register move instruction).
 916   int_def INSN_COST            (    100,     100);
 917   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 918   int_def CALL_COST            (    200,     2 * INSN_COST);
 919   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 920 %}
 921 
 922 
 923 //----------SOURCE BLOCK-------------------------------------------------------
 924 // This is a block of C++ code which provides values, functions, and
 925 // definitions necessary in the rest of the architecture description
 926 
 927 source_hpp %{
 928 
 929 #if INCLUDE_ALL_GCS
 930 #include "shenandoahBarrierSetAssembler_aarch64.hpp"
 931 #endif
 932 
 933 class CallStubImpl {
 934  
 935   //--------------------------------------------------------------
 936   //---<  Used for optimization in Compile::shorten_branches  >---
 937   //--------------------------------------------------------------
 938 
 939  public:
 940   // Size of call trampoline stub.
 941   static uint size_call_trampoline() {
 942     return 0; // no call trampolines on this platform
 943   }
 944   
 945   // number of relocations needed by a call trampoline stub
 946   static uint reloc_call_trampoline() { 
 947     return 0; // no call trampolines on this platform
 948   }
 949 };
 950 
 951 class HandlerImpl {
 952 
 953  public:
 954 
 955   static int emit_exception_handler(CodeBuffer &cbuf);
 956   static int emit_deopt_handler(CodeBuffer& cbuf);
 957 
 958   static uint size_exception_handler() {
 959     return MacroAssembler::far_branch_size();
 960   }
 961 
 962   static uint size_deopt_handler() {
 963     // count one adr and one far branch instruction
 964     // return 4 * NativeInstruction::instruction_size;
 965     return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
 966   }
 967 };
 968 
 969   bool is_CAS(int opcode);
 970 
 971   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
 972 
 973   bool unnecessary_acquire(const Node *barrier);
 974   bool needs_acquiring_load(const Node *load);
 975 
 976   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
 977 
 978   bool unnecessary_release(const Node *barrier);
 979   bool unnecessary_volatile(const Node *barrier);
 980   bool needs_releasing_store(const Node *store);
 981 
 982   // predicate controlling translation of CompareAndSwapX
 983   bool needs_acquiring_load_exclusive(const Node *load);
 984 
 985   // predicate controlling translation of StoreCM
 986   bool unnecessary_storestore(const Node *storecm);
 987 %}
 988 
 989 source %{
 990 
 991   // Optimizaton of volatile gets and puts
 992   // -------------------------------------
 993   //
 994   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 995   // use to implement volatile reads and writes. For a volatile read
 996   // we simply need
 997   //
 998   //   ldar<x>
 999   //
1000   // and for a volatile write we need
1001   //
1002   //   stlr<x>
1003   // 
1004   // Alternatively, we can implement them by pairing a normal
1005   // load/store with a memory barrier. For a volatile read we need
1006   // 
1007   //   ldr<x>
1008   //   dmb ishld
1009   //
1010   // for a volatile write
1011   //
1012   //   dmb ish
1013   //   str<x>
1014   //   dmb ish
1015   //
1016   // We can also use ldaxr and stlxr to implement compare and swap CAS
1017   // sequences. These are normally translated to an instruction
1018   // sequence like the following
1019   //
1020   //   dmb      ish
1021   // retry:
1022   //   ldxr<x>   rval raddr
1023   //   cmp       rval rold
1024   //   b.ne done
1025   //   stlxr<x>  rval, rnew, rold
1026   //   cbnz      rval retry
1027   // done:
1028   //   cset      r0, eq
1029   //   dmb ishld
1030   //
1031   // Note that the exclusive store is already using an stlxr
1032   // instruction. That is required to ensure visibility to other
1033   // threads of the exclusive write (assuming it succeeds) before that
1034   // of any subsequent writes.
1035   //
1036   // The following instruction sequence is an improvement on the above
1037   //
1038   // retry:
1039   //   ldaxr<x>  rval raddr
1040   //   cmp       rval rold
1041   //   b.ne done
1042   //   stlxr<x>  rval, rnew, rold
1043   //   cbnz      rval retry
1044   // done:
1045   //   cset      r0, eq
1046   //
1047   // We don't need the leading dmb ish since the stlxr guarantees
1048   // visibility of prior writes in the case that the swap is
1049   // successful. Crucially we don't have to worry about the case where
1050   // the swap is not successful since no valid program should be
1051   // relying on visibility of prior changes by the attempting thread
1052   // in the case where the CAS fails.
1053   //
1054   // Similarly, we don't need the trailing dmb ishld if we substitute
1055   // an ldaxr instruction since that will provide all the guarantees we
1056   // require regarding observation of changes made by other threads
1057   // before any change to the CAS address observed by the load.
1058   //
1059   // In order to generate the desired instruction sequence we need to
1060   // be able to identify specific 'signature' ideal graph node
1061   // sequences which i) occur as a translation of a volatile reads or
1062   // writes or CAS operations and ii) do not occur through any other
1063   // translation or graph transformation. We can then provide
1064   // alternative aldc matching rules which translate these node
1065   // sequences to the desired machine code sequences. Selection of the
1066   // alternative rules can be implemented by predicates which identify
1067   // the relevant node sequences.
1068   //
1069   // The ideal graph generator translates a volatile read to the node
1070   // sequence
1071   //
1072   //   LoadX[mo_acquire]
1073   //   MemBarAcquire
1074   //
1075   // As a special case when using the compressed oops optimization we
1076   // may also see this variant
1077   //
1078   //   LoadN[mo_acquire]
1079   //   DecodeN
1080   //   MemBarAcquire
1081   //
1082   // A volatile write is translated to the node sequence
1083   //
1084   //   MemBarRelease
1085   //   StoreX[mo_release] {CardMark}-optional
1086   //   MemBarVolatile
1087   //
1088   // n.b. the above node patterns are generated with a strict
1089   // 'signature' configuration of input and output dependencies (see
1090   // the predicates below for exact details). The card mark may be as
1091   // simple as a few extra nodes or, in a few GC configurations, may
1092   // include more complex control flow between the leading and
1093   // trailing memory barriers. However, whatever the card mark
1094   // configuration these signatures are unique to translated volatile
1095   // reads/stores -- they will not appear as a result of any other
1096   // bytecode translation or inlining nor as a consequence of
1097   // optimizing transforms.
1098   //
1099   // We also want to catch inlined unsafe volatile gets and puts and
1100   // be able to implement them using either ldar<x>/stlr<x> or some
1101   // combination of ldr<x>/stlr<x> and dmb instructions.
1102   //
1103   // Inlined unsafe volatiles puts manifest as a minor variant of the
1104   // normal volatile put node sequence containing an extra cpuorder
1105   // membar
1106   //
1107   //   MemBarRelease
1108   //   MemBarCPUOrder
1109   //   StoreX[mo_release] {CardMark}-optional
1110   //   MemBarVolatile
1111   //
1112   // n.b. as an aside, the cpuorder membar is not itself subject to
1113   // matching and translation by adlc rules.  However, the rule
1114   // predicates need to detect its presence in order to correctly
1115   // select the desired adlc rules.
1116   //
1117   // Inlined unsafe volatile gets manifest as a somewhat different
1118   // node sequence to a normal volatile get
1119   //
1120   //   MemBarCPUOrder
1121   //        ||       \\
1122   //   MemBarAcquire LoadX[mo_acquire]
1123   //        ||
1124   //   MemBarCPUOrder
1125   //
1126   // In this case the acquire membar does not directly depend on the
1127   // load. However, we can be sure that the load is generated from an
1128   // inlined unsafe volatile get if we see it dependent on this unique
1129   // sequence of membar nodes. Similarly, given an acquire membar we
1130   // can know that it was added because of an inlined unsafe volatile
1131   // get if it is fed and feeds a cpuorder membar and if its feed
1132   // membar also feeds an acquiring load.
1133   //
1134   // Finally an inlined (Unsafe) CAS operation is translated to the
1135   // following ideal graph
1136   //
1137   //   MemBarRelease
1138   //   MemBarCPUOrder
1139   //   CompareAndSwapX {CardMark}-optional
1140   //   MemBarCPUOrder
1141   //   MemBarAcquire
1142   //
1143   // So, where we can identify these volatile read and write
1144   // signatures we can choose to plant either of the above two code
1145   // sequences. For a volatile read we can simply plant a normal
1146   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1147   // also choose to inhibit translation of the MemBarAcquire and
1148   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1149   //
1150   // When we recognise a volatile store signature we can choose to
1151   // plant at a dmb ish as a translation for the MemBarRelease, a
1152   // normal str<x> and then a dmb ish for the MemBarVolatile.
1153   // Alternatively, we can inhibit translation of the MemBarRelease
1154   // and MemBarVolatile and instead plant a simple stlr<x>
1155   // instruction.
1156   //
1157   // when we recognise a CAS signature we can choose to plant a dmb
1158   // ish as a translation for the MemBarRelease, the conventional
1159   // macro-instruction sequence for the CompareAndSwap node (which
1160   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1161   // Alternatively, we can elide generation of the dmb instructions
1162   // and plant the alternative CompareAndSwap macro-instruction
1163   // sequence (which uses ldaxr<x>).
1164   // 
1165   // Of course, the above only applies when we see these signature
1166   // configurations. We still want to plant dmb instructions in any
1167   // other cases where we may see a MemBarAcquire, MemBarRelease or
1168   // MemBarVolatile. For example, at the end of a constructor which
1169   // writes final/volatile fields we will see a MemBarRelease
1170   // instruction and this needs a 'dmb ish' lest we risk the
1171   // constructed object being visible without making the
1172   // final/volatile field writes visible.
1173   //
1174   // n.b. the translation rules below which rely on detection of the
1175   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1176   // If we see anything other than the signature configurations we
1177   // always just translate the loads and stores to ldr<x> and str<x>
1178   // and translate acquire, release and volatile membars to the
1179   // relevant dmb instructions.
1180   //
1181 
1182   // is_CAS(int opcode)
1183   //
1184   // return true if opcode is one of the possible CompareAndSwapX
1185   // values otherwise false.
1186 
1187   bool is_CAS(int opcode)
1188   {
1189     switch(opcode) {
1190     // We handle these
1191     case Op_CompareAndSwapI:
1192     case Op_CompareAndSwapL:
1193     case Op_CompareAndSwapP:
1194     case Op_CompareAndSwapN:
1195     case Op_GetAndSetI:
1196     case Op_GetAndSetL:
1197     case Op_GetAndSetP:
1198     case Op_GetAndSetN:
1199     case Op_GetAndAddI:
1200     case Op_GetAndAddL:
1201       return true;
1202     default:
1203       return false;
1204     }
1205   }
1206 
1207 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1208 
1209 bool unnecessary_acquire(const Node *barrier)
1210 {
1211   assert(barrier->is_MemBar(), "expecting a membar");
1212 
1213   if (UseBarriersForVolatile) {
1214     // we need to plant a dmb
1215     return false;
1216   }
1217 
1218   MemBarNode* mb = barrier->as_MemBar();
1219 
1220   if (mb->trailing_load()) {
1221     return true;
1222   }
1223 
1224   if (mb->trailing_load_store()) {
1225     Node* load_store = mb->in(MemBarNode::Precedent);
1226     assert(load_store->is_LoadStore(), "unexpected graph shape");
1227     return is_CAS(load_store->Opcode());
1228   }
1229 
1230   return false;
1231 }
1232 
1233 bool needs_acquiring_load(const Node *n)
1234 {
1235   assert(n->is_Load(), "expecting a load");
1236   if (UseBarriersForVolatile) {
1237     // we use a normal load and a dmb
1238     return false;
1239   }
1240 
1241   LoadNode *ld = n->as_Load();
1242 
1243   return ld->is_acquire();
1244 }
1245 
1246 bool unnecessary_release(const Node *n)
1247 {
1248   assert((n->is_MemBar() &&
1249           n->Opcode() == Op_MemBarRelease),
1250          "expecting a release membar");
1251 
1252   if (UseBarriersForVolatile) {
1253     // we need to plant a dmb
1254     return false;
1255   }
1256 
1257   MemBarNode *barrier = n->as_MemBar();
1258 
1259   if (!barrier->leading()) {
1260     return false;
1261   } else {
1262     Node* trailing = barrier->trailing_membar();
1263     MemBarNode* trailing_mb = trailing->as_MemBar();
1264     assert(trailing_mb->trailing(), "Not a trailing membar?");
1265     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1266 
1267     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1268     if (mem->is_Store()) {
1269       assert(mem->as_Store()->is_release(), "");
1270       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1271       return true;
1272     } else {
1273       assert(mem->is_LoadStore(), "");
1274       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1275       return is_CAS(mem->Opcode());
1276     }
1277   }
1278 
1279   return false;
1280 }
1281 
1282 bool unnecessary_volatile(const Node *n)
1283 {
1284   // assert n->is_MemBar();
1285   if (UseBarriersForVolatile) {
1286     // we need to plant a dmb
1287     return false;
1288   }
1289 
1290   MemBarNode *mbvol = n->as_MemBar();
1291 
1292   bool release = mbvol->trailing_store();
1293   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1294 #ifdef ASSERT
1295   if (release) {
1296     Node* leading = mbvol->leading_membar();
1297     assert(leading->Opcode() == Op_MemBarRelease, "");
1298     assert(leading->as_MemBar()->leading_store(), "");
1299     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1300    }
1301 #endif
1302 
1303   return release;
1304 }
1305 
1306 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1307 
1308 bool needs_releasing_store(const Node *n)
1309 {
1310   // assert n->is_Store();
1311   if (UseBarriersForVolatile) {
1312     // we use a normal store and dmb combination
1313     return false;
1314   }
1315 
1316   StoreNode *st = n->as_Store();
1317 
1318   return st->trailing_membar() != NULL;
1319 }
1320 
1321 // predicate controlling translation of CAS
1322 //
1323 // returns true if CAS needs to use an acquiring load otherwise false
1324 
1325 bool needs_acquiring_load_exclusive(const Node *n)
1326 {
1327   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1328   if (UseBarriersForVolatile) {
1329     return false;
1330   }
1331 
1332   LoadStoreNode* ldst = n->as_LoadStore();
1333   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1334 
1335   // so we can just return true here
1336   return true;
1337 }
1338 
1339 // predicate controlling translation of StoreCM
1340 //
1341 // returns true if a StoreStore must precede the card write otherwise
1342 // false
1343 
1344 bool unnecessary_storestore(const Node *storecm)
1345 {
1346   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1347 
1348   // we need to generate a dmb ishst between an object put and the
1349   // associated card mark when we are using CMS without conditional
1350   // card marking
1351 
1352   if (UseConcMarkSweepGC && !UseCondCardMark) {
1353     return false;
1354   }
1355 
1356   // a storestore is unnecesary in all other cases
1357 
1358   return true;
1359 }
1360 
1361 
1362 #define __ _masm.
1363 
1364 // advance declaratuons for helper functions to convert register
1365 // indices to register objects
1366 
1367 // the ad file has to provide implementations of certain methods
1368 // expected by the generic code
1369 //
1370 // REQUIRED FUNCTIONALITY
1371 
1372 //=============================================================================
1373 
1374 // !!!!! Special hack to get all types of calls to specify the byte offset
1375 //       from the start of the call to the point where the return address
1376 //       will point.
1377 
1378 int MachCallStaticJavaNode::ret_addr_offset()
1379 {
1380   // call should be a simple bl
1381   // unless this is a method handle invoke in which case it is
1382   // mov(rfp, sp), bl, mov(sp, rfp)
1383   int off = 4;
1384   if (_method_handle_invoke) {
1385     off += 4;
1386   }
1387   return off;
1388 }
1389 
1390 int MachCallDynamicJavaNode::ret_addr_offset()
1391 {
1392   return 16; // movz, movk, movk, bl
1393 }
1394 
1395 int MachCallRuntimeNode::ret_addr_offset() {
1396   // for generated stubs the call will be
1397   //   bl(addr)
1398   // for real runtime callouts it will be six instructions
1399   // see aarch64_enc_java_to_runtime
1400   //   adr(rscratch2, retaddr)
1401   //   lea(rscratch1, RuntimeAddress(addr)
1402   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1403   //   blr(rscratch1)
1404   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1405   if (cb) {
1406     return MacroAssembler::far_branch_size();
1407   } else {
1408     return 6 * NativeInstruction::instruction_size;
1409   }
1410 }
1411 
1412 // Indicate if the safepoint node needs the polling page as an input
1413 
1414 // the shared code plants the oop data at the start of the generated
1415 // code for the safepoint node and that needs ot be at the load
1416 // instruction itself. so we cannot plant a mov of the safepoint poll
1417 // address followed by a load. setting this to true means the mov is
1418 // scheduled as a prior instruction. that's better for scheduling
1419 // anyway.
1420 
1421 bool SafePointNode::needs_polling_address_input()
1422 {
1423   return true;
1424 }
1425 
1426 //=============================================================================
1427 
1428 #ifndef PRODUCT
1429 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1430   st->print("BREAKPOINT");
1431 }
1432 #endif
1433 
1434 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1435   MacroAssembler _masm(&cbuf);
1436   __ brk(0);
1437 }
1438 
1439 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1440   return MachNode::size(ra_);
1441 }
1442 
1443 //=============================================================================
1444 
1445 #ifndef PRODUCT
1446   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1447     st->print("nop \t# %d bytes pad for loops and calls", _count);
1448   }
1449 #endif
1450 
1451   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1452     MacroAssembler _masm(&cbuf);
1453     for (int i = 0; i < _count; i++) { 
1454       __ nop();
1455     }
1456   }
1457 
1458   uint MachNopNode::size(PhaseRegAlloc*) const {
1459     return _count * NativeInstruction::instruction_size;
1460   }
1461 
1462 //=============================================================================
1463 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1464 
1465 int Compile::ConstantTable::calculate_table_base_offset() const {
1466   return 0;  // absolute addressing, no offset
1467 }
1468 
1469 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1470 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1471   ShouldNotReachHere();
1472 }
1473 
1474 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1475   // Empty encoding
1476 }
1477 
1478 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1479   return 0;
1480 }
1481 
1482 #ifndef PRODUCT
1483 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1484   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1485 }
1486 #endif
1487 
1488 #ifndef PRODUCT
1489 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1490   Compile* C = ra_->C;
1491 
1492   int framesize = C->frame_slots() << LogBytesPerInt;
1493 
1494   if (C->need_stack_bang(framesize))
1495     st->print("# stack bang size=%d\n\t", framesize);
1496 
1497   if (framesize == 0) {
1498     // Is this even possible?
1499     st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize)); 
1500   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1501     st->print("sub  sp, sp, #%d\n\t", framesize);
1502     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1503   } else {
1504     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 
1505     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1506     st->print("sub  sp, sp, rscratch1");
1507   }
1508 }
1509 #endif
1510 
1511 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1512   Compile* C = ra_->C;
1513   MacroAssembler _masm(&cbuf);
1514 
1515   // n.b. frame size includes space for return pc and rfp
1516   long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
1517   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1518 
1519   // insert a nop at the start of the prolog so we can patch in a
1520   // branch if we need to invalidate the method later
1521   __ nop();
1522 
1523   if (C->need_stack_bang(framesize))
1524     __ generate_stack_overflow_check(framesize);
1525 
1526   __ build_frame(framesize);
1527 
1528   if (VerifyStackAtCalls) {
1529     Unimplemented();
1530   }
1531 
1532   C->set_frame_complete(cbuf.insts_size());
1533 
1534   if (C->has_mach_constant_base_node()) {
1535     // NOTE: We set the table base offset here because users might be
1536     // emitted before MachConstantBaseNode.
1537     Compile::ConstantTable& constant_table = C->constant_table();
1538     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1539   }
1540 }
1541 
1542 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1543 {
1544   return MachNode::size(ra_); // too many variables; just compute it
1545                               // the hard way
1546 }
1547 
1548 int MachPrologNode::reloc() const
1549 {
1550   return 0;
1551 }
1552 
1553 //=============================================================================
1554 
1555 #ifndef PRODUCT
1556 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1557   Compile* C = ra_->C;
1558   int framesize = C->frame_slots() << LogBytesPerInt;
1559 
1560   st->print("# pop frame %d\n\t",framesize);
1561 
1562   if (framesize == 0) {
1563     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1564   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1565     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1566     st->print("add  sp, sp, #%d\n\t", framesize);
1567   } else {
1568     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1569     st->print("add  sp, sp, rscratch1\n\t");
1570     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1571   }
1572 
1573   if (do_polling() && C->is_method_compilation()) {
1574     st->print("# touch polling page\n\t");
1575     st->print("mov  rscratch1, #" INTPTR_FORMAT "\n\t", p2i(os::get_polling_page()));
1576     st->print("ldr zr, [rscratch1]");
1577   }
1578 }
1579 #endif
1580 
1581 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1582   Compile* C = ra_->C;
1583   MacroAssembler _masm(&cbuf);
1584   int framesize = C->frame_slots() << LogBytesPerInt;
1585 
1586   __ remove_frame(framesize);
1587 
1588   if (do_polling() && C->is_method_compilation()) {
1589     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1590   }
1591 }
1592 
1593 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1594   // Variable size. Determine dynamically.
1595   return MachNode::size(ra_);
1596 }
1597 
1598 int MachEpilogNode::reloc() const {
1599   // Return number of relocatable values contained in this instruction.
1600   return 1; // 1 for polling page.
1601 }
1602 
1603 const Pipeline * MachEpilogNode::pipeline() const {
1604   return MachNode::pipeline_class();
1605 }
1606 
1607 // This method seems to be obsolete. It is declared in machnode.hpp
1608 // and defined in all *.ad files, but it is never called. Should we
1609 // get rid of it?
1610 int MachEpilogNode::safepoint_offset() const {
1611   assert(do_polling(), "no return for this epilog node");
1612   return 4;
1613 }
1614 
1615 //=============================================================================
1616 
1617 // Figure out which register class each belongs in: rc_int, rc_float or
1618 // rc_stack.
1619 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1620 
1621 static enum RC rc_class(OptoReg::Name reg) {
1622 
1623   if (reg == OptoReg::Bad) {
1624     return rc_bad;
1625   }
1626 
1627   // we have 30 int registers * 2 halves
1628   // (rscratch1 and rscratch2 are omitted)
1629 
1630   if (reg < 60) {
1631     return rc_int;
1632   }
1633 
1634   // we have 32 float register * 2 halves
1635   if (reg < 60 + 128) {
1636     return rc_float;
1637   }
1638 
1639   // Between float regs & stack is the flags regs.
1640   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1641 
1642   return rc_stack;
1643 }
1644 
1645 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1646   Compile* C = ra_->C;
1647 
1648   // Get registers to move.
1649   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1650   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1651   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1652   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1653 
1654   enum RC src_hi_rc = rc_class(src_hi);
1655   enum RC src_lo_rc = rc_class(src_lo);
1656   enum RC dst_hi_rc = rc_class(dst_hi);
1657   enum RC dst_lo_rc = rc_class(dst_lo);
1658 
1659   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1660 
1661   if (src_hi != OptoReg::Bad) {
1662     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1663            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1664            "expected aligned-adjacent pairs");
1665   }
1666 
1667   if (src_lo == dst_lo && src_hi == dst_hi) {
1668     return 0;            // Self copy, no move.
1669   }
1670 
1671   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1672               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1673   int src_offset = ra_->reg2offset(src_lo);
1674   int dst_offset = ra_->reg2offset(dst_lo);
1675 
1676   if (bottom_type()->isa_vect() != NULL) {
1677     uint ireg = ideal_reg();
1678     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1679     if (cbuf) {
1680       MacroAssembler _masm(cbuf);
1681       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1682       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1683         // stack->stack
1684         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1685         if (ireg == Op_VecD) {
1686           __ unspill(rscratch1, true, src_offset);
1687           __ spill(rscratch1, true, dst_offset);
1688         } else {
1689           __ spill_copy128(src_offset, dst_offset);
1690         }
1691       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1692         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1693                ireg == Op_VecD ? __ T8B : __ T16B,
1694                as_FloatRegister(Matcher::_regEncode[src_lo]));
1695       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1696         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1697                        ireg == Op_VecD ? __ D : __ Q,
1698                        ra_->reg2offset(dst_lo));
1699       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1700         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1701                        ireg == Op_VecD ? __ D : __ Q,
1702                        ra_->reg2offset(src_lo));
1703       } else {
1704         ShouldNotReachHere();
1705       }
1706     }
1707   } else if (cbuf) {
1708     MacroAssembler _masm(cbuf);
1709     switch (src_lo_rc) {
1710     case rc_int:
1711       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1712         if (is64) {
1713             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1714                    as_Register(Matcher::_regEncode[src_lo]));
1715         } else {
1716             MacroAssembler _masm(cbuf);
1717             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1718                     as_Register(Matcher::_regEncode[src_lo]));
1719         }
1720       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1721         if (is64) {
1722             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1723                      as_Register(Matcher::_regEncode[src_lo]));
1724         } else {
1725             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1726                      as_Register(Matcher::_regEncode[src_lo]));
1727         }
1728       } else {                    // gpr --> stack spill
1729         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1730         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1731       }
1732       break;
1733     case rc_float:
1734       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1735         if (is64) {
1736             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1737                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1738         } else {
1739             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1740                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1741         }
1742       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1743           if (cbuf) {
1744             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1745                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1746         } else {
1747             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1748                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1749         }
1750       } else {                    // fpr --> stack spill
1751         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1752         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1753                  is64 ? __ D : __ S, dst_offset);
1754       }
1755       break;
1756     case rc_stack:
1757       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1758         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1759       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1760         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1761                    is64 ? __ D : __ S, src_offset);
1762       } else {                    // stack --> stack copy
1763         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1764         __ unspill(rscratch1, is64, src_offset);
1765         __ spill(rscratch1, is64, dst_offset);
1766       }
1767       break;
1768     default:
1769       assert(false, "bad rc_class for spill");
1770       ShouldNotReachHere();
1771     }
1772   }
1773 
1774   if (st) {
1775     st->print("spill ");
1776     if (src_lo_rc == rc_stack) {
1777       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1778     } else {
1779       st->print("%s -> ", Matcher::regName[src_lo]);
1780     }
1781     if (dst_lo_rc == rc_stack) {
1782       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1783     } else {
1784       st->print("%s", Matcher::regName[dst_lo]);
1785     }
1786     if (bottom_type()->isa_vect() != NULL) {
1787       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1788     } else {
1789       st->print("\t# spill size = %d", is64 ? 64:32);
1790     }
1791   }
1792 
1793   return 0;
1794 
1795 }
1796 
1797 #ifndef PRODUCT
1798 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1799   if (!ra_)
1800     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1801   else
1802     implementation(NULL, ra_, false, st);
1803 }
1804 #endif
1805 
1806 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1807   implementation(&cbuf, ra_, false, NULL);
1808 }
1809 
1810 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1811   return MachNode::size(ra_);
1812 }
1813 
1814 //=============================================================================
1815 
1816 #ifndef PRODUCT
1817 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1818   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1819   int reg = ra_->get_reg_first(this);
1820   st->print("add %s, rsp, #%d]\t# box lock",
1821             Matcher::regName[reg], offset);
1822 }
1823 #endif
1824 
1825 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1826   MacroAssembler _masm(&cbuf);
1827 
1828   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1829   int reg    = ra_->get_encode(this);
1830 
1831   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1832     __ add(as_Register(reg), sp, offset);
1833   } else {
1834     ShouldNotReachHere();
1835   }
1836 }
1837 
1838 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1839   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1840   return 4;
1841 }
1842 
1843 //=============================================================================
1844 
1845 #ifndef PRODUCT
1846 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1847 {
1848   st->print_cr("# MachUEPNode");
1849   if (UseCompressedClassPointers) {
1850     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1851     if (Universe::narrow_klass_shift() != 0) {
1852       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1853     }
1854   } else {
1855    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1856   }
1857   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1858   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1859 }
1860 #endif
1861 
1862 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1863 {
1864   // This is the unverified entry point.
1865   MacroAssembler _masm(&cbuf);
1866 
1867   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1868   Label skip;
1869   // TODO
1870   // can we avoid this skip and still use a reloc?
1871   __ br(Assembler::EQ, skip);
1872   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1873   __ bind(skip);
1874 }
1875 
1876 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1877 {
1878   return MachNode::size(ra_);
1879 }
1880 
1881 // REQUIRED EMIT CODE
1882 
1883 //=============================================================================
1884 
1885 // Emit exception handler code.
1886 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1887 {
1888   // mov rscratch1 #exception_blob_entry_point
1889   // br rscratch1
1890   // Note that the code buffer's insts_mark is always relative to insts.
1891   // That's why we must use the macroassembler to generate a handler.
1892   MacroAssembler _masm(&cbuf);
1893   address base = __ start_a_stub(size_exception_handler());
1894   if (base == NULL) {
1895     ciEnv::current()->record_failure("CodeCache is full");
1896     return 0;  // CodeBuffer::expand failed
1897   }
1898   int offset = __ offset();
1899   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1900   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1901   __ end_a_stub();
1902   return offset;
1903 }
1904 
1905 // Emit deopt handler code.
1906 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1907 {
1908   // Note that the code buffer's insts_mark is always relative to insts.
1909   // That's why we must use the macroassembler to generate a handler.
1910   MacroAssembler _masm(&cbuf);
1911   address base = __ start_a_stub(size_deopt_handler());
1912   if (base == NULL) {
1913     ciEnv::current()->record_failure("CodeCache is full");
1914     return 0;  // CodeBuffer::expand failed
1915   }
1916   int offset = __ offset();
1917 
1918   __ adr(lr, __ pc());
1919   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1920 
1921   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1922   __ end_a_stub();
1923   return offset;
1924 }
1925 
1926 // REQUIRED MATCHER CODE
1927 
1928 //=============================================================================
1929 
1930 const bool Matcher::match_rule_supported(int opcode) {
1931 
1932   // TODO 
1933   // identify extra cases that we might want to provide match rules for
1934   // e.g. Op_StrEquals and other intrinsics
1935   if (!has_match_rule(opcode)) {
1936     return false;
1937   }
1938 
1939   return true;  // Per default match rules are supported.
1940 }
1941 
1942 int Matcher::regnum_to_fpu_offset(int regnum)
1943 {
1944   Unimplemented();
1945   return 0;
1946 }
1947 
1948 // Is this branch offset short enough that a short branch can be used?
1949 //
1950 // NOTE: If the platform does not provide any short branch variants, then
1951 //       this method should return false for offset 0.
1952 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1953   // The passed offset is relative to address of the branch.
1954 
1955   return (-32768 <= offset && offset < 32768);
1956 }
1957 
1958 const bool Matcher::isSimpleConstant64(jlong value) {
1959   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1960   // Probably always true, even if a temp register is required.
1961   return true;
1962 }
1963 
1964 // true just means we have fast l2f conversion
1965 const bool Matcher::convL2FSupported(void) {
1966   return true;
1967 }
1968 
1969 // Vector width in bytes.
1970 const int Matcher::vector_width_in_bytes(BasicType bt) {
1971   int size = MIN2(16,(int)MaxVectorSize);
1972   // Minimum 2 values in vector
1973   if (size < 2*type2aelembytes(bt)) size = 0;
1974   // But never < 4
1975   if (size < 4) size = 0;
1976   return size;
1977 }
1978 
1979 // Limits on vector size (number of elements) loaded into vector.
1980 const int Matcher::max_vector_size(const BasicType bt) {
1981   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1982 }
1983 const int Matcher::min_vector_size(const BasicType bt) {
1984 //  For the moment limit the vector size to 8 bytes
1985     int size = 8 / type2aelembytes(bt);
1986     if (size < 2) size = 2;
1987     return size;
1988 }
1989 
1990 // Vector ideal reg.
1991 const uint Matcher::vector_ideal_reg(int len) {
1992   switch(len) {
1993     case  8: return Op_VecD;
1994     case 16: return Op_VecX;
1995   }
1996   ShouldNotReachHere();
1997   return 0;
1998 }
1999 
2000 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2001   switch(size) {
2002     case  8: return Op_VecD;
2003     case 16: return Op_VecX;
2004   }
2005   ShouldNotReachHere();
2006   return 0;
2007 }
2008 
2009 // AES support not yet implemented
2010 const bool Matcher::pass_original_key_for_aes() {
2011   return false;
2012 }
2013 
2014 // x86 supports misaligned vectors store/load.
2015 const bool Matcher::misaligned_vectors_ok() {
2016   return !AlignVector; // can be changed by flag
2017 }
2018 
2019 // false => size gets scaled to BytesPerLong, ok.
2020 const bool Matcher::init_array_count_is_in_bytes = false;
2021 
2022 // Threshold size for cleararray.
2023 const int Matcher::init_array_short_size = 4 * BytesPerLong;
2024 
2025 // Use conditional move (CMOVL)
2026 const int Matcher::long_cmove_cost() {
2027   // long cmoves are no more expensive than int cmoves
2028   return 0;
2029 }
2030 
2031 const int Matcher::float_cmove_cost() {
2032   // float cmoves are no more expensive than int cmoves
2033   return 0;
2034 }
2035 
2036 // Does the CPU require late expand (see block.cpp for description of late expand)?
2037 const bool Matcher::require_postalloc_expand = false;
2038 
2039 // Should the Matcher clone shifts on addressing modes, expecting them
2040 // to be subsumed into complex addressing expressions or compute them
2041 // into registers?  True for Intel but false for most RISCs
2042 const bool Matcher::clone_shift_expressions = false;
2043 
2044 // Do we need to mask the count passed to shift instructions or does
2045 // the cpu only look at the lower 5/6 bits anyway?
2046 const bool Matcher::need_masked_shift_count = false;
2047 
2048 // This affects two different things:
2049 //  - how Decode nodes are matched
2050 //  - how ImplicitNullCheck opportunities are recognized
2051 // If true, the matcher will try to remove all Decodes and match them
2052 // (as operands) into nodes. NullChecks are not prepared to deal with 
2053 // Decodes by final_graph_reshaping().
2054 // If false, final_graph_reshaping() forces the decode behind the Cmp
2055 // for a NullCheck. The matcher matches the Decode node into a register.
2056 // Implicit_null_check optimization moves the Decode along with the 
2057 // memory operation back up before the NullCheck.
2058 bool Matcher::narrow_oop_use_complex_address() {
2059   return Universe::narrow_oop_shift() == 0;
2060 }
2061 
2062 bool Matcher::narrow_klass_use_complex_address() {
2063 // TODO
2064 // decide whether we need to set this to true
2065   return false;
2066 }
2067 
2068 // Is it better to copy float constants, or load them directly from
2069 // memory?  Intel can load a float constant from a direct address,
2070 // requiring no extra registers.  Most RISCs will have to materialize
2071 // an address into a register first, so they would do better to copy
2072 // the constant from stack.
2073 const bool Matcher::rematerialize_float_constants = false;
2074 
2075 // If CPU can load and store mis-aligned doubles directly then no
2076 // fixup is needed.  Else we split the double into 2 integer pieces
2077 // and move it piece-by-piece.  Only happens when passing doubles into
2078 // C code as the Java calling convention forces doubles to be aligned.
2079 const bool Matcher::misaligned_doubles_ok = true;
2080 
2081 // No-op on amd64
2082 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2083   Unimplemented();
2084 }
2085 
2086 // Advertise here if the CPU requires explicit rounding operations to
2087 // implement the UseStrictFP mode.
2088 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2089 
2090 // Are floats converted to double when stored to stack during
2091 // deoptimization?
2092 bool Matcher::float_in_double() { return true; }
2093 
2094 // Do ints take an entire long register or just half?
2095 // The relevant question is how the int is callee-saved:
2096 // the whole long is written but de-opt'ing will have to extract
2097 // the relevant 32 bits.
2098 const bool Matcher::int_in_long = true;
2099 
2100 // Return whether or not this register is ever used as an argument.
2101 // This function is used on startup to build the trampoline stubs in
2102 // generateOptoStub.  Registers not mentioned will be killed by the VM
2103 // call in the trampoline, and arguments in those registers not be
2104 // available to the callee.
2105 bool Matcher::can_be_java_arg(int reg)
2106 {
2107   return
2108     reg ==  R0_num || reg == R0_H_num ||
2109     reg ==  R1_num || reg == R1_H_num ||
2110     reg ==  R2_num || reg == R2_H_num ||
2111     reg ==  R3_num || reg == R3_H_num ||
2112     reg ==  R4_num || reg == R4_H_num ||
2113     reg ==  R5_num || reg == R5_H_num ||
2114     reg ==  R6_num || reg == R6_H_num ||
2115     reg ==  R7_num || reg == R7_H_num ||
2116     reg ==  V0_num || reg == V0_H_num ||
2117     reg ==  V1_num || reg == V1_H_num ||
2118     reg ==  V2_num || reg == V2_H_num ||
2119     reg ==  V3_num || reg == V3_H_num ||
2120     reg ==  V4_num || reg == V4_H_num ||
2121     reg ==  V5_num || reg == V5_H_num ||
2122     reg ==  V6_num || reg == V6_H_num ||
2123     reg ==  V7_num || reg == V7_H_num;
2124 }
2125 
2126 bool Matcher::is_spillable_arg(int reg)
2127 {
2128   return can_be_java_arg(reg);
2129 }
2130 
2131 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2132   return false;
2133 }
2134 
2135 RegMask Matcher::divI_proj_mask() {
2136   ShouldNotReachHere();
2137   return RegMask();
2138 }
2139 
2140 // Register for MODI projection of divmodI.
2141 RegMask Matcher::modI_proj_mask() {
2142   ShouldNotReachHere();
2143   return RegMask();
2144 }
2145 
2146 // Register for DIVL projection of divmodL.
2147 RegMask Matcher::divL_proj_mask() {
2148   ShouldNotReachHere();
2149   return RegMask();
2150 }
2151 
2152 // Register for MODL projection of divmodL.
2153 RegMask Matcher::modL_proj_mask() {
2154   ShouldNotReachHere();
2155   return RegMask();
2156 }
2157 
2158 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2159   return FP_REG_mask();
2160 }
2161 
2162 
2163 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2164   MacroAssembler _masm(&cbuf);                                              \
2165   {                                                                     \
2166     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2167     guarantee(DISP == 0, "mode not permitted for volatile");            \
2168     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2169     __ INSN(REG, as_Register(BASE));                                    \
2170   }
2171 
2172 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2173 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2174 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2175                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2176 
2177   // Used for all non-volatile memory accesses.  The use of
2178   // $mem->opcode() to discover whether this pattern uses sign-extended
2179   // offsets is something of a kludge.
2180   static void loadStore(MacroAssembler masm, mem_insn insn,
2181                          Register reg, int opcode,
2182                          Register base, int index, int size, int disp)
2183   {
2184     Address::extend scale;
2185 
2186     // Hooboy, this is fugly.  We need a way to communicate to the
2187     // encoder that the index needs to be sign extended, so we have to
2188     // enumerate all the cases.
2189     switch (opcode) {
2190     case INDINDEXSCALEDOFFSETI2L:
2191     case INDINDEXSCALEDI2L:
2192     case INDINDEXSCALEDOFFSETI2LN:
2193     case INDINDEXSCALEDI2LN:
2194     case INDINDEXOFFSETI2L:
2195     case INDINDEXOFFSETI2LN:
2196       scale = Address::sxtw(size);
2197       break;
2198     default:
2199       scale = Address::lsl(size);
2200     }
2201 
2202     if (index == -1) {
2203       (masm.*insn)(reg, Address(base, disp));
2204     } else {
2205       if (disp == 0) {
2206         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2207       } else {
2208         masm.lea(rscratch1, Address(base, disp));
2209         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2210       }
2211     }
2212   }
2213 
2214   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2215                          FloatRegister reg, int opcode,
2216                          Register base, int index, int size, int disp)
2217   {
2218     Address::extend scale;
2219 
2220     switch (opcode) {
2221     case INDINDEXSCALEDOFFSETI2L:
2222     case INDINDEXSCALEDI2L:
2223     case INDINDEXSCALEDOFFSETI2LN:
2224     case INDINDEXSCALEDI2LN:
2225       scale = Address::sxtw(size);
2226       break;
2227     default:
2228       scale = Address::lsl(size);
2229     }
2230 
2231      if (index == -1) {
2232       (masm.*insn)(reg, Address(base, disp));
2233     } else {
2234       if (disp == 0) {
2235         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2236       } else {
2237         masm.lea(rscratch1, Address(base, disp));
2238         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2239       }
2240     }
2241   }
2242 
2243   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2244                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2245                          int opcode, Register base, int index, int size, int disp)
2246   {
2247     if (index == -1) {
2248       (masm.*insn)(reg, T, Address(base, disp));
2249     } else {
2250       assert(disp == 0, "unsupported address mode");
2251       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2252     }
2253   }
2254 
2255 %}
2256 
2257 
2258 
2259 //----------ENCODING BLOCK-----------------------------------------------------
2260 // This block specifies the encoding classes used by the compiler to
2261 // output byte streams.  Encoding classes are parameterized macros
2262 // used by Machine Instruction Nodes in order to generate the bit
2263 // encoding of the instruction.  Operands specify their base encoding
2264 // interface with the interface keyword.  There are currently
2265 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2266 // COND_INTER.  REG_INTER causes an operand to generate a function
2267 // which returns its register number when queried.  CONST_INTER causes
2268 // an operand to generate a function which returns the value of the
2269 // constant when queried.  MEMORY_INTER causes an operand to generate
2270 // four functions which return the Base Register, the Index Register,
2271 // the Scale Value, and the Offset Value of the operand when queried.
2272 // COND_INTER causes an operand to generate six functions which return
2273 // the encoding code (ie - encoding bits for the instruction)
2274 // associated with each basic boolean condition for a conditional
2275 // instruction.
2276 //
2277 // Instructions specify two basic values for encoding.  Again, a
2278 // function is available to check if the constant displacement is an
2279 // oop. They use the ins_encode keyword to specify their encoding
2280 // classes (which must be a sequence of enc_class names, and their
2281 // parameters, specified in the encoding block), and they use the
2282 // opcode keyword to specify, in order, their primary, secondary, and
2283 // tertiary opcode.  Only the opcode sections which a particular
2284 // instruction needs for encoding need to be specified.
2285 encode %{
2286   // Build emit functions for each basic byte or larger field in the
2287   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2288   // from C++ code in the enc_class source block.  Emit functions will
2289   // live in the main source block for now.  In future, we can
2290   // generalize this by adding a syntax that specifies the sizes of
2291   // fields in an order, so that the adlc can build the emit functions
2292   // automagically
2293 
2294   // catch all for unimplemented encodings
2295   enc_class enc_unimplemented %{
2296     MacroAssembler _masm(&cbuf);
2297     __ unimplemented("C2 catch all");    
2298   %}
2299 
2300   // BEGIN Non-volatile memory access
2301 
2302   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2303     Register dst_reg = as_Register($dst$$reg);
2304     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2305                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2306   %}
2307 
2308   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2309     Register dst_reg = as_Register($dst$$reg);
2310     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2311                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2312   %}
2313 
2314   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2315     Register dst_reg = as_Register($dst$$reg);
2316     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2317                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2318   %}
2319 
2320   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2321     Register dst_reg = as_Register($dst$$reg);
2322     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2323                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2324   %}
2325 
2326   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2327     Register dst_reg = as_Register($dst$$reg);
2328     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2329                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2330   %}
2331 
2332   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2333     Register dst_reg = as_Register($dst$$reg);
2334     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2335                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2336   %}
2337 
2338   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2339     Register dst_reg = as_Register($dst$$reg);
2340     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2341                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2342   %}
2343 
2344   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2345     Register dst_reg = as_Register($dst$$reg);
2346     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2347                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2348   %}
2349 
2350   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2351     Register dst_reg = as_Register($dst$$reg);
2352     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2353                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2354   %}
2355 
2356   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2357     Register dst_reg = as_Register($dst$$reg);
2358     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2359                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2360   %}
2361 
2362   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2363     Register dst_reg = as_Register($dst$$reg);
2364     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2365                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2366   %}
2367 
2368   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2369     Register dst_reg = as_Register($dst$$reg);
2370     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2371                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2372   %}
2373 
2374   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2375     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2376     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2377                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2378   %}
2379 
2380   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2381     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2382     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2383                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2384   %}
2385 
2386   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2387     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2388     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2389        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2390   %}
2391 
2392   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2393     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2394     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2395        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2396   %}
2397 
2398   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2399     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2400     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2401        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2402   %}
2403 
2404   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2405     Register src_reg = as_Register($src$$reg);
2406     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2407                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2408   %}
2409 
2410   enc_class aarch64_enc_strb0(memory mem) %{
2411     MacroAssembler _masm(&cbuf);
2412     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2413                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2414   %}
2415 
2416   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2417     MacroAssembler _masm(&cbuf);
2418     __ membar(Assembler::StoreStore);
2419     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2420                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2421   %}
2422 
2423   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2424     Register src_reg = as_Register($src$$reg);
2425     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2426                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2427   %}
2428 
2429   enc_class aarch64_enc_strh0(memory mem) %{
2430     MacroAssembler _masm(&cbuf);
2431     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2432                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2433   %}
2434 
2435   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2436     Register src_reg = as_Register($src$$reg);
2437     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2438                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2439   %}
2440 
2441   enc_class aarch64_enc_strw0(memory mem) %{
2442     MacroAssembler _masm(&cbuf);
2443     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2444                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2445   %}
2446 
2447   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2448     Register src_reg = as_Register($src$$reg);
2449     // we sometimes get asked to store the stack pointer into the
2450     // current thread -- we cannot do that directly on AArch64
2451     if (src_reg == r31_sp) {
2452       MacroAssembler _masm(&cbuf);
2453       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2454       __ mov(rscratch2, sp);
2455       src_reg = rscratch2;
2456     }
2457     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2458                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2459   %}
2460 
2461   enc_class aarch64_enc_str0(memory mem) %{
2462     MacroAssembler _masm(&cbuf);
2463     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2464                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2465   %}
2466 
2467   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2468     FloatRegister src_reg = as_FloatRegister($src$$reg);
2469     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2470                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2471   %}
2472 
2473   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2474     FloatRegister src_reg = as_FloatRegister($src$$reg);
2475     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2476                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2477   %}
2478 
2479   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2480     FloatRegister src_reg = as_FloatRegister($src$$reg);
2481     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2482        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2483   %}
2484 
2485   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2486     FloatRegister src_reg = as_FloatRegister($src$$reg);
2487     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2488        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2489   %}
2490 
2491   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2492     FloatRegister src_reg = as_FloatRegister($src$$reg);
2493     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2494        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2495   %}
2496 
2497   // END Non-volatile memory access
2498 
2499   // this encoding writes the address of the first instruction in the
2500   // call sequence for the runtime call into the anchor pc slot. this
2501   // address allows the runtime to i) locate the code buffer for the
2502   // caller (any address in the buffer would do) and ii) find the oop
2503   // map associated with the call (has to address the instruction
2504   // following the call). note that we have to store the address which
2505   // follows the actual call.
2506   // 
2507   // the offset from the current pc can be computed by considering
2508   // what gets generated between this point up to and including the
2509   // call. it looks like this
2510   //
2511   //   movz xscratch1 0xnnnn        <-- current pc is here
2512   //   movk xscratch1 0xnnnn
2513   //   movk xscratch1 0xnnnn
2514   //   str xscratch1, [xthread,#anchor_pc_off]
2515   //   mov xscratch2, sp
2516   //   str xscratch2, [xthread,#anchor_sp_off
2517   //   mov x0, x1
2518   //   . . .
2519   //   mov xn-1, xn
2520   //   mov xn, thread            <-- always passed
2521   //   mov xn+1, rfp             <-- optional iff primary == 1
2522   //   movz xscratch1 0xnnnn
2523   //   movk xscratch1 0xnnnn
2524   //   movk xscratch1 0xnnnn
2525   //   blr  xscratch1
2526   //   . . .
2527   //
2528   // where the called routine has n args (including the thread and,
2529   // possibly the stub's caller return address currently in rfp).  we
2530   // can compute n by looking at the number of args passed into the
2531   // stub. we assert that nargs is < 7.
2532   //
2533   // so the offset we need to add to the pc (in 32-bit words) is
2534   //   3 +        <-- load 48-bit constant return pc
2535   //   1 +        <-- write anchor pc
2536   //   1 +        <-- copy sp
2537   //   1 +        <-- write anchor sp
2538   //   nargs +    <-- java stub arg count
2539   //   1 +        <-- extra thread arg
2540   // [ 1 + ]      <-- optional ret address of stub caller
2541   //   3 +        <-- load 64 bit call target address
2542   //   1          <-- blr instruction
2543   //
2544   // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes
2545   //
2546 
2547   enc_class aarch64_enc_save_pc() %{
2548     Compile* C = ra_->C;
2549     int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms;
2550     if ($primary) { nargs++; }
2551     assert(nargs <= 8, "opto runtime stub has more than 8 args!");
2552     MacroAssembler _masm(&cbuf);
2553     address pc = __ pc();
2554     int call_offset = (nargs + 11) * 4;
2555     int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
2556                        in_bytes(JavaFrameAnchor::last_Java_pc_offset());
2557     __ lea(rscratch1, InternalAddress(pc + call_offset));
2558     __ str(rscratch1, Address(rthread, field_offset));
2559   %}
2560 
2561   // volatile loads and stores
2562 
2563   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2564     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2565                  rscratch1, stlrb);
2566     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2567       __ dmb(__ ISH);
2568   %}
2569 
2570   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2571     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2572                  rscratch1, stlrh);
2573     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2574       __ dmb(__ ISH);
2575   %}
2576 
2577   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2578     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2579                  rscratch1, stlrw);
2580     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2581       __ dmb(__ ISH);
2582   %}
2583 
2584 
2585   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2586     Register dst_reg = as_Register($dst$$reg);
2587     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2588              rscratch1, ldarb);
2589     __ sxtbw(dst_reg, dst_reg);
2590   %}
2591 
2592   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2593     Register dst_reg = as_Register($dst$$reg);
2594     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2595              rscratch1, ldarb);
2596     __ sxtb(dst_reg, dst_reg);
2597   %}
2598 
2599   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2600     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2601              rscratch1, ldarb);
2602   %}
2603 
2604   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2605     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2606              rscratch1, ldarb);
2607   %}
2608 
2609   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2610     Register dst_reg = as_Register($dst$$reg);
2611     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2612              rscratch1, ldarh);
2613     __ sxthw(dst_reg, dst_reg);
2614   %}
2615 
2616   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2617     Register dst_reg = as_Register($dst$$reg);
2618     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2619              rscratch1, ldarh);
2620     __ sxth(dst_reg, dst_reg);
2621   %}
2622 
2623   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2624     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2625              rscratch1, ldarh);
2626   %}
2627 
2628   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2629     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2630              rscratch1, ldarh);
2631   %}
2632 
2633   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2634     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2635              rscratch1, ldarw);
2636   %}
2637 
2638   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2639     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2640              rscratch1, ldarw);
2641   %}
2642 
2643   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2644     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2645              rscratch1, ldar);
2646   %}
2647 
2648   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2649     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2650              rscratch1, ldarw);
2651     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2652   %}
2653 
2654   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2655     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2656              rscratch1, ldar);
2657     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2658   %}
2659 
2660   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2661     Register src_reg = as_Register($src$$reg);
2662     // we sometimes get asked to store the stack pointer into the
2663     // current thread -- we cannot do that directly on AArch64
2664     if (src_reg == r31_sp) {
2665         MacroAssembler _masm(&cbuf);
2666       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2667       __ mov(rscratch2, sp);
2668       src_reg = rscratch2;
2669     }
2670     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2671                  rscratch1, stlr);
2672     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2673       __ dmb(__ ISH);
2674   %}
2675 
2676   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2677     {
2678       MacroAssembler _masm(&cbuf);
2679       FloatRegister src_reg = as_FloatRegister($src$$reg);
2680       __ fmovs(rscratch2, src_reg);
2681     }
2682     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2683                  rscratch1, stlrw);
2684     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2685       __ dmb(__ ISH);
2686   %}
2687 
2688   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2689     {
2690       MacroAssembler _masm(&cbuf);
2691       FloatRegister src_reg = as_FloatRegister($src$$reg);
2692       __ fmovd(rscratch2, src_reg);
2693     }
2694     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2695                  rscratch1, stlr);
2696     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2697       __ dmb(__ ISH);
2698   %}
2699 
2700   // synchronized read/update encodings
2701 
2702   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2703     MacroAssembler _masm(&cbuf);
2704     Register dst_reg = as_Register($dst$$reg);
2705     Register base = as_Register($mem$$base);
2706     int index = $mem$$index;
2707     int scale = $mem$$scale;
2708     int disp = $mem$$disp;
2709     if (index == -1) {
2710        if (disp != 0) {      
2711         __ lea(rscratch1, Address(base, disp));
2712         __ ldaxr(dst_reg, rscratch1);
2713       } else {
2714         // TODO
2715         // should we ever get anything other than this case?
2716         __ ldaxr(dst_reg, base);
2717       }
2718     } else {
2719       Register index_reg = as_Register(index);
2720       if (disp == 0) {
2721         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2722         __ ldaxr(dst_reg, rscratch1);
2723       } else {
2724         __ lea(rscratch1, Address(base, disp));
2725         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2726         __ ldaxr(dst_reg, rscratch1);
2727       }
2728     }
2729   %}
2730 
2731   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2732     MacroAssembler _masm(&cbuf);
2733     Register src_reg = as_Register($src$$reg);
2734     Register base = as_Register($mem$$base);
2735     int index = $mem$$index;
2736     int scale = $mem$$scale;
2737     int disp = $mem$$disp;
2738     if (index == -1) {
2739        if (disp != 0) {      
2740         __ lea(rscratch2, Address(base, disp));
2741         __ stlxr(rscratch1, src_reg, rscratch2);
2742       } else {
2743         // TODO
2744         // should we ever get anything other than this case?
2745         __ stlxr(rscratch1, src_reg, base);
2746       }
2747     } else {
2748       Register index_reg = as_Register(index);
2749       if (disp == 0) {
2750         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2751         __ stlxr(rscratch1, src_reg, rscratch2);
2752       } else {
2753         __ lea(rscratch2, Address(base, disp));
2754         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2755         __ stlxr(rscratch1, src_reg, rscratch2);
2756       }
2757     }
2758     __ cmpw(rscratch1, zr);
2759   %}
2760 
2761   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2762     MacroAssembler _masm(&cbuf);
2763     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2764     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2765                Assembler::xword, /*acquire*/ false, /*release*/ true);
2766   %}
2767 
2768   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2769     MacroAssembler _masm(&cbuf);
2770     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2771     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2772                Assembler::word, /*acquire*/ false, /*release*/ true);
2773   %}
2774 
2775 
2776   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
2777     MacroAssembler _masm(&cbuf);
2778     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2779     Register tmp = $tmp$$Register;
2780     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
2781     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
2782                               /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
2783   %}
2784 
2785   // The only difference between aarch64_enc_cmpxchg and
2786   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2787   // CompareAndSwap sequence to serve as a barrier on acquiring a
2788   // lock.
2789   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2790     MacroAssembler _masm(&cbuf);
2791     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2792     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2793                Assembler::xword, /*acquire*/ true, /*release*/ true);
2794   %}
2795 
2796   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2797     MacroAssembler _masm(&cbuf);
2798     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2799     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2800                Assembler::word, /*acquire*/ true, /*release*/ true);
2801   %}
2802 
2803   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
2804     MacroAssembler _masm(&cbuf);
2805     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2806     Register tmp = $tmp$$Register;
2807     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
2808     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
2809                               /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false,
2810                               $res$$Register);
2811   %}
2812 
2813   // auxiliary used for CompareAndSwapX to set result register
2814   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2815     MacroAssembler _masm(&cbuf);
2816     Register res_reg = as_Register($res$$reg);
2817     __ cset(res_reg, Assembler::EQ);
2818   %}
2819 
2820   // prefetch encodings
2821 
2822   enc_class aarch64_enc_prefetchr(memory mem) %{
2823     MacroAssembler _masm(&cbuf);
2824     Register base = as_Register($mem$$base);
2825     int index = $mem$$index;
2826     int scale = $mem$$scale;
2827     int disp = $mem$$disp;
2828     if (index == -1) {
2829       __ prfm(Address(base, disp), PLDL1KEEP);
2830     } else {
2831       Register index_reg = as_Register(index);
2832       if (disp == 0) {
2833         __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
2834       } else {
2835         __ lea(rscratch1, Address(base, disp));
2836         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
2837       }
2838     }
2839   %}
2840 
2841   enc_class aarch64_enc_prefetchw(memory mem) %{
2842     MacroAssembler _masm(&cbuf);
2843     Register base = as_Register($mem$$base);
2844     int index = $mem$$index;
2845     int scale = $mem$$scale;
2846     int disp = $mem$$disp;
2847     if (index == -1) {
2848       __ prfm(Address(base, disp), PSTL1KEEP);
2849     } else {
2850       Register index_reg = as_Register(index);
2851       if (disp == 0) {
2852         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2853       } else {
2854         __ lea(rscratch1, Address(base, disp));
2855         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2856       }
2857     }
2858   %}
2859 
2860   enc_class aarch64_enc_prefetchnta(memory mem) %{
2861     MacroAssembler _masm(&cbuf);
2862     Register base = as_Register($mem$$base);
2863     int index = $mem$$index;
2864     int scale = $mem$$scale;
2865     int disp = $mem$$disp;
2866     if (index == -1) {
2867       __ prfm(Address(base, disp), PSTL1STRM);
2868     } else {
2869       Register index_reg = as_Register(index);
2870       if (disp == 0) {
2871         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
2872         __ nop();
2873       } else {
2874         __ lea(rscratch1, Address(base, disp));
2875         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
2876       }
2877     }
2878   %}
2879 
2880   /// mov envcodings
2881 
2882   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2883     MacroAssembler _masm(&cbuf);
2884     u_int32_t con = (u_int32_t)$src$$constant;
2885     Register dst_reg = as_Register($dst$$reg);
2886     if (con == 0) {
2887       __ movw(dst_reg, zr);
2888     } else {
2889       __ movw(dst_reg, con);
2890     }
2891   %}
2892 
2893   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2894     MacroAssembler _masm(&cbuf);
2895     Register dst_reg = as_Register($dst$$reg);
2896     u_int64_t con = (u_int64_t)$src$$constant;
2897     if (con == 0) {
2898       __ mov(dst_reg, zr);
2899     } else {
2900       __ mov(dst_reg, con);
2901     }
2902   %}
2903 
2904   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2905     MacroAssembler _masm(&cbuf);
2906     Register dst_reg = as_Register($dst$$reg);
2907     address con = (address)$src$$constant;
2908     if (con == NULL || con == (address)1) {
2909       ShouldNotReachHere();
2910     } else {
2911       relocInfo::relocType rtype = $src->constant_reloc();
2912       if (rtype == relocInfo::oop_type) {
2913         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2914       } else if (rtype == relocInfo::metadata_type) {
2915         __ mov_metadata(dst_reg, (Metadata*)con);
2916       } else {
2917         assert(rtype == relocInfo::none, "unexpected reloc type");
2918         if (con < (address)(uintptr_t)os::vm_page_size()) {
2919           __ mov(dst_reg, con);
2920         } else {
2921           unsigned long offset;
2922           __ adrp(dst_reg, con, offset);
2923           __ add(dst_reg, dst_reg, offset);
2924         }
2925       }
2926     }
2927   %}
2928 
2929   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2930     MacroAssembler _masm(&cbuf);
2931     Register dst_reg = as_Register($dst$$reg);
2932     __ mov(dst_reg, zr);
2933   %}
2934 
2935   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2936     MacroAssembler _masm(&cbuf);
2937     Register dst_reg = as_Register($dst$$reg);
2938     __ mov(dst_reg, (u_int64_t)1);
2939   %}
2940 
2941   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2942     MacroAssembler _masm(&cbuf);
2943     address page = (address)$src$$constant;
2944     Register dst_reg = as_Register($dst$$reg);
2945     unsigned long off;
2946     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
2947     assert(off == 0, "assumed offset == 0");
2948   %}
2949 
2950   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
2951     MacroAssembler _masm(&cbuf);
2952     __ load_byte_map_base($dst$$Register);
2953   %}
2954 
2955   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
2956     MacroAssembler _masm(&cbuf);
2957     Register dst_reg = as_Register($dst$$reg);
2958     address con = (address)$src$$constant;
2959     if (con == NULL) {
2960       ShouldNotReachHere();
2961     } else {
2962       relocInfo::relocType rtype = $src->constant_reloc();
2963       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
2964       __ set_narrow_oop(dst_reg, (jobject)con);
2965     }
2966   %}
2967 
2968   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
2969     MacroAssembler _masm(&cbuf);
2970     Register dst_reg = as_Register($dst$$reg);
2971     __ mov(dst_reg, zr);
2972   %}
2973 
2974   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
2975     MacroAssembler _masm(&cbuf);
2976     Register dst_reg = as_Register($dst$$reg);
2977     address con = (address)$src$$constant;
2978     if (con == NULL) {
2979       ShouldNotReachHere();
2980     } else {
2981       relocInfo::relocType rtype = $src->constant_reloc();
2982       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
2983       __ set_narrow_klass(dst_reg, (Klass *)con);
2984     }
2985   %}
2986 
2987   // arithmetic encodings
2988 
2989   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
2990     MacroAssembler _masm(&cbuf);
2991     Register dst_reg = as_Register($dst$$reg);
2992     Register src_reg = as_Register($src1$$reg);
2993     int32_t con = (int32_t)$src2$$constant;
2994     // add has primary == 0, subtract has primary == 1
2995     if ($primary) { con = -con; }
2996     if (con < 0) {
2997       __ subw(dst_reg, src_reg, -con);
2998     } else {
2999       __ addw(dst_reg, src_reg, con);
3000     }
3001   %}
3002 
3003   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3004     MacroAssembler _masm(&cbuf);
3005     Register dst_reg = as_Register($dst$$reg);
3006     Register src_reg = as_Register($src1$$reg);
3007     int32_t con = (int32_t)$src2$$constant;
3008     // add has primary == 0, subtract has primary == 1
3009     if ($primary) { con = -con; }
3010     if (con < 0) {
3011       __ sub(dst_reg, src_reg, -con);
3012     } else {
3013       __ add(dst_reg, src_reg, con);
3014     }
3015   %}
3016 
3017   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3018     MacroAssembler _masm(&cbuf);
3019    Register dst_reg = as_Register($dst$$reg);
3020    Register src1_reg = as_Register($src1$$reg);
3021    Register src2_reg = as_Register($src2$$reg);
3022     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3023   %}
3024 
3025   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3026     MacroAssembler _masm(&cbuf);
3027    Register dst_reg = as_Register($dst$$reg);
3028    Register src1_reg = as_Register($src1$$reg);
3029    Register src2_reg = as_Register($src2$$reg);
3030     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3031   %}
3032 
3033   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3034     MacroAssembler _masm(&cbuf);
3035    Register dst_reg = as_Register($dst$$reg);
3036    Register src1_reg = as_Register($src1$$reg);
3037    Register src2_reg = as_Register($src2$$reg);
3038     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3039   %}
3040 
3041   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3042     MacroAssembler _masm(&cbuf);
3043    Register dst_reg = as_Register($dst$$reg);
3044    Register src1_reg = as_Register($src1$$reg);
3045    Register src2_reg = as_Register($src2$$reg);
3046     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3047   %}
3048 
3049   // compare instruction encodings
3050 
3051   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3052     MacroAssembler _masm(&cbuf);
3053     Register reg1 = as_Register($src1$$reg);
3054     Register reg2 = as_Register($src2$$reg);
3055     __ cmpw(reg1, reg2);
3056   %}
3057 
3058   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3059     MacroAssembler _masm(&cbuf);
3060     Register reg = as_Register($src1$$reg);
3061     int32_t val = $src2$$constant;
3062     if (val >= 0) {
3063       __ subsw(zr, reg, val);
3064     } else {
3065       __ addsw(zr, reg, -val);
3066     }
3067   %}
3068 
3069   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3070     MacroAssembler _masm(&cbuf);
3071     Register reg1 = as_Register($src1$$reg);
3072     u_int32_t val = (u_int32_t)$src2$$constant;
3073     __ movw(rscratch1, val);
3074     __ cmpw(reg1, rscratch1);
3075   %}
3076 
3077   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3078     MacroAssembler _masm(&cbuf);
3079     Register reg1 = as_Register($src1$$reg);
3080     Register reg2 = as_Register($src2$$reg);
3081     __ cmp(reg1, reg2);
3082   %}
3083 
3084   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3085     MacroAssembler _masm(&cbuf);
3086     Register reg = as_Register($src1$$reg);
3087     int64_t val = $src2$$constant;
3088     if (val >= 0) {
3089       __ subs(zr, reg, val);
3090     } else if (val != -val) {
3091       __ adds(zr, reg, -val);
3092     } else {
3093     // aargh, Long.MIN_VALUE is a special case
3094       __ orr(rscratch1, zr, (u_int64_t)val);
3095       __ subs(zr, reg, rscratch1);
3096     }
3097   %}
3098 
3099   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3100     MacroAssembler _masm(&cbuf);
3101     Register reg1 = as_Register($src1$$reg);
3102     u_int64_t val = (u_int64_t)$src2$$constant;
3103     __ mov(rscratch1, val);
3104     __ cmp(reg1, rscratch1);
3105   %}
3106 
3107   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3108     MacroAssembler _masm(&cbuf);
3109     Register reg1 = as_Register($src1$$reg);
3110     Register reg2 = as_Register($src2$$reg);
3111     __ cmp(reg1, reg2);
3112   %}
3113 
3114   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3115     MacroAssembler _masm(&cbuf);
3116     Register reg1 = as_Register($src1$$reg);
3117     Register reg2 = as_Register($src2$$reg);
3118     __ cmpw(reg1, reg2);
3119   %}
3120 
3121   enc_class aarch64_enc_testp(iRegP src) %{
3122     MacroAssembler _masm(&cbuf);
3123     Register reg = as_Register($src$$reg);
3124     __ cmp(reg, zr);
3125   %}
3126 
3127   enc_class aarch64_enc_testn(iRegN src) %{
3128     MacroAssembler _masm(&cbuf);
3129     Register reg = as_Register($src$$reg);
3130     __ cmpw(reg, zr);
3131   %}
3132 
3133   enc_class aarch64_enc_b(label lbl) %{
3134     MacroAssembler _masm(&cbuf);
3135     Label *L = $lbl$$label;
3136     __ b(*L);
3137   %}
3138 
3139   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3140     MacroAssembler _masm(&cbuf);
3141     Label *L = $lbl$$label;
3142     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3143   %}
3144 
3145   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3146     MacroAssembler _masm(&cbuf);
3147     Label *L = $lbl$$label;
3148     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3149   %}
3150 
3151   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3152   %{
3153      Register sub_reg = as_Register($sub$$reg);
3154      Register super_reg = as_Register($super$$reg);
3155      Register temp_reg = as_Register($temp$$reg);
3156      Register result_reg = as_Register($result$$reg);
3157 
3158      Label miss;
3159      MacroAssembler _masm(&cbuf);
3160      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3161                                      NULL, &miss,
3162                                      /*set_cond_codes:*/ true);
3163      if ($primary) {
3164        __ mov(result_reg, zr);
3165      }
3166      __ bind(miss);
3167   %}
3168 
3169   enc_class aarch64_enc_java_static_call(method meth) %{
3170     MacroAssembler _masm(&cbuf);
3171 
3172     address mark = __ pc();
3173     address addr = (address)$meth$$method;
3174     address call;
3175     if (!_method) {
3176       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3177       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3178     } else if (_optimized_virtual) {
3179       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3180     } else {
3181       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3182     }
3183     if (call == NULL) {
3184       ciEnv::current()->record_failure("CodeCache is full"); 
3185       return;
3186     }
3187 
3188     if (_method) {
3189       // Emit stub for static call
3190       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3191       if (stub == NULL) {
3192         ciEnv::current()->record_failure("CodeCache is full"); 
3193         return;
3194       }
3195     }
3196   %}
3197 
3198   enc_class aarch64_enc_java_handle_call(method meth) %{
3199     MacroAssembler _masm(&cbuf);
3200     relocInfo::relocType reloc;
3201 
3202     // RFP is preserved across all calls, even compiled calls.
3203     // Use it to preserve SP.
3204     __ mov(rfp, sp);
3205 
3206     address mark = __ pc();
3207     address addr = (address)$meth$$method;
3208     address call;
3209     if (!_method) {
3210       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3211       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3212     } else if (_optimized_virtual) {
3213       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3214     } else {
3215       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3216     }
3217     if (call == NULL) {
3218       ciEnv::current()->record_failure("CodeCache is full"); 
3219       return;
3220     }
3221 
3222     if (_method) {
3223       // Emit stub for static call
3224       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3225       if (stub == NULL) {
3226         ciEnv::current()->record_failure("CodeCache is full"); 
3227         return;
3228       }
3229     }
3230 
3231     // now restore sp
3232     __ mov(sp, rfp);
3233   %}
3234 
3235   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3236     MacroAssembler _masm(&cbuf);
3237     address call = __ ic_call((address)$meth$$method);
3238     if (call == NULL) {
3239       ciEnv::current()->record_failure("CodeCache is full"); 
3240       return;
3241     }
3242   %}
3243 
3244   enc_class aarch64_enc_call_epilog() %{
3245     MacroAssembler _masm(&cbuf);
3246     if (VerifyStackAtCalls) {
3247       // Check that stack depth is unchanged: find majik cookie on stack
3248       __ call_Unimplemented();
3249     }
3250   %}
3251 
3252   enc_class aarch64_enc_java_to_runtime(method meth) %{
3253     MacroAssembler _masm(&cbuf);
3254 
3255     // some calls to generated routines (arraycopy code) are scheduled
3256     // by C2 as runtime calls. if so we can call them using a br (they
3257     // will be in a reachable segment) otherwise we have to use a blr
3258     // which loads the absolute address into a register.
3259     address entry = (address)$meth$$method;
3260     CodeBlob *cb = CodeCache::find_blob(entry);
3261     if (cb) {
3262       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3263       if (call == NULL) {
3264         ciEnv::current()->record_failure("CodeCache is full"); 
3265         return;
3266       }
3267     } else {
3268       Label retaddr;
3269       __ adr(rscratch2, retaddr);
3270       __ lea(rscratch1, RuntimeAddress(entry));
3271       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3272       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3273       __ blr(rscratch1);
3274       __ bind(retaddr);
3275       __ add(sp, sp, 2 * wordSize);
3276     }
3277   %}
3278 
3279   enc_class aarch64_enc_rethrow() %{
3280     MacroAssembler _masm(&cbuf);
3281     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3282   %}
3283 
3284   enc_class aarch64_enc_ret() %{
3285     MacroAssembler _masm(&cbuf);
3286     __ ret(lr);
3287   %}
3288 
3289   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3290     MacroAssembler _masm(&cbuf);
3291     Register target_reg = as_Register($jump_target$$reg);
3292     __ br(target_reg);
3293   %}
3294 
3295   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3296     MacroAssembler _masm(&cbuf);
3297     Register target_reg = as_Register($jump_target$$reg);
3298     // exception oop should be in r0
3299     // ret addr has been popped into lr
3300     // callee expects it in r3
3301     __ mov(r3, lr);
3302     __ br(target_reg);
3303   %}
3304 
3305   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3306     MacroAssembler _masm(&cbuf);
3307     Register oop = as_Register($object$$reg);
3308     Register box = as_Register($box$$reg);
3309     Register disp_hdr = as_Register($tmp$$reg);
3310     Register tmp = as_Register($tmp2$$reg);
3311     Label cont;
3312     Label object_has_monitor;
3313     Label cas_failed;
3314 
3315     assert_different_registers(oop, box, tmp, disp_hdr);
3316 
3317     // Load markOop from object into displaced_header.
3318     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3319 
3320     // Always do locking in runtime.
3321     if (EmitSync & 0x01) {
3322       __ cmp(oop, zr);
3323       return;
3324     }
3325     
3326     if (UseBiasedLocking && !UseOptoBiasInlining) {
3327       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3328     }
3329 
3330     // Handle existing monitor
3331     if ((EmitSync & 0x02) == 0) {
3332       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3333     }
3334 
3335     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3336     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3337 
3338     // Load Compare Value application register.
3339 
3340     // Initialize the box. (Must happen before we update the object mark!)
3341     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3342 
3343     // Compare object markOop with an unlocked value (tmp) and if
3344     // equal exchange the stack address of our box with object markOop.
3345     // On failure disp_hdr contains the possibly locked markOop.
3346     if (UseLSE) {
3347       __ mov(disp_hdr, tmp);
3348       __ casal(Assembler::xword, disp_hdr, box, oop);  // Updates disp_hdr
3349       __ cmp(tmp, disp_hdr);
3350       __ br(Assembler::EQ, cont);
3351     } else {
3352       Label retry_load;
3353       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3354         __ prfm(Address(oop), PSTL1STRM);
3355       __ bind(retry_load);
3356       __ ldaxr(disp_hdr, oop);
3357       __ cmp(tmp, disp_hdr);
3358       __ br(Assembler::NE, cas_failed);
3359       // use stlxr to ensure update is immediately visible
3360       __ stlxr(disp_hdr, box, oop);
3361       __ cbzw(disp_hdr, cont);
3362       __ b(retry_load);
3363     }
3364 
3365     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3366 
3367     // If the compare-and-exchange succeeded, then we found an unlocked
3368     // object, will have now locked it will continue at label cont
3369 
3370     __ bind(cas_failed);
3371     // We did not see an unlocked object so try the fast recursive case.
3372 
3373     // Check if the owner is self by comparing the value in the
3374     // markOop of object (disp_hdr) with the stack pointer.
3375     __ mov(rscratch1, sp);
3376     __ sub(disp_hdr, disp_hdr, rscratch1);
3377     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3378     // If condition is true we are cont and hence we can store 0 as the
3379     // displaced header in the box, which indicates that it is a recursive lock.
3380     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3381     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3382 
3383     // Handle existing monitor.
3384     if ((EmitSync & 0x02) == 0) {
3385       __ b(cont);
3386 
3387       __ bind(object_has_monitor);
3388       // The object's monitor m is unlocked iff m->owner == NULL,
3389       // otherwise m->owner may contain a thread or a stack address.
3390       //
3391       // Try to CAS m->owner from NULL to current thread.
3392       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3393       __ mov(disp_hdr, zr);
3394 
3395       if (UseLSE) {
3396         __ mov(rscratch1, disp_hdr);
3397         __ casal(Assembler::xword, rscratch1, rthread, tmp);
3398         __ cmp(rscratch1, disp_hdr);
3399       } else {
3400         Label retry_load, fail;
3401         if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3402           __ prfm(Address(tmp), PSTL1STRM);
3403         __ bind(retry_load);
3404         __ ldaxr(rscratch1, tmp);
3405         __ cmp(disp_hdr, rscratch1);
3406         __ br(Assembler::NE, fail);
3407         // use stlxr to ensure update is immediately visible
3408         __ stlxr(rscratch1, rthread, tmp);
3409         __ cbnzw(rscratch1, retry_load);
3410         __ bind(fail);
3411       }
3412 
3413       // Store a non-null value into the box to avoid looking like a re-entrant
3414       // lock. The fast-path monitor unlock code checks for
3415       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3416       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3417       __ mov(tmp, (address)markOopDesc::unused_mark());
3418       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3419     }
3420 
3421     __ bind(cont);
3422     // flag == EQ indicates success
3423     // flag == NE indicates failure
3424   %}
3425 
3426   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3427     MacroAssembler _masm(&cbuf);
3428     Register oop = as_Register($object$$reg);
3429     Register box = as_Register($box$$reg);
3430     Register disp_hdr = as_Register($tmp$$reg);
3431     Register tmp = as_Register($tmp2$$reg);
3432     Label cont;
3433     Label object_has_monitor;
3434 
3435     assert_different_registers(oop, box, tmp, disp_hdr);
3436 
3437     // Always do locking in runtime.
3438     if (EmitSync & 0x01) {
3439       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3440       return;
3441     }
3442 
3443     if (UseBiasedLocking && !UseOptoBiasInlining) {
3444       __ biased_locking_exit(oop, tmp, cont);
3445     }
3446 
3447     // Find the lock address and load the displaced header from the stack.
3448     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3449 
3450     // If the displaced header is 0, we have a recursive unlock.
3451     __ cmp(disp_hdr, zr);
3452     __ br(Assembler::EQ, cont);
3453 
3454     // Handle existing monitor.
3455     if ((EmitSync & 0x02) == 0) {
3456       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3457       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3458     }
3459 
3460     // Check if it is still a light weight lock, this is is true if we
3461     // see the stack address of the basicLock in the markOop of the
3462     // object.
3463 
3464     if (UseLSE) {
3465       __ mov(tmp, box);
3466       __ casl(Assembler::xword, tmp, disp_hdr, oop);
3467       __ cmp(tmp, box);
3468       __ b(cont);
3469     } else {
3470       Label retry_load;
3471       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3472         __ prfm(Address(oop), PSTL1STRM);
3473       __ bind(retry_load);
3474       __ ldxr(tmp, oop);
3475       __ cmp(box, tmp);
3476       __ br(Assembler::NE, cont);
3477       // use stlxr to ensure update is immediately visible
3478       __ stlxr(tmp, disp_hdr, oop);
3479       __ cbzw(tmp, cont);
3480       __ b(retry_load);
3481     }
3482 
3483     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3484 
3485     // Handle existing monitor.
3486     if ((EmitSync & 0x02) == 0) {
3487       __ bind(object_has_monitor);
3488       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3489       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3490       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3491       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3492       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3493       __ cmp(rscratch1, zr);
3494       __ br(Assembler::NE, cont);
3495 
3496       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3497       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3498       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3499       __ cmp(rscratch1, zr);
3500       __ br(Assembler::NE, cont);
3501       // need a release store here
3502       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3503       __ stlr(zr, tmp); // set unowned
3504     }
3505 
3506     __ bind(cont);
3507     // flag == EQ indicates success
3508     // flag == NE indicates failure
3509   %}
3510 
3511 %}
3512 
3513 //----------FRAME--------------------------------------------------------------
3514 // Definition of frame structure and management information.
3515 //
3516 //  S T A C K   L A Y O U T    Allocators stack-slot number
3517 //                             |   (to get allocators register number
3518 //  G  Owned by    |        |  v    add OptoReg::stack0())
3519 //  r   CALLER     |        |
3520 //  o     |        +--------+      pad to even-align allocators stack-slot
3521 //  w     V        |  pad0  |        numbers; owned by CALLER
3522 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3523 //  h     ^        |   in   |  5
3524 //        |        |  args  |  4   Holes in incoming args owned by SELF
3525 //  |     |        |        |  3
3526 //  |     |        +--------+
3527 //  V     |        | old out|      Empty on Intel, window on Sparc
3528 //        |    old |preserve|      Must be even aligned.
3529 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3530 //        |        |   in   |  3   area for Intel ret address
3531 //     Owned by    |preserve|      Empty on Sparc.
3532 //       SELF      +--------+
3533 //        |        |  pad2  |  2   pad to align old SP
3534 //        |        +--------+  1
3535 //        |        | locks  |  0
3536 //        |        +--------+----> OptoReg::stack0(), even aligned
3537 //        |        |  pad1  | 11   pad to align new SP
3538 //        |        +--------+
3539 //        |        |        | 10
3540 //        |        | spills |  9   spills
3541 //        V        |        |  8   (pad0 slot for callee)
3542 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3543 //        ^        |  out   |  7
3544 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3545 //     Owned by    +--------+
3546 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3547 //        |    new |preserve|      Must be even-aligned.
3548 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3549 //        |        |        |
3550 //
3551 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3552 //         known from SELF's arguments and the Java calling convention.
3553 //         Region 6-7 is determined per call site.
3554 // Note 2: If the calling convention leaves holes in the incoming argument
3555 //         area, those holes are owned by SELF.  Holes in the outgoing area
3556 //         are owned by the CALLEE.  Holes should not be nessecary in the
3557 //         incoming area, as the Java calling convention is completely under
3558 //         the control of the AD file.  Doubles can be sorted and packed to
3559 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3560 //         varargs C calling conventions.
3561 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3562 //         even aligned with pad0 as needed.
3563 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3564 //           (the latter is true on Intel but is it false on AArch64?)
3565 //         region 6-11 is even aligned; it may be padded out more so that
3566 //         the region from SP to FP meets the minimum stack alignment.
3567 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3568 //         alignment.  Region 11, pad1, may be dynamically extended so that
3569 //         SP meets the minimum alignment.
3570 
3571 frame %{
3572   // What direction does stack grow in (assumed to be same for C & Java)
3573   stack_direction(TOWARDS_LOW);
3574 
3575   // These three registers define part of the calling convention
3576   // between compiled code and the interpreter.
3577 
3578   // Inline Cache Register or methodOop for I2C.
3579   inline_cache_reg(R12);
3580 
3581   // Method Oop Register when calling interpreter.
3582   interpreter_method_oop_reg(R12);
3583 
3584   // Number of stack slots consumed by locking an object
3585   sync_stack_slots(2);
3586 
3587   // Compiled code's Frame Pointer
3588   frame_pointer(R31);
3589 
3590   // Interpreter stores its frame pointer in a register which is
3591   // stored to the stack by I2CAdaptors.
3592   // I2CAdaptors convert from interpreted java to compiled java.
3593   interpreter_frame_pointer(R29);
3594 
3595   // Stack alignment requirement
3596   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3597 
3598   // Number of stack slots between incoming argument block and the start of
3599   // a new frame.  The PROLOG must add this many slots to the stack.  The
3600   // EPILOG must remove this many slots. aarch64 needs two slots for
3601   // return address and fp.
3602   // TODO think this is correct but check
3603   in_preserve_stack_slots(4);
3604 
3605   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3606   // for calls to C.  Supports the var-args backing area for register parms.
3607   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3608 
3609   // The after-PROLOG location of the return address.  Location of
3610   // return address specifies a type (REG or STACK) and a number
3611   // representing the register number (i.e. - use a register name) or
3612   // stack slot.
3613   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3614   // Otherwise, it is above the locks and verification slot and alignment word
3615   // TODO this may well be correct but need to check why that - 2 is there
3616   // ppc port uses 0 but we definitely need to allow for fixed_slots
3617   // which folds in the space used for monitors
3618   return_addr(STACK - 2 +
3619               round_to((Compile::current()->in_preserve_stack_slots() +
3620                         Compile::current()->fixed_slots()),
3621                        stack_alignment_in_slots()));
3622 
3623   // Body of function which returns an integer array locating
3624   // arguments either in registers or in stack slots.  Passed an array
3625   // of ideal registers called "sig" and a "length" count.  Stack-slot
3626   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3627   // arguments for a CALLEE.  Incoming stack arguments are
3628   // automatically biased by the preserve_stack_slots field above.
3629 
3630   calling_convention
3631   %{
3632     // No difference between ingoing/outgoing just pass false
3633     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3634   %}
3635 
3636   c_calling_convention
3637   %{
3638     // This is obviously always outgoing
3639     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3640   %}
3641 
3642   // Location of compiled Java return values.  Same as C for now.
3643   return_value
3644   %{
3645     // TODO do we allow ideal_reg == Op_RegN???
3646     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3647            "only return normal values");
3648 
3649     static const int lo[Op_RegL + 1] = { // enum name
3650       0,                                 // Op_Node
3651       0,                                 // Op_Set
3652       R0_num,                            // Op_RegN
3653       R0_num,                            // Op_RegI
3654       R0_num,                            // Op_RegP
3655       V0_num,                            // Op_RegF
3656       V0_num,                            // Op_RegD
3657       R0_num                             // Op_RegL
3658     };
3659   
3660     static const int hi[Op_RegL + 1] = { // enum name
3661       0,                                 // Op_Node
3662       0,                                 // Op_Set
3663       OptoReg::Bad,                       // Op_RegN
3664       OptoReg::Bad,                      // Op_RegI
3665       R0_H_num,                          // Op_RegP
3666       OptoReg::Bad,                      // Op_RegF
3667       V0_H_num,                          // Op_RegD
3668       R0_H_num                           // Op_RegL
3669     };
3670 
3671     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3672   %}
3673 %}
3674 
3675 //----------ATTRIBUTES---------------------------------------------------------
3676 //----------Operand Attributes-------------------------------------------------
3677 op_attrib op_cost(1);        // Required cost attribute
3678 
3679 //----------Instruction Attributes---------------------------------------------
3680 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3681 ins_attrib ins_size(32);        // Required size attribute (in bits)
3682 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3683                                 // a non-matching short branch variant
3684                                 // of some long branch?
3685 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3686                                 // be a power of 2) specifies the
3687                                 // alignment that some part of the
3688                                 // instruction (not necessarily the
3689                                 // start) requires.  If > 1, a
3690                                 // compute_padding() function must be
3691                                 // provided for the instruction
3692 
3693 //----------OPERANDS-----------------------------------------------------------
3694 // Operand definitions must precede instruction definitions for correct parsing
3695 // in the ADLC because operands constitute user defined types which are used in
3696 // instruction definitions.
3697 
3698 //----------Simple Operands----------------------------------------------------
3699 
3700 // Integer operands 32 bit
3701 // 32 bit immediate
3702 operand immI()
3703 %{
3704   match(ConI);
3705 
3706   op_cost(0);
3707   format %{ %}
3708   interface(CONST_INTER);
3709 %}
3710 
3711 // 32 bit zero
3712 operand immI0()
3713 %{
3714   predicate(n->get_int() == 0);
3715   match(ConI);
3716 
3717   op_cost(0);
3718   format %{ %}
3719   interface(CONST_INTER);
3720 %}
3721 
3722 // 32 bit unit increment
3723 operand immI_1()
3724 %{
3725   predicate(n->get_int() == 1);
3726   match(ConI);
3727 
3728   op_cost(0);
3729   format %{ %}
3730   interface(CONST_INTER);
3731 %}
3732 
3733 // 32 bit unit decrement
3734 operand immI_M1()
3735 %{
3736   predicate(n->get_int() == -1);
3737   match(ConI);
3738 
3739   op_cost(0);
3740   format %{ %}
3741   interface(CONST_INTER);
3742 %}
3743 
3744 operand immI_le_4()
3745 %{
3746   predicate(n->get_int() <= 4);
3747   match(ConI);
3748 
3749   op_cost(0);
3750   format %{ %}
3751   interface(CONST_INTER);
3752 %}
3753 
3754 operand immI_31()
3755 %{
3756   predicate(n->get_int() == 31);
3757   match(ConI);
3758 
3759   op_cost(0);
3760   format %{ %}
3761   interface(CONST_INTER);
3762 %}
3763 
3764 operand immI_8()
3765 %{
3766   predicate(n->get_int() == 8);
3767   match(ConI);
3768 
3769   op_cost(0);
3770   format %{ %}
3771   interface(CONST_INTER);
3772 %}
3773 
3774 operand immI_16()
3775 %{
3776   predicate(n->get_int() == 16);
3777   match(ConI);
3778 
3779   op_cost(0);
3780   format %{ %}
3781   interface(CONST_INTER);
3782 %}
3783 
3784 operand immI_24()
3785 %{
3786   predicate(n->get_int() == 24);
3787   match(ConI);
3788 
3789   op_cost(0);
3790   format %{ %}
3791   interface(CONST_INTER);
3792 %}
3793 
3794 operand immI_32()
3795 %{
3796   predicate(n->get_int() == 32);
3797   match(ConI);
3798 
3799   op_cost(0);
3800   format %{ %}
3801   interface(CONST_INTER);
3802 %}
3803 
3804 operand immI_48()
3805 %{
3806   predicate(n->get_int() == 48);
3807   match(ConI);
3808 
3809   op_cost(0);
3810   format %{ %}
3811   interface(CONST_INTER);
3812 %}
3813 
3814 operand immI_56()
3815 %{
3816   predicate(n->get_int() == 56);
3817   match(ConI);
3818 
3819   op_cost(0);
3820   format %{ %}
3821   interface(CONST_INTER);
3822 %}
3823 
3824 operand immI_64()
3825 %{
3826   predicate(n->get_int() == 64);
3827   match(ConI);
3828 
3829   op_cost(0);
3830   format %{ %}
3831   interface(CONST_INTER);
3832 %}
3833 
3834 operand immI_255()
3835 %{
3836   predicate(n->get_int() == 255);
3837   match(ConI);
3838 
3839   op_cost(0);
3840   format %{ %}
3841   interface(CONST_INTER);
3842 %}
3843 
3844 operand immI_65535()
3845 %{
3846   predicate(n->get_int() == 65535);
3847   match(ConI);
3848 
3849   op_cost(0);
3850   format %{ %}
3851   interface(CONST_INTER);
3852 %}
3853 
3854 operand immL_63()
3855 %{
3856   predicate(n->get_int() == 63);
3857   match(ConI);
3858 
3859   op_cost(0);
3860   format %{ %}
3861   interface(CONST_INTER);
3862 %}
3863 
3864 operand immL_255()
3865 %{
3866   predicate(n->get_int() == 255);
3867   match(ConI);
3868 
3869   op_cost(0);
3870   format %{ %}
3871   interface(CONST_INTER);
3872 %}
3873 
3874 operand immL_65535()
3875 %{
3876   predicate(n->get_long() == 65535L);
3877   match(ConL);
3878 
3879   op_cost(0);
3880   format %{ %}
3881   interface(CONST_INTER);
3882 %}
3883 
3884 operand immL_4294967295()
3885 %{
3886   predicate(n->get_long() == 4294967295L);
3887   match(ConL);
3888 
3889   op_cost(0);
3890   format %{ %}
3891   interface(CONST_INTER);
3892 %}
3893 
3894 operand immL_bitmask()
3895 %{
3896   predicate((n->get_long() != 0)
3897             && ((n->get_long() & 0xc000000000000000l) == 0)
3898             && is_power_of_2(n->get_long() + 1));
3899   match(ConL);
3900 
3901   op_cost(0);
3902   format %{ %}
3903   interface(CONST_INTER);
3904 %}
3905 
3906 operand immI_bitmask()
3907 %{
3908   predicate((n->get_int() != 0)
3909             && ((n->get_int() & 0xc0000000) == 0)
3910             && is_power_of_2(n->get_int() + 1));
3911   match(ConI);
3912 
3913   op_cost(0);
3914   format %{ %}
3915   interface(CONST_INTER);
3916 %}
3917 
3918 // Scale values for scaled offset addressing modes (up to long but not quad)
3919 operand immIScale()
3920 %{
3921   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3922   match(ConI);
3923 
3924   op_cost(0);
3925   format %{ %}
3926   interface(CONST_INTER);
3927 %}
3928 
3929 // 26 bit signed offset -- for pc-relative branches
3930 operand immI26()
3931 %{
3932   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3933   match(ConI);
3934 
3935   op_cost(0);
3936   format %{ %}
3937   interface(CONST_INTER);
3938 %}
3939 
3940 // 19 bit signed offset -- for pc-relative loads
3941 operand immI19()
3942 %{
3943   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3944   match(ConI);
3945 
3946   op_cost(0);
3947   format %{ %}
3948   interface(CONST_INTER);
3949 %}
3950 
3951 // 12 bit unsigned offset -- for base plus immediate loads
3952 operand immIU12()
3953 %{
3954   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3955   match(ConI);
3956 
3957   op_cost(0);
3958   format %{ %}
3959   interface(CONST_INTER);
3960 %}
3961 
3962 operand immLU12()
3963 %{
3964   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3965   match(ConL);
3966 
3967   op_cost(0);
3968   format %{ %}
3969   interface(CONST_INTER);
3970 %}
3971 
3972 // Offset for scaled or unscaled immediate loads and stores
3973 operand immIOffset()
3974 %{
3975   predicate(Address::offset_ok_for_immed(n->get_int()));
3976   match(ConI);
3977 
3978   op_cost(0);
3979   format %{ %}
3980   interface(CONST_INTER);
3981 %}
3982 
3983 operand immIOffset4()
3984 %{
3985   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3986   match(ConI);
3987 
3988   op_cost(0);
3989   format %{ %}
3990   interface(CONST_INTER);
3991 %}
3992 
3993 operand immIOffset8()
3994 %{
3995   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3996   match(ConI);
3997 
3998   op_cost(0);
3999   format %{ %}
4000   interface(CONST_INTER);
4001 %}
4002 
4003 operand immIOffset16()
4004 %{
4005   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4006   match(ConI);
4007 
4008   op_cost(0);
4009   format %{ %}
4010   interface(CONST_INTER);
4011 %}
4012 
4013 operand immLoffset()
4014 %{
4015   predicate(Address::offset_ok_for_immed(n->get_long()));
4016   match(ConL);
4017 
4018   op_cost(0);
4019   format %{ %}
4020   interface(CONST_INTER);
4021 %}
4022 
4023 operand immLoffset4()
4024 %{
4025   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4026   match(ConL);
4027 
4028   op_cost(0);
4029   format %{ %}
4030   interface(CONST_INTER);
4031 %}
4032 
4033 operand immLoffset8()
4034 %{
4035   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4036   match(ConL);
4037 
4038   op_cost(0);
4039   format %{ %}
4040   interface(CONST_INTER);
4041 %}
4042 
4043 operand immLoffset16()
4044 %{
4045   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4046   match(ConL);
4047 
4048   op_cost(0);
4049   format %{ %}
4050   interface(CONST_INTER);
4051 %}
4052 
4053 // 32 bit integer valid for add sub immediate
4054 operand immIAddSub()
4055 %{
4056   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4057   match(ConI);
4058   op_cost(0);
4059   format %{ %}
4060   interface(CONST_INTER);
4061 %}
4062 
4063 // 32 bit unsigned integer valid for logical immediate
4064 // TODO -- check this is right when e.g the mask is 0x80000000
4065 operand immILog()
4066 %{
4067   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4068   match(ConI);
4069 
4070   op_cost(0);
4071   format %{ %}
4072   interface(CONST_INTER);
4073 %}
4074 
4075 // Integer operands 64 bit
4076 // 64 bit immediate
4077 operand immL()
4078 %{
4079   match(ConL);
4080 
4081   op_cost(0);
4082   format %{ %}
4083   interface(CONST_INTER);
4084 %}
4085 
4086 // 64 bit zero
4087 operand immL0()
4088 %{
4089   predicate(n->get_long() == 0);
4090   match(ConL);
4091 
4092   op_cost(0);
4093   format %{ %}
4094   interface(CONST_INTER);
4095 %}
4096 
4097 // 64 bit unit increment
4098 operand immL_1()
4099 %{
4100   predicate(n->get_long() == 1);
4101   match(ConL);
4102 
4103   op_cost(0);
4104   format %{ %}
4105   interface(CONST_INTER);
4106 %}
4107 
4108 // 64 bit unit decrement
4109 operand immL_M1()
4110 %{
4111   predicate(n->get_long() == -1);
4112   match(ConL);
4113 
4114   op_cost(0);
4115   format %{ %}
4116   interface(CONST_INTER);
4117 %}
4118 
4119 // 32 bit offset of pc in thread anchor
4120 
4121 operand immL_pc_off()
4122 %{
4123   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4124                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4125   match(ConL);
4126 
4127   op_cost(0);
4128   format %{ %}
4129   interface(CONST_INTER);
4130 %}
4131 
4132 // 64 bit integer valid for add sub immediate
4133 operand immLAddSub()
4134 %{
4135   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4136   match(ConL);
4137   op_cost(0);
4138   format %{ %}
4139   interface(CONST_INTER);
4140 %}
4141 
4142 // 64 bit integer valid for logical immediate
4143 operand immLLog()
4144 %{
4145   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4146   match(ConL);
4147   op_cost(0);
4148   format %{ %}
4149   interface(CONST_INTER);
4150 %}
4151 
4152 // Long Immediate: low 32-bit mask
4153 operand immL_32bits()
4154 %{
4155   predicate(n->get_long() == 0xFFFFFFFFL);
4156   match(ConL);
4157   op_cost(0);
4158   format %{ %}
4159   interface(CONST_INTER);
4160 %}
4161 
4162 // Pointer operands
4163 // Pointer Immediate
4164 operand immP()
4165 %{
4166   match(ConP);
4167 
4168   op_cost(0);
4169   format %{ %}
4170   interface(CONST_INTER);
4171 %}
4172 
4173 // NULL Pointer Immediate
4174 operand immP0()
4175 %{
4176   predicate(n->get_ptr() == 0);
4177   match(ConP);
4178 
4179   op_cost(0);
4180   format %{ %}
4181   interface(CONST_INTER);
4182 %}
4183 
4184 // Pointer Immediate One
4185 // this is used in object initialization (initial object header)
4186 operand immP_1()
4187 %{
4188   predicate(n->get_ptr() == 1);
4189   match(ConP);
4190 
4191   op_cost(0);
4192   format %{ %}
4193   interface(CONST_INTER);
4194 %}
4195 
4196 // Polling Page Pointer Immediate
4197 operand immPollPage()
4198 %{
4199   predicate((address)n->get_ptr() == os::get_polling_page());
4200   match(ConP);
4201 
4202   op_cost(0);
4203   format %{ %}
4204   interface(CONST_INTER);
4205 %}
4206 
4207 // Card Table Byte Map Base
4208 operand immByteMapBase()
4209 %{
4210   // Get base of card map
4211   predicate(!UseShenandoahGC && // TODO: Should really check for BS::is_a, see JDK-8193193
4212     (jbyte*)n->get_ptr() == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4213   match(ConP);
4214 
4215   op_cost(0);
4216   format %{ %}
4217   interface(CONST_INTER);
4218 %}
4219 
4220 // Pointer Immediate Minus One
4221 // this is used when we want to write the current PC to the thread anchor
4222 operand immP_M1()
4223 %{
4224   predicate(n->get_ptr() == -1);
4225   match(ConP);
4226 
4227   op_cost(0);
4228   format %{ %}
4229   interface(CONST_INTER);
4230 %}
4231 
4232 // Pointer Immediate Minus Two
4233 // this is used when we want to write the current PC to the thread anchor
4234 operand immP_M2()
4235 %{
4236   predicate(n->get_ptr() == -2);
4237   match(ConP);
4238 
4239   op_cost(0);
4240   format %{ %}
4241   interface(CONST_INTER);
4242 %}
4243 
4244 // Float and Double operands
4245 // Double Immediate
4246 operand immD()
4247 %{
4248   match(ConD);
4249   op_cost(0);
4250   format %{ %}
4251   interface(CONST_INTER);
4252 %}
4253 
4254 // constant 'double +0.0'.
4255 operand immD0()
4256 %{
4257   predicate((n->getd() == 0) &&
4258             (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
4259   match(ConD);
4260   op_cost(0);
4261   format %{ %}
4262   interface(CONST_INTER);
4263 %}
4264 
4265 // constant 'double +0.0'.
4266 operand immDPacked()
4267 %{
4268   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4269   match(ConD);
4270   op_cost(0);
4271   format %{ %}
4272   interface(CONST_INTER);
4273 %}
4274 
4275 // Float Immediate
4276 operand immF()
4277 %{
4278   match(ConF);
4279   op_cost(0);
4280   format %{ %}
4281   interface(CONST_INTER);
4282 %}
4283 
4284 // constant 'float +0.0'.
4285 operand immF0()
4286 %{
4287   predicate((n->getf() == 0) &&
4288             (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
4289   match(ConF);
4290   op_cost(0);
4291   format %{ %}
4292   interface(CONST_INTER);
4293 %}
4294 
4295 // 
4296 operand immFPacked()
4297 %{
4298   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4299   match(ConF);
4300   op_cost(0);
4301   format %{ %}
4302   interface(CONST_INTER);
4303 %}
4304 
4305 // Narrow pointer operands
4306 // Narrow Pointer Immediate
4307 operand immN()
4308 %{
4309   match(ConN);
4310 
4311   op_cost(0);
4312   format %{ %}
4313   interface(CONST_INTER);
4314 %}
4315 
4316 // Narrow NULL Pointer Immediate
4317 operand immN0()
4318 %{
4319   predicate(n->get_narrowcon() == 0);
4320   match(ConN);
4321 
4322   op_cost(0);
4323   format %{ %}
4324   interface(CONST_INTER);
4325 %}
4326 
4327 operand immNKlass()
4328 %{
4329   match(ConNKlass);
4330 
4331   op_cost(0);
4332   format %{ %}
4333   interface(CONST_INTER);
4334 %}
4335 
4336 // Integer 32 bit Register Operands
4337 // Integer 32 bitRegister (excludes SP)
4338 operand iRegI()
4339 %{
4340   constraint(ALLOC_IN_RC(any_reg32));
4341   match(RegI);
4342   match(iRegINoSp);
4343   op_cost(0);
4344   format %{ %}
4345   interface(REG_INTER);
4346 %}
4347 
4348 // Integer 32 bit Register not Special
4349 operand iRegINoSp()
4350 %{
4351   constraint(ALLOC_IN_RC(no_special_reg32));
4352   match(RegI);
4353   op_cost(0);
4354   format %{ %}
4355   interface(REG_INTER);
4356 %}
4357 
4358 // Integer 64 bit Register Operands
4359 // Integer 64 bit Register (includes SP)
4360 operand iRegL()
4361 %{
4362   constraint(ALLOC_IN_RC(any_reg));
4363   match(RegL);
4364   match(iRegLNoSp);
4365   op_cost(0);
4366   format %{ %}
4367   interface(REG_INTER);
4368 %}
4369 
4370 // Integer 64 bit Register not Special
4371 operand iRegLNoSp()
4372 %{
4373   constraint(ALLOC_IN_RC(no_special_reg));
4374   match(RegL);
4375   format %{ %}
4376   interface(REG_INTER);
4377 %}
4378 
4379 // Pointer Register Operands
4380 // Pointer Register
4381 operand iRegP()
4382 %{
4383   constraint(ALLOC_IN_RC(ptr_reg));
4384   match(RegP);
4385   match(iRegPNoSp);
4386   match(iRegP_R0);
4387   //match(iRegP_R2);
4388   //match(iRegP_R4);
4389   //match(iRegP_R5);
4390   match(thread_RegP);
4391   op_cost(0);
4392   format %{ %}
4393   interface(REG_INTER);
4394 %}
4395 
4396 // Pointer 64 bit Register not Special
4397 operand iRegPNoSp()
4398 %{
4399   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4400   match(RegP);
4401   // match(iRegP);
4402   // match(iRegP_R0);
4403   // match(iRegP_R2);
4404   // match(iRegP_R4);
4405   // match(iRegP_R5);
4406   // match(thread_RegP);
4407   op_cost(0);
4408   format %{ %}
4409   interface(REG_INTER);
4410 %}
4411 
4412 // Pointer 64 bit Register R0 only
4413 operand iRegP_R0()
4414 %{
4415   constraint(ALLOC_IN_RC(r0_reg));
4416   match(RegP);
4417   // match(iRegP);
4418   match(iRegPNoSp);
4419   op_cost(0);
4420   format %{ %}
4421   interface(REG_INTER);
4422 %}
4423 
4424 // Pointer 64 bit Register R1 only
4425 operand iRegP_R1()
4426 %{
4427   constraint(ALLOC_IN_RC(r1_reg));
4428   match(RegP);
4429   // match(iRegP);
4430   match(iRegPNoSp);
4431   op_cost(0);
4432   format %{ %}
4433   interface(REG_INTER);
4434 %}
4435 
4436 // Pointer 64 bit Register R2 only
4437 operand iRegP_R2()
4438 %{
4439   constraint(ALLOC_IN_RC(r2_reg));
4440   match(RegP);
4441   // match(iRegP);
4442   match(iRegPNoSp);
4443   op_cost(0);
4444   format %{ %}
4445   interface(REG_INTER);
4446 %}
4447 
4448 // Pointer 64 bit Register R3 only
4449 operand iRegP_R3()
4450 %{
4451   constraint(ALLOC_IN_RC(r3_reg));
4452   match(RegP);
4453   // match(iRegP);
4454   match(iRegPNoSp);
4455   op_cost(0);
4456   format %{ %}
4457   interface(REG_INTER);
4458 %}
4459 
4460 // Pointer 64 bit Register R4 only
4461 operand iRegP_R4()
4462 %{
4463   constraint(ALLOC_IN_RC(r4_reg));
4464   match(RegP);
4465   // match(iRegP);
4466   match(iRegPNoSp);
4467   op_cost(0);
4468   format %{ %}
4469   interface(REG_INTER);
4470 %}
4471 
4472 // Pointer 64 bit Register R5 only
4473 operand iRegP_R5()
4474 %{
4475   constraint(ALLOC_IN_RC(r5_reg));
4476   match(RegP);
4477   // match(iRegP);
4478   match(iRegPNoSp);
4479   op_cost(0);
4480   format %{ %}
4481   interface(REG_INTER);
4482 %}
4483 
4484 // Pointer 64 bit Register R10 only
4485 operand iRegP_R10()
4486 %{
4487   constraint(ALLOC_IN_RC(r10_reg));
4488   match(RegP);
4489   // match(iRegP);
4490   match(iRegPNoSp);
4491   op_cost(0);
4492   format %{ %}
4493   interface(REG_INTER);
4494 %}
4495 
4496 // Long 64 bit Register R11 only
4497 operand iRegL_R11()
4498 %{
4499   constraint(ALLOC_IN_RC(r11_reg));
4500   match(RegL);
4501   match(iRegLNoSp);
4502   op_cost(0);
4503   format %{ %}
4504   interface(REG_INTER);
4505 %}
4506 
4507 // Pointer 64 bit Register FP only
4508 operand iRegP_FP()
4509 %{
4510   constraint(ALLOC_IN_RC(fp_reg));
4511   match(RegP);
4512   // match(iRegP);
4513   op_cost(0);
4514   format %{ %}
4515   interface(REG_INTER);
4516 %}
4517 
4518 // Register R0 only
4519 operand iRegI_R0()
4520 %{
4521   constraint(ALLOC_IN_RC(int_r0_reg));
4522   match(RegI);
4523   match(iRegINoSp);
4524   op_cost(0);
4525   format %{ %}
4526   interface(REG_INTER);
4527 %}
4528 
4529 // Register R2 only
4530 operand iRegI_R2()
4531 %{
4532   constraint(ALLOC_IN_RC(int_r2_reg));
4533   match(RegI);
4534   match(iRegINoSp);
4535   op_cost(0);
4536   format %{ %}
4537   interface(REG_INTER);
4538 %}
4539 
4540 // Register R3 only
4541 operand iRegI_R3()
4542 %{
4543   constraint(ALLOC_IN_RC(int_r3_reg));
4544   match(RegI);
4545   match(iRegINoSp);
4546   op_cost(0);
4547   format %{ %}
4548   interface(REG_INTER);
4549 %}
4550 
4551 
4552 // Register R2 only
4553 operand iRegI_R4()
4554 %{
4555   constraint(ALLOC_IN_RC(int_r4_reg));
4556   match(RegI);
4557   match(iRegINoSp);
4558   op_cost(0);
4559   format %{ %}
4560   interface(REG_INTER);
4561 %}
4562 
4563 
4564 // Pointer Register Operands
4565 // Narrow Pointer Register
4566 operand iRegN()
4567 %{
4568   constraint(ALLOC_IN_RC(any_reg32));
4569   match(RegN);
4570   match(iRegNNoSp);
4571   op_cost(0);
4572   format %{ %}
4573   interface(REG_INTER);
4574 %}
4575 
4576 // Integer 64 bit Register not Special
4577 operand iRegNNoSp()
4578 %{
4579   constraint(ALLOC_IN_RC(no_special_reg32));
4580   match(RegN);
4581   op_cost(0);
4582   format %{ %}
4583   interface(REG_INTER);
4584 %}
4585 
4586 // heap base register -- used for encoding immN0
4587 
4588 operand iRegIHeapbase()
4589 %{
4590   constraint(ALLOC_IN_RC(heapbase_reg));
4591   match(RegI);
4592   op_cost(0);
4593   format %{ %}
4594   interface(REG_INTER);
4595 %}
4596 
4597 // Float Register
4598 // Float register operands
4599 operand vRegF()
4600 %{
4601   constraint(ALLOC_IN_RC(float_reg));
4602   match(RegF);
4603 
4604   op_cost(0);
4605   format %{ %}
4606   interface(REG_INTER);
4607 %}
4608 
4609 // Double Register
4610 // Double register operands
4611 operand vRegD()
4612 %{
4613   constraint(ALLOC_IN_RC(double_reg));
4614   match(RegD);
4615 
4616   op_cost(0);
4617   format %{ %}
4618   interface(REG_INTER);
4619 %}
4620 
4621 operand vecD()
4622 %{
4623   constraint(ALLOC_IN_RC(vectord_reg));
4624   match(VecD);
4625 
4626   op_cost(0);
4627   format %{ %}
4628   interface(REG_INTER);
4629 %}
4630 
4631 operand vecX()
4632 %{
4633   constraint(ALLOC_IN_RC(vectorx_reg));
4634   match(VecX);
4635 
4636   op_cost(0);
4637   format %{ %}
4638   interface(REG_INTER);
4639 %}
4640 
4641 operand vRegD_V0()
4642 %{
4643   constraint(ALLOC_IN_RC(v0_reg));
4644   match(RegD);
4645   op_cost(0);
4646   format %{ %}
4647   interface(REG_INTER);
4648 %}
4649 
4650 operand vRegD_V1()
4651 %{
4652   constraint(ALLOC_IN_RC(v1_reg));
4653   match(RegD);
4654   op_cost(0);
4655   format %{ %}
4656   interface(REG_INTER);
4657 %}
4658 
4659 operand vRegD_V2()
4660 %{
4661   constraint(ALLOC_IN_RC(v2_reg));
4662   match(RegD);
4663   op_cost(0);
4664   format %{ %}
4665   interface(REG_INTER);
4666 %}
4667 
4668 operand vRegD_V3()
4669 %{
4670   constraint(ALLOC_IN_RC(v3_reg));
4671   match(RegD);
4672   op_cost(0);
4673   format %{ %}
4674   interface(REG_INTER);
4675 %}
4676 
4677 // Flags register, used as output of signed compare instructions
4678 
4679 // note that on AArch64 we also use this register as the output for
4680 // for floating point compare instructions (CmpF CmpD). this ensures
4681 // that ordered inequality tests use GT, GE, LT or LE none of which
4682 // pass through cases where the result is unordered i.e. one or both
4683 // inputs to the compare is a NaN. this means that the ideal code can
4684 // replace e.g. a GT with an LE and not end up capturing the NaN case
4685 // (where the comparison should always fail). EQ and NE tests are
4686 // always generated in ideal code so that unordered folds into the NE
4687 // case, matching the behaviour of AArch64 NE.
4688 //
4689 // This differs from x86 where the outputs of FP compares use a
4690 // special FP flags registers and where compares based on this
4691 // register are distinguished into ordered inequalities (cmpOpUCF) and
4692 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4693 // to explicitly handle the unordered case in branches. x86 also has
4694 // to include extra CMoveX rules to accept a cmpOpUCF input.
4695 
4696 operand rFlagsReg()
4697 %{
4698   constraint(ALLOC_IN_RC(int_flags));
4699   match(RegFlags);
4700 
4701   op_cost(0);
4702   format %{ "RFLAGS" %}
4703   interface(REG_INTER);
4704 %}
4705 
4706 // Flags register, used as output of unsigned compare instructions
4707 operand rFlagsRegU()
4708 %{
4709   constraint(ALLOC_IN_RC(int_flags));
4710   match(RegFlags);
4711 
4712   op_cost(0);
4713   format %{ "RFLAGSU" %}
4714   interface(REG_INTER);
4715 %}
4716 
4717 // Special Registers
4718 
4719 // Method Register
4720 operand inline_cache_RegP(iRegP reg)
4721 %{
4722   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4723   match(reg);
4724   match(iRegPNoSp);
4725   op_cost(0);
4726   format %{ %}
4727   interface(REG_INTER);
4728 %}
4729 
4730 operand interpreter_method_oop_RegP(iRegP reg)
4731 %{
4732   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4733   match(reg);
4734   match(iRegPNoSp);
4735   op_cost(0);
4736   format %{ %}
4737   interface(REG_INTER);
4738 %}
4739 
4740 // Thread Register
4741 operand thread_RegP(iRegP reg)
4742 %{
4743   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4744   match(reg);
4745   op_cost(0);
4746   format %{ %}
4747   interface(REG_INTER);
4748 %}
4749 
4750 operand lr_RegP(iRegP reg)
4751 %{
4752   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4753   match(reg);
4754   op_cost(0);
4755   format %{ %}
4756   interface(REG_INTER);
4757 %}
4758 
4759 //----------Memory Operands----------------------------------------------------
4760 
4761 operand indirect(iRegP reg)
4762 %{
4763   constraint(ALLOC_IN_RC(ptr_reg));
4764   match(reg);
4765   op_cost(0);
4766   format %{ "[$reg]" %}
4767   interface(MEMORY_INTER) %{
4768     base($reg);
4769     index(0xffffffff);
4770     scale(0x0);
4771     disp(0x0);
4772   %}
4773 %}
4774 
4775 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
4776 %{
4777   constraint(ALLOC_IN_RC(ptr_reg));
4778   match(AddP (AddP reg (LShiftL lreg scale)) off);
4779   op_cost(INSN_COST);
4780   format %{ "$reg, $lreg lsl($scale), $off" %}
4781   interface(MEMORY_INTER) %{
4782     base($reg);
4783     index($lreg);
4784     scale($scale);
4785     disp($off);
4786   %}
4787 %}
4788 
4789 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
4790 %{
4791   constraint(ALLOC_IN_RC(ptr_reg));
4792   match(AddP (AddP reg (LShiftL lreg scale)) off);
4793   op_cost(INSN_COST);
4794   format %{ "$reg, $lreg lsl($scale), $off" %}
4795   interface(MEMORY_INTER) %{
4796     base($reg);
4797     index($lreg);
4798     scale($scale);
4799     disp($off);
4800   %}
4801 %}
4802 
4803 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
4804 %{
4805   constraint(ALLOC_IN_RC(ptr_reg));
4806   match(AddP (AddP reg (ConvI2L ireg)) off);
4807   op_cost(INSN_COST);
4808   format %{ "$reg, $ireg, $off I2L" %}
4809   interface(MEMORY_INTER) %{
4810     base($reg);
4811     index($ireg);
4812     scale(0x0);
4813     disp($off);
4814   %}
4815 %}
4816 
4817 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
4818 %{
4819   constraint(ALLOC_IN_RC(ptr_reg));
4820   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
4821   op_cost(INSN_COST);
4822   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
4823   interface(MEMORY_INTER) %{
4824     base($reg);
4825     index($ireg);
4826     scale($scale);
4827     disp($off);
4828   %}
4829 %}
4830 
4831 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4832 %{
4833   constraint(ALLOC_IN_RC(ptr_reg));
4834   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4835   op_cost(0);
4836   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4837   interface(MEMORY_INTER) %{
4838     base($reg);
4839     index($ireg);
4840     scale($scale);
4841     disp(0x0);
4842   %}
4843 %}
4844 
4845 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4846 %{
4847   constraint(ALLOC_IN_RC(ptr_reg));
4848   match(AddP reg (LShiftL lreg scale));
4849   op_cost(0);
4850   format %{ "$reg, $lreg lsl($scale)" %}
4851   interface(MEMORY_INTER) %{
4852     base($reg);
4853     index($lreg);
4854     scale($scale);
4855     disp(0x0);
4856   %}
4857 %}
4858 
4859 operand indIndex(iRegP reg, iRegL lreg)
4860 %{
4861   constraint(ALLOC_IN_RC(ptr_reg));
4862   match(AddP reg lreg);
4863   op_cost(0);
4864   format %{ "$reg, $lreg" %}
4865   interface(MEMORY_INTER) %{
4866     base($reg);
4867     index($lreg);
4868     scale(0x0);
4869     disp(0x0);
4870   %}
4871 %}
4872 
4873 operand indOffI(iRegP reg, immIOffset off)
4874 %{
4875   constraint(ALLOC_IN_RC(ptr_reg));
4876   match(AddP reg off);
4877   op_cost(0);
4878   format %{ "[$reg, $off]" %}
4879   interface(MEMORY_INTER) %{
4880     base($reg);
4881     index(0xffffffff);
4882     scale(0x0);
4883     disp($off);
4884   %}
4885 %}
4886 
4887 operand indOffI4(iRegP reg, immIOffset4 off)
4888 %{
4889   constraint(ALLOC_IN_RC(ptr_reg));
4890   match(AddP reg off);
4891   op_cost(0);
4892   format %{ "[$reg, $off]" %}
4893   interface(MEMORY_INTER) %{
4894     base($reg);
4895     index(0xffffffff);
4896     scale(0x0);
4897     disp($off);
4898   %}
4899 %}
4900 
4901 operand indOffI8(iRegP reg, immIOffset8 off)
4902 %{
4903   constraint(ALLOC_IN_RC(ptr_reg));
4904   match(AddP reg off);
4905   op_cost(0);
4906   format %{ "[$reg, $off]" %}
4907   interface(MEMORY_INTER) %{
4908     base($reg);
4909     index(0xffffffff);
4910     scale(0x0);
4911     disp($off);
4912   %}
4913 %}
4914 
4915 operand indOffI16(iRegP reg, immIOffset16 off)
4916 %{
4917   constraint(ALLOC_IN_RC(ptr_reg));
4918   match(AddP reg off);
4919   op_cost(0);
4920   format %{ "[$reg, $off]" %}
4921   interface(MEMORY_INTER) %{
4922     base($reg);
4923     index(0xffffffff);
4924     scale(0x0);
4925     disp($off);
4926   %}
4927 %}
4928 
4929 operand indOffL(iRegP reg, immLoffset off)
4930 %{
4931   constraint(ALLOC_IN_RC(ptr_reg));
4932   match(AddP reg off);
4933   op_cost(0);
4934   format %{ "[$reg, $off]" %}
4935   interface(MEMORY_INTER) %{
4936     base($reg);
4937     index(0xffffffff);
4938     scale(0x0);
4939     disp($off);
4940   %}
4941 %}
4942 
4943 operand indOffL4(iRegP reg, immLoffset4 off)
4944 %{
4945   constraint(ALLOC_IN_RC(ptr_reg));
4946   match(AddP reg off);
4947   op_cost(0);
4948   format %{ "[$reg, $off]" %}
4949   interface(MEMORY_INTER) %{
4950     base($reg);
4951     index(0xffffffff);
4952     scale(0x0);
4953     disp($off);
4954   %}
4955 %}
4956 
4957 operand indOffL8(iRegP reg, immLoffset8 off)
4958 %{
4959   constraint(ALLOC_IN_RC(ptr_reg));
4960   match(AddP reg off);
4961   op_cost(0);
4962   format %{ "[$reg, $off]" %}
4963   interface(MEMORY_INTER) %{
4964     base($reg);
4965     index(0xffffffff);
4966     scale(0x0);
4967     disp($off);
4968   %}
4969 %}
4970 
4971 operand indOffL16(iRegP reg, immLoffset16 off)
4972 %{
4973   constraint(ALLOC_IN_RC(ptr_reg));
4974   match(AddP reg off);
4975   op_cost(0);
4976   format %{ "[$reg, $off]" %}
4977   interface(MEMORY_INTER) %{
4978     base($reg);
4979     index(0xffffffff);
4980     scale(0x0);
4981     disp($off);
4982   %}
4983 %}
4984 
4985 operand indirectN(iRegN reg)
4986 %{
4987   predicate(Universe::narrow_oop_shift() == 0);
4988   constraint(ALLOC_IN_RC(ptr_reg));
4989   match(DecodeN reg);
4990   op_cost(0);
4991   format %{ "[$reg]\t# narrow" %}
4992   interface(MEMORY_INTER) %{
4993     base($reg);
4994     index(0xffffffff);
4995     scale(0x0);
4996     disp(0x0);
4997   %}
4998 %}
4999 
5000 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5001 %{
5002   predicate(Universe::narrow_oop_shift() == 0);
5003   constraint(ALLOC_IN_RC(ptr_reg));
5004   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5005   op_cost(0);
5006   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5007   interface(MEMORY_INTER) %{
5008     base($reg);
5009     index($lreg);
5010     scale($scale);
5011     disp($off);
5012   %}
5013 %}
5014 
5015 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5016 %{
5017   predicate(Universe::narrow_oop_shift() == 0);
5018   constraint(ALLOC_IN_RC(ptr_reg));
5019   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5020   op_cost(INSN_COST);
5021   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5022   interface(MEMORY_INTER) %{
5023     base($reg);
5024     index($lreg);
5025     scale($scale);
5026     disp($off);
5027   %}
5028 %}
5029 
5030 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5031 %{
5032   predicate(Universe::narrow_oop_shift() == 0);
5033   constraint(ALLOC_IN_RC(ptr_reg));
5034   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5035   op_cost(INSN_COST);
5036   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5037   interface(MEMORY_INTER) %{
5038     base($reg);
5039     index($ireg);
5040     scale(0x0);
5041     disp($off);
5042   %}
5043 %}
5044 
5045 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5046 %{
5047   predicate(Universe::narrow_oop_shift() == 0);
5048   constraint(ALLOC_IN_RC(ptr_reg));
5049   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5050   op_cost(INSN_COST);
5051   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5052   interface(MEMORY_INTER) %{
5053     base($reg);
5054     index($ireg);
5055     scale($scale);
5056     disp($off);
5057   %}
5058 %}
5059 
5060 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5061 %{
5062   predicate(Universe::narrow_oop_shift() == 0);
5063   constraint(ALLOC_IN_RC(ptr_reg));
5064   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5065   op_cost(0);
5066   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5067   interface(MEMORY_INTER) %{
5068     base($reg);
5069     index($ireg);
5070     scale($scale);
5071     disp(0x0);
5072   %}
5073 %}
5074 
5075 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5076 %{
5077   predicate(Universe::narrow_oop_shift() == 0);
5078   constraint(ALLOC_IN_RC(ptr_reg));
5079   match(AddP (DecodeN reg) (LShiftL lreg scale));
5080   op_cost(0);
5081   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5082   interface(MEMORY_INTER) %{
5083     base($reg);
5084     index($lreg);
5085     scale($scale);
5086     disp(0x0);
5087   %}
5088 %}
5089 
5090 operand indIndexN(iRegN reg, iRegL lreg)
5091 %{
5092   predicate(Universe::narrow_oop_shift() == 0);
5093   constraint(ALLOC_IN_RC(ptr_reg));
5094   match(AddP (DecodeN reg) lreg);
5095   op_cost(0);
5096   format %{ "$reg, $lreg\t# narrow" %}
5097   interface(MEMORY_INTER) %{
5098     base($reg);
5099     index($lreg);
5100     scale(0x0);
5101     disp(0x0);
5102   %}
5103 %}
5104 
5105 operand indOffIN(iRegN reg, immIOffset off)
5106 %{
5107   predicate(Universe::narrow_oop_shift() == 0);
5108   constraint(ALLOC_IN_RC(ptr_reg));
5109   match(AddP (DecodeN reg) off);
5110   op_cost(0);
5111   format %{ "[$reg, $off]\t# narrow" %}
5112   interface(MEMORY_INTER) %{
5113     base($reg);
5114     index(0xffffffff);
5115     scale(0x0);
5116     disp($off);
5117   %}
5118 %}
5119 
5120 operand indOffLN(iRegN reg, immLoffset off)
5121 %{
5122   predicate(Universe::narrow_oop_shift() == 0);
5123   constraint(ALLOC_IN_RC(ptr_reg));
5124   match(AddP (DecodeN reg) off);
5125   op_cost(0);
5126   format %{ "[$reg, $off]\t# narrow" %}
5127   interface(MEMORY_INTER) %{
5128     base($reg);
5129     index(0xffffffff);
5130     scale(0x0);
5131     disp($off);
5132   %}
5133 %}
5134 
5135 
5136 
5137 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5138 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5139 %{
5140   constraint(ALLOC_IN_RC(ptr_reg));
5141   match(AddP reg off);
5142   op_cost(0);
5143   format %{ "[$reg, $off]" %}
5144   interface(MEMORY_INTER) %{
5145     base($reg);
5146     index(0xffffffff);
5147     scale(0x0);
5148     disp($off);
5149   %}
5150 %}
5151 
5152 //----------Special Memory Operands--------------------------------------------
5153 // Stack Slot Operand - This operand is used for loading and storing temporary
5154 //                      values on the stack where a match requires a value to
5155 //                      flow through memory.
5156 operand stackSlotP(sRegP reg)
5157 %{
5158   constraint(ALLOC_IN_RC(stack_slots));
5159   op_cost(100);
5160   // No match rule because this operand is only generated in matching
5161   // match(RegP);
5162   format %{ "[$reg]" %}
5163   interface(MEMORY_INTER) %{
5164     base(0x1e);  // RSP
5165     index(0x0);  // No Index
5166     scale(0x0);  // No Scale
5167     disp($reg);  // Stack Offset
5168   %}
5169 %}
5170 
5171 operand stackSlotI(sRegI reg)
5172 %{
5173   constraint(ALLOC_IN_RC(stack_slots));
5174   // No match rule because this operand is only generated in matching
5175   // match(RegI);
5176   format %{ "[$reg]" %}
5177   interface(MEMORY_INTER) %{
5178     base(0x1e);  // RSP
5179     index(0x0);  // No Index
5180     scale(0x0);  // No Scale
5181     disp($reg);  // Stack Offset
5182   %}
5183 %}
5184 
5185 operand stackSlotF(sRegF reg)
5186 %{
5187   constraint(ALLOC_IN_RC(stack_slots));
5188   // No match rule because this operand is only generated in matching
5189   // match(RegF);
5190   format %{ "[$reg]" %}
5191   interface(MEMORY_INTER) %{
5192     base(0x1e);  // RSP
5193     index(0x0);  // No Index
5194     scale(0x0);  // No Scale
5195     disp($reg);  // Stack Offset
5196   %}
5197 %}
5198 
5199 operand stackSlotD(sRegD reg)
5200 %{
5201   constraint(ALLOC_IN_RC(stack_slots));
5202   // No match rule because this operand is only generated in matching
5203   // match(RegD);
5204   format %{ "[$reg]" %}
5205   interface(MEMORY_INTER) %{
5206     base(0x1e);  // RSP
5207     index(0x0);  // No Index
5208     scale(0x0);  // No Scale
5209     disp($reg);  // Stack Offset
5210   %}
5211 %}
5212 
5213 operand stackSlotL(sRegL reg)
5214 %{
5215   constraint(ALLOC_IN_RC(stack_slots));
5216   // No match rule because this operand is only generated in matching
5217   // match(RegL);
5218   format %{ "[$reg]" %}
5219   interface(MEMORY_INTER) %{
5220     base(0x1e);  // RSP
5221     index(0x0);  // No Index
5222     scale(0x0);  // No Scale
5223     disp($reg);  // Stack Offset
5224   %}
5225 %}
5226 
5227 // Operands for expressing Control Flow
5228 // NOTE: Label is a predefined operand which should not be redefined in
5229 //       the AD file. It is generically handled within the ADLC.
5230 
5231 //----------Conditional Branch Operands----------------------------------------
5232 // Comparison Op  - This is the operation of the comparison, and is limited to
5233 //                  the following set of codes:
5234 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5235 //
5236 // Other attributes of the comparison, such as unsignedness, are specified
5237 // by the comparison instruction that sets a condition code flags register.
5238 // That result is represented by a flags operand whose subtype is appropriate
5239 // to the unsignedness (etc.) of the comparison.
5240 //
5241 // Later, the instruction which matches both the Comparison Op (a Bool) and
5242 // the flags (produced by the Cmp) specifies the coding of the comparison op
5243 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5244 
5245 // used for signed integral comparisons and fp comparisons
5246 
5247 operand cmpOp()
5248 %{
5249   match(Bool);
5250 
5251   format %{ "" %}
5252   interface(COND_INTER) %{
5253     equal(0x0, "eq");
5254     not_equal(0x1, "ne");
5255     less(0xb, "lt");
5256     greater_equal(0xa, "ge");
5257     less_equal(0xd, "le");
5258     greater(0xc, "gt");
5259     overflow(0x6, "vs");
5260     no_overflow(0x7, "vc");
5261   %}
5262 %}
5263 
5264 // used for unsigned integral comparisons
5265 
5266 operand cmpOpU()
5267 %{
5268   match(Bool);
5269 
5270   format %{ "" %}
5271   interface(COND_INTER) %{
5272     equal(0x0, "eq");
5273     not_equal(0x1, "ne");
5274     less(0x3, "lo");
5275     greater_equal(0x2, "hs");
5276     less_equal(0x9, "ls");
5277     greater(0x8, "hi");
5278     overflow(0x6, "vs");
5279     no_overflow(0x7, "vc");
5280   %}
5281 %}
5282 
5283 // Special operand allowing long args to int ops to be truncated for free
5284 
5285 operand iRegL2I(iRegL reg) %{
5286 
5287   op_cost(0);
5288 
5289   match(ConvL2I reg);
5290 
5291   format %{ "l2i($reg)" %}
5292 
5293   interface(REG_INTER)
5294 %}
5295 
5296 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5297 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5298 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5299 
5300 //----------OPERAND CLASSES----------------------------------------------------
5301 // Operand Classes are groups of operands that are used as to simplify
5302 // instruction definitions by not requiring the AD writer to specify
5303 // separate instructions for every form of operand when the
5304 // instruction accepts multiple operand types with the same basic
5305 // encoding and format. The classic case of this is memory operands.
5306 
5307 // memory is used to define read/write location for load/store
5308 // instruction defs. we can turn a memory op into an Address
5309 
5310 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5311                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5312  
5313  // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5314 
5315 
5316 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5317 // operations. it allows the src to be either an iRegI or a (ConvL2I
5318 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5319 // can be elided because the 32-bit instruction will just employ the
5320 // lower 32 bits anyway.
5321 //
5322 // n.b. this does not elide all L2I conversions. if the truncated
5323 // value is consumed by more than one operation then the ConvL2I
5324 // cannot be bundled into the consuming nodes so an l2i gets planted
5325 // (actually a movw $dst $src) and the downstream instructions consume
5326 // the result of the l2i as an iRegI input. That's a shame since the
5327 // movw is actually redundant but its not too costly.
5328 
5329 opclass iRegIorL2I(iRegI, iRegL2I);
5330 
5331 //----------PIPELINE-----------------------------------------------------------
5332 // Rules which define the behavior of the target architectures pipeline.
5333 
5334 // For specific pipelines, eg A53, define the stages of that pipeline
5335 //pipe_desc(ISS, EX1, EX2, WR);
5336 #define ISS S0
5337 #define EX1 S1
5338 #define EX2 S2
5339 #define WR  S3
5340 
5341 // Integer ALU reg operation
5342 pipeline %{
5343 
5344 attributes %{
5345   // ARM instructions are of fixed length
5346   fixed_size_instructions;        // Fixed size instructions TODO does
5347   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5348   // ARM instructions come in 32-bit word units
5349   instruction_unit_size = 4;         // An instruction is 4 bytes long
5350   instruction_fetch_unit_size = 64;  // The processor fetches one line
5351   instruction_fetch_units = 1;       // of 64 bytes
5352 
5353   // List of nop instructions
5354   nops( MachNop );
5355 %}
5356 
5357 // We don't use an actual pipeline model so don't care about resources
5358 // or description. we do use pipeline classes to introduce fixed
5359 // latencies
5360 
5361 //----------RESOURCES----------------------------------------------------------
5362 // Resources are the functional units available to the machine
5363 
5364 resources( INS0, INS1, INS01 = INS0 | INS1,
5365            ALU0, ALU1, ALU = ALU0 | ALU1,
5366            MAC,
5367            DIV,
5368            BRANCH,
5369            LDST,
5370            NEON_FP);
5371 
5372 //----------PIPELINE DESCRIPTION-----------------------------------------------
5373 // Pipeline Description specifies the stages in the machine's pipeline
5374 
5375 // Define the pipeline as a generic 6 stage pipeline
5376 pipe_desc(S0, S1, S2, S3, S4, S5);
5377 
5378 //----------PIPELINE CLASSES---------------------------------------------------
5379 // Pipeline Classes describe the stages in which input and output are
5380 // referenced by the hardware pipeline.
5381 
5382 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5383 %{
5384   single_instruction;
5385   src1   : S1(read);
5386   src2   : S2(read);
5387   dst    : S5(write);
5388   INS01  : ISS;
5389   NEON_FP : S5;
5390 %}
5391 
5392 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5393 %{
5394   single_instruction;
5395   src1   : S1(read);
5396   src2   : S2(read);
5397   dst    : S5(write);
5398   INS01  : ISS;
5399   NEON_FP : S5;
5400 %}
5401 
5402 pipe_class fp_uop_s(vRegF dst, vRegF src)
5403 %{
5404   single_instruction;
5405   src    : S1(read);
5406   dst    : S5(write);
5407   INS01  : ISS;
5408   NEON_FP : S5;
5409 %}
5410 
5411 pipe_class fp_uop_d(vRegD dst, vRegD src)
5412 %{
5413   single_instruction;
5414   src    : S1(read);
5415   dst    : S5(write);
5416   INS01  : ISS;
5417   NEON_FP : S5;
5418 %}
5419 
5420 pipe_class fp_d2f(vRegF dst, vRegD src)
5421 %{
5422   single_instruction;
5423   src    : S1(read);
5424   dst    : S5(write);
5425   INS01  : ISS;
5426   NEON_FP : S5;
5427 %}
5428 
5429 pipe_class fp_f2d(vRegD dst, vRegF src)
5430 %{
5431   single_instruction;
5432   src    : S1(read);
5433   dst    : S5(write);
5434   INS01  : ISS;
5435   NEON_FP : S5;
5436 %}
5437 
5438 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5439 %{
5440   single_instruction;
5441   src    : S1(read);
5442   dst    : S5(write);
5443   INS01  : ISS;
5444   NEON_FP : S5;
5445 %}
5446 
5447 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5448 %{
5449   single_instruction;
5450   src    : S1(read);
5451   dst    : S5(write);
5452   INS01  : ISS;
5453   NEON_FP : S5;
5454 %}
5455 
5456 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5457 %{
5458   single_instruction;
5459   src    : S1(read);
5460   dst    : S5(write);
5461   INS01  : ISS;
5462   NEON_FP : S5;
5463 %}
5464 
5465 pipe_class fp_l2f(vRegF dst, iRegL src)
5466 %{
5467   single_instruction;
5468   src    : S1(read);
5469   dst    : S5(write);
5470   INS01  : ISS;
5471   NEON_FP : S5;
5472 %}
5473 
5474 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5475 %{
5476   single_instruction;
5477   src    : S1(read);
5478   dst    : S5(write);
5479   INS01  : ISS;
5480   NEON_FP : S5;
5481 %}
5482 
5483 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5484 %{
5485   single_instruction;
5486   src    : S1(read);
5487   dst    : S5(write);
5488   INS01  : ISS;
5489   NEON_FP : S5;
5490 %}
5491 
5492 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5493 %{
5494   single_instruction;
5495   src    : S1(read);
5496   dst    : S5(write);
5497   INS01  : ISS;
5498   NEON_FP : S5;
5499 %}
5500 
5501 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5502 %{
5503   single_instruction;
5504   src    : S1(read);
5505   dst    : S5(write);
5506   INS01  : ISS;
5507   NEON_FP : S5;
5508 %}
5509 
5510 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5511 %{
5512   single_instruction;
5513   src1   : S1(read);
5514   src2   : S2(read);
5515   dst    : S5(write);
5516   INS0   : ISS;
5517   NEON_FP : S5;
5518 %}
5519 
5520 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5521 %{
5522   single_instruction;
5523   src1   : S1(read);
5524   src2   : S2(read);
5525   dst    : S5(write);
5526   INS0   : ISS;
5527   NEON_FP : S5;
5528 %}
5529 
5530 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5531 %{
5532   single_instruction;
5533   cr     : S1(read);
5534   src1   : S1(read);
5535   src2   : S1(read);
5536   dst    : S3(write);
5537   INS01  : ISS;
5538   NEON_FP : S3;
5539 %}
5540 
5541 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5542 %{
5543   single_instruction;
5544   cr     : S1(read);
5545   src1   : S1(read);
5546   src2   : S1(read);
5547   dst    : S3(write);
5548   INS01  : ISS;
5549   NEON_FP : S3;
5550 %}
5551 
5552 pipe_class fp_imm_s(vRegF dst)
5553 %{
5554   single_instruction;
5555   dst    : S3(write);
5556   INS01  : ISS;
5557   NEON_FP : S3;
5558 %}
5559 
5560 pipe_class fp_imm_d(vRegD dst)
5561 %{
5562   single_instruction;
5563   dst    : S3(write);
5564   INS01  : ISS;
5565   NEON_FP : S3;
5566 %}
5567 
5568 pipe_class fp_load_constant_s(vRegF dst)
5569 %{
5570   single_instruction;
5571   dst    : S4(write);
5572   INS01  : ISS;
5573   NEON_FP : S4;
5574 %}
5575 
5576 pipe_class fp_load_constant_d(vRegD dst)
5577 %{
5578   single_instruction;
5579   dst    : S4(write);
5580   INS01  : ISS;
5581   NEON_FP : S4;
5582 %}
5583 
5584 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5585 %{
5586   single_instruction;
5587   dst    : S5(write);
5588   src1   : S1(read);
5589   src2   : S1(read);
5590   INS01  : ISS;
5591   NEON_FP : S5;
5592 %}
5593 
5594 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5595 %{
5596   single_instruction;
5597   dst    : S5(write);
5598   src1   : S1(read);
5599   src2   : S1(read);
5600   INS0   : ISS;
5601   NEON_FP : S5;
5602 %}
5603 
5604 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5605 %{
5606   single_instruction;
5607   dst    : S5(write);
5608   src1   : S1(read);
5609   src2   : S1(read);
5610   dst    : S1(read);
5611   INS01  : ISS;
5612   NEON_FP : S5;
5613 %}
5614 
5615 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5616 %{
5617   single_instruction;
5618   dst    : S5(write);
5619   src1   : S1(read);
5620   src2   : S1(read);
5621   dst    : S1(read);
5622   INS0   : ISS;
5623   NEON_FP : S5;
5624 %}
5625 
5626 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5627 %{
5628   single_instruction;
5629   dst    : S4(write);
5630   src1   : S2(read);
5631   src2   : S2(read);
5632   INS01  : ISS;
5633   NEON_FP : S4;
5634 %}
5635 
5636 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5637 %{
5638   single_instruction;
5639   dst    : S4(write);
5640   src1   : S2(read);
5641   src2   : S2(read);
5642   INS0   : ISS;
5643   NEON_FP : S4;
5644 %}
5645 
5646 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5647 %{
5648   single_instruction;
5649   dst    : S3(write);
5650   src1   : S2(read);
5651   src2   : S2(read);
5652   INS01  : ISS;
5653   NEON_FP : S3;
5654 %}
5655 
5656 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5657 %{
5658   single_instruction;
5659   dst    : S3(write);
5660   src1   : S2(read);
5661   src2   : S2(read);
5662   INS0   : ISS;
5663   NEON_FP : S3;
5664 %}
5665 
5666 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5667 %{
5668   single_instruction;
5669   dst    : S3(write);
5670   src    : S1(read);
5671   shift  : S1(read);
5672   INS01  : ISS;
5673   NEON_FP : S3;
5674 %}
5675 
5676 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5677 %{
5678   single_instruction;
5679   dst    : S3(write);
5680   src    : S1(read);
5681   shift  : S1(read);
5682   INS0   : ISS;
5683   NEON_FP : S3;
5684 %}
5685 
5686 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5687 %{
5688   single_instruction;
5689   dst    : S3(write);
5690   src    : S1(read);
5691   INS01  : ISS;
5692   NEON_FP : S3;
5693 %}
5694 
5695 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5696 %{
5697   single_instruction;
5698   dst    : S3(write);
5699   src    : S1(read);
5700   INS0   : ISS;
5701   NEON_FP : S3;
5702 %}
5703 
5704 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5705 %{
5706   single_instruction;
5707   dst    : S5(write);
5708   src1   : S1(read);
5709   src2   : S1(read);
5710   INS01  : ISS;
5711   NEON_FP : S5;
5712 %}
5713 
5714 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5715 %{
5716   single_instruction;
5717   dst    : S5(write);
5718   src1   : S1(read);
5719   src2   : S1(read);
5720   INS0   : ISS;
5721   NEON_FP : S5;
5722 %}
5723 
5724 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5725 %{
5726   single_instruction;
5727   dst    : S5(write);
5728   src1   : S1(read);
5729   src2   : S1(read);
5730   INS0   : ISS;
5731   NEON_FP : S5;
5732 %}
5733 
5734 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5735 %{
5736   single_instruction;
5737   dst    : S5(write);
5738   src1   : S1(read);
5739   src2   : S1(read);
5740   INS0   : ISS;
5741   NEON_FP : S5;
5742 %}
5743 
5744 pipe_class vsqrt_fp128(vecX dst, vecX src)
5745 %{
5746   single_instruction;
5747   dst    : S5(write);
5748   src    : S1(read);
5749   INS0   : ISS;
5750   NEON_FP : S5;
5751 %}
5752 
5753 pipe_class vunop_fp64(vecD dst, vecD src)
5754 %{
5755   single_instruction;
5756   dst    : S5(write);
5757   src    : S1(read);
5758   INS01  : ISS;
5759   NEON_FP : S5;
5760 %}
5761 
5762 pipe_class vunop_fp128(vecX dst, vecX src)
5763 %{
5764   single_instruction;
5765   dst    : S5(write);
5766   src    : S1(read);
5767   INS0   : ISS;
5768   NEON_FP : S5;
5769 %}
5770 
5771 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5772 %{
5773   single_instruction;
5774   dst    : S3(write);
5775   src    : S1(read);
5776   INS01  : ISS;
5777   NEON_FP : S3;
5778 %}
5779 
5780 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5781 %{
5782   single_instruction;
5783   dst    : S3(write);
5784   src    : S1(read);
5785   INS01  : ISS;
5786   NEON_FP : S3;
5787 %}
5788 
5789 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5790 %{
5791   single_instruction;
5792   dst    : S3(write);
5793   src    : S1(read);
5794   INS01  : ISS;
5795   NEON_FP : S3;
5796 %}
5797 
5798 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5799 %{
5800   single_instruction;
5801   dst    : S3(write);
5802   src    : S1(read);
5803   INS01  : ISS;
5804   NEON_FP : S3;
5805 %}
5806 
5807 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5808 %{
5809   single_instruction;
5810   dst    : S3(write);
5811   src    : S1(read);
5812   INS01  : ISS;
5813   NEON_FP : S3;
5814 %}
5815 
5816 pipe_class vmovi_reg_imm64(vecD dst)
5817 %{
5818   single_instruction;
5819   dst    : S3(write);
5820   INS01  : ISS;
5821   NEON_FP : S3;
5822 %}
5823 
5824 pipe_class vmovi_reg_imm128(vecX dst)
5825 %{
5826   single_instruction;
5827   dst    : S3(write);
5828   INS0   : ISS;
5829   NEON_FP : S3;
5830 %}
5831 
5832 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5833 %{
5834   single_instruction;
5835   dst    : S5(write);
5836   mem    : ISS(read);
5837   INS01  : ISS;
5838   NEON_FP : S3;
5839 %}
5840 
5841 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5842 %{
5843   single_instruction;
5844   dst    : S5(write);
5845   mem    : ISS(read);
5846   INS01  : ISS;
5847   NEON_FP : S3;
5848 %}
5849 
5850 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5851 %{
5852   single_instruction;
5853   mem    : ISS(read);
5854   src    : S2(read);
5855   INS01  : ISS;
5856   NEON_FP : S3;
5857 %}
5858 
5859 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5860 %{
5861   single_instruction;
5862   mem    : ISS(read);
5863   src    : S2(read);
5864   INS01  : ISS;
5865   NEON_FP : S3;
5866 %}
5867 
5868 //------- Integer ALU operations --------------------------
5869 
5870 // Integer ALU reg-reg operation
5871 // Operands needed in EX1, result generated in EX2
5872 // Eg.  ADD     x0, x1, x2
5873 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5874 %{
5875   single_instruction;
5876   dst    : EX2(write);
5877   src1   : EX1(read);
5878   src2   : EX1(read);
5879   INS01  : ISS; // Dual issue as instruction 0 or 1
5880   ALU    : EX2;
5881 %}
5882 
5883 // Integer ALU reg-reg operation with constant shift
5884 // Shifted register must be available in LATE_ISS instead of EX1
5885 // Eg.  ADD     x0, x1, x2, LSL #2
5886 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5887 %{
5888   single_instruction;
5889   dst    : EX2(write);
5890   src1   : EX1(read);
5891   src2   : ISS(read);
5892   INS01  : ISS;
5893   ALU    : EX2;
5894 %}
5895 
5896 // Integer ALU reg operation with constant shift
5897 // Eg.  LSL     x0, x1, #shift
5898 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5899 %{
5900   single_instruction;
5901   dst    : EX2(write);
5902   src1   : ISS(read);
5903   INS01  : ISS;
5904   ALU    : EX2;
5905 %}
5906 
5907 // Integer ALU reg-reg operation with variable shift
5908 // Both operands must be available in LATE_ISS instead of EX1
5909 // Result is available in EX1 instead of EX2
5910 // Eg.  LSLV    x0, x1, x2
5911 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5912 %{
5913   single_instruction;
5914   dst    : EX1(write);
5915   src1   : ISS(read);
5916   src2   : ISS(read);
5917   INS01  : ISS;
5918   ALU    : EX1;
5919 %}
5920 
5921 // Integer ALU reg-reg operation with extract
5922 // As for _vshift above, but result generated in EX2
5923 // Eg.  EXTR    x0, x1, x2, #N
5924 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5925 %{
5926   single_instruction;
5927   dst    : EX2(write);
5928   src1   : ISS(read);
5929   src2   : ISS(read);
5930   INS1   : ISS; // Can only dual issue as Instruction 1
5931   ALU    : EX1;
5932 %}
5933 
5934 // Integer ALU reg operation
5935 // Eg.  NEG     x0, x1
5936 pipe_class ialu_reg(iRegI dst, iRegI src)
5937 %{
5938   single_instruction;
5939   dst    : EX2(write);
5940   src    : EX1(read);
5941   INS01  : ISS;
5942   ALU    : EX2;
5943 %}
5944 
5945 // Integer ALU reg mmediate operation
5946 // Eg.  ADD     x0, x1, #N
5947 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5948 %{
5949   single_instruction;
5950   dst    : EX2(write);
5951   src1   : EX1(read);
5952   INS01  : ISS;
5953   ALU    : EX2;
5954 %}
5955 
5956 // Integer ALU immediate operation (no source operands)
5957 // Eg.  MOV     x0, #N
5958 pipe_class ialu_imm(iRegI dst)
5959 %{
5960   single_instruction;
5961   dst    : EX1(write);
5962   INS01  : ISS;
5963   ALU    : EX1;
5964 %}
5965 
5966 //------- Compare operation -------------------------------
5967 
5968 // Compare reg-reg
5969 // Eg.  CMP     x0, x1
5970 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5971 %{
5972   single_instruction;
5973 //  fixed_latency(16);
5974   cr     : EX2(write);
5975   op1    : EX1(read);
5976   op2    : EX1(read);
5977   INS01  : ISS;
5978   ALU    : EX2;
5979 %}
5980 
5981 // Compare reg-reg
5982 // Eg.  CMP     x0, #N
5983 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
5984 %{
5985   single_instruction;
5986 //  fixed_latency(16);
5987   cr     : EX2(write);
5988   op1    : EX1(read);
5989   INS01  : ISS;
5990   ALU    : EX2;
5991 %}
5992 
5993 //------- Conditional instructions ------------------------
5994 
5995 // Conditional no operands
5996 // Eg.  CSINC   x0, zr, zr, <cond>
5997 pipe_class icond_none(iRegI dst, rFlagsReg cr)
5998 %{
5999   single_instruction;
6000   cr     : EX1(read);
6001   dst    : EX2(write);
6002   INS01  : ISS;
6003   ALU    : EX2;
6004 %}
6005 
6006 // Conditional 2 operand
6007 // EG.  CSEL    X0, X1, X2, <cond>
6008 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6009 %{
6010   single_instruction;
6011   cr     : EX1(read);
6012   src1   : EX1(read);
6013   src2   : EX1(read);
6014   dst    : EX2(write);
6015   INS01  : ISS;
6016   ALU    : EX2;
6017 %}
6018 
6019 // Conditional 2 operand
6020 // EG.  CSEL    X0, X1, X2, <cond>
6021 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6022 %{
6023   single_instruction;
6024   cr     : EX1(read);
6025   src    : EX1(read);
6026   dst    : EX2(write);
6027   INS01  : ISS;
6028   ALU    : EX2;
6029 %}
6030 
6031 //------- Multiply pipeline operations --------------------
6032 
6033 // Multiply reg-reg
6034 // Eg.  MUL     w0, w1, w2
6035 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6036 %{
6037   single_instruction;
6038   dst    : WR(write);
6039   src1   : ISS(read);
6040   src2   : ISS(read);
6041   INS01  : ISS;
6042   MAC    : WR;
6043 %}
6044 
6045 // Multiply accumulate
6046 // Eg.  MADD    w0, w1, w2, w3
6047 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6048 %{
6049   single_instruction;
6050   dst    : WR(write);
6051   src1   : ISS(read);
6052   src2   : ISS(read);
6053   src3   : ISS(read);
6054   INS01  : ISS;
6055   MAC    : WR;
6056 %}
6057 
6058 // Eg.  MUL     w0, w1, w2
6059 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6060 %{
6061   single_instruction;
6062   fixed_latency(3); // Maximum latency for 64 bit mul
6063   dst    : WR(write);
6064   src1   : ISS(read);
6065   src2   : ISS(read);
6066   INS01  : ISS;
6067   MAC    : WR;
6068 %}
6069 
6070 // Multiply accumulate
6071 // Eg.  MADD    w0, w1, w2, w3
6072 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6073 %{
6074   single_instruction;
6075   fixed_latency(3); // Maximum latency for 64 bit mul
6076   dst    : WR(write);
6077   src1   : ISS(read);
6078   src2   : ISS(read);
6079   src3   : ISS(read);
6080   INS01  : ISS;
6081   MAC    : WR;
6082 %}
6083 
6084 //------- Divide pipeline operations --------------------
6085 
6086 // Eg.  SDIV    w0, w1, w2
6087 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6088 %{
6089   single_instruction;
6090   fixed_latency(8); // Maximum latency for 32 bit divide
6091   dst    : WR(write);
6092   src1   : ISS(read);
6093   src2   : ISS(read);
6094   INS0   : ISS; // Can only dual issue as instruction 0
6095   DIV    : WR;
6096 %}
6097 
6098 // Eg.  SDIV    x0, x1, x2
6099 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6100 %{
6101   single_instruction;
6102   fixed_latency(16); // Maximum latency for 64 bit divide
6103   dst    : WR(write);
6104   src1   : ISS(read);
6105   src2   : ISS(read);
6106   INS0   : ISS; // Can only dual issue as instruction 0
6107   DIV    : WR;
6108 %}
6109 
6110 //------- Load pipeline operations ------------------------
6111 
6112 // Load - prefetch
6113 // Eg.  PFRM    <mem>
6114 pipe_class iload_prefetch(memory mem)
6115 %{
6116   single_instruction;
6117   mem    : ISS(read);
6118   INS01  : ISS;
6119   LDST   : WR;
6120 %}
6121 
6122 // Load - reg, mem
6123 // Eg.  LDR     x0, <mem>
6124 pipe_class iload_reg_mem(iRegI dst, memory mem)
6125 %{
6126   single_instruction;
6127   dst    : WR(write);
6128   mem    : ISS(read);
6129   INS01  : ISS;
6130   LDST   : WR;
6131 %}
6132 
6133 // Load - reg, reg
6134 // Eg.  LDR     x0, [sp, x1]
6135 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6136 %{
6137   single_instruction;
6138   dst    : WR(write);
6139   src    : ISS(read);
6140   INS01  : ISS;
6141   LDST   : WR;
6142 %}
6143 
6144 //------- Store pipeline operations -----------------------
6145 
6146 // Store - zr, mem
6147 // Eg.  STR     zr, <mem>
6148 pipe_class istore_mem(memory mem)
6149 %{
6150   single_instruction;
6151   mem    : ISS(read);
6152   INS01  : ISS;
6153   LDST   : WR;
6154 %}
6155 
6156 // Store - reg, mem
6157 // Eg.  STR     x0, <mem>
6158 pipe_class istore_reg_mem(iRegI src, memory mem)
6159 %{
6160   single_instruction;
6161   mem    : ISS(read);
6162   src    : EX2(read);
6163   INS01  : ISS;
6164   LDST   : WR;
6165 %}
6166 
6167 // Store - reg, reg
6168 // Eg. STR      x0, [sp, x1]
6169 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6170 %{
6171   single_instruction;
6172   dst    : ISS(read);
6173   src    : EX2(read);
6174   INS01  : ISS;
6175   LDST   : WR;
6176 %}
6177 
6178 //------- Store pipeline operations -----------------------
6179 
6180 // Branch
6181 pipe_class pipe_branch()
6182 %{
6183   single_instruction;
6184   INS01  : ISS;
6185   BRANCH : EX1;
6186 %}
6187 
6188 // Conditional branch
6189 pipe_class pipe_branch_cond(rFlagsReg cr)
6190 %{
6191   single_instruction;
6192   cr     : EX1(read);
6193   INS01  : ISS;
6194   BRANCH : EX1;
6195 %}
6196 
6197 // Compare & Branch
6198 // EG.  CBZ/CBNZ
6199 pipe_class pipe_cmp_branch(iRegI op1)
6200 %{
6201   single_instruction;
6202   op1    : EX1(read);
6203   INS01  : ISS;
6204   BRANCH : EX1;
6205 %}
6206 
6207 //------- Synchronisation operations ----------------------
6208 
6209 // Any operation requiring serialization.
6210 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6211 pipe_class pipe_serial()
6212 %{
6213   single_instruction;
6214   force_serialization;
6215   fixed_latency(16);
6216   INS01  : ISS(2); // Cannot dual issue with any other instruction
6217   LDST   : WR;
6218 %}
6219 
6220 // Generic big/slow expanded idiom - also serialized
6221 pipe_class pipe_slow()
6222 %{
6223   instruction_count(10);
6224   multiple_bundles;
6225   force_serialization;
6226   fixed_latency(16);
6227   INS01  : ISS(2); // Cannot dual issue with any other instruction
6228   LDST   : WR;
6229 %}
6230 
6231 // Empty pipeline class
6232 pipe_class pipe_class_empty()
6233 %{
6234   single_instruction;
6235   fixed_latency(0);
6236 %}
6237 
6238 // Default pipeline class.
6239 pipe_class pipe_class_default()
6240 %{
6241   single_instruction;
6242   fixed_latency(2);
6243 %}
6244 
6245 // Pipeline class for compares.
6246 pipe_class pipe_class_compare()
6247 %{
6248   single_instruction;
6249   fixed_latency(16);
6250 %}
6251 
6252 // Pipeline class for memory operations.
6253 pipe_class pipe_class_memory()
6254 %{
6255   single_instruction;
6256   fixed_latency(16);
6257 %}
6258 
6259 // Pipeline class for call.
6260 pipe_class pipe_class_call()
6261 %{
6262   single_instruction;
6263   fixed_latency(100);
6264 %}
6265 
6266 // Define the class for the Nop node.
6267 define %{
6268    MachNop = pipe_class_empty;
6269 %}
6270 
6271 %}
6272 //----------INSTRUCTIONS-------------------------------------------------------
6273 //
6274 // match      -- States which machine-independent subtree may be replaced
6275 //               by this instruction.
6276 // ins_cost   -- The estimated cost of this instruction is used by instruction
6277 //               selection to identify a minimum cost tree of machine
6278 //               instructions that matches a tree of machine-independent
6279 //               instructions.
6280 // format     -- A string providing the disassembly for this instruction.
6281 //               The value of an instruction's operand may be inserted
6282 //               by referring to it with a '$' prefix.
6283 // opcode     -- Three instruction opcodes may be provided.  These are referred
6284 //               to within an encode class as $primary, $secondary, and $tertiary
6285 //               rrspectively.  The primary opcode is commonly used to
6286 //               indicate the type of machine instruction, while secondary
6287 //               and tertiary are often used for prefix options or addressing
6288 //               modes.
6289 // ins_encode -- A list of encode classes with parameters. The encode class
6290 //               name must have been defined in an 'enc_class' specification
6291 //               in the encode section of the architecture description.
6292 
6293 // ============================================================================
6294 // Memory (Load/Store) Instructions
6295 
6296 // Load Instructions
6297 
6298 // Load Byte (8 bit signed)
6299 instruct loadB(iRegINoSp dst, memory mem)
6300 %{
6301   match(Set dst (LoadB mem));
6302   predicate(!needs_acquiring_load(n));
6303 
6304   ins_cost(4 * INSN_COST);
6305   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6306 
6307   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6308 
6309   ins_pipe(iload_reg_mem);
6310 %}
6311 
6312 // Load Byte (8 bit signed) into long
6313 instruct loadB2L(iRegLNoSp dst, memory mem)
6314 %{
6315   match(Set dst (ConvI2L (LoadB mem)));
6316   predicate(!needs_acquiring_load(n->in(1)));
6317 
6318   ins_cost(4 * INSN_COST);
6319   format %{ "ldrsb  $dst, $mem\t# byte" %}
6320 
6321   ins_encode(aarch64_enc_ldrsb(dst, mem));
6322 
6323   ins_pipe(iload_reg_mem);
6324 %}
6325 
6326 // Load Byte (8 bit unsigned)
6327 instruct loadUB(iRegINoSp dst, memory mem)
6328 %{
6329   match(Set dst (LoadUB mem));
6330   predicate(!needs_acquiring_load(n));
6331 
6332   ins_cost(4 * INSN_COST);
6333   format %{ "ldrbw  $dst, $mem\t# byte" %}
6334 
6335   ins_encode(aarch64_enc_ldrb(dst, mem));
6336 
6337   ins_pipe(iload_reg_mem);
6338 %}
6339 
6340 // Load Byte (8 bit unsigned) into long
6341 instruct loadUB2L(iRegLNoSp dst, memory mem)
6342 %{
6343   match(Set dst (ConvI2L (LoadUB mem)));
6344   predicate(!needs_acquiring_load(n->in(1)));
6345 
6346   ins_cost(4 * INSN_COST);
6347   format %{ "ldrb  $dst, $mem\t# byte" %}
6348 
6349   ins_encode(aarch64_enc_ldrb(dst, mem));
6350 
6351   ins_pipe(iload_reg_mem);
6352 %}
6353 
6354 // Load Short (16 bit signed)
6355 instruct loadS(iRegINoSp dst, memory mem)
6356 %{
6357   match(Set dst (LoadS mem));
6358   predicate(!needs_acquiring_load(n));
6359 
6360   ins_cost(4 * INSN_COST);
6361   format %{ "ldrshw  $dst, $mem\t# short" %}
6362 
6363   ins_encode(aarch64_enc_ldrshw(dst, mem));
6364 
6365   ins_pipe(iload_reg_mem);
6366 %}
6367 
6368 // Load Short (16 bit signed) into long
6369 instruct loadS2L(iRegLNoSp dst, memory mem)
6370 %{
6371   match(Set dst (ConvI2L (LoadS mem)));
6372   predicate(!needs_acquiring_load(n->in(1)));
6373 
6374   ins_cost(4 * INSN_COST);
6375   format %{ "ldrsh  $dst, $mem\t# short" %}
6376 
6377   ins_encode(aarch64_enc_ldrsh(dst, mem));
6378 
6379   ins_pipe(iload_reg_mem);
6380 %}
6381 
6382 // Load Char (16 bit unsigned)
6383 instruct loadUS(iRegINoSp dst, memory mem)
6384 %{
6385   match(Set dst (LoadUS mem));
6386   predicate(!needs_acquiring_load(n));
6387 
6388   ins_cost(4 * INSN_COST);
6389   format %{ "ldrh  $dst, $mem\t# short" %}
6390 
6391   ins_encode(aarch64_enc_ldrh(dst, mem));
6392 
6393   ins_pipe(iload_reg_mem);
6394 %}
6395 
6396 // Load Short/Char (16 bit unsigned) into long
6397 instruct loadUS2L(iRegLNoSp dst, memory mem)
6398 %{
6399   match(Set dst (ConvI2L (LoadUS mem)));
6400   predicate(!needs_acquiring_load(n->in(1)));
6401 
6402   ins_cost(4 * INSN_COST);
6403   format %{ "ldrh  $dst, $mem\t# short" %}
6404 
6405   ins_encode(aarch64_enc_ldrh(dst, mem));
6406 
6407   ins_pipe(iload_reg_mem);
6408 %}
6409 
6410 // Load Integer (32 bit signed)
6411 instruct loadI(iRegINoSp dst, memory mem)
6412 %{
6413   match(Set dst (LoadI mem));
6414   predicate(!needs_acquiring_load(n));
6415 
6416   ins_cost(4 * INSN_COST);
6417   format %{ "ldrw  $dst, $mem\t# int" %}
6418 
6419   ins_encode(aarch64_enc_ldrw(dst, mem));
6420 
6421   ins_pipe(iload_reg_mem);
6422 %}
6423 
6424 // Load Integer (32 bit signed) into long
6425 instruct loadI2L(iRegLNoSp dst, memory mem)
6426 %{
6427   match(Set dst (ConvI2L (LoadI mem)));
6428   predicate(!needs_acquiring_load(n->in(1)));
6429 
6430   ins_cost(4 * INSN_COST);
6431   format %{ "ldrsw  $dst, $mem\t# int" %}
6432 
6433   ins_encode(aarch64_enc_ldrsw(dst, mem));
6434 
6435   ins_pipe(iload_reg_mem);
6436 %}
6437 
6438 // Load Integer (32 bit unsigned) into long
6439 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6440 %{
6441   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6442   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6443 
6444   ins_cost(4 * INSN_COST);
6445   format %{ "ldrw  $dst, $mem\t# int" %}
6446 
6447   ins_encode(aarch64_enc_ldrw(dst, mem));
6448 
6449   ins_pipe(iload_reg_mem);
6450 %}
6451 
6452 // Load Long (64 bit signed)
6453 instruct loadL(iRegLNoSp dst, memory mem)
6454 %{
6455   match(Set dst (LoadL mem));
6456   predicate(!needs_acquiring_load(n));
6457 
6458   ins_cost(4 * INSN_COST);
6459   format %{ "ldr  $dst, $mem\t# int" %}
6460 
6461   ins_encode(aarch64_enc_ldr(dst, mem));
6462 
6463   ins_pipe(iload_reg_mem);
6464 %}
6465 
6466 // Load Range
6467 instruct loadRange(iRegINoSp dst, memory mem)
6468 %{
6469   match(Set dst (LoadRange mem));
6470 
6471   ins_cost(4 * INSN_COST);
6472   format %{ "ldrw  $dst, $mem\t# range" %}
6473 
6474   ins_encode(aarch64_enc_ldrw(dst, mem));
6475 
6476   ins_pipe(iload_reg_mem);
6477 %}
6478 
6479 // Load Pointer
6480 instruct loadP(iRegPNoSp dst, memory mem)
6481 %{
6482   match(Set dst (LoadP mem));
6483   predicate(!needs_acquiring_load(n));
6484 
6485   ins_cost(4 * INSN_COST);
6486   format %{ "ldr  $dst, $mem\t# ptr" %}
6487 
6488   ins_encode(aarch64_enc_ldr(dst, mem));
6489 
6490   ins_pipe(iload_reg_mem);
6491 %}
6492 
6493 // Load Compressed Pointer
6494 instruct loadN(iRegNNoSp dst, memory mem)
6495 %{
6496   match(Set dst (LoadN mem));
6497   predicate(!needs_acquiring_load(n));
6498 
6499   ins_cost(4 * INSN_COST);
6500   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6501 
6502   ins_encode(aarch64_enc_ldrw(dst, mem));
6503 
6504   ins_pipe(iload_reg_mem);
6505 %}
6506 
6507 // Load Klass Pointer
6508 instruct loadKlass(iRegPNoSp dst, memory mem)
6509 %{
6510   match(Set dst (LoadKlass mem));
6511   predicate(!needs_acquiring_load(n));
6512 
6513   ins_cost(4 * INSN_COST);
6514   format %{ "ldr  $dst, $mem\t# class" %}
6515 
6516   ins_encode(aarch64_enc_ldr(dst, mem));
6517 
6518   ins_pipe(iload_reg_mem);
6519 %}
6520 
6521 // Load Narrow Klass Pointer
6522 instruct loadNKlass(iRegNNoSp dst, memory mem)
6523 %{
6524   match(Set dst (LoadNKlass mem));
6525   predicate(!needs_acquiring_load(n));
6526 
6527   ins_cost(4 * INSN_COST);
6528   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6529 
6530   ins_encode(aarch64_enc_ldrw(dst, mem));
6531 
6532   ins_pipe(iload_reg_mem);
6533 %}
6534 
6535 // Load Float
6536 instruct loadF(vRegF dst, memory mem)
6537 %{
6538   match(Set dst (LoadF mem));
6539   predicate(!needs_acquiring_load(n));
6540 
6541   ins_cost(4 * INSN_COST);
6542   format %{ "ldrs  $dst, $mem\t# float" %}
6543 
6544   ins_encode( aarch64_enc_ldrs(dst, mem) );
6545 
6546   ins_pipe(pipe_class_memory);
6547 %}
6548 
6549 // Load Double
6550 instruct loadD(vRegD dst, memory mem)
6551 %{
6552   match(Set dst (LoadD mem));
6553   predicate(!needs_acquiring_load(n));
6554 
6555   ins_cost(4 * INSN_COST);
6556   format %{ "ldrd  $dst, $mem\t# double" %}
6557 
6558   ins_encode( aarch64_enc_ldrd(dst, mem) );
6559 
6560   ins_pipe(pipe_class_memory);
6561 %}
6562 
6563 
6564 // Load Int Constant
6565 instruct loadConI(iRegINoSp dst, immI src)
6566 %{
6567   match(Set dst src);
6568 
6569   ins_cost(INSN_COST);
6570   format %{ "mov $dst, $src\t# int" %}
6571 
6572   ins_encode( aarch64_enc_movw_imm(dst, src) );
6573 
6574   ins_pipe(ialu_imm);
6575 %}
6576 
6577 // Load Long Constant
6578 instruct loadConL(iRegLNoSp dst, immL src)
6579 %{
6580   match(Set dst src);
6581 
6582   ins_cost(INSN_COST);
6583   format %{ "mov $dst, $src\t# long" %}
6584 
6585   ins_encode( aarch64_enc_mov_imm(dst, src) );
6586 
6587   ins_pipe(ialu_imm);
6588 %}
6589 
6590 // Load Pointer Constant
6591 
6592 instruct loadConP(iRegPNoSp dst, immP con)
6593 %{
6594   match(Set dst con);
6595 
6596   ins_cost(INSN_COST * 4);
6597   format %{
6598     "mov  $dst, $con\t# ptr\n\t"
6599   %}
6600 
6601   ins_encode(aarch64_enc_mov_p(dst, con));
6602 
6603   ins_pipe(ialu_imm);
6604 %}
6605 
6606 // Load Null Pointer Constant
6607 
6608 instruct loadConP0(iRegPNoSp dst, immP0 con)
6609 %{
6610   match(Set dst con);
6611 
6612   ins_cost(INSN_COST);
6613   format %{ "mov  $dst, $con\t# NULL ptr" %}
6614 
6615   ins_encode(aarch64_enc_mov_p0(dst, con));
6616 
6617   ins_pipe(ialu_imm);
6618 %}
6619 
6620 // Load Pointer Constant One
6621 
6622 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6623 %{
6624   match(Set dst con);
6625 
6626   ins_cost(INSN_COST);
6627   format %{ "mov  $dst, $con\t# NULL ptr" %}
6628 
6629   ins_encode(aarch64_enc_mov_p1(dst, con));
6630 
6631   ins_pipe(ialu_imm);
6632 %}
6633 
6634 // Load Poll Page Constant
6635 
6636 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6637 %{
6638   match(Set dst con);
6639 
6640   ins_cost(INSN_COST);
6641   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6642 
6643   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6644 
6645   ins_pipe(ialu_imm);
6646 %}
6647 
6648 // Load Byte Map Base Constant
6649 
6650 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6651 %{
6652   match(Set dst con);
6653 
6654   ins_cost(INSN_COST);
6655   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6656 
6657   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6658 
6659   ins_pipe(ialu_imm);
6660 %}
6661 
6662 // Load Narrow Pointer Constant
6663 
6664 instruct loadConN(iRegNNoSp dst, immN con)
6665 %{
6666   match(Set dst con);
6667 
6668   ins_cost(INSN_COST * 4);
6669   format %{ "mov  $dst, $con\t# compressed ptr" %}
6670 
6671   ins_encode(aarch64_enc_mov_n(dst, con));
6672 
6673   ins_pipe(ialu_imm);
6674 %}
6675 
6676 // Load Narrow Null Pointer Constant
6677 
6678 instruct loadConN0(iRegNNoSp dst, immN0 con)
6679 %{
6680   match(Set dst con);
6681 
6682   ins_cost(INSN_COST);
6683   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6684 
6685   ins_encode(aarch64_enc_mov_n0(dst, con));
6686 
6687   ins_pipe(ialu_imm);
6688 %}
6689 
6690 // Load Narrow Klass Constant
6691 
6692 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6693 %{
6694   match(Set dst con);
6695 
6696   ins_cost(INSN_COST);
6697   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6698 
6699   ins_encode(aarch64_enc_mov_nk(dst, con));
6700 
6701   ins_pipe(ialu_imm);
6702 %}
6703 
6704 // Load Packed Float Constant
6705 
6706 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6707   match(Set dst con);
6708   ins_cost(INSN_COST * 4);
6709   format %{ "fmovs  $dst, $con"%}
6710   ins_encode %{
6711     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6712   %}
6713 
6714   ins_pipe(fp_imm_s);
6715 %}
6716 
6717 // Load Float Constant
6718 
6719 instruct loadConF(vRegF dst, immF con) %{
6720   match(Set dst con);
6721 
6722   ins_cost(INSN_COST * 4);
6723 
6724   format %{
6725     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6726   %}
6727 
6728   ins_encode %{
6729     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6730   %}
6731 
6732   ins_pipe(fp_load_constant_s);
6733 %}
6734 
6735 // Load Packed Double Constant
6736 
6737 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6738   match(Set dst con);
6739   ins_cost(INSN_COST);
6740   format %{ "fmovd  $dst, $con"%}
6741   ins_encode %{
6742     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6743   %}
6744 
6745   ins_pipe(fp_imm_d);
6746 %}
6747 
6748 // Load Double Constant
6749 
6750 instruct loadConD(vRegD dst, immD con) %{
6751   match(Set dst con);
6752 
6753   ins_cost(INSN_COST * 5);
6754   format %{
6755     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6756   %}
6757 
6758   ins_encode %{
6759     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6760   %}
6761 
6762   ins_pipe(fp_load_constant_d);
6763 %}
6764 
6765 // Store Instructions
6766 
6767 // Store CMS card-mark Immediate
6768 instruct storeimmCM0(immI0 zero, memory mem)
6769 %{
6770   match(Set mem (StoreCM mem zero));
6771   predicate(unnecessary_storestore(n));
6772 
6773   ins_cost(INSN_COST);
6774   format %{ "strb zr, $mem\t# byte" %}
6775 
6776   ins_encode(aarch64_enc_strb0(mem));
6777 
6778   ins_pipe(istore_mem);
6779 %}
6780 
6781 // Store CMS card-mark Immediate with intervening StoreStore
6782 // needed when using CMS with no conditional card marking
6783 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6784 %{
6785   match(Set mem (StoreCM mem zero));
6786 
6787   ins_cost(INSN_COST * 2);
6788   format %{ "dmb ishst"
6789       "\n\tstrb zr, $mem\t# byte" %}
6790 
6791   ins_encode(aarch64_enc_strb0_ordered(mem));
6792 
6793   ins_pipe(istore_mem);
6794 %}
6795 
6796 // Store Byte
6797 instruct storeB(iRegIorL2I src, memory mem)
6798 %{
6799   match(Set mem (StoreB mem src));
6800   predicate(!needs_releasing_store(n));
6801 
6802   ins_cost(INSN_COST);
6803   format %{ "strb  $src, $mem\t# byte" %}
6804 
6805   ins_encode(aarch64_enc_strb(src, mem));
6806 
6807   ins_pipe(istore_reg_mem);
6808 %}
6809 
6810 
6811 instruct storeimmB0(immI0 zero, memory mem)
6812 %{
6813   match(Set mem (StoreB mem zero));
6814   predicate(!needs_releasing_store(n));
6815 
6816   ins_cost(INSN_COST);
6817   format %{ "strb zr, $mem\t# byte" %}
6818 
6819   ins_encode(aarch64_enc_strb0(mem));
6820 
6821   ins_pipe(istore_mem);
6822 %}
6823 
6824 // Store Char/Short
6825 instruct storeC(iRegIorL2I src, memory mem)
6826 %{
6827   match(Set mem (StoreC mem src));
6828   predicate(!needs_releasing_store(n));
6829 
6830   ins_cost(INSN_COST);
6831   format %{ "strh  $src, $mem\t# short" %}
6832 
6833   ins_encode(aarch64_enc_strh(src, mem));
6834 
6835   ins_pipe(istore_reg_mem);
6836 %}
6837 
6838 instruct storeimmC0(immI0 zero, memory mem)
6839 %{
6840   match(Set mem (StoreC mem zero));
6841   predicate(!needs_releasing_store(n));
6842 
6843   ins_cost(INSN_COST);
6844   format %{ "strh  zr, $mem\t# short" %}
6845 
6846   ins_encode(aarch64_enc_strh0(mem));
6847 
6848   ins_pipe(istore_mem);
6849 %}
6850 
6851 // Store Integer
6852 
6853 instruct storeI(iRegIorL2I src, memory mem)
6854 %{
6855   match(Set mem(StoreI mem src));
6856   predicate(!needs_releasing_store(n));
6857 
6858   ins_cost(INSN_COST);
6859   format %{ "strw  $src, $mem\t# int" %}
6860 
6861   ins_encode(aarch64_enc_strw(src, mem));
6862 
6863   ins_pipe(istore_reg_mem);
6864 %}
6865 
6866 instruct storeimmI0(immI0 zero, memory mem)
6867 %{
6868   match(Set mem(StoreI mem zero));
6869   predicate(!needs_releasing_store(n));
6870 
6871   ins_cost(INSN_COST);
6872   format %{ "strw  zr, $mem\t# int" %}
6873 
6874   ins_encode(aarch64_enc_strw0(mem));
6875 
6876   ins_pipe(istore_mem);
6877 %}
6878 
6879 // Store Long (64 bit signed)
6880 instruct storeL(iRegL src, memory mem)
6881 %{
6882   match(Set mem (StoreL mem src));
6883   predicate(!needs_releasing_store(n));
6884 
6885   ins_cost(INSN_COST);
6886   format %{ "str  $src, $mem\t# int" %}
6887 
6888   ins_encode(aarch64_enc_str(src, mem));
6889 
6890   ins_pipe(istore_reg_mem);
6891 %}
6892 
6893 // Store Long (64 bit signed)
6894 instruct storeimmL0(immL0 zero, memory mem)
6895 %{
6896   match(Set mem (StoreL mem zero));
6897   predicate(!needs_releasing_store(n));
6898 
6899   ins_cost(INSN_COST);
6900   format %{ "str  zr, $mem\t# int" %}
6901 
6902   ins_encode(aarch64_enc_str0(mem));
6903 
6904   ins_pipe(istore_mem);
6905 %}
6906 
6907 // Store Pointer
6908 instruct storeP(iRegP src, memory mem)
6909 %{
6910   match(Set mem (StoreP mem src));
6911   predicate(!needs_releasing_store(n));
6912 
6913   ins_cost(INSN_COST);
6914   format %{ "str  $src, $mem\t# ptr" %}
6915 
6916   ins_encode %{
6917     int opcode = $mem->opcode();
6918     Register base = as_Register($mem$$base);
6919     int index = $mem$$index;
6920     int size = $mem$$scale;
6921     int disp = $mem$$disp;
6922     Register reg = as_Register($src$$reg);
6923 
6924     // we sometimes get asked to store the stack pointer into the
6925     // current thread -- we cannot do that directly on AArch64
6926     if (reg == r31_sp) {
6927       MacroAssembler _masm(&cbuf);
6928       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
6929       __ mov(rscratch2, sp);
6930       reg = rscratch2;
6931     }
6932     Address::extend scale;
6933 
6934     // Hooboy, this is fugly.  We need a way to communicate to the
6935     // encoder that the index needs to be sign extended, so we have to
6936     // enumerate all the cases.
6937     switch (opcode) {
6938     case INDINDEXSCALEDOFFSETI2L:
6939     case INDINDEXSCALEDI2L:
6940     case INDINDEXSCALEDOFFSETI2LN:
6941     case INDINDEXSCALEDI2LN:
6942     case INDINDEXOFFSETI2L:
6943     case INDINDEXOFFSETI2LN:
6944       scale = Address::sxtw(size);
6945       break;
6946     default:
6947       scale = Address::lsl(size);
6948     }
6949 
6950     Address adr;
6951     if (index == -1) {
6952       adr = Address(base, disp);
6953     } else {
6954       if (disp == 0) {
6955         adr = Address(base, as_Register(index), scale);
6956       } else {
6957         __ lea(rscratch1, Address(base, disp));
6958         adr = Address(rscratch1, as_Register(index), scale);
6959       }
6960     }
6961 
6962     __ str(reg, adr);
6963   %}
6964 
6965   ins_pipe(istore_reg_mem);
6966 %}
6967 
6968 // Store Pointer
6969 instruct storeimmP0(immP0 zero, memory mem)
6970 %{
6971   match(Set mem (StoreP mem zero));
6972   predicate(!needs_releasing_store(n));
6973 
6974   ins_cost(INSN_COST);
6975   format %{ "str zr, $mem\t# ptr" %}
6976 
6977   ins_encode(aarch64_enc_str0(mem));
6978 
6979   ins_pipe(istore_mem);
6980 %}
6981 
6982 // Store Compressed Pointer
6983 instruct storeN(iRegN src, memory mem)
6984 %{
6985   match(Set mem (StoreN mem src));
6986   predicate(!needs_releasing_store(n));
6987 
6988   ins_cost(INSN_COST);
6989   format %{ "strw  $src, $mem\t# compressed ptr" %}
6990 
6991   ins_encode(aarch64_enc_strw(src, mem));
6992 
6993   ins_pipe(istore_reg_mem);
6994 %}
6995 
6996 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6997 %{
6998   match(Set mem (StoreN mem zero));
6999   predicate(Universe::narrow_oop_base() == NULL &&
7000             Universe::narrow_klass_base() == NULL  &&
7001             (!needs_releasing_store(n)));
7002 
7003   ins_cost(INSN_COST);
7004   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7005 
7006   ins_encode(aarch64_enc_strw(heapbase, mem));
7007 
7008   ins_pipe(istore_reg_mem);
7009 %}
7010 
7011 // Store Float
7012 instruct storeF(vRegF src, memory mem)
7013 %{
7014   match(Set mem (StoreF mem src));
7015   predicate(!needs_releasing_store(n));
7016 
7017   ins_cost(INSN_COST);
7018   format %{ "strs  $src, $mem\t# float" %}
7019 
7020   ins_encode( aarch64_enc_strs(src, mem) );
7021 
7022   ins_pipe(pipe_class_memory);
7023 %}
7024 
7025 // TODO
7026 // implement storeImmF0 and storeFImmPacked
7027 
7028 // Store Double
7029 instruct storeD(vRegD src, memory mem)
7030 %{
7031   match(Set mem (StoreD mem src));
7032   predicate(!needs_releasing_store(n));
7033 
7034   ins_cost(INSN_COST);
7035   format %{ "strd  $src, $mem\t# double" %}
7036 
7037   ins_encode( aarch64_enc_strd(src, mem) );
7038 
7039   ins_pipe(pipe_class_memory);
7040 %}
7041 
7042 // Store Compressed Klass Pointer
7043 instruct storeNKlass(iRegN src, memory mem)
7044 %{
7045   predicate(!needs_releasing_store(n));
7046   match(Set mem (StoreNKlass mem src));
7047 
7048   ins_cost(INSN_COST);
7049   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7050 
7051   ins_encode(aarch64_enc_strw(src, mem));
7052 
7053   ins_pipe(istore_reg_mem);
7054 %}
7055 
7056 // TODO
7057 // implement storeImmD0 and storeDImmPacked
7058 
7059 // prefetch instructions
7060 // Must be safe to execute with invalid address (cannot fault).
7061 
7062 instruct prefetchr( memory mem ) %{
7063   match(PrefetchRead mem);
7064 
7065   ins_cost(INSN_COST);
7066   format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
7067 
7068   ins_encode( aarch64_enc_prefetchr(mem) );
7069 
7070   ins_pipe(iload_prefetch);
7071 %}
7072 
7073 instruct prefetchw( memory mem ) %{
7074   match(PrefetchAllocation mem);
7075 
7076   ins_cost(INSN_COST);
7077   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7078 
7079   ins_encode( aarch64_enc_prefetchw(mem) );
7080 
7081   ins_pipe(iload_prefetch);
7082 %}
7083 
7084 instruct prefetchnta( memory mem ) %{
7085   match(PrefetchWrite mem);
7086 
7087   ins_cost(INSN_COST);
7088   format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
7089 
7090   ins_encode( aarch64_enc_prefetchnta(mem) );
7091 
7092   ins_pipe(iload_prefetch);
7093 %}
7094 
7095 //  ---------------- volatile loads and stores ----------------
7096 
7097 // Load Byte (8 bit signed)
7098 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7099 %{
7100   match(Set dst (LoadB mem));
7101 
7102   ins_cost(VOLATILE_REF_COST);
7103   format %{ "ldarsb  $dst, $mem\t# byte" %}
7104 
7105   ins_encode(aarch64_enc_ldarsb(dst, mem));
7106 
7107   ins_pipe(pipe_serial);
7108 %}
7109 
7110 // Load Byte (8 bit signed) into long
7111 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7112 %{
7113   match(Set dst (ConvI2L (LoadB mem)));
7114 
7115   ins_cost(VOLATILE_REF_COST);
7116   format %{ "ldarsb  $dst, $mem\t# byte" %}
7117 
7118   ins_encode(aarch64_enc_ldarsb(dst, mem));
7119 
7120   ins_pipe(pipe_serial);
7121 %}
7122 
7123 // Load Byte (8 bit unsigned)
7124 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7125 %{
7126   match(Set dst (LoadUB mem));
7127 
7128   ins_cost(VOLATILE_REF_COST);
7129   format %{ "ldarb  $dst, $mem\t# byte" %}
7130 
7131   ins_encode(aarch64_enc_ldarb(dst, mem));
7132 
7133   ins_pipe(pipe_serial);
7134 %}
7135 
7136 // Load Byte (8 bit unsigned) into long
7137 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7138 %{
7139   match(Set dst (ConvI2L (LoadUB mem)));
7140 
7141   ins_cost(VOLATILE_REF_COST);
7142   format %{ "ldarb  $dst, $mem\t# byte" %}
7143 
7144   ins_encode(aarch64_enc_ldarb(dst, mem));
7145 
7146   ins_pipe(pipe_serial);
7147 %}
7148 
7149 // Load Short (16 bit signed)
7150 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7151 %{
7152   match(Set dst (LoadS mem));
7153 
7154   ins_cost(VOLATILE_REF_COST);
7155   format %{ "ldarshw  $dst, $mem\t# short" %}
7156 
7157   ins_encode(aarch64_enc_ldarshw(dst, mem));
7158 
7159   ins_pipe(pipe_serial);
7160 %}
7161 
7162 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7163 %{
7164   match(Set dst (LoadUS mem));
7165 
7166   ins_cost(VOLATILE_REF_COST);
7167   format %{ "ldarhw  $dst, $mem\t# short" %}
7168 
7169   ins_encode(aarch64_enc_ldarhw(dst, mem));
7170 
7171   ins_pipe(pipe_serial);
7172 %}
7173 
7174 // Load Short/Char (16 bit unsigned) into long
7175 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7176 %{
7177   match(Set dst (ConvI2L (LoadUS mem)));
7178 
7179   ins_cost(VOLATILE_REF_COST);
7180   format %{ "ldarh  $dst, $mem\t# short" %}
7181 
7182   ins_encode(aarch64_enc_ldarh(dst, mem));
7183 
7184   ins_pipe(pipe_serial);
7185 %}
7186 
7187 // Load Short/Char (16 bit signed) into long
7188 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7189 %{
7190   match(Set dst (ConvI2L (LoadS mem)));
7191 
7192   ins_cost(VOLATILE_REF_COST);
7193   format %{ "ldarh  $dst, $mem\t# short" %}
7194 
7195   ins_encode(aarch64_enc_ldarsh(dst, mem));
7196 
7197   ins_pipe(pipe_serial);
7198 %}
7199 
7200 // Load Integer (32 bit signed)
7201 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7202 %{
7203   match(Set dst (LoadI mem));
7204 
7205   ins_cost(VOLATILE_REF_COST);
7206   format %{ "ldarw  $dst, $mem\t# int" %}
7207 
7208   ins_encode(aarch64_enc_ldarw(dst, mem));
7209 
7210   ins_pipe(pipe_serial);
7211 %}
7212 
7213 // Load Integer (32 bit unsigned) into long
7214 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7215 %{
7216   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7217 
7218   ins_cost(VOLATILE_REF_COST);
7219   format %{ "ldarw  $dst, $mem\t# int" %}
7220 
7221   ins_encode(aarch64_enc_ldarw(dst, mem));
7222 
7223   ins_pipe(pipe_serial);
7224 %}
7225 
7226 // Load Long (64 bit signed)
7227 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7228 %{
7229   match(Set dst (LoadL mem));
7230 
7231   ins_cost(VOLATILE_REF_COST);
7232   format %{ "ldar  $dst, $mem\t# int" %}
7233 
7234   ins_encode(aarch64_enc_ldar(dst, mem));
7235 
7236   ins_pipe(pipe_serial);
7237 %}
7238 
7239 // Load Pointer
7240 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7241 %{
7242   match(Set dst (LoadP mem));
7243 
7244   ins_cost(VOLATILE_REF_COST);
7245   format %{ "ldar  $dst, $mem\t# ptr" %}
7246 
7247   ins_encode(aarch64_enc_ldar(dst, mem));
7248 
7249   ins_pipe(pipe_serial);
7250 %}
7251 
7252 // Load Compressed Pointer
7253 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7254 %{
7255   match(Set dst (LoadN mem));
7256 
7257   ins_cost(VOLATILE_REF_COST);
7258   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7259 
7260   ins_encode(aarch64_enc_ldarw(dst, mem));
7261 
7262   ins_pipe(pipe_serial);
7263 %}
7264 
7265 // Load Float
7266 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7267 %{
7268   match(Set dst (LoadF mem));
7269 
7270   ins_cost(VOLATILE_REF_COST);
7271   format %{ "ldars  $dst, $mem\t# float" %}
7272 
7273   ins_encode( aarch64_enc_fldars(dst, mem) );
7274 
7275   ins_pipe(pipe_serial);
7276 %}
7277 
7278 // Load Double
7279 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7280 %{
7281   match(Set dst (LoadD mem));
7282 
7283   ins_cost(VOLATILE_REF_COST);
7284   format %{ "ldard  $dst, $mem\t# double" %}
7285 
7286   ins_encode( aarch64_enc_fldard(dst, mem) );
7287 
7288   ins_pipe(pipe_serial);
7289 %}
7290 
7291 // Store Byte
7292 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7293 %{
7294   match(Set mem (StoreB mem src));
7295 
7296   ins_cost(VOLATILE_REF_COST);
7297   format %{ "stlrb  $src, $mem\t# byte" %}
7298 
7299   ins_encode(aarch64_enc_stlrb(src, mem));
7300 
7301   ins_pipe(pipe_class_memory);
7302 %}
7303 
7304 // Store Char/Short
7305 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7306 %{
7307   match(Set mem (StoreC mem src));
7308 
7309   ins_cost(VOLATILE_REF_COST);
7310   format %{ "stlrh  $src, $mem\t# short" %}
7311 
7312   ins_encode(aarch64_enc_stlrh(src, mem));
7313 
7314   ins_pipe(pipe_class_memory);
7315 %}
7316 
7317 // Store Integer
7318 
7319 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7320 %{
7321   match(Set mem(StoreI mem src));
7322 
7323   ins_cost(VOLATILE_REF_COST);
7324   format %{ "stlrw  $src, $mem\t# int" %}
7325 
7326   ins_encode(aarch64_enc_stlrw(src, mem));
7327 
7328   ins_pipe(pipe_class_memory);
7329 %}
7330 
7331 // Store Long (64 bit signed)
7332 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7333 %{
7334   match(Set mem (StoreL mem src));
7335 
7336   ins_cost(VOLATILE_REF_COST);
7337   format %{ "stlr  $src, $mem\t# int" %}
7338 
7339   ins_encode(aarch64_enc_stlr(src, mem));
7340 
7341   ins_pipe(pipe_class_memory);
7342 %}
7343 
7344 // Store Pointer
7345 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7346 %{
7347   match(Set mem (StoreP mem src));
7348 
7349   ins_cost(VOLATILE_REF_COST);
7350   format %{ "stlr  $src, $mem\t# ptr" %}
7351 
7352   ins_encode(aarch64_enc_stlr(src, mem));
7353 
7354   ins_pipe(pipe_class_memory);
7355 %}
7356 
7357 // Store Compressed Pointer
7358 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7359 %{
7360   match(Set mem (StoreN mem src));
7361 
7362   ins_cost(VOLATILE_REF_COST);
7363   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7364 
7365   ins_encode(aarch64_enc_stlrw(src, mem));
7366 
7367   ins_pipe(pipe_class_memory);
7368 %}
7369 
7370 // Store Float
7371 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7372 %{
7373   match(Set mem (StoreF mem src));
7374 
7375   ins_cost(VOLATILE_REF_COST);
7376   format %{ "stlrs  $src, $mem\t# float" %}
7377 
7378   ins_encode( aarch64_enc_fstlrs(src, mem) );
7379 
7380   ins_pipe(pipe_class_memory);
7381 %}
7382 
7383 // TODO
7384 // implement storeImmF0 and storeFImmPacked
7385 
7386 // Store Double
7387 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7388 %{
7389   match(Set mem (StoreD mem src));
7390 
7391   ins_cost(VOLATILE_REF_COST);
7392   format %{ "stlrd  $src, $mem\t# double" %}
7393 
7394   ins_encode( aarch64_enc_fstlrd(src, mem) );
7395 
7396   ins_pipe(pipe_class_memory);
7397 %}
7398 
7399 //  ---------------- end of volatile loads and stores ----------------
7400 
7401 // ============================================================================
7402 // BSWAP Instructions
7403 
7404 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7405   match(Set dst (ReverseBytesI src));
7406 
7407   ins_cost(INSN_COST);
7408   format %{ "revw  $dst, $src" %}
7409 
7410   ins_encode %{
7411     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7412   %}
7413 
7414   ins_pipe(ialu_reg);
7415 %}
7416 
7417 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7418   match(Set dst (ReverseBytesL src));
7419 
7420   ins_cost(INSN_COST);
7421   format %{ "rev  $dst, $src" %}
7422 
7423   ins_encode %{
7424     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7425   %}
7426 
7427   ins_pipe(ialu_reg);
7428 %}
7429 
7430 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7431   match(Set dst (ReverseBytesUS src));
7432 
7433   ins_cost(INSN_COST);
7434   format %{ "rev16w  $dst, $src" %}
7435 
7436   ins_encode %{
7437     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7438   %}
7439 
7440   ins_pipe(ialu_reg);
7441 %}
7442 
7443 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7444   match(Set dst (ReverseBytesS src));
7445 
7446   ins_cost(INSN_COST);
7447   format %{ "rev16w  $dst, $src\n\t"
7448             "sbfmw $dst, $dst, #0, #15" %}
7449 
7450   ins_encode %{
7451     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7452     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7453   %}
7454 
7455   ins_pipe(ialu_reg);
7456 %}
7457 
7458 // ============================================================================
7459 // Zero Count Instructions
7460 
7461 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7462   match(Set dst (CountLeadingZerosI src));
7463 
7464   ins_cost(INSN_COST);
7465   format %{ "clzw  $dst, $src" %}
7466   ins_encode %{
7467     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7468   %}
7469 
7470   ins_pipe(ialu_reg);
7471 %}
7472 
7473 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7474   match(Set dst (CountLeadingZerosL src));
7475 
7476   ins_cost(INSN_COST);
7477   format %{ "clz   $dst, $src" %}
7478   ins_encode %{
7479     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7480   %}
7481 
7482   ins_pipe(ialu_reg);
7483 %}
7484 
7485 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7486   match(Set dst (CountTrailingZerosI src));
7487 
7488   ins_cost(INSN_COST * 2);
7489   format %{ "rbitw  $dst, $src\n\t"
7490             "clzw   $dst, $dst" %}
7491   ins_encode %{
7492     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7493     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7494   %}
7495 
7496   ins_pipe(ialu_reg);
7497 %}
7498 
7499 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7500   match(Set dst (CountTrailingZerosL src));
7501 
7502   ins_cost(INSN_COST * 2);
7503   format %{ "rbit   $dst, $src\n\t"
7504             "clz    $dst, $dst" %}
7505   ins_encode %{
7506     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7507     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7508   %}
7509 
7510   ins_pipe(ialu_reg);
7511 %}
7512 
7513 //---------- Population Count Instructions -------------------------------------
7514 //
7515 
7516 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7517   predicate(UsePopCountInstruction);
7518   match(Set dst (PopCountI src));
7519   effect(TEMP tmp);
7520   ins_cost(INSN_COST * 13);
7521 
7522   format %{ "movw   $src, $src\n\t"
7523             "mov    $tmp, $src\t# vector (1D)\n\t"
7524             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7525             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7526             "mov    $dst, $tmp\t# vector (1D)" %}
7527   ins_encode %{
7528     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7529     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7530     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7531     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7532     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7533   %}
7534 
7535   ins_pipe(pipe_class_default);
7536 %}
7537 
7538 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7539   predicate(UsePopCountInstruction);
7540   match(Set dst (PopCountI (LoadI mem)));
7541   effect(TEMP tmp);
7542   ins_cost(INSN_COST * 13);
7543 
7544   format %{ "ldrs   $tmp, $mem\n\t"
7545             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7546             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7547             "mov    $dst, $tmp\t# vector (1D)" %}
7548   ins_encode %{
7549     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7550     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7551                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7552     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7553     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7554     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7555   %}
7556 
7557   ins_pipe(pipe_class_default);
7558 %}
7559 
7560 // Note: Long.bitCount(long) returns an int.
7561 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7562   predicate(UsePopCountInstruction);
7563   match(Set dst (PopCountL src));
7564   effect(TEMP tmp);
7565   ins_cost(INSN_COST * 13);
7566 
7567   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7568             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7569             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7570             "mov    $dst, $tmp\t# vector (1D)" %}
7571   ins_encode %{
7572     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7573     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7574     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7575     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7576   %}
7577 
7578   ins_pipe(pipe_class_default);
7579 %}
7580 
7581 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7582   predicate(UsePopCountInstruction);
7583   match(Set dst (PopCountL (LoadL mem)));
7584   effect(TEMP tmp);
7585   ins_cost(INSN_COST * 13);
7586 
7587   format %{ "ldrd   $tmp, $mem\n\t"
7588             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7589             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7590             "mov    $dst, $tmp\t# vector (1D)" %}
7591   ins_encode %{
7592     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7593     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7594                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7595     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7596     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7597     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7598   %}
7599 
7600   ins_pipe(pipe_class_default);
7601 %}
7602 
7603 // ============================================================================
7604 // MemBar Instruction
7605 
7606 instruct load_fence() %{
7607   match(LoadFence);
7608   ins_cost(VOLATILE_REF_COST);
7609 
7610   format %{ "load_fence" %}
7611 
7612   ins_encode %{
7613     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7614   %}
7615   ins_pipe(pipe_serial);
7616 %}
7617 
7618 instruct unnecessary_membar_acquire() %{
7619   predicate(unnecessary_acquire(n));
7620   match(MemBarAcquire);
7621   ins_cost(0);
7622 
7623   format %{ "membar_acquire (elided)" %}
7624 
7625   ins_encode %{
7626     __ block_comment("membar_acquire (elided)");
7627   %}
7628 
7629   ins_pipe(pipe_class_empty);
7630 %}
7631 
7632 instruct membar_acquire() %{
7633   match(MemBarAcquire);
7634   ins_cost(VOLATILE_REF_COST);
7635 
7636   format %{ "membar_acquire" %}
7637 
7638   ins_encode %{
7639     __ block_comment("membar_acquire");
7640     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7641   %}
7642 
7643   ins_pipe(pipe_serial);
7644 %}
7645 
7646 
7647 instruct membar_acquire_lock() %{
7648   match(MemBarAcquireLock);
7649   ins_cost(VOLATILE_REF_COST);
7650 
7651   format %{ "membar_acquire_lock (elided)" %}
7652 
7653   ins_encode %{
7654     __ block_comment("membar_acquire_lock (elided)");
7655   %}
7656 
7657   ins_pipe(pipe_serial);
7658 %}
7659 
7660 instruct store_fence() %{
7661   match(StoreFence);
7662   ins_cost(VOLATILE_REF_COST);
7663 
7664   format %{ "store_fence" %}
7665 
7666   ins_encode %{
7667     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7668   %}
7669   ins_pipe(pipe_serial);
7670 %}
7671 
7672 instruct unnecessary_membar_release() %{
7673   predicate(unnecessary_release(n));
7674   match(MemBarRelease);
7675   ins_cost(0);
7676 
7677   format %{ "membar_release (elided)" %}
7678 
7679   ins_encode %{
7680     __ block_comment("membar_release (elided)");
7681   %}
7682   ins_pipe(pipe_serial);
7683 %}
7684 
7685 instruct membar_release() %{
7686   match(MemBarRelease);
7687   ins_cost(VOLATILE_REF_COST);
7688 
7689   format %{ "membar_release" %}
7690 
7691   ins_encode %{
7692     __ block_comment("membar_release");
7693     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7694   %}
7695   ins_pipe(pipe_serial);
7696 %}
7697 
7698 instruct membar_storestore() %{
7699   match(MemBarStoreStore);
7700   ins_cost(VOLATILE_REF_COST);
7701 
7702   format %{ "MEMBAR-store-store" %}
7703 
7704   ins_encode %{
7705     __ membar(Assembler::StoreStore);
7706   %}
7707   ins_pipe(pipe_serial);
7708 %}
7709 
7710 instruct membar_release_lock() %{
7711   match(MemBarReleaseLock);
7712   ins_cost(VOLATILE_REF_COST);
7713 
7714   format %{ "membar_release_lock (elided)" %}
7715 
7716   ins_encode %{
7717     __ block_comment("membar_release_lock (elided)");
7718   %}
7719 
7720   ins_pipe(pipe_serial);
7721 %}
7722 
7723 instruct unnecessary_membar_volatile() %{
7724   predicate(unnecessary_volatile(n));
7725   match(MemBarVolatile);
7726   ins_cost(0);
7727 
7728   format %{ "membar_volatile (elided)" %}
7729 
7730   ins_encode %{
7731     __ block_comment("membar_volatile (elided)");
7732   %}
7733 
7734   ins_pipe(pipe_serial);
7735 %}
7736 
7737 instruct membar_volatile() %{
7738   match(MemBarVolatile);
7739   ins_cost(VOLATILE_REF_COST*100);
7740 
7741   format %{ "membar_volatile" %}
7742 
7743   ins_encode %{
7744     __ block_comment("membar_volatile");
7745     __ membar(Assembler::StoreLoad);
7746     %}
7747 
7748   ins_pipe(pipe_serial);
7749 %}
7750 
7751 // ============================================================================
7752 // Cast/Convert Instructions
7753 
7754 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7755   match(Set dst (CastX2P src));
7756 
7757   ins_cost(INSN_COST);
7758   format %{ "mov $dst, $src\t# long -> ptr" %}
7759 
7760   ins_encode %{
7761     if ($dst$$reg != $src$$reg) {
7762       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7763     }
7764   %}
7765 
7766   ins_pipe(ialu_reg);
7767 %}
7768 
7769 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7770   match(Set dst (CastP2X src));
7771 
7772   ins_cost(INSN_COST);
7773   format %{ "mov $dst, $src\t# ptr -> long" %}
7774 
7775   ins_encode %{
7776     if ($dst$$reg != $src$$reg) {
7777       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7778     }
7779   %}
7780 
7781   ins_pipe(ialu_reg);
7782 %}
7783 
7784 // Convert oop into int for vectors alignment masking
7785 instruct convP2I(iRegINoSp dst, iRegP src) %{
7786   match(Set dst (ConvL2I (CastP2X src)));
7787 
7788   ins_cost(INSN_COST);
7789   format %{ "movw $dst, $src\t# ptr -> int" %}
7790   ins_encode %{
7791     __ movw($dst$$Register, $src$$Register);
7792   %}
7793 
7794   ins_pipe(ialu_reg);
7795 %}
7796 
7797 // Convert compressed oop into int for vectors alignment masking
7798 // in case of 32bit oops (heap < 4Gb).
7799 instruct convN2I(iRegINoSp dst, iRegN src)
7800 %{
7801   predicate(Universe::narrow_oop_shift() == 0);
7802   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7803 
7804   ins_cost(INSN_COST);
7805   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7806   ins_encode %{
7807     __ movw($dst$$Register, $src$$Register);
7808   %}
7809 
7810   ins_pipe(ialu_reg);
7811 %}
7812 
7813 // Convert oop pointer into compressed form
7814 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7815   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7816   match(Set dst (EncodeP src));
7817   effect(KILL cr);
7818   ins_cost(INSN_COST * 3);
7819   format %{ "encode_heap_oop $dst, $src" %}
7820   ins_encode %{
7821     Register s = $src$$Register;
7822     Register d = $dst$$Register;
7823     __ encode_heap_oop(d, s);
7824   %}
7825   ins_pipe(ialu_reg);
7826 %}
7827 
7828 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7829   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7830   match(Set dst (EncodeP src));
7831   ins_cost(INSN_COST * 3);
7832   format %{ "encode_heap_oop_not_null $dst, $src" %}
7833   ins_encode %{
7834     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7835   %}
7836   ins_pipe(ialu_reg);
7837 %}
7838 
7839 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7840   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7841             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7842   match(Set dst (DecodeN src));
7843   ins_cost(INSN_COST * 3);
7844   format %{ "decode_heap_oop $dst, $src" %}
7845   ins_encode %{
7846     Register s = $src$$Register;
7847     Register d = $dst$$Register;
7848     __ decode_heap_oop(d, s);
7849   %}
7850   ins_pipe(ialu_reg);
7851 %}
7852 
7853 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7854   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7855             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7856   match(Set dst (DecodeN src));
7857   ins_cost(INSN_COST * 3);
7858   format %{ "decode_heap_oop_not_null $dst, $src" %}
7859   ins_encode %{
7860     Register s = $src$$Register;
7861     Register d = $dst$$Register;
7862     __ decode_heap_oop_not_null(d, s);
7863   %}
7864   ins_pipe(ialu_reg);
7865 %}
7866 
7867 // n.b. AArch64 implementations of encode_klass_not_null and
7868 // decode_klass_not_null do not modify the flags register so, unlike
7869 // Intel, we don't kill CR as a side effect here
7870 
7871 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7872   match(Set dst (EncodePKlass src));
7873 
7874   ins_cost(INSN_COST * 3);
7875   format %{ "encode_klass_not_null $dst,$src" %}
7876 
7877   ins_encode %{
7878     Register src_reg = as_Register($src$$reg);
7879     Register dst_reg = as_Register($dst$$reg);
7880     __ encode_klass_not_null(dst_reg, src_reg);
7881   %}
7882 
7883    ins_pipe(ialu_reg);
7884 %}
7885 
7886 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7887   match(Set dst (DecodeNKlass src));
7888 
7889   ins_cost(INSN_COST * 3);
7890   format %{ "decode_klass_not_null $dst,$src" %}
7891 
7892   ins_encode %{
7893     Register src_reg = as_Register($src$$reg);
7894     Register dst_reg = as_Register($dst$$reg);
7895     if (dst_reg != src_reg) {
7896       __ decode_klass_not_null(dst_reg, src_reg);
7897     } else {
7898       __ decode_klass_not_null(dst_reg);
7899     }
7900   %}
7901 
7902    ins_pipe(ialu_reg);
7903 %}
7904 
7905 instruct checkCastPP(iRegPNoSp dst)
7906 %{
7907   match(Set dst (CheckCastPP dst));
7908 
7909   size(0);
7910   format %{ "# checkcastPP of $dst" %}
7911   ins_encode(/* empty encoding */);
7912   ins_pipe(pipe_class_empty);
7913 %}
7914 
7915 instruct castPP(iRegPNoSp dst)
7916 %{
7917   match(Set dst (CastPP dst));
7918 
7919   size(0);
7920   format %{ "# castPP of $dst" %}
7921   ins_encode(/* empty encoding */);
7922   ins_pipe(pipe_class_empty);
7923 %}
7924 
7925 instruct castII(iRegI dst)
7926 %{
7927   match(Set dst (CastII dst));
7928 
7929   size(0);
7930   format %{ "# castII of $dst" %}
7931   ins_encode(/* empty encoding */);
7932   ins_cost(0);
7933   ins_pipe(pipe_class_empty);
7934 %}
7935 
7936 // ============================================================================
7937 // Atomic operation instructions
7938 //
7939 // Intel and SPARC both implement Ideal Node LoadPLocked and
7940 // Store{PIL}Conditional instructions using a normal load for the
7941 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7942 //
7943 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7944 // pair to lock object allocations from Eden space when not using
7945 // TLABs.
7946 //
7947 // There does not appear to be a Load{IL}Locked Ideal Node and the
7948 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7949 // and to use StoreIConditional only for 32-bit and StoreLConditional
7950 // only for 64-bit.
7951 //
7952 // We implement LoadPLocked and StorePLocked instructions using,
7953 // respectively the AArch64 hw load-exclusive and store-conditional
7954 // instructions. Whereas we must implement each of
7955 // Store{IL}Conditional using a CAS which employs a pair of
7956 // instructions comprising a load-exclusive followed by a
7957 // store-conditional.
7958 
7959 
7960 // Locked-load (linked load) of the current heap-top
7961 // used when updating the eden heap top
7962 // implemented using ldaxr on AArch64
7963 
7964 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7965 %{
7966   match(Set dst (LoadPLocked mem));
7967 
7968   ins_cost(VOLATILE_REF_COST);
7969 
7970   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7971 
7972   ins_encode(aarch64_enc_ldaxr(dst, mem));
7973 
7974   ins_pipe(pipe_serial);
7975 %}
7976 
7977 // Conditional-store of the updated heap-top.
7978 // Used during allocation of the shared heap.
7979 // Sets flag (EQ) on success.
7980 // implemented using stlxr on AArch64.
7981 
7982 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 
7983 %{
7984   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7985 
7986   ins_cost(VOLATILE_REF_COST);
7987 
7988  // TODO
7989  // do we need to do a store-conditional release or can we just use a
7990  // plain store-conditional?
7991 
7992   format %{
7993     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7994     "cmpw rscratch1, zr\t# EQ on successful write"
7995   %}
7996 
7997   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7998 
7999   ins_pipe(pipe_serial);
8000 %}
8001 
8002 
8003 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8004 // when attempting to rebias a lock towards the current thread.  We
8005 // must use the acquire form of cmpxchg in order to guarantee acquire
8006 // semantics in this case.
8007 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 
8008 %{
8009   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8010 
8011   ins_cost(VOLATILE_REF_COST);
8012 
8013   format %{
8014     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8015     "cmpw rscratch1, zr\t# EQ on successful write"
8016   %}
8017 
8018   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8019 
8020   ins_pipe(pipe_slow);
8021 %}
8022 
8023 // storeIConditional also has acquire semantics, for no better reason
8024 // than matching storeLConditional.  At the time of writing this
8025 // comment storeIConditional was not used anywhere by AArch64.
8026 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 
8027 %{
8028   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8029 
8030   ins_cost(VOLATILE_REF_COST);
8031 
8032   format %{
8033     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8034     "cmpw rscratch1, zr\t# EQ on successful write"
8035   %}
8036 
8037   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8038 
8039   ins_pipe(pipe_slow);
8040 %}
8041 
8042 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8043 // can't match them
8044 
8045 // standard CompareAndSwapX when we are using barriers
8046 // these have higher priority than the rules selected by a predicate
8047 
8048 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8049 
8050   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8051   ins_cost(2 * VOLATILE_REF_COST);
8052 
8053   effect(KILL cr);
8054 
8055  format %{
8056     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8057     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8058  %}
8059 
8060  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8061             aarch64_enc_cset_eq(res));
8062 
8063   ins_pipe(pipe_slow);
8064 %}
8065 
8066 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8067 
8068   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8069   ins_cost(2 * VOLATILE_REF_COST);
8070 
8071   effect(KILL cr);
8072 
8073  format %{
8074     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8075     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8076  %}
8077 
8078  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8079             aarch64_enc_cset_eq(res));
8080 
8081   ins_pipe(pipe_slow);
8082 %}
8083 
8084 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8085 
8086   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
8087   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8088   ins_cost(2 * VOLATILE_REF_COST);
8089 
8090   effect(KILL cr);
8091 
8092  format %{
8093     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8094     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8095  %}
8096 
8097  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8098             aarch64_enc_cset_eq(res));
8099 
8100   ins_pipe(pipe_slow);
8101 %}
8102 
8103 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
8104 
8105   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
8106   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8107   ins_cost(2 * VOLATILE_REF_COST);
8108 
8109   effect(TEMP tmp, KILL cr);
8110 
8111   format %{
8112     "cmpxchg_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8113   %}
8114 
8115   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res));
8116 
8117   ins_pipe(pipe_slow);
8118 %}
8119 
8120 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8121 
8122   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR);
8123   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8124   ins_cost(2 * VOLATILE_REF_COST);
8125 
8126   effect(KILL cr);
8127 
8128  format %{
8129     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8130     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8131  %}
8132 
8133  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8134             aarch64_enc_cset_eq(res));
8135 
8136   ins_pipe(pipe_slow);
8137 %}
8138 
8139 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
8140 
8141   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
8142   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8143   ins_cost(2 * VOLATILE_REF_COST);
8144 
8145   effect(TEMP tmp, KILL cr);
8146 
8147   format %{
8148     "cmpxchgw_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8149   %}
8150 
8151   ins_encode %{
8152     Register tmp = $tmp$$Register;
8153     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
8154     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
8155   %}
8156 
8157   ins_pipe(pipe_slow);
8158 %}
8159 
8160 // alternative CompareAndSwapX when we are eliding barriers
8161 
8162 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8163 
8164   predicate(needs_acquiring_load_exclusive(n));
8165   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8166   ins_cost(VOLATILE_REF_COST);
8167 
8168   effect(KILL cr);
8169 
8170  format %{
8171     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8172     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8173  %}
8174 
8175  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8176             aarch64_enc_cset_eq(res));
8177 
8178   ins_pipe(pipe_slow);
8179 %}
8180 
8181 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8182 
8183   predicate(needs_acquiring_load_exclusive(n));
8184   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8185   ins_cost(VOLATILE_REF_COST);
8186 
8187   effect(KILL cr);
8188 
8189  format %{
8190     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8191     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8192  %}
8193 
8194  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8195             aarch64_enc_cset_eq(res));
8196 
8197   ins_pipe(pipe_slow);
8198 %}
8199 
8200 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8201 
8202   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
8203   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8204   ins_cost(VOLATILE_REF_COST);
8205 
8206   effect(KILL cr);
8207 
8208  format %{
8209     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8210     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8211  %}
8212 
8213  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8214             aarch64_enc_cset_eq(res));
8215 
8216   ins_pipe(pipe_slow);
8217 %}
8218 
8219 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
8220 
8221   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
8222   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8223   ins_cost(VOLATILE_REF_COST);
8224 
8225   effect(TEMP tmp, KILL cr);
8226 
8227   format %{
8228     "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8229   %}
8230 
8231   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res));
8232 
8233   ins_pipe(pipe_slow);
8234 %}
8235 
8236 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8237 
8238   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier|| n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR));
8239   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8240   ins_cost(VOLATILE_REF_COST);
8241 
8242   effect(KILL cr);
8243 
8244  format %{
8245     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8246     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8247  %}
8248 
8249  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8250             aarch64_enc_cset_eq(res));
8251 
8252   ins_pipe(pipe_slow);
8253 %}
8254 
8255 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
8256 
8257   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
8258   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8259   ins_cost(VOLATILE_REF_COST);
8260 
8261   effect(TEMP tmp, KILL cr);
8262 
8263  format %{
8264     "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8265  %}
8266 
8267   ins_encode %{
8268     Register tmp = $tmp$$Register;
8269     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
8270     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
8271   %}
8272 
8273   ins_pipe(pipe_slow);
8274 %}
8275 
8276 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8277   match(Set prev (GetAndSetI mem newv));
8278   ins_cost(2 * VOLATILE_REF_COST);
8279   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8280   ins_encode %{
8281     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8282   %}
8283   ins_pipe(pipe_serial);
8284 %}
8285 
8286 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8287   match(Set prev (GetAndSetL mem newv));
8288   ins_cost(2 * VOLATILE_REF_COST);
8289   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8290   ins_encode %{
8291     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8292   %}
8293   ins_pipe(pipe_serial);
8294 %}
8295 
8296 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8297   match(Set prev (GetAndSetN mem newv));
8298   ins_cost(2 * VOLATILE_REF_COST);
8299   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8300   ins_encode %{
8301     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8302   %}
8303   ins_pipe(pipe_serial);
8304 %}
8305 
8306 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8307   match(Set prev (GetAndSetP mem newv));
8308   ins_cost(2 * VOLATILE_REF_COST);
8309   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8310   ins_encode %{
8311     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8312   %}
8313   ins_pipe(pipe_serial);
8314 %}
8315 
8316 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8317   predicate(needs_acquiring_load_exclusive(n));
8318   match(Set prev (GetAndSetI mem newv));
8319   ins_cost(VOLATILE_REF_COST);
8320   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8321   ins_encode %{
8322     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8323   %}
8324   ins_pipe(pipe_serial);
8325 %}
8326 
8327 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8328   predicate(needs_acquiring_load_exclusive(n));
8329   match(Set prev (GetAndSetL mem newv));
8330   ins_cost(VOLATILE_REF_COST);
8331   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8332   ins_encode %{
8333     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8334   %}
8335   ins_pipe(pipe_serial);
8336 %}
8337 
8338 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8339   predicate(needs_acquiring_load_exclusive(n));
8340   match(Set prev (GetAndSetN mem newv));
8341   ins_cost(VOLATILE_REF_COST);
8342   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8343   ins_encode %{
8344     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8345   %}
8346   ins_pipe(pipe_serial);
8347 %}
8348 
8349 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8350   predicate(needs_acquiring_load_exclusive(n));
8351   match(Set prev (GetAndSetP mem newv));
8352   ins_cost(VOLATILE_REF_COST);
8353   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8354   ins_encode %{
8355     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8356   %}
8357   ins_pipe(pipe_serial);
8358 %}
8359 
8360 
8361 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8362   match(Set newval (GetAndAddL mem incr));
8363   ins_cost(2 * VOLATILE_REF_COST + 1);
8364   format %{ "get_and_addL $newval, [$mem], $incr" %}
8365   ins_encode %{
8366     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8367   %}
8368   ins_pipe(pipe_serial);
8369 %}
8370 
8371 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8372   predicate(n->as_LoadStore()->result_not_used());
8373   match(Set dummy (GetAndAddL mem incr));
8374   ins_cost(2 * VOLATILE_REF_COST);
8375   format %{ "get_and_addL [$mem], $incr" %}
8376   ins_encode %{
8377     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8378   %}
8379   ins_pipe(pipe_serial);
8380 %}
8381 
8382 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8383   match(Set newval (GetAndAddL mem incr));
8384   ins_cost(2 * VOLATILE_REF_COST + 1);
8385   format %{ "get_and_addL $newval, [$mem], $incr" %}
8386   ins_encode %{
8387     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8388   %}
8389   ins_pipe(pipe_serial);
8390 %}
8391 
8392 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8393   predicate(n->as_LoadStore()->result_not_used());
8394   match(Set dummy (GetAndAddL mem incr));
8395   ins_cost(2 * VOLATILE_REF_COST);
8396   format %{ "get_and_addL [$mem], $incr" %}
8397   ins_encode %{
8398     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8399   %}
8400   ins_pipe(pipe_serial);
8401 %}
8402 
8403 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8404   match(Set newval (GetAndAddI mem incr));
8405   ins_cost(2 * VOLATILE_REF_COST + 1);
8406   format %{ "get_and_addI $newval, [$mem], $incr" %}
8407   ins_encode %{
8408     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8409   %}
8410   ins_pipe(pipe_serial);
8411 %}
8412 
8413 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8414   predicate(n->as_LoadStore()->result_not_used());
8415   match(Set dummy (GetAndAddI mem incr));
8416   ins_cost(2 * VOLATILE_REF_COST);
8417   format %{ "get_and_addI [$mem], $incr" %}
8418   ins_encode %{
8419     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8420   %}
8421   ins_pipe(pipe_serial);
8422 %}
8423 
8424 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8425   match(Set newval (GetAndAddI mem incr));
8426   ins_cost(2 * VOLATILE_REF_COST + 1);
8427   format %{ "get_and_addI $newval, [$mem], $incr" %}
8428   ins_encode %{
8429     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8430   %}
8431   ins_pipe(pipe_serial);
8432 %}
8433 
8434 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8435   predicate(n->as_LoadStore()->result_not_used());
8436   match(Set dummy (GetAndAddI mem incr));
8437   ins_cost(2 * VOLATILE_REF_COST);
8438   format %{ "get_and_addI [$mem], $incr" %}
8439   ins_encode %{
8440     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8441   %}
8442   ins_pipe(pipe_serial);
8443 %}
8444 
8445 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8446   predicate(needs_acquiring_load_exclusive(n));
8447   match(Set newval (GetAndAddL mem incr));
8448   ins_cost(VOLATILE_REF_COST + 1);
8449   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8450   ins_encode %{
8451     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8452   %}
8453   ins_pipe(pipe_serial);
8454 %}
8455 
8456 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8457   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8458   match(Set dummy (GetAndAddL mem incr));
8459   ins_cost(VOLATILE_REF_COST);
8460   format %{ "get_and_addL_acq [$mem], $incr" %}
8461   ins_encode %{
8462     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8463   %}
8464   ins_pipe(pipe_serial);
8465 %}
8466 
8467 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8468   predicate(needs_acquiring_load_exclusive(n));
8469   match(Set newval (GetAndAddL mem incr));
8470   ins_cost(VOLATILE_REF_COST + 1);
8471   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8472   ins_encode %{
8473     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8474   %}
8475   ins_pipe(pipe_serial);
8476 %}
8477 
8478 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8479   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8480   match(Set dummy (GetAndAddL mem incr));
8481   ins_cost(VOLATILE_REF_COST);
8482   format %{ "get_and_addL_acq [$mem], $incr" %}
8483   ins_encode %{
8484     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8485   %}
8486   ins_pipe(pipe_serial);
8487 %}
8488 
8489 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8490   predicate(needs_acquiring_load_exclusive(n));
8491   match(Set newval (GetAndAddI mem incr));
8492   ins_cost(VOLATILE_REF_COST + 1);
8493   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8494   ins_encode %{
8495     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8496   %}
8497   ins_pipe(pipe_serial);
8498 %}
8499 
8500 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8501   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8502   match(Set dummy (GetAndAddI mem incr));
8503   ins_cost(VOLATILE_REF_COST);
8504   format %{ "get_and_addI_acq [$mem], $incr" %}
8505   ins_encode %{
8506     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8507   %}
8508   ins_pipe(pipe_serial);
8509 %}
8510 
8511 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8512   predicate(needs_acquiring_load_exclusive(n));
8513   match(Set newval (GetAndAddI mem incr));
8514   ins_cost(VOLATILE_REF_COST + 1);
8515   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8516   ins_encode %{
8517     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8518   %}
8519   ins_pipe(pipe_serial);
8520 %}
8521 
8522 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8523   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8524   match(Set dummy (GetAndAddI mem incr));
8525   ins_cost(VOLATILE_REF_COST);
8526   format %{ "get_and_addI_acq [$mem], $incr" %}
8527   ins_encode %{
8528     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8529   %}
8530   ins_pipe(pipe_serial);
8531 %}
8532 
8533 // ============================================================================
8534 // Conditional Move Instructions
8535 
8536 // n.b. we have identical rules for both a signed compare op (cmpOp)
8537 // and an unsigned compare op (cmpOpU). it would be nice if we could
8538 // define an op class which merged both inputs and use it to type the
8539 // argument to a single rule. unfortunatelyt his fails because the
8540 // opclass does not live up to the COND_INTER interface of its
8541 // component operands. When the generic code tries to negate the
8542 // operand it ends up running the generci Machoper::negate method
8543 // which throws a ShouldNotHappen. So, we have to provide two flavours
8544 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8545 
8546 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8547   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8548 
8549   ins_cost(INSN_COST * 2);
8550   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8551 
8552   ins_encode %{
8553     __ cselw(as_Register($dst$$reg),
8554              as_Register($src2$$reg),
8555              as_Register($src1$$reg),
8556              (Assembler::Condition)$cmp$$cmpcode);
8557   %}
8558 
8559   ins_pipe(icond_reg_reg);
8560 %}
8561 
8562 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8563   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8564 
8565   ins_cost(INSN_COST * 2);
8566   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8567 
8568   ins_encode %{
8569     __ cselw(as_Register($dst$$reg),
8570              as_Register($src2$$reg),
8571              as_Register($src1$$reg),
8572              (Assembler::Condition)$cmp$$cmpcode);
8573   %}
8574 
8575   ins_pipe(icond_reg_reg);
8576 %}
8577 
8578 // special cases where one arg is zero
8579 
8580 // n.b. this is selected in preference to the rule above because it
8581 // avoids loading constant 0 into a source register
8582 
8583 // TODO
8584 // we ought only to be able to cull one of these variants as the ideal
8585 // transforms ought always to order the zero consistently (to left/right?)
8586 
8587 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8588   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8589 
8590   ins_cost(INSN_COST * 2);
8591   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8592 
8593   ins_encode %{
8594     __ cselw(as_Register($dst$$reg),
8595              as_Register($src$$reg),
8596              zr,
8597              (Assembler::Condition)$cmp$$cmpcode);
8598   %}
8599 
8600   ins_pipe(icond_reg);
8601 %}
8602 
8603 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8604   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8605 
8606   ins_cost(INSN_COST * 2);
8607   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8608 
8609   ins_encode %{
8610     __ cselw(as_Register($dst$$reg),
8611              as_Register($src$$reg),
8612              zr,
8613              (Assembler::Condition)$cmp$$cmpcode);
8614   %}
8615 
8616   ins_pipe(icond_reg);
8617 %}
8618 
8619 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8620   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8621 
8622   ins_cost(INSN_COST * 2);
8623   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8624 
8625   ins_encode %{
8626     __ cselw(as_Register($dst$$reg),
8627              zr,
8628              as_Register($src$$reg),
8629              (Assembler::Condition)$cmp$$cmpcode);
8630   %}
8631 
8632   ins_pipe(icond_reg);
8633 %}
8634 
8635 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8636   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8637 
8638   ins_cost(INSN_COST * 2);
8639   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8640 
8641   ins_encode %{
8642     __ cselw(as_Register($dst$$reg),
8643              zr,
8644              as_Register($src$$reg),
8645              (Assembler::Condition)$cmp$$cmpcode);
8646   %}
8647 
8648   ins_pipe(icond_reg);
8649 %}
8650 
8651 // special case for creating a boolean 0 or 1
8652 
8653 // n.b. this is selected in preference to the rule above because it
8654 // avoids loading constants 0 and 1 into a source register
8655 
8656 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8657   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8658 
8659   ins_cost(INSN_COST * 2);
8660   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8661 
8662   ins_encode %{
8663     // equivalently
8664     // cset(as_Register($dst$$reg),
8665     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8666     __ csincw(as_Register($dst$$reg),
8667              zr,
8668              zr,
8669              (Assembler::Condition)$cmp$$cmpcode);
8670   %}
8671 
8672   ins_pipe(icond_none);
8673 %}
8674 
8675 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8676   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8677 
8678   ins_cost(INSN_COST * 2);
8679   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8680 
8681   ins_encode %{
8682     // equivalently
8683     // cset(as_Register($dst$$reg),
8684     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8685     __ csincw(as_Register($dst$$reg),
8686              zr,
8687              zr,
8688              (Assembler::Condition)$cmp$$cmpcode);
8689   %}
8690 
8691   ins_pipe(icond_none);
8692 %}
8693 
8694 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8695   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8696 
8697   ins_cost(INSN_COST * 2);
8698   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8699 
8700   ins_encode %{
8701     __ csel(as_Register($dst$$reg),
8702             as_Register($src2$$reg),
8703             as_Register($src1$$reg),
8704             (Assembler::Condition)$cmp$$cmpcode);
8705   %}
8706 
8707   ins_pipe(icond_reg_reg);
8708 %}
8709 
8710 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8711   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8712 
8713   ins_cost(INSN_COST * 2);
8714   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8715 
8716   ins_encode %{
8717     __ csel(as_Register($dst$$reg),
8718             as_Register($src2$$reg),
8719             as_Register($src1$$reg),
8720             (Assembler::Condition)$cmp$$cmpcode);
8721   %}
8722 
8723   ins_pipe(icond_reg_reg);
8724 %}
8725 
8726 // special cases where one arg is zero
8727 
8728 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8729   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8730 
8731   ins_cost(INSN_COST * 2);
8732   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8733 
8734   ins_encode %{
8735     __ csel(as_Register($dst$$reg),
8736             zr,
8737             as_Register($src$$reg),
8738             (Assembler::Condition)$cmp$$cmpcode);
8739   %}
8740 
8741   ins_pipe(icond_reg);
8742 %}
8743 
8744 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8745   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8746 
8747   ins_cost(INSN_COST * 2);
8748   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8749 
8750   ins_encode %{
8751     __ csel(as_Register($dst$$reg),
8752             zr,
8753             as_Register($src$$reg),
8754             (Assembler::Condition)$cmp$$cmpcode);
8755   %}
8756 
8757   ins_pipe(icond_reg);
8758 %}
8759 
8760 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8761   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8762 
8763   ins_cost(INSN_COST * 2);
8764   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8765 
8766   ins_encode %{
8767     __ csel(as_Register($dst$$reg),
8768             as_Register($src$$reg),
8769             zr,
8770             (Assembler::Condition)$cmp$$cmpcode);
8771   %}
8772 
8773   ins_pipe(icond_reg);
8774 %}
8775 
8776 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8777   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8778 
8779   ins_cost(INSN_COST * 2);
8780   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8781 
8782   ins_encode %{
8783     __ csel(as_Register($dst$$reg),
8784             as_Register($src$$reg),
8785             zr,
8786             (Assembler::Condition)$cmp$$cmpcode);
8787   %}
8788 
8789   ins_pipe(icond_reg);
8790 %}
8791 
8792 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8793   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8794 
8795   ins_cost(INSN_COST * 2);
8796   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8797 
8798   ins_encode %{
8799     __ csel(as_Register($dst$$reg),
8800             as_Register($src2$$reg),
8801             as_Register($src1$$reg),
8802             (Assembler::Condition)$cmp$$cmpcode);
8803   %}
8804 
8805   ins_pipe(icond_reg_reg);
8806 %}
8807 
8808 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8809   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8810 
8811   ins_cost(INSN_COST * 2);
8812   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8813 
8814   ins_encode %{
8815     __ csel(as_Register($dst$$reg),
8816             as_Register($src2$$reg),
8817             as_Register($src1$$reg),
8818             (Assembler::Condition)$cmp$$cmpcode);
8819   %}
8820 
8821   ins_pipe(icond_reg_reg);
8822 %}
8823 
8824 // special cases where one arg is zero
8825 
8826 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8827   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8828 
8829   ins_cost(INSN_COST * 2);
8830   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8831 
8832   ins_encode %{
8833     __ csel(as_Register($dst$$reg),
8834             zr,
8835             as_Register($src$$reg),
8836             (Assembler::Condition)$cmp$$cmpcode);
8837   %}
8838 
8839   ins_pipe(icond_reg);
8840 %}
8841 
8842 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8843   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8844 
8845   ins_cost(INSN_COST * 2);
8846   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8847 
8848   ins_encode %{
8849     __ csel(as_Register($dst$$reg),
8850             zr,
8851             as_Register($src$$reg),
8852             (Assembler::Condition)$cmp$$cmpcode);
8853   %}
8854 
8855   ins_pipe(icond_reg);
8856 %}
8857 
8858 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8859   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8860 
8861   ins_cost(INSN_COST * 2);
8862   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8863 
8864   ins_encode %{
8865     __ csel(as_Register($dst$$reg),
8866             as_Register($src$$reg),
8867             zr,
8868             (Assembler::Condition)$cmp$$cmpcode);
8869   %}
8870 
8871   ins_pipe(icond_reg);
8872 %}
8873 
8874 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8875   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8876 
8877   ins_cost(INSN_COST * 2);
8878   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8879 
8880   ins_encode %{
8881     __ csel(as_Register($dst$$reg),
8882             as_Register($src$$reg),
8883             zr,
8884             (Assembler::Condition)$cmp$$cmpcode);
8885   %}
8886 
8887   ins_pipe(icond_reg);
8888 %}
8889 
8890 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8891   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8892 
8893   ins_cost(INSN_COST * 2);
8894   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8895 
8896   ins_encode %{
8897     __ cselw(as_Register($dst$$reg),
8898              as_Register($src2$$reg),
8899              as_Register($src1$$reg),
8900              (Assembler::Condition)$cmp$$cmpcode);
8901   %}
8902 
8903   ins_pipe(icond_reg_reg);
8904 %}
8905 
8906 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8907   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8908 
8909   ins_cost(INSN_COST * 2);
8910   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8911 
8912   ins_encode %{
8913     __ cselw(as_Register($dst$$reg),
8914              as_Register($src2$$reg),
8915              as_Register($src1$$reg),
8916              (Assembler::Condition)$cmp$$cmpcode);
8917   %}
8918 
8919   ins_pipe(icond_reg_reg);
8920 %}
8921 
8922 // special cases where one arg is zero
8923 
8924 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8925   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8926 
8927   ins_cost(INSN_COST * 2);
8928   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8929 
8930   ins_encode %{
8931     __ cselw(as_Register($dst$$reg),
8932              zr,
8933              as_Register($src$$reg),
8934              (Assembler::Condition)$cmp$$cmpcode);
8935   %}
8936 
8937   ins_pipe(icond_reg);
8938 %}
8939 
8940 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8941   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8942 
8943   ins_cost(INSN_COST * 2);
8944   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8945 
8946   ins_encode %{
8947     __ cselw(as_Register($dst$$reg),
8948              zr,
8949              as_Register($src$$reg),
8950              (Assembler::Condition)$cmp$$cmpcode);
8951   %}
8952 
8953   ins_pipe(icond_reg);
8954 %}
8955 
8956 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8957   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8958 
8959   ins_cost(INSN_COST * 2);
8960   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8961 
8962   ins_encode %{
8963     __ cselw(as_Register($dst$$reg),
8964              as_Register($src$$reg),
8965              zr,
8966              (Assembler::Condition)$cmp$$cmpcode);
8967   %}
8968 
8969   ins_pipe(icond_reg);
8970 %}
8971 
8972 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8973   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8974 
8975   ins_cost(INSN_COST * 2);
8976   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8977 
8978   ins_encode %{
8979     __ cselw(as_Register($dst$$reg),
8980              as_Register($src$$reg),
8981              zr,
8982              (Assembler::Condition)$cmp$$cmpcode);
8983   %}
8984 
8985   ins_pipe(icond_reg);
8986 %}
8987 
8988 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8989 %{
8990   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8991 
8992   ins_cost(INSN_COST * 3);
8993 
8994   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8995   ins_encode %{
8996     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8997     __ fcsels(as_FloatRegister($dst$$reg),
8998               as_FloatRegister($src2$$reg),
8999               as_FloatRegister($src1$$reg),
9000               cond);
9001   %}
9002 
9003   ins_pipe(fp_cond_reg_reg_s);
9004 %}
9005 
9006 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9007 %{
9008   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9009 
9010   ins_cost(INSN_COST * 3);
9011 
9012   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9013   ins_encode %{
9014     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9015     __ fcsels(as_FloatRegister($dst$$reg),
9016               as_FloatRegister($src2$$reg),
9017               as_FloatRegister($src1$$reg),
9018               cond);
9019   %}
9020 
9021   ins_pipe(fp_cond_reg_reg_s);
9022 %}
9023 
9024 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9025 %{
9026   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9027 
9028   ins_cost(INSN_COST * 3);
9029 
9030   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9031   ins_encode %{
9032     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9033     __ fcseld(as_FloatRegister($dst$$reg),
9034               as_FloatRegister($src2$$reg),
9035               as_FloatRegister($src1$$reg),
9036               cond);
9037   %}
9038 
9039   ins_pipe(fp_cond_reg_reg_d);
9040 %}
9041 
9042 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9043 %{
9044   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9045 
9046   ins_cost(INSN_COST * 3);
9047 
9048   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9049   ins_encode %{
9050     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9051     __ fcseld(as_FloatRegister($dst$$reg),
9052               as_FloatRegister($src2$$reg),
9053               as_FloatRegister($src1$$reg),
9054               cond);
9055   %}
9056 
9057   ins_pipe(fp_cond_reg_reg_d);
9058 %}
9059 
9060 // ============================================================================
9061 // Arithmetic Instructions
9062 //
9063 
9064 // Integer Addition
9065 
9066 // TODO
9067 // these currently employ operations which do not set CR and hence are
9068 // not flagged as killing CR but we would like to isolate the cases
9069 // where we want to set flags from those where we don't. need to work
9070 // out how to do that.
9071 
9072 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9073   match(Set dst (AddI src1 src2));
9074 
9075   ins_cost(INSN_COST);
9076   format %{ "addw  $dst, $src1, $src2" %}
9077 
9078   ins_encode %{
9079     __ addw(as_Register($dst$$reg),
9080             as_Register($src1$$reg),
9081             as_Register($src2$$reg));
9082   %}
9083 
9084   ins_pipe(ialu_reg_reg);
9085 %}
9086 
9087 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9088   match(Set dst (AddI src1 src2));
9089 
9090   ins_cost(INSN_COST);
9091   format %{ "addw $dst, $src1, $src2" %}
9092 
9093   // use opcode to indicate that this is an add not a sub
9094   opcode(0x0);
9095 
9096   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9097 
9098   ins_pipe(ialu_reg_imm);
9099 %}
9100 
9101 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9102   match(Set dst (AddI (ConvL2I src1) src2));
9103 
9104   ins_cost(INSN_COST);
9105   format %{ "addw $dst, $src1, $src2" %}
9106 
9107   // use opcode to indicate that this is an add not a sub
9108   opcode(0x0);
9109 
9110   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9111 
9112   ins_pipe(ialu_reg_imm);
9113 %}
9114 
9115 // Pointer Addition
9116 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9117   match(Set dst (AddP src1 src2));
9118 
9119   ins_cost(INSN_COST);
9120   format %{ "add $dst, $src1, $src2\t# ptr" %}
9121 
9122   ins_encode %{
9123     __ add(as_Register($dst$$reg),
9124            as_Register($src1$$reg),
9125            as_Register($src2$$reg));
9126   %}
9127 
9128   ins_pipe(ialu_reg_reg);
9129 %}
9130 
9131 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9132   match(Set dst (AddP src1 (ConvI2L src2)));
9133 
9134   ins_cost(1.9 * INSN_COST);
9135   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9136 
9137   ins_encode %{
9138     __ add(as_Register($dst$$reg),
9139            as_Register($src1$$reg),
9140            as_Register($src2$$reg), ext::sxtw);
9141   %}
9142 
9143   ins_pipe(ialu_reg_reg);
9144 %}
9145 
9146 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9147   match(Set dst (AddP src1 (LShiftL src2 scale)));
9148 
9149   ins_cost(1.9 * INSN_COST);
9150   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9151 
9152   ins_encode %{
9153     __ lea(as_Register($dst$$reg),
9154            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9155                    Address::lsl($scale$$constant)));
9156   %}
9157 
9158   ins_pipe(ialu_reg_reg_shift);
9159 %}
9160 
9161 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9162   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9163 
9164   ins_cost(1.9 * INSN_COST);
9165   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9166 
9167   ins_encode %{
9168     __ lea(as_Register($dst$$reg),
9169            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9170                    Address::sxtw($scale$$constant)));
9171   %}
9172 
9173   ins_pipe(ialu_reg_reg_shift);
9174 %}
9175 
9176 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9177   match(Set dst (LShiftL (ConvI2L src) scale));
9178 
9179   ins_cost(INSN_COST);
9180   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9181 
9182   ins_encode %{
9183     __ sbfiz(as_Register($dst$$reg),
9184           as_Register($src$$reg),
9185           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9186   %}
9187 
9188   ins_pipe(ialu_reg_shift);
9189 %}
9190 
9191 // Pointer Immediate Addition
9192 // n.b. this needs to be more expensive than using an indirect memory
9193 // operand
9194 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9195   match(Set dst (AddP src1 src2));
9196 
9197   ins_cost(INSN_COST);
9198   format %{ "add $dst, $src1, $src2\t# ptr" %}
9199 
9200   // use opcode to indicate that this is an add not a sub
9201   opcode(0x0);
9202 
9203   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9204 
9205   ins_pipe(ialu_reg_imm);
9206 %}
9207 
9208 // Long Addition
9209 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9210 
9211   match(Set dst (AddL src1 src2));
9212 
9213   ins_cost(INSN_COST);
9214   format %{ "add  $dst, $src1, $src2" %}
9215 
9216   ins_encode %{
9217     __ add(as_Register($dst$$reg),
9218            as_Register($src1$$reg),
9219            as_Register($src2$$reg));
9220   %}
9221 
9222   ins_pipe(ialu_reg_reg);
9223 %}
9224 
9225 // No constant pool entries requiredLong Immediate Addition.
9226 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9227   match(Set dst (AddL src1 src2));
9228 
9229   ins_cost(INSN_COST);
9230   format %{ "add $dst, $src1, $src2" %}
9231 
9232   // use opcode to indicate that this is an add not a sub
9233   opcode(0x0);
9234 
9235   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9236 
9237   ins_pipe(ialu_reg_imm);
9238 %}
9239 
9240 // Integer Subtraction
9241 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9242   match(Set dst (SubI src1 src2));
9243 
9244   ins_cost(INSN_COST);
9245   format %{ "subw  $dst, $src1, $src2" %}
9246 
9247   ins_encode %{
9248     __ subw(as_Register($dst$$reg),
9249             as_Register($src1$$reg),
9250             as_Register($src2$$reg));
9251   %}
9252 
9253   ins_pipe(ialu_reg_reg);
9254 %}
9255 
9256 // Immediate Subtraction
9257 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9258   match(Set dst (SubI src1 src2));
9259 
9260   ins_cost(INSN_COST);
9261   format %{ "subw $dst, $src1, $src2" %}
9262 
9263   // use opcode to indicate that this is a sub not an add
9264   opcode(0x1);
9265 
9266   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9267 
9268   ins_pipe(ialu_reg_imm);
9269 %}
9270 
9271 // Long Subtraction
9272 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9273 
9274   match(Set dst (SubL src1 src2));
9275 
9276   ins_cost(INSN_COST);
9277   format %{ "sub  $dst, $src1, $src2" %}
9278 
9279   ins_encode %{
9280     __ sub(as_Register($dst$$reg),
9281            as_Register($src1$$reg),
9282            as_Register($src2$$reg));
9283   %}
9284 
9285   ins_pipe(ialu_reg_reg);
9286 %}
9287 
9288 // No constant pool entries requiredLong Immediate Subtraction.
9289 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9290   match(Set dst (SubL src1 src2));
9291 
9292   ins_cost(INSN_COST);
9293   format %{ "sub$dst, $src1, $src2" %}
9294 
9295   // use opcode to indicate that this is a sub not an add
9296   opcode(0x1);
9297 
9298   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9299 
9300   ins_pipe(ialu_reg_imm);
9301 %}
9302 
9303 // Integer Negation (special case for sub)
9304 
9305 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9306   match(Set dst (SubI zero src));
9307 
9308   ins_cost(INSN_COST);
9309   format %{ "negw $dst, $src\t# int" %}
9310 
9311   ins_encode %{
9312     __ negw(as_Register($dst$$reg),
9313             as_Register($src$$reg));
9314   %}
9315 
9316   ins_pipe(ialu_reg);
9317 %}
9318 
9319 // Long Negation
9320 
9321 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9322   match(Set dst (SubL zero src));
9323 
9324   ins_cost(INSN_COST);
9325   format %{ "neg $dst, $src\t# long" %}
9326 
9327   ins_encode %{
9328     __ neg(as_Register($dst$$reg),
9329            as_Register($src$$reg));
9330   %}
9331 
9332   ins_pipe(ialu_reg);
9333 %}
9334 
9335 // Integer Multiply
9336 
9337 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9338   match(Set dst (MulI src1 src2));
9339 
9340   ins_cost(INSN_COST * 3);
9341   format %{ "mulw  $dst, $src1, $src2" %}
9342 
9343   ins_encode %{
9344     __ mulw(as_Register($dst$$reg),
9345             as_Register($src1$$reg),
9346             as_Register($src2$$reg));
9347   %}
9348 
9349   ins_pipe(imul_reg_reg);
9350 %}
9351 
9352 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9353   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9354 
9355   ins_cost(INSN_COST * 3);
9356   format %{ "smull  $dst, $src1, $src2" %}
9357 
9358   ins_encode %{
9359     __ smull(as_Register($dst$$reg),
9360              as_Register($src1$$reg),
9361              as_Register($src2$$reg));
9362   %}
9363 
9364   ins_pipe(imul_reg_reg);
9365 %}
9366 
9367 // Long Multiply
9368 
9369 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9370   match(Set dst (MulL src1 src2));
9371 
9372   ins_cost(INSN_COST * 5);
9373   format %{ "mul  $dst, $src1, $src2" %}
9374 
9375   ins_encode %{
9376     __ mul(as_Register($dst$$reg),
9377            as_Register($src1$$reg),
9378            as_Register($src2$$reg));
9379   %}
9380 
9381   ins_pipe(lmul_reg_reg);
9382 %}
9383 
9384 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9385 %{
9386   match(Set dst (MulHiL src1 src2));
9387 
9388   ins_cost(INSN_COST * 7);
9389   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9390 
9391   ins_encode %{
9392     __ smulh(as_Register($dst$$reg),
9393              as_Register($src1$$reg),
9394              as_Register($src2$$reg));
9395   %}
9396 
9397   ins_pipe(lmul_reg_reg);
9398 %}
9399 
9400 // Combined Integer Multiply & Add/Sub
9401 
9402 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9403   match(Set dst (AddI src3 (MulI src1 src2)));
9404 
9405   ins_cost(INSN_COST * 3);
9406   format %{ "madd  $dst, $src1, $src2, $src3" %}
9407 
9408   ins_encode %{
9409     __ maddw(as_Register($dst$$reg),
9410              as_Register($src1$$reg),
9411              as_Register($src2$$reg),
9412              as_Register($src3$$reg));
9413   %}
9414 
9415   ins_pipe(imac_reg_reg);
9416 %}
9417 
9418 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9419   match(Set dst (SubI src3 (MulI src1 src2)));
9420 
9421   ins_cost(INSN_COST * 3);
9422   format %{ "msub  $dst, $src1, $src2, $src3" %}
9423 
9424   ins_encode %{
9425     __ msubw(as_Register($dst$$reg),
9426              as_Register($src1$$reg),
9427              as_Register($src2$$reg),
9428              as_Register($src3$$reg));
9429   %}
9430 
9431   ins_pipe(imac_reg_reg);
9432 %}
9433 
9434 // Combined Long Multiply & Add/Sub
9435 
9436 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9437   match(Set dst (AddL src3 (MulL src1 src2)));
9438 
9439   ins_cost(INSN_COST * 5);
9440   format %{ "madd  $dst, $src1, $src2, $src3" %}
9441 
9442   ins_encode %{
9443     __ madd(as_Register($dst$$reg),
9444             as_Register($src1$$reg),
9445             as_Register($src2$$reg),
9446             as_Register($src3$$reg));
9447   %}
9448 
9449   ins_pipe(lmac_reg_reg);
9450 %}
9451 
9452 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9453   match(Set dst (SubL src3 (MulL src1 src2)));
9454 
9455   ins_cost(INSN_COST * 5);
9456   format %{ "msub  $dst, $src1, $src2, $src3" %}
9457 
9458   ins_encode %{
9459     __ msub(as_Register($dst$$reg),
9460             as_Register($src1$$reg),
9461             as_Register($src2$$reg),
9462             as_Register($src3$$reg));
9463   %}
9464 
9465   ins_pipe(lmac_reg_reg);
9466 %}
9467 
9468 // Integer Divide
9469 
9470 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9471   match(Set dst (DivI src1 src2));
9472 
9473   ins_cost(INSN_COST * 19);
9474   format %{ "sdivw  $dst, $src1, $src2" %}
9475 
9476   ins_encode(aarch64_enc_divw(dst, src1, src2));
9477   ins_pipe(idiv_reg_reg);
9478 %}
9479 
9480 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9481   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9482   ins_cost(INSN_COST);
9483   format %{ "lsrw $dst, $src1, $div1" %}
9484   ins_encode %{
9485     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9486   %}
9487   ins_pipe(ialu_reg_shift);
9488 %}
9489 
9490 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9491   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9492   ins_cost(INSN_COST);
9493   format %{ "addw $dst, $src, LSR $div1" %}
9494 
9495   ins_encode %{
9496     __ addw(as_Register($dst$$reg),
9497               as_Register($src$$reg),
9498               as_Register($src$$reg),
9499               Assembler::LSR, 31);
9500   %}
9501   ins_pipe(ialu_reg);
9502 %}
9503 
9504 // Long Divide
9505 
9506 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9507   match(Set dst (DivL src1 src2));
9508 
9509   ins_cost(INSN_COST * 35);
9510   format %{ "sdiv   $dst, $src1, $src2" %}
9511 
9512   ins_encode(aarch64_enc_div(dst, src1, src2));
9513   ins_pipe(ldiv_reg_reg);
9514 %}
9515 
9516 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9517   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9518   ins_cost(INSN_COST);
9519   format %{ "lsr $dst, $src1, $div1" %}
9520   ins_encode %{
9521     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9522   %}
9523   ins_pipe(ialu_reg_shift);
9524 %}
9525 
9526 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9527   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9528   ins_cost(INSN_COST);
9529   format %{ "add $dst, $src, $div1" %}
9530 
9531   ins_encode %{
9532     __ add(as_Register($dst$$reg),
9533               as_Register($src$$reg),
9534               as_Register($src$$reg),
9535               Assembler::LSR, 63);
9536   %}
9537   ins_pipe(ialu_reg);
9538 %}
9539 
9540 // Integer Remainder
9541 
9542 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9543   match(Set dst (ModI src1 src2));
9544 
9545   ins_cost(INSN_COST * 22);
9546   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9547             "msubw($dst, rscratch1, $src2, $src1" %}
9548 
9549   ins_encode(aarch64_enc_modw(dst, src1, src2));
9550   ins_pipe(idiv_reg_reg);
9551 %}
9552 
9553 // Long Remainder
9554 
9555 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9556   match(Set dst (ModL src1 src2));
9557 
9558   ins_cost(INSN_COST * 38);
9559   format %{ "sdiv   rscratch1, $src1, $src2\n"
9560             "msub($dst, rscratch1, $src2, $src1" %}
9561 
9562   ins_encode(aarch64_enc_mod(dst, src1, src2));
9563   ins_pipe(ldiv_reg_reg);
9564 %}
9565 
9566 // Integer Shifts
9567 
9568 // Shift Left Register
9569 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9570   match(Set dst (LShiftI src1 src2));
9571 
9572   ins_cost(INSN_COST * 2);
9573   format %{ "lslvw  $dst, $src1, $src2" %}
9574 
9575   ins_encode %{
9576     __ lslvw(as_Register($dst$$reg),
9577              as_Register($src1$$reg),
9578              as_Register($src2$$reg));
9579   %}
9580 
9581   ins_pipe(ialu_reg_reg_vshift);
9582 %}
9583 
9584 // Shift Left Immediate
9585 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9586   match(Set dst (LShiftI src1 src2));
9587 
9588   ins_cost(INSN_COST);
9589   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9590 
9591   ins_encode %{
9592     __ lslw(as_Register($dst$$reg),
9593             as_Register($src1$$reg),
9594             $src2$$constant & 0x1f);
9595   %}
9596 
9597   ins_pipe(ialu_reg_shift);
9598 %}
9599 
9600 // Shift Right Logical Register
9601 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9602   match(Set dst (URShiftI src1 src2));
9603 
9604   ins_cost(INSN_COST * 2);
9605   format %{ "lsrvw  $dst, $src1, $src2" %}
9606 
9607   ins_encode %{
9608     __ lsrvw(as_Register($dst$$reg),
9609              as_Register($src1$$reg),
9610              as_Register($src2$$reg));
9611   %}
9612 
9613   ins_pipe(ialu_reg_reg_vshift);
9614 %}
9615 
9616 // Shift Right Logical Immediate
9617 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9618   match(Set dst (URShiftI src1 src2));
9619 
9620   ins_cost(INSN_COST);
9621   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9622 
9623   ins_encode %{
9624     __ lsrw(as_Register($dst$$reg),
9625             as_Register($src1$$reg),
9626             $src2$$constant & 0x1f);
9627   %}
9628 
9629   ins_pipe(ialu_reg_shift);
9630 %}
9631 
9632 // Shift Right Arithmetic Register
9633 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9634   match(Set dst (RShiftI src1 src2));
9635 
9636   ins_cost(INSN_COST * 2);
9637   format %{ "asrvw  $dst, $src1, $src2" %}
9638 
9639   ins_encode %{
9640     __ asrvw(as_Register($dst$$reg),
9641              as_Register($src1$$reg),
9642              as_Register($src2$$reg));
9643   %}
9644 
9645   ins_pipe(ialu_reg_reg_vshift);
9646 %}
9647 
9648 // Shift Right Arithmetic Immediate
9649 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9650   match(Set dst (RShiftI src1 src2));
9651 
9652   ins_cost(INSN_COST);
9653   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9654 
9655   ins_encode %{
9656     __ asrw(as_Register($dst$$reg),
9657             as_Register($src1$$reg),
9658             $src2$$constant & 0x1f);
9659   %}
9660 
9661   ins_pipe(ialu_reg_shift);
9662 %}
9663 
9664 // Combined Int Mask and Right Shift (using UBFM)
9665 // TODO
9666 
9667 // Long Shifts
9668 
9669 // Shift Left Register
9670 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9671   match(Set dst (LShiftL src1 src2));
9672 
9673   ins_cost(INSN_COST * 2);
9674   format %{ "lslv  $dst, $src1, $src2" %}
9675 
9676   ins_encode %{
9677     __ lslv(as_Register($dst$$reg),
9678             as_Register($src1$$reg),
9679             as_Register($src2$$reg));
9680   %}
9681 
9682   ins_pipe(ialu_reg_reg_vshift);
9683 %}
9684 
9685 // Shift Left Immediate
9686 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9687   match(Set dst (LShiftL src1 src2));
9688 
9689   ins_cost(INSN_COST);
9690   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9691 
9692   ins_encode %{
9693     __ lsl(as_Register($dst$$reg),
9694             as_Register($src1$$reg),
9695             $src2$$constant & 0x3f);
9696   %}
9697 
9698   ins_pipe(ialu_reg_shift);
9699 %}
9700 
9701 // Shift Right Logical Register
9702 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9703   match(Set dst (URShiftL src1 src2));
9704 
9705   ins_cost(INSN_COST * 2);
9706   format %{ "lsrv  $dst, $src1, $src2" %}
9707 
9708   ins_encode %{
9709     __ lsrv(as_Register($dst$$reg),
9710             as_Register($src1$$reg),
9711             as_Register($src2$$reg));
9712   %}
9713 
9714   ins_pipe(ialu_reg_reg_vshift);
9715 %}
9716 
9717 // Shift Right Logical Immediate
9718 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9719   match(Set dst (URShiftL src1 src2));
9720 
9721   ins_cost(INSN_COST);
9722   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9723 
9724   ins_encode %{
9725     __ lsr(as_Register($dst$$reg),
9726            as_Register($src1$$reg),
9727            $src2$$constant & 0x3f);
9728   %}
9729 
9730   ins_pipe(ialu_reg_shift);
9731 %}
9732 
9733 // A special-case pattern for card table stores.
9734 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9735   match(Set dst (URShiftL (CastP2X src1) src2));
9736 
9737   ins_cost(INSN_COST);
9738   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9739 
9740   ins_encode %{
9741     __ lsr(as_Register($dst$$reg),
9742            as_Register($src1$$reg),
9743            $src2$$constant & 0x3f);
9744   %}
9745 
9746   ins_pipe(ialu_reg_shift);
9747 %}
9748 
9749 // Shift Right Arithmetic Register
9750 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9751   match(Set dst (RShiftL src1 src2));
9752 
9753   ins_cost(INSN_COST * 2);
9754   format %{ "asrv  $dst, $src1, $src2" %}
9755 
9756   ins_encode %{
9757     __ asrv(as_Register($dst$$reg),
9758             as_Register($src1$$reg),
9759             as_Register($src2$$reg));
9760   %}
9761 
9762   ins_pipe(ialu_reg_reg_vshift);
9763 %}
9764 
9765 // Shift Right Arithmetic Immediate
9766 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9767   match(Set dst (RShiftL src1 src2));
9768 
9769   ins_cost(INSN_COST);
9770   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9771 
9772   ins_encode %{
9773     __ asr(as_Register($dst$$reg),
9774            as_Register($src1$$reg),
9775            $src2$$constant & 0x3f);
9776   %}
9777 
9778   ins_pipe(ialu_reg_shift);
9779 %}
9780 
9781 // BEGIN This section of the file is automatically generated. Do not edit --------------
9782 
9783 instruct regL_not_reg(iRegLNoSp dst,
9784                          iRegL src1, immL_M1 m1,
9785                          rFlagsReg cr) %{
9786   match(Set dst (XorL src1 m1));
9787   ins_cost(INSN_COST);
9788   format %{ "eon  $dst, $src1, zr" %}
9789 
9790   ins_encode %{
9791     __ eon(as_Register($dst$$reg),
9792               as_Register($src1$$reg),
9793               zr,
9794               Assembler::LSL, 0);
9795   %}
9796 
9797   ins_pipe(ialu_reg);
9798 %}
9799 instruct regI_not_reg(iRegINoSp dst,
9800                          iRegIorL2I src1, immI_M1 m1,
9801                          rFlagsReg cr) %{
9802   match(Set dst (XorI src1 m1));
9803   ins_cost(INSN_COST);
9804   format %{ "eonw  $dst, $src1, zr" %}
9805 
9806   ins_encode %{
9807     __ eonw(as_Register($dst$$reg),
9808               as_Register($src1$$reg),
9809               zr,
9810               Assembler::LSL, 0);
9811   %}
9812 
9813   ins_pipe(ialu_reg);
9814 %}
9815 
9816 instruct AndI_reg_not_reg(iRegINoSp dst,
9817                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9818                          rFlagsReg cr) %{
9819   match(Set dst (AndI src1 (XorI src2 m1)));
9820   ins_cost(INSN_COST);
9821   format %{ "bicw  $dst, $src1, $src2" %}
9822 
9823   ins_encode %{
9824     __ bicw(as_Register($dst$$reg),
9825               as_Register($src1$$reg),
9826               as_Register($src2$$reg),
9827               Assembler::LSL, 0);
9828   %}
9829 
9830   ins_pipe(ialu_reg_reg);
9831 %}
9832 
9833 instruct AndL_reg_not_reg(iRegLNoSp dst,
9834                          iRegL src1, iRegL src2, immL_M1 m1,
9835                          rFlagsReg cr) %{
9836   match(Set dst (AndL src1 (XorL src2 m1)));
9837   ins_cost(INSN_COST);
9838   format %{ "bic  $dst, $src1, $src2" %}
9839 
9840   ins_encode %{
9841     __ bic(as_Register($dst$$reg),
9842               as_Register($src1$$reg),
9843               as_Register($src2$$reg),
9844               Assembler::LSL, 0);
9845   %}
9846 
9847   ins_pipe(ialu_reg_reg);
9848 %}
9849 
9850 instruct OrI_reg_not_reg(iRegINoSp dst,
9851                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9852                          rFlagsReg cr) %{
9853   match(Set dst (OrI src1 (XorI src2 m1)));
9854   ins_cost(INSN_COST);
9855   format %{ "ornw  $dst, $src1, $src2" %}
9856 
9857   ins_encode %{
9858     __ ornw(as_Register($dst$$reg),
9859               as_Register($src1$$reg),
9860               as_Register($src2$$reg),
9861               Assembler::LSL, 0);
9862   %}
9863 
9864   ins_pipe(ialu_reg_reg);
9865 %}
9866 
9867 instruct OrL_reg_not_reg(iRegLNoSp dst,
9868                          iRegL src1, iRegL src2, immL_M1 m1,
9869                          rFlagsReg cr) %{
9870   match(Set dst (OrL src1 (XorL src2 m1)));
9871   ins_cost(INSN_COST);
9872   format %{ "orn  $dst, $src1, $src2" %}
9873 
9874   ins_encode %{
9875     __ orn(as_Register($dst$$reg),
9876               as_Register($src1$$reg),
9877               as_Register($src2$$reg),
9878               Assembler::LSL, 0);
9879   %}
9880 
9881   ins_pipe(ialu_reg_reg);
9882 %}
9883 
9884 instruct XorI_reg_not_reg(iRegINoSp dst,
9885                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9886                          rFlagsReg cr) %{
9887   match(Set dst (XorI m1 (XorI src2 src1)));
9888   ins_cost(INSN_COST);
9889   format %{ "eonw  $dst, $src1, $src2" %}
9890 
9891   ins_encode %{
9892     __ eonw(as_Register($dst$$reg),
9893               as_Register($src1$$reg),
9894               as_Register($src2$$reg),
9895               Assembler::LSL, 0);
9896   %}
9897 
9898   ins_pipe(ialu_reg_reg);
9899 %}
9900 
9901 instruct XorL_reg_not_reg(iRegLNoSp dst,
9902                          iRegL src1, iRegL src2, immL_M1 m1,
9903                          rFlagsReg cr) %{
9904   match(Set dst (XorL m1 (XorL src2 src1)));
9905   ins_cost(INSN_COST);
9906   format %{ "eon  $dst, $src1, $src2" %}
9907 
9908   ins_encode %{
9909     __ eon(as_Register($dst$$reg),
9910               as_Register($src1$$reg),
9911               as_Register($src2$$reg),
9912               Assembler::LSL, 0);
9913   %}
9914 
9915   ins_pipe(ialu_reg_reg);
9916 %}
9917 
9918 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9919                          iRegIorL2I src1, iRegIorL2I src2,
9920                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9921   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9922   ins_cost(1.9 * INSN_COST);
9923   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9924 
9925   ins_encode %{
9926     __ bicw(as_Register($dst$$reg),
9927               as_Register($src1$$reg),
9928               as_Register($src2$$reg),
9929               Assembler::LSR,
9930               $src3$$constant & 0x1f);
9931   %}
9932 
9933   ins_pipe(ialu_reg_reg_shift);
9934 %}
9935 
9936 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9937                          iRegL src1, iRegL src2,
9938                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9939   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9940   ins_cost(1.9 * INSN_COST);
9941   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9942 
9943   ins_encode %{
9944     __ bic(as_Register($dst$$reg),
9945               as_Register($src1$$reg),
9946               as_Register($src2$$reg),
9947               Assembler::LSR,
9948               $src3$$constant & 0x3f);
9949   %}
9950 
9951   ins_pipe(ialu_reg_reg_shift);
9952 %}
9953 
9954 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9955                          iRegIorL2I src1, iRegIorL2I src2,
9956                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9957   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9958   ins_cost(1.9 * INSN_COST);
9959   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9960 
9961   ins_encode %{
9962     __ bicw(as_Register($dst$$reg),
9963               as_Register($src1$$reg),
9964               as_Register($src2$$reg),
9965               Assembler::ASR,
9966               $src3$$constant & 0x1f);
9967   %}
9968 
9969   ins_pipe(ialu_reg_reg_shift);
9970 %}
9971 
9972 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9973                          iRegL src1, iRegL src2,
9974                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9975   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9976   ins_cost(1.9 * INSN_COST);
9977   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9978 
9979   ins_encode %{
9980     __ bic(as_Register($dst$$reg),
9981               as_Register($src1$$reg),
9982               as_Register($src2$$reg),
9983               Assembler::ASR,
9984               $src3$$constant & 0x3f);
9985   %}
9986 
9987   ins_pipe(ialu_reg_reg_shift);
9988 %}
9989 
9990 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9991                          iRegIorL2I src1, iRegIorL2I src2,
9992                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9993   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9994   ins_cost(1.9 * INSN_COST);
9995   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9996 
9997   ins_encode %{
9998     __ bicw(as_Register($dst$$reg),
9999               as_Register($src1$$reg),
10000               as_Register($src2$$reg),
10001               Assembler::LSL,
10002               $src3$$constant & 0x1f);
10003   %}
10004 
10005   ins_pipe(ialu_reg_reg_shift);
10006 %}
10007 
10008 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10009                          iRegL src1, iRegL src2,
10010                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10011   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10012   ins_cost(1.9 * INSN_COST);
10013   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10014 
10015   ins_encode %{
10016     __ bic(as_Register($dst$$reg),
10017               as_Register($src1$$reg),
10018               as_Register($src2$$reg),
10019               Assembler::LSL,
10020               $src3$$constant & 0x3f);
10021   %}
10022 
10023   ins_pipe(ialu_reg_reg_shift);
10024 %}
10025 
10026 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10027                          iRegIorL2I src1, iRegIorL2I src2,
10028                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10029   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10030   ins_cost(1.9 * INSN_COST);
10031   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10032 
10033   ins_encode %{
10034     __ eonw(as_Register($dst$$reg),
10035               as_Register($src1$$reg),
10036               as_Register($src2$$reg),
10037               Assembler::LSR,
10038               $src3$$constant & 0x1f);
10039   %}
10040 
10041   ins_pipe(ialu_reg_reg_shift);
10042 %}
10043 
10044 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10045                          iRegL src1, iRegL src2,
10046                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10047   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10048   ins_cost(1.9 * INSN_COST);
10049   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10050 
10051   ins_encode %{
10052     __ eon(as_Register($dst$$reg),
10053               as_Register($src1$$reg),
10054               as_Register($src2$$reg),
10055               Assembler::LSR,
10056               $src3$$constant & 0x3f);
10057   %}
10058 
10059   ins_pipe(ialu_reg_reg_shift);
10060 %}
10061 
10062 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10063                          iRegIorL2I src1, iRegIorL2I src2,
10064                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10065   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10066   ins_cost(1.9 * INSN_COST);
10067   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10068 
10069   ins_encode %{
10070     __ eonw(as_Register($dst$$reg),
10071               as_Register($src1$$reg),
10072               as_Register($src2$$reg),
10073               Assembler::ASR,
10074               $src3$$constant & 0x1f);
10075   %}
10076 
10077   ins_pipe(ialu_reg_reg_shift);
10078 %}
10079 
10080 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10081                          iRegL src1, iRegL src2,
10082                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10083   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10084   ins_cost(1.9 * INSN_COST);
10085   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10086 
10087   ins_encode %{
10088     __ eon(as_Register($dst$$reg),
10089               as_Register($src1$$reg),
10090               as_Register($src2$$reg),
10091               Assembler::ASR,
10092               $src3$$constant & 0x3f);
10093   %}
10094 
10095   ins_pipe(ialu_reg_reg_shift);
10096 %}
10097 
10098 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10099                          iRegIorL2I src1, iRegIorL2I src2,
10100                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10101   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10102   ins_cost(1.9 * INSN_COST);
10103   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10104 
10105   ins_encode %{
10106     __ eonw(as_Register($dst$$reg),
10107               as_Register($src1$$reg),
10108               as_Register($src2$$reg),
10109               Assembler::LSL,
10110               $src3$$constant & 0x1f);
10111   %}
10112 
10113   ins_pipe(ialu_reg_reg_shift);
10114 %}
10115 
10116 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10117                          iRegL src1, iRegL src2,
10118                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10119   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10120   ins_cost(1.9 * INSN_COST);
10121   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10122 
10123   ins_encode %{
10124     __ eon(as_Register($dst$$reg),
10125               as_Register($src1$$reg),
10126               as_Register($src2$$reg),
10127               Assembler::LSL,
10128               $src3$$constant & 0x3f);
10129   %}
10130 
10131   ins_pipe(ialu_reg_reg_shift);
10132 %}
10133 
10134 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10135                          iRegIorL2I src1, iRegIorL2I src2,
10136                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10137   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10138   ins_cost(1.9 * INSN_COST);
10139   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10140 
10141   ins_encode %{
10142     __ ornw(as_Register($dst$$reg),
10143               as_Register($src1$$reg),
10144               as_Register($src2$$reg),
10145               Assembler::LSR,
10146               $src3$$constant & 0x1f);
10147   %}
10148 
10149   ins_pipe(ialu_reg_reg_shift);
10150 %}
10151 
10152 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10153                          iRegL src1, iRegL src2,
10154                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10155   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10156   ins_cost(1.9 * INSN_COST);
10157   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10158 
10159   ins_encode %{
10160     __ orn(as_Register($dst$$reg),
10161               as_Register($src1$$reg),
10162               as_Register($src2$$reg),
10163               Assembler::LSR,
10164               $src3$$constant & 0x3f);
10165   %}
10166 
10167   ins_pipe(ialu_reg_reg_shift);
10168 %}
10169 
10170 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10171                          iRegIorL2I src1, iRegIorL2I src2,
10172                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10173   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10174   ins_cost(1.9 * INSN_COST);
10175   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10176 
10177   ins_encode %{
10178     __ ornw(as_Register($dst$$reg),
10179               as_Register($src1$$reg),
10180               as_Register($src2$$reg),
10181               Assembler::ASR,
10182               $src3$$constant & 0x1f);
10183   %}
10184 
10185   ins_pipe(ialu_reg_reg_shift);
10186 %}
10187 
10188 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10189                          iRegL src1, iRegL src2,
10190                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10191   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10192   ins_cost(1.9 * INSN_COST);
10193   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10194 
10195   ins_encode %{
10196     __ orn(as_Register($dst$$reg),
10197               as_Register($src1$$reg),
10198               as_Register($src2$$reg),
10199               Assembler::ASR,
10200               $src3$$constant & 0x3f);
10201   %}
10202 
10203   ins_pipe(ialu_reg_reg_shift);
10204 %}
10205 
10206 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10207                          iRegIorL2I src1, iRegIorL2I src2,
10208                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10209   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10210   ins_cost(1.9 * INSN_COST);
10211   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10212 
10213   ins_encode %{
10214     __ ornw(as_Register($dst$$reg),
10215               as_Register($src1$$reg),
10216               as_Register($src2$$reg),
10217               Assembler::LSL,
10218               $src3$$constant & 0x1f);
10219   %}
10220 
10221   ins_pipe(ialu_reg_reg_shift);
10222 %}
10223 
10224 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10225                          iRegL src1, iRegL src2,
10226                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10227   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10228   ins_cost(1.9 * INSN_COST);
10229   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10230 
10231   ins_encode %{
10232     __ orn(as_Register($dst$$reg),
10233               as_Register($src1$$reg),
10234               as_Register($src2$$reg),
10235               Assembler::LSL,
10236               $src3$$constant & 0x3f);
10237   %}
10238 
10239   ins_pipe(ialu_reg_reg_shift);
10240 %}
10241 
10242 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10243                          iRegIorL2I src1, iRegIorL2I src2,
10244                          immI src3, rFlagsReg cr) %{
10245   match(Set dst (AndI src1 (URShiftI src2 src3)));
10246 
10247   ins_cost(1.9 * INSN_COST);
10248   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10249 
10250   ins_encode %{
10251     __ andw(as_Register($dst$$reg),
10252               as_Register($src1$$reg),
10253               as_Register($src2$$reg),
10254               Assembler::LSR,
10255               $src3$$constant & 0x1f);
10256   %}
10257 
10258   ins_pipe(ialu_reg_reg_shift);
10259 %}
10260 
10261 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10262                          iRegL src1, iRegL src2,
10263                          immI src3, rFlagsReg cr) %{
10264   match(Set dst (AndL src1 (URShiftL src2 src3)));
10265 
10266   ins_cost(1.9 * INSN_COST);
10267   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10268 
10269   ins_encode %{
10270     __ andr(as_Register($dst$$reg),
10271               as_Register($src1$$reg),
10272               as_Register($src2$$reg),
10273               Assembler::LSR,
10274               $src3$$constant & 0x3f);
10275   %}
10276 
10277   ins_pipe(ialu_reg_reg_shift);
10278 %}
10279 
10280 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10281                          iRegIorL2I src1, iRegIorL2I src2,
10282                          immI src3, rFlagsReg cr) %{
10283   match(Set dst (AndI src1 (RShiftI src2 src3)));
10284 
10285   ins_cost(1.9 * INSN_COST);
10286   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10287 
10288   ins_encode %{
10289     __ andw(as_Register($dst$$reg),
10290               as_Register($src1$$reg),
10291               as_Register($src2$$reg),
10292               Assembler::ASR,
10293               $src3$$constant & 0x1f);
10294   %}
10295 
10296   ins_pipe(ialu_reg_reg_shift);
10297 %}
10298 
10299 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10300                          iRegL src1, iRegL src2,
10301                          immI src3, rFlagsReg cr) %{
10302   match(Set dst (AndL src1 (RShiftL src2 src3)));
10303 
10304   ins_cost(1.9 * INSN_COST);
10305   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10306 
10307   ins_encode %{
10308     __ andr(as_Register($dst$$reg),
10309               as_Register($src1$$reg),
10310               as_Register($src2$$reg),
10311               Assembler::ASR,
10312               $src3$$constant & 0x3f);
10313   %}
10314 
10315   ins_pipe(ialu_reg_reg_shift);
10316 %}
10317 
10318 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10319                          iRegIorL2I src1, iRegIorL2I src2,
10320                          immI src3, rFlagsReg cr) %{
10321   match(Set dst (AndI src1 (LShiftI src2 src3)));
10322 
10323   ins_cost(1.9 * INSN_COST);
10324   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10325 
10326   ins_encode %{
10327     __ andw(as_Register($dst$$reg),
10328               as_Register($src1$$reg),
10329               as_Register($src2$$reg),
10330               Assembler::LSL,
10331               $src3$$constant & 0x1f);
10332   %}
10333 
10334   ins_pipe(ialu_reg_reg_shift);
10335 %}
10336 
10337 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10338                          iRegL src1, iRegL src2,
10339                          immI src3, rFlagsReg cr) %{
10340   match(Set dst (AndL src1 (LShiftL src2 src3)));
10341 
10342   ins_cost(1.9 * INSN_COST);
10343   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10344 
10345   ins_encode %{
10346     __ andr(as_Register($dst$$reg),
10347               as_Register($src1$$reg),
10348               as_Register($src2$$reg),
10349               Assembler::LSL,
10350               $src3$$constant & 0x3f);
10351   %}
10352 
10353   ins_pipe(ialu_reg_reg_shift);
10354 %}
10355 
10356 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10357                          iRegIorL2I src1, iRegIorL2I src2,
10358                          immI src3, rFlagsReg cr) %{
10359   match(Set dst (XorI src1 (URShiftI src2 src3)));
10360 
10361   ins_cost(1.9 * INSN_COST);
10362   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10363 
10364   ins_encode %{
10365     __ eorw(as_Register($dst$$reg),
10366               as_Register($src1$$reg),
10367               as_Register($src2$$reg),
10368               Assembler::LSR,
10369               $src3$$constant & 0x1f);
10370   %}
10371 
10372   ins_pipe(ialu_reg_reg_shift);
10373 %}
10374 
10375 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10376                          iRegL src1, iRegL src2,
10377                          immI src3, rFlagsReg cr) %{
10378   match(Set dst (XorL src1 (URShiftL src2 src3)));
10379 
10380   ins_cost(1.9 * INSN_COST);
10381   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10382 
10383   ins_encode %{
10384     __ eor(as_Register($dst$$reg),
10385               as_Register($src1$$reg),
10386               as_Register($src2$$reg),
10387               Assembler::LSR,
10388               $src3$$constant & 0x3f);
10389   %}
10390 
10391   ins_pipe(ialu_reg_reg_shift);
10392 %}
10393 
10394 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10395                          iRegIorL2I src1, iRegIorL2I src2,
10396                          immI src3, rFlagsReg cr) %{
10397   match(Set dst (XorI src1 (RShiftI src2 src3)));
10398 
10399   ins_cost(1.9 * INSN_COST);
10400   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10401 
10402   ins_encode %{
10403     __ eorw(as_Register($dst$$reg),
10404               as_Register($src1$$reg),
10405               as_Register($src2$$reg),
10406               Assembler::ASR,
10407               $src3$$constant & 0x1f);
10408   %}
10409 
10410   ins_pipe(ialu_reg_reg_shift);
10411 %}
10412 
10413 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10414                          iRegL src1, iRegL src2,
10415                          immI src3, rFlagsReg cr) %{
10416   match(Set dst (XorL src1 (RShiftL src2 src3)));
10417 
10418   ins_cost(1.9 * INSN_COST);
10419   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10420 
10421   ins_encode %{
10422     __ eor(as_Register($dst$$reg),
10423               as_Register($src1$$reg),
10424               as_Register($src2$$reg),
10425               Assembler::ASR,
10426               $src3$$constant & 0x3f);
10427   %}
10428 
10429   ins_pipe(ialu_reg_reg_shift);
10430 %}
10431 
10432 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10433                          iRegIorL2I src1, iRegIorL2I src2,
10434                          immI src3, rFlagsReg cr) %{
10435   match(Set dst (XorI src1 (LShiftI src2 src3)));
10436 
10437   ins_cost(1.9 * INSN_COST);
10438   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10439 
10440   ins_encode %{
10441     __ eorw(as_Register($dst$$reg),
10442               as_Register($src1$$reg),
10443               as_Register($src2$$reg),
10444               Assembler::LSL,
10445               $src3$$constant & 0x1f);
10446   %}
10447 
10448   ins_pipe(ialu_reg_reg_shift);
10449 %}
10450 
10451 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10452                          iRegL src1, iRegL src2,
10453                          immI src3, rFlagsReg cr) %{
10454   match(Set dst (XorL src1 (LShiftL src2 src3)));
10455 
10456   ins_cost(1.9 * INSN_COST);
10457   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10458 
10459   ins_encode %{
10460     __ eor(as_Register($dst$$reg),
10461               as_Register($src1$$reg),
10462               as_Register($src2$$reg),
10463               Assembler::LSL,
10464               $src3$$constant & 0x3f);
10465   %}
10466 
10467   ins_pipe(ialu_reg_reg_shift);
10468 %}
10469 
10470 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10471                          iRegIorL2I src1, iRegIorL2I src2,
10472                          immI src3, rFlagsReg cr) %{
10473   match(Set dst (OrI src1 (URShiftI src2 src3)));
10474 
10475   ins_cost(1.9 * INSN_COST);
10476   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10477 
10478   ins_encode %{
10479     __ orrw(as_Register($dst$$reg),
10480               as_Register($src1$$reg),
10481               as_Register($src2$$reg),
10482               Assembler::LSR,
10483               $src3$$constant & 0x1f);
10484   %}
10485 
10486   ins_pipe(ialu_reg_reg_shift);
10487 %}
10488 
10489 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10490                          iRegL src1, iRegL src2,
10491                          immI src3, rFlagsReg cr) %{
10492   match(Set dst (OrL src1 (URShiftL src2 src3)));
10493 
10494   ins_cost(1.9 * INSN_COST);
10495   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10496 
10497   ins_encode %{
10498     __ orr(as_Register($dst$$reg),
10499               as_Register($src1$$reg),
10500               as_Register($src2$$reg),
10501               Assembler::LSR,
10502               $src3$$constant & 0x3f);
10503   %}
10504 
10505   ins_pipe(ialu_reg_reg_shift);
10506 %}
10507 
10508 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10509                          iRegIorL2I src1, iRegIorL2I src2,
10510                          immI src3, rFlagsReg cr) %{
10511   match(Set dst (OrI src1 (RShiftI src2 src3)));
10512 
10513   ins_cost(1.9 * INSN_COST);
10514   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10515 
10516   ins_encode %{
10517     __ orrw(as_Register($dst$$reg),
10518               as_Register($src1$$reg),
10519               as_Register($src2$$reg),
10520               Assembler::ASR,
10521               $src3$$constant & 0x1f);
10522   %}
10523 
10524   ins_pipe(ialu_reg_reg_shift);
10525 %}
10526 
10527 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10528                          iRegL src1, iRegL src2,
10529                          immI src3, rFlagsReg cr) %{
10530   match(Set dst (OrL src1 (RShiftL src2 src3)));
10531 
10532   ins_cost(1.9 * INSN_COST);
10533   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10534 
10535   ins_encode %{
10536     __ orr(as_Register($dst$$reg),
10537               as_Register($src1$$reg),
10538               as_Register($src2$$reg),
10539               Assembler::ASR,
10540               $src3$$constant & 0x3f);
10541   %}
10542 
10543   ins_pipe(ialu_reg_reg_shift);
10544 %}
10545 
10546 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10547                          iRegIorL2I src1, iRegIorL2I src2,
10548                          immI src3, rFlagsReg cr) %{
10549   match(Set dst (OrI src1 (LShiftI src2 src3)));
10550 
10551   ins_cost(1.9 * INSN_COST);
10552   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10553 
10554   ins_encode %{
10555     __ orrw(as_Register($dst$$reg),
10556               as_Register($src1$$reg),
10557               as_Register($src2$$reg),
10558               Assembler::LSL,
10559               $src3$$constant & 0x1f);
10560   %}
10561 
10562   ins_pipe(ialu_reg_reg_shift);
10563 %}
10564 
10565 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10566                          iRegL src1, iRegL src2,
10567                          immI src3, rFlagsReg cr) %{
10568   match(Set dst (OrL src1 (LShiftL src2 src3)));
10569 
10570   ins_cost(1.9 * INSN_COST);
10571   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10572 
10573   ins_encode %{
10574     __ orr(as_Register($dst$$reg),
10575               as_Register($src1$$reg),
10576               as_Register($src2$$reg),
10577               Assembler::LSL,
10578               $src3$$constant & 0x3f);
10579   %}
10580 
10581   ins_pipe(ialu_reg_reg_shift);
10582 %}
10583 
10584 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10585                          iRegIorL2I src1, iRegIorL2I src2,
10586                          immI src3, rFlagsReg cr) %{
10587   match(Set dst (AddI src1 (URShiftI src2 src3)));
10588 
10589   ins_cost(1.9 * INSN_COST);
10590   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10591 
10592   ins_encode %{
10593     __ addw(as_Register($dst$$reg),
10594               as_Register($src1$$reg),
10595               as_Register($src2$$reg),
10596               Assembler::LSR,
10597               $src3$$constant & 0x1f);
10598   %}
10599 
10600   ins_pipe(ialu_reg_reg_shift);
10601 %}
10602 
10603 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10604                          iRegL src1, iRegL src2,
10605                          immI src3, rFlagsReg cr) %{
10606   match(Set dst (AddL src1 (URShiftL src2 src3)));
10607 
10608   ins_cost(1.9 * INSN_COST);
10609   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10610 
10611   ins_encode %{
10612     __ add(as_Register($dst$$reg),
10613               as_Register($src1$$reg),
10614               as_Register($src2$$reg),
10615               Assembler::LSR,
10616               $src3$$constant & 0x3f);
10617   %}
10618 
10619   ins_pipe(ialu_reg_reg_shift);
10620 %}
10621 
10622 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10623                          iRegIorL2I src1, iRegIorL2I src2,
10624                          immI src3, rFlagsReg cr) %{
10625   match(Set dst (AddI src1 (RShiftI src2 src3)));
10626 
10627   ins_cost(1.9 * INSN_COST);
10628   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10629 
10630   ins_encode %{
10631     __ addw(as_Register($dst$$reg),
10632               as_Register($src1$$reg),
10633               as_Register($src2$$reg),
10634               Assembler::ASR,
10635               $src3$$constant & 0x1f);
10636   %}
10637 
10638   ins_pipe(ialu_reg_reg_shift);
10639 %}
10640 
10641 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10642                          iRegL src1, iRegL src2,
10643                          immI src3, rFlagsReg cr) %{
10644   match(Set dst (AddL src1 (RShiftL src2 src3)));
10645 
10646   ins_cost(1.9 * INSN_COST);
10647   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10648 
10649   ins_encode %{
10650     __ add(as_Register($dst$$reg),
10651               as_Register($src1$$reg),
10652               as_Register($src2$$reg),
10653               Assembler::ASR,
10654               $src3$$constant & 0x3f);
10655   %}
10656 
10657   ins_pipe(ialu_reg_reg_shift);
10658 %}
10659 
10660 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10661                          iRegIorL2I src1, iRegIorL2I src2,
10662                          immI src3, rFlagsReg cr) %{
10663   match(Set dst (AddI src1 (LShiftI src2 src3)));
10664 
10665   ins_cost(1.9 * INSN_COST);
10666   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10667 
10668   ins_encode %{
10669     __ addw(as_Register($dst$$reg),
10670               as_Register($src1$$reg),
10671               as_Register($src2$$reg),
10672               Assembler::LSL,
10673               $src3$$constant & 0x1f);
10674   %}
10675 
10676   ins_pipe(ialu_reg_reg_shift);
10677 %}
10678 
10679 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10680                          iRegL src1, iRegL src2,
10681                          immI src3, rFlagsReg cr) %{
10682   match(Set dst (AddL src1 (LShiftL src2 src3)));
10683 
10684   ins_cost(1.9 * INSN_COST);
10685   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10686 
10687   ins_encode %{
10688     __ add(as_Register($dst$$reg),
10689               as_Register($src1$$reg),
10690               as_Register($src2$$reg),
10691               Assembler::LSL,
10692               $src3$$constant & 0x3f);
10693   %}
10694 
10695   ins_pipe(ialu_reg_reg_shift);
10696 %}
10697 
10698 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10699                          iRegIorL2I src1, iRegIorL2I src2,
10700                          immI src3, rFlagsReg cr) %{
10701   match(Set dst (SubI src1 (URShiftI src2 src3)));
10702 
10703   ins_cost(1.9 * INSN_COST);
10704   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10705 
10706   ins_encode %{
10707     __ subw(as_Register($dst$$reg),
10708               as_Register($src1$$reg),
10709               as_Register($src2$$reg),
10710               Assembler::LSR,
10711               $src3$$constant & 0x1f);
10712   %}
10713 
10714   ins_pipe(ialu_reg_reg_shift);
10715 %}
10716 
10717 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10718                          iRegL src1, iRegL src2,
10719                          immI src3, rFlagsReg cr) %{
10720   match(Set dst (SubL src1 (URShiftL src2 src3)));
10721 
10722   ins_cost(1.9 * INSN_COST);
10723   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10724 
10725   ins_encode %{
10726     __ sub(as_Register($dst$$reg),
10727               as_Register($src1$$reg),
10728               as_Register($src2$$reg),
10729               Assembler::LSR,
10730               $src3$$constant & 0x3f);
10731   %}
10732 
10733   ins_pipe(ialu_reg_reg_shift);
10734 %}
10735 
10736 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10737                          iRegIorL2I src1, iRegIorL2I src2,
10738                          immI src3, rFlagsReg cr) %{
10739   match(Set dst (SubI src1 (RShiftI src2 src3)));
10740 
10741   ins_cost(1.9 * INSN_COST);
10742   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10743 
10744   ins_encode %{
10745     __ subw(as_Register($dst$$reg),
10746               as_Register($src1$$reg),
10747               as_Register($src2$$reg),
10748               Assembler::ASR,
10749               $src3$$constant & 0x1f);
10750   %}
10751 
10752   ins_pipe(ialu_reg_reg_shift);
10753 %}
10754 
10755 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10756                          iRegL src1, iRegL src2,
10757                          immI src3, rFlagsReg cr) %{
10758   match(Set dst (SubL src1 (RShiftL src2 src3)));
10759 
10760   ins_cost(1.9 * INSN_COST);
10761   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10762 
10763   ins_encode %{
10764     __ sub(as_Register($dst$$reg),
10765               as_Register($src1$$reg),
10766               as_Register($src2$$reg),
10767               Assembler::ASR,
10768               $src3$$constant & 0x3f);
10769   %}
10770 
10771   ins_pipe(ialu_reg_reg_shift);
10772 %}
10773 
10774 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10775                          iRegIorL2I src1, iRegIorL2I src2,
10776                          immI src3, rFlagsReg cr) %{
10777   match(Set dst (SubI src1 (LShiftI src2 src3)));
10778 
10779   ins_cost(1.9 * INSN_COST);
10780   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10781 
10782   ins_encode %{
10783     __ subw(as_Register($dst$$reg),
10784               as_Register($src1$$reg),
10785               as_Register($src2$$reg),
10786               Assembler::LSL,
10787               $src3$$constant & 0x1f);
10788   %}
10789 
10790   ins_pipe(ialu_reg_reg_shift);
10791 %}
10792 
10793 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10794                          iRegL src1, iRegL src2,
10795                          immI src3, rFlagsReg cr) %{
10796   match(Set dst (SubL src1 (LShiftL src2 src3)));
10797 
10798   ins_cost(1.9 * INSN_COST);
10799   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10800 
10801   ins_encode %{
10802     __ sub(as_Register($dst$$reg),
10803               as_Register($src1$$reg),
10804               as_Register($src2$$reg),
10805               Assembler::LSL,
10806               $src3$$constant & 0x3f);
10807   %}
10808 
10809   ins_pipe(ialu_reg_reg_shift);
10810 %}
10811 
10812 
10813 
10814 // Shift Left followed by Shift Right.
10815 // This idiom is used by the compiler for the i2b bytecode etc.
10816 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10817 %{
10818   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10819   // Make sure we are not going to exceed what sbfm can do.
10820   predicate((unsigned int)n->in(2)->get_int() <= 63
10821             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10822 
10823   ins_cost(INSN_COST * 2);
10824   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10825   ins_encode %{
10826     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10827     int s = 63 - lshift;
10828     int r = (rshift - lshift) & 63;
10829     __ sbfm(as_Register($dst$$reg),
10830             as_Register($src$$reg),
10831             r, s);
10832   %}
10833 
10834   ins_pipe(ialu_reg_shift);
10835 %}
10836 
10837 // Shift Left followed by Shift Right.
10838 // This idiom is used by the compiler for the i2b bytecode etc.
10839 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10840 %{
10841   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10842   // Make sure we are not going to exceed what sbfmw can do.
10843   predicate((unsigned int)n->in(2)->get_int() <= 31
10844             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10845 
10846   ins_cost(INSN_COST * 2);
10847   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10848   ins_encode %{
10849     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10850     int s = 31 - lshift;
10851     int r = (rshift - lshift) & 31;
10852     __ sbfmw(as_Register($dst$$reg),
10853             as_Register($src$$reg),
10854             r, s);
10855   %}
10856 
10857   ins_pipe(ialu_reg_shift);
10858 %}
10859 
10860 // Shift Left followed by Shift Right.
10861 // This idiom is used by the compiler for the i2b bytecode etc.
10862 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10863 %{
10864   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10865   // Make sure we are not going to exceed what ubfm can do.
10866   predicate((unsigned int)n->in(2)->get_int() <= 63
10867             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10868 
10869   ins_cost(INSN_COST * 2);
10870   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10871   ins_encode %{
10872     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10873     int s = 63 - lshift;
10874     int r = (rshift - lshift) & 63;
10875     __ ubfm(as_Register($dst$$reg),
10876             as_Register($src$$reg),
10877             r, s);
10878   %}
10879 
10880   ins_pipe(ialu_reg_shift);
10881 %}
10882 
10883 // Shift Left followed by Shift Right.
10884 // This idiom is used by the compiler for the i2b bytecode etc.
10885 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10886 %{
10887   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10888   // Make sure we are not going to exceed what ubfmw can do.
10889   predicate((unsigned int)n->in(2)->get_int() <= 31
10890             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10891 
10892   ins_cost(INSN_COST * 2);
10893   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10894   ins_encode %{
10895     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10896     int s = 31 - lshift;
10897     int r = (rshift - lshift) & 31;
10898     __ ubfmw(as_Register($dst$$reg),
10899             as_Register($src$$reg),
10900             r, s);
10901   %}
10902 
10903   ins_pipe(ialu_reg_shift);
10904 %}
10905 // Bitfield extract with shift & mask
10906 
10907 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10908 %{
10909   match(Set dst (AndI (URShiftI src rshift) mask));
10910   // Make sure we are not going to exceed what ubfxw can do.
10911   predicate((exact_log2(n->in(2)->get_int() + 1) + (n->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10912 
10913   ins_cost(INSN_COST);
10914   format %{ "ubfxw $dst, $src, $mask" %}
10915   ins_encode %{
10916     int rshift = $rshift$$constant & 31;
10917     long mask = $mask$$constant;
10918     int width = exact_log2(mask+1);
10919     __ ubfxw(as_Register($dst$$reg),
10920             as_Register($src$$reg), rshift, width);
10921   %}
10922   ins_pipe(ialu_reg_shift);
10923 %}
10924 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10925 %{
10926   match(Set dst (AndL (URShiftL src rshift) mask));
10927   // Make sure we are not going to exceed what ubfx can do.
10928   predicate((exact_log2_long(n->in(2)->get_long() + 1) + (n->in(1)->in(2)->get_int() & 63)) <= (63 + 1));
10929 
10930   ins_cost(INSN_COST);
10931   format %{ "ubfx $dst, $src, $mask" %}
10932   ins_encode %{
10933     int rshift = $rshift$$constant & 63;
10934     long mask = $mask$$constant;
10935     int width = exact_log2_long(mask+1);
10936     __ ubfx(as_Register($dst$$reg),
10937             as_Register($src$$reg), rshift, width);
10938   %}
10939   ins_pipe(ialu_reg_shift);
10940 %}
10941 
10942 // We can use ubfx when extending an And with a mask when we know mask
10943 // is positive.  We know that because immI_bitmask guarantees it.
10944 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10945 %{
10946   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10947   // Make sure we are not going to exceed what ubfxw can do.
10948   predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
10949 
10950   ins_cost(INSN_COST * 2);
10951   format %{ "ubfx $dst, $src, $mask" %}
10952   ins_encode %{
10953     int rshift = $rshift$$constant & 31;
10954     long mask = $mask$$constant;
10955     int width = exact_log2(mask+1);
10956     __ ubfx(as_Register($dst$$reg),
10957             as_Register($src$$reg), rshift, width);
10958   %}
10959   ins_pipe(ialu_reg_shift);
10960 %}
10961 
10962 // Rotations
10963 
10964 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10965 %{
10966   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10967   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10968 
10969   ins_cost(INSN_COST);
10970   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10971 
10972   ins_encode %{
10973     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10974             $rshift$$constant & 63);
10975   %}
10976   ins_pipe(ialu_reg_reg_extr);
10977 %}
10978 
10979 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10980 %{
10981   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10982   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10983 
10984   ins_cost(INSN_COST);
10985   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10986 
10987   ins_encode %{
10988     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10989             $rshift$$constant & 31);
10990   %}
10991   ins_pipe(ialu_reg_reg_extr);
10992 %}
10993 
10994 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10995 %{
10996   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10997   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10998 
10999   ins_cost(INSN_COST);
11000   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11001 
11002   ins_encode %{
11003     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11004             $rshift$$constant & 63);
11005   %}
11006   ins_pipe(ialu_reg_reg_extr);
11007 %}
11008 
11009 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11010 %{
11011   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11012   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11013 
11014   ins_cost(INSN_COST);
11015   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11016 
11017   ins_encode %{
11018     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11019             $rshift$$constant & 31);
11020   %}
11021   ins_pipe(ialu_reg_reg_extr);
11022 %}
11023 
11024 
11025 // rol expander
11026 
11027 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11028 %{
11029   effect(DEF dst, USE src, USE shift);
11030 
11031   format %{ "rol    $dst, $src, $shift" %}
11032   ins_cost(INSN_COST * 3);
11033   ins_encode %{
11034     __ subw(rscratch1, zr, as_Register($shift$$reg));
11035     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11036             rscratch1);
11037     %}
11038   ins_pipe(ialu_reg_reg_vshift);
11039 %}
11040 
11041 // rol expander
11042 
11043 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11044 %{
11045   effect(DEF dst, USE src, USE shift);
11046 
11047   format %{ "rol    $dst, $src, $shift" %}
11048   ins_cost(INSN_COST * 3);
11049   ins_encode %{
11050     __ subw(rscratch1, zr, as_Register($shift$$reg));
11051     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11052             rscratch1);
11053     %}
11054   ins_pipe(ialu_reg_reg_vshift);
11055 %}
11056 
11057 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11058 %{
11059   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11060 
11061   expand %{
11062     rolL_rReg(dst, src, shift, cr);
11063   %}
11064 %}
11065 
11066 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11067 %{
11068   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11069 
11070   expand %{
11071     rolL_rReg(dst, src, shift, cr);
11072   %}
11073 %}
11074 
11075 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11076 %{
11077   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11078 
11079   expand %{
11080     rolI_rReg(dst, src, shift, cr);
11081   %}
11082 %}
11083 
11084 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11085 %{
11086   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11087 
11088   expand %{
11089     rolI_rReg(dst, src, shift, cr);
11090   %}
11091 %}
11092 
11093 // ror expander
11094 
11095 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11096 %{
11097   effect(DEF dst, USE src, USE shift);
11098 
11099   format %{ "ror    $dst, $src, $shift" %}
11100   ins_cost(INSN_COST);
11101   ins_encode %{
11102     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11103             as_Register($shift$$reg));
11104     %}
11105   ins_pipe(ialu_reg_reg_vshift);
11106 %}
11107 
11108 // ror expander
11109 
11110 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11111 %{
11112   effect(DEF dst, USE src, USE shift);
11113 
11114   format %{ "ror    $dst, $src, $shift" %}
11115   ins_cost(INSN_COST);
11116   ins_encode %{
11117     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11118             as_Register($shift$$reg));
11119     %}
11120   ins_pipe(ialu_reg_reg_vshift);
11121 %}
11122 
11123 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11124 %{
11125   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11126 
11127   expand %{
11128     rorL_rReg(dst, src, shift, cr);
11129   %}
11130 %}
11131 
11132 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11133 %{
11134   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11135 
11136   expand %{
11137     rorL_rReg(dst, src, shift, cr);
11138   %}
11139 %}
11140 
11141 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11142 %{
11143   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11144 
11145   expand %{
11146     rorI_rReg(dst, src, shift, cr);
11147   %}
11148 %}
11149 
11150 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11151 %{
11152   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11153 
11154   expand %{
11155     rorI_rReg(dst, src, shift, cr);
11156   %}
11157 %}
11158 
11159 // Add/subtract (extended)
11160 
11161 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11162 %{
11163   match(Set dst (AddL src1 (ConvI2L src2)));
11164   ins_cost(INSN_COST);
11165   format %{ "add  $dst, $src1, sxtw $src2" %}
11166 
11167    ins_encode %{
11168      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11169             as_Register($src2$$reg), ext::sxtw);
11170    %}
11171   ins_pipe(ialu_reg_reg);
11172 %};
11173 
11174 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11175 %{
11176   match(Set dst (SubL src1 (ConvI2L src2)));
11177   ins_cost(INSN_COST);
11178   format %{ "sub  $dst, $src1, sxtw $src2" %}
11179 
11180    ins_encode %{
11181      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11182             as_Register($src2$$reg), ext::sxtw);
11183    %}
11184   ins_pipe(ialu_reg_reg);
11185 %};
11186 
11187 
11188 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11189 %{
11190   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11191   ins_cost(INSN_COST);
11192   format %{ "add  $dst, $src1, sxth $src2" %}
11193 
11194    ins_encode %{
11195      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11196             as_Register($src2$$reg), ext::sxth);
11197    %}
11198   ins_pipe(ialu_reg_reg);
11199 %}
11200 
11201 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11202 %{
11203   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11204   ins_cost(INSN_COST);
11205   format %{ "add  $dst, $src1, sxtb $src2" %}
11206 
11207    ins_encode %{
11208      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11209             as_Register($src2$$reg), ext::sxtb);
11210    %}
11211   ins_pipe(ialu_reg_reg);
11212 %}
11213 
11214 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11215 %{
11216   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11217   ins_cost(INSN_COST);
11218   format %{ "add  $dst, $src1, uxtb $src2" %}
11219 
11220    ins_encode %{
11221      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11222             as_Register($src2$$reg), ext::uxtb);
11223    %}
11224   ins_pipe(ialu_reg_reg);
11225 %}
11226 
11227 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11228 %{
11229   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11230   ins_cost(INSN_COST);
11231   format %{ "add  $dst, $src1, sxth $src2" %}
11232 
11233    ins_encode %{
11234      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11235             as_Register($src2$$reg), ext::sxth);
11236    %}
11237   ins_pipe(ialu_reg_reg);
11238 %}
11239 
11240 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11241 %{
11242   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11243   ins_cost(INSN_COST);
11244   format %{ "add  $dst, $src1, sxtw $src2" %}
11245 
11246    ins_encode %{
11247      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11248             as_Register($src2$$reg), ext::sxtw);
11249    %}
11250   ins_pipe(ialu_reg_reg);
11251 %}
11252 
11253 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11254 %{
11255   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11256   ins_cost(INSN_COST);
11257   format %{ "add  $dst, $src1, sxtb $src2" %}
11258 
11259    ins_encode %{
11260      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11261             as_Register($src2$$reg), ext::sxtb);
11262    %}
11263   ins_pipe(ialu_reg_reg);
11264 %}
11265 
11266 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11267 %{
11268   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11269   ins_cost(INSN_COST);
11270   format %{ "add  $dst, $src1, uxtb $src2" %}
11271 
11272    ins_encode %{
11273      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11274             as_Register($src2$$reg), ext::uxtb);
11275    %}
11276   ins_pipe(ialu_reg_reg);
11277 %}
11278 
11279 
11280 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11281 %{
11282   match(Set dst (AddI src1 (AndI src2 mask)));
11283   ins_cost(INSN_COST);
11284   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11285 
11286    ins_encode %{
11287      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11288             as_Register($src2$$reg), ext::uxtb);
11289    %}
11290   ins_pipe(ialu_reg_reg);
11291 %}
11292 
11293 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11294 %{
11295   match(Set dst (AddI src1 (AndI src2 mask)));
11296   ins_cost(INSN_COST);
11297   format %{ "addw  $dst, $src1, $src2, uxth" %}
11298 
11299    ins_encode %{
11300      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11301             as_Register($src2$$reg), ext::uxth);
11302    %}
11303   ins_pipe(ialu_reg_reg);
11304 %}
11305 
11306 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11307 %{
11308   match(Set dst (AddL src1 (AndL src2 mask)));
11309   ins_cost(INSN_COST);
11310   format %{ "add  $dst, $src1, $src2, uxtb" %}
11311 
11312    ins_encode %{
11313      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11314             as_Register($src2$$reg), ext::uxtb);
11315    %}
11316   ins_pipe(ialu_reg_reg);
11317 %}
11318 
11319 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11320 %{
11321   match(Set dst (AddL src1 (AndL src2 mask)));
11322   ins_cost(INSN_COST);
11323   format %{ "add  $dst, $src1, $src2, uxth" %}
11324 
11325    ins_encode %{
11326      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11327             as_Register($src2$$reg), ext::uxth);
11328    %}
11329   ins_pipe(ialu_reg_reg);
11330 %}
11331 
11332 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11333 %{
11334   match(Set dst (AddL src1 (AndL src2 mask)));
11335   ins_cost(INSN_COST);
11336   format %{ "add  $dst, $src1, $src2, uxtw" %}
11337 
11338    ins_encode %{
11339      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11340             as_Register($src2$$reg), ext::uxtw);
11341    %}
11342   ins_pipe(ialu_reg_reg);
11343 %}
11344 
11345 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11346 %{
11347   match(Set dst (SubI src1 (AndI src2 mask)));
11348   ins_cost(INSN_COST);
11349   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11350 
11351    ins_encode %{
11352      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11353             as_Register($src2$$reg), ext::uxtb);
11354    %}
11355   ins_pipe(ialu_reg_reg);
11356 %}
11357 
11358 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11359 %{
11360   match(Set dst (SubI src1 (AndI src2 mask)));
11361   ins_cost(INSN_COST);
11362   format %{ "subw  $dst, $src1, $src2, uxth" %}
11363 
11364    ins_encode %{
11365      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11366             as_Register($src2$$reg), ext::uxth);
11367    %}
11368   ins_pipe(ialu_reg_reg);
11369 %}
11370 
11371 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11372 %{
11373   match(Set dst (SubL src1 (AndL src2 mask)));
11374   ins_cost(INSN_COST);
11375   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11376 
11377    ins_encode %{
11378      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11379             as_Register($src2$$reg), ext::uxtb);
11380    %}
11381   ins_pipe(ialu_reg_reg);
11382 %}
11383 
11384 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11385 %{
11386   match(Set dst (SubL src1 (AndL src2 mask)));
11387   ins_cost(INSN_COST);
11388   format %{ "sub  $dst, $src1, $src2, uxth" %}
11389 
11390    ins_encode %{
11391      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11392             as_Register($src2$$reg), ext::uxth);
11393    %}
11394   ins_pipe(ialu_reg_reg);
11395 %}
11396 
11397 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11398 %{
11399   match(Set dst (SubL src1 (AndL src2 mask)));
11400   ins_cost(INSN_COST);
11401   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11402 
11403    ins_encode %{
11404      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11405             as_Register($src2$$reg), ext::uxtw);
11406    %}
11407   ins_pipe(ialu_reg_reg);
11408 %}
11409 
11410 // END This section of the file is automatically generated. Do not edit --------------
11411 
11412 // ============================================================================
11413 // Floating Point Arithmetic Instructions
11414 
11415 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11416   match(Set dst (AddF src1 src2));
11417 
11418   ins_cost(INSN_COST * 5);
11419   format %{ "fadds   $dst, $src1, $src2" %}
11420 
11421   ins_encode %{
11422     __ fadds(as_FloatRegister($dst$$reg),
11423              as_FloatRegister($src1$$reg),
11424              as_FloatRegister($src2$$reg));
11425   %}
11426 
11427   ins_pipe(fp_dop_reg_reg_s);
11428 %}
11429 
11430 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11431   match(Set dst (AddD src1 src2));
11432 
11433   ins_cost(INSN_COST * 5);
11434   format %{ "faddd   $dst, $src1, $src2" %}
11435 
11436   ins_encode %{
11437     __ faddd(as_FloatRegister($dst$$reg),
11438              as_FloatRegister($src1$$reg),
11439              as_FloatRegister($src2$$reg));
11440   %}
11441 
11442   ins_pipe(fp_dop_reg_reg_d);
11443 %}
11444 
11445 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11446   match(Set dst (SubF src1 src2));
11447 
11448   ins_cost(INSN_COST * 5);
11449   format %{ "fsubs   $dst, $src1, $src2" %}
11450 
11451   ins_encode %{
11452     __ fsubs(as_FloatRegister($dst$$reg),
11453              as_FloatRegister($src1$$reg),
11454              as_FloatRegister($src2$$reg));
11455   %}
11456 
11457   ins_pipe(fp_dop_reg_reg_s);
11458 %}
11459 
11460 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11461   match(Set dst (SubD src1 src2));
11462 
11463   ins_cost(INSN_COST * 5);
11464   format %{ "fsubd   $dst, $src1, $src2" %}
11465 
11466   ins_encode %{
11467     __ fsubd(as_FloatRegister($dst$$reg),
11468              as_FloatRegister($src1$$reg),
11469              as_FloatRegister($src2$$reg));
11470   %}
11471 
11472   ins_pipe(fp_dop_reg_reg_d);
11473 %}
11474 
11475 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11476   match(Set dst (MulF src1 src2));
11477 
11478   ins_cost(INSN_COST * 6);
11479   format %{ "fmuls   $dst, $src1, $src2" %}
11480 
11481   ins_encode %{
11482     __ fmuls(as_FloatRegister($dst$$reg),
11483              as_FloatRegister($src1$$reg),
11484              as_FloatRegister($src2$$reg));
11485   %}
11486 
11487   ins_pipe(fp_dop_reg_reg_s);
11488 %}
11489 
11490 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11491   match(Set dst (MulD src1 src2));
11492 
11493   ins_cost(INSN_COST * 6);
11494   format %{ "fmuld   $dst, $src1, $src2" %}
11495 
11496   ins_encode %{
11497     __ fmuld(as_FloatRegister($dst$$reg),
11498              as_FloatRegister($src1$$reg),
11499              as_FloatRegister($src2$$reg));
11500   %}
11501 
11502   ins_pipe(fp_dop_reg_reg_d);
11503 %}
11504 
11505 // We cannot use these fused mul w add/sub ops because they don't
11506 // produce the same result as the equivalent separated ops
11507 // (essentially they don't round the intermediate result). that's a
11508 // shame. leaving them here in case we can idenitfy cases where it is
11509 // legitimate to use them
11510 
11511 
11512 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11513 //   match(Set dst (AddF (MulF src1 src2) src3));
11514 
11515 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11516 
11517 //   ins_encode %{
11518 //     __ fmadds(as_FloatRegister($dst$$reg),
11519 //              as_FloatRegister($src1$$reg),
11520 //              as_FloatRegister($src2$$reg),
11521 //              as_FloatRegister($src3$$reg));
11522 //   %}
11523 
11524 //   ins_pipe(pipe_class_default);
11525 // %}
11526 
11527 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11528 //   match(Set dst (AddD (MulD src1 src2) src3));
11529 
11530 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11531 
11532 //   ins_encode %{
11533 //     __ fmaddd(as_FloatRegister($dst$$reg),
11534 //              as_FloatRegister($src1$$reg),
11535 //              as_FloatRegister($src2$$reg),
11536 //              as_FloatRegister($src3$$reg));
11537 //   %}
11538 
11539 //   ins_pipe(pipe_class_default);
11540 // %}
11541 
11542 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11543 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11544 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11545 
11546 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11547 
11548 //   ins_encode %{
11549 //     __ fmsubs(as_FloatRegister($dst$$reg),
11550 //               as_FloatRegister($src1$$reg),
11551 //               as_FloatRegister($src2$$reg),
11552 //              as_FloatRegister($src3$$reg));
11553 //   %}
11554 
11555 //   ins_pipe(pipe_class_default);
11556 // %}
11557 
11558 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11559 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11560 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11561 
11562 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11563 
11564 //   ins_encode %{
11565 //     __ fmsubd(as_FloatRegister($dst$$reg),
11566 //               as_FloatRegister($src1$$reg),
11567 //               as_FloatRegister($src2$$reg),
11568 //               as_FloatRegister($src3$$reg));
11569 //   %}
11570 
11571 //   ins_pipe(pipe_class_default);
11572 // %}
11573 
11574 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11575 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11576 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11577 
11578 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11579 
11580 //   ins_encode %{
11581 //     __ fnmadds(as_FloatRegister($dst$$reg),
11582 //                as_FloatRegister($src1$$reg),
11583 //                as_FloatRegister($src2$$reg),
11584 //                as_FloatRegister($src3$$reg));
11585 //   %}
11586 
11587 //   ins_pipe(pipe_class_default);
11588 // %}
11589 
11590 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11591 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11592 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11593 
11594 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11595 
11596 //   ins_encode %{
11597 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11598 //                as_FloatRegister($src1$$reg),
11599 //                as_FloatRegister($src2$$reg),
11600 //                as_FloatRegister($src3$$reg));
11601 //   %}
11602 
11603 //   ins_pipe(pipe_class_default);
11604 // %}
11605 
11606 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11607 //   match(Set dst (SubF (MulF src1 src2) src3));
11608 
11609 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11610 
11611 //   ins_encode %{
11612 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11613 //                as_FloatRegister($src1$$reg),
11614 //                as_FloatRegister($src2$$reg),
11615 //                as_FloatRegister($src3$$reg));
11616 //   %}
11617 
11618 //   ins_pipe(pipe_class_default);
11619 // %}
11620 
11621 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11622 //   match(Set dst (SubD (MulD src1 src2) src3));
11623 
11624 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11625 
11626 //   ins_encode %{
11627 //   // n.b. insn name should be fnmsubd
11628 //     __ fnmsub(as_FloatRegister($dst$$reg),
11629 //                as_FloatRegister($src1$$reg),
11630 //                as_FloatRegister($src2$$reg),
11631 //                as_FloatRegister($src3$$reg));
11632 //   %}
11633 
11634 //   ins_pipe(pipe_class_default);
11635 // %}
11636 
11637 
11638 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11639   match(Set dst (DivF src1  src2));
11640 
11641   ins_cost(INSN_COST * 18);
11642   format %{ "fdivs   $dst, $src1, $src2" %}
11643 
11644   ins_encode %{
11645     __ fdivs(as_FloatRegister($dst$$reg),
11646              as_FloatRegister($src1$$reg),
11647              as_FloatRegister($src2$$reg));
11648   %}
11649 
11650   ins_pipe(fp_div_s);
11651 %}
11652 
11653 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11654   match(Set dst (DivD src1  src2));
11655 
11656   ins_cost(INSN_COST * 32);
11657   format %{ "fdivd   $dst, $src1, $src2" %}
11658 
11659   ins_encode %{
11660     __ fdivd(as_FloatRegister($dst$$reg),
11661              as_FloatRegister($src1$$reg),
11662              as_FloatRegister($src2$$reg));
11663   %}
11664 
11665   ins_pipe(fp_div_d);
11666 %}
11667 
11668 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11669   match(Set dst (NegF src));
11670 
11671   ins_cost(INSN_COST * 3);
11672   format %{ "fneg   $dst, $src" %}
11673 
11674   ins_encode %{
11675     __ fnegs(as_FloatRegister($dst$$reg),
11676              as_FloatRegister($src$$reg));
11677   %}
11678 
11679   ins_pipe(fp_uop_s);
11680 %}
11681 
11682 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11683   match(Set dst (NegD src));
11684 
11685   ins_cost(INSN_COST * 3);
11686   format %{ "fnegd   $dst, $src" %}
11687 
11688   ins_encode %{
11689     __ fnegd(as_FloatRegister($dst$$reg),
11690              as_FloatRegister($src$$reg));
11691   %}
11692 
11693   ins_pipe(fp_uop_d);
11694 %}
11695 
11696 instruct absF_reg(vRegF dst, vRegF src) %{
11697   match(Set dst (AbsF src));
11698 
11699   ins_cost(INSN_COST * 3);
11700   format %{ "fabss   $dst, $src" %}
11701   ins_encode %{
11702     __ fabss(as_FloatRegister($dst$$reg),
11703              as_FloatRegister($src$$reg));
11704   %}
11705 
11706   ins_pipe(fp_uop_s);
11707 %}
11708 
11709 instruct absD_reg(vRegD dst, vRegD src) %{
11710   match(Set dst (AbsD src));
11711 
11712   ins_cost(INSN_COST * 3);
11713   format %{ "fabsd   $dst, $src" %}
11714   ins_encode %{
11715     __ fabsd(as_FloatRegister($dst$$reg),
11716              as_FloatRegister($src$$reg));
11717   %}
11718 
11719   ins_pipe(fp_uop_d);
11720 %}
11721 
11722 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11723   match(Set dst (SqrtD src));
11724 
11725   ins_cost(INSN_COST * 50);
11726   format %{ "fsqrtd  $dst, $src" %}
11727   ins_encode %{
11728     __ fsqrtd(as_FloatRegister($dst$$reg),
11729              as_FloatRegister($src$$reg));
11730   %}
11731 
11732   ins_pipe(fp_div_s);
11733 %}
11734 
11735 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11736   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11737 
11738   ins_cost(INSN_COST * 50);
11739   format %{ "fsqrts  $dst, $src" %}
11740   ins_encode %{
11741     __ fsqrts(as_FloatRegister($dst$$reg),
11742              as_FloatRegister($src$$reg));
11743   %}
11744 
11745   ins_pipe(fp_div_d);
11746 %}
11747 
11748 // ============================================================================
11749 // Logical Instructions
11750 
11751 // Integer Logical Instructions
11752 
11753 // And Instructions
11754 
11755 
11756 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11757   match(Set dst (AndI src1 src2));
11758 
11759   format %{ "andw  $dst, $src1, $src2\t# int" %}
11760 
11761   ins_cost(INSN_COST);
11762   ins_encode %{
11763     __ andw(as_Register($dst$$reg),
11764             as_Register($src1$$reg),
11765             as_Register($src2$$reg));
11766   %}
11767 
11768   ins_pipe(ialu_reg_reg);
11769 %}
11770 
11771 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11772   match(Set dst (AndI src1 src2));
11773 
11774   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11775 
11776   ins_cost(INSN_COST);
11777   ins_encode %{
11778     __ andw(as_Register($dst$$reg),
11779             as_Register($src1$$reg),
11780             (unsigned long)($src2$$constant));
11781   %}
11782 
11783   ins_pipe(ialu_reg_imm);
11784 %}
11785 
11786 // Or Instructions
11787 
11788 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11789   match(Set dst (OrI src1 src2));
11790 
11791   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11792 
11793   ins_cost(INSN_COST);
11794   ins_encode %{
11795     __ orrw(as_Register($dst$$reg),
11796             as_Register($src1$$reg),
11797             as_Register($src2$$reg));
11798   %}
11799 
11800   ins_pipe(ialu_reg_reg);
11801 %}
11802 
11803 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11804   match(Set dst (OrI src1 src2));
11805 
11806   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11807 
11808   ins_cost(INSN_COST);
11809   ins_encode %{
11810     __ orrw(as_Register($dst$$reg),
11811             as_Register($src1$$reg),
11812             (unsigned long)($src2$$constant));
11813   %}
11814 
11815   ins_pipe(ialu_reg_imm);
11816 %}
11817 
11818 // Xor Instructions
11819 
11820 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11821   match(Set dst (XorI src1 src2));
11822 
11823   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11824 
11825   ins_cost(INSN_COST);
11826   ins_encode %{
11827     __ eorw(as_Register($dst$$reg),
11828             as_Register($src1$$reg),
11829             as_Register($src2$$reg));
11830   %}
11831 
11832   ins_pipe(ialu_reg_reg);
11833 %}
11834 
11835 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11836   match(Set dst (XorI src1 src2));
11837 
11838   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11839 
11840   ins_cost(INSN_COST);
11841   ins_encode %{
11842     __ eorw(as_Register($dst$$reg),
11843             as_Register($src1$$reg),
11844             (unsigned long)($src2$$constant));
11845   %}
11846 
11847   ins_pipe(ialu_reg_imm);
11848 %}
11849 
11850 // Long Logical Instructions
11851 // TODO
11852 
11853 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11854   match(Set dst (AndL src1 src2));
11855 
11856   format %{ "and  $dst, $src1, $src2\t# int" %}
11857 
11858   ins_cost(INSN_COST);
11859   ins_encode %{
11860     __ andr(as_Register($dst$$reg),
11861             as_Register($src1$$reg),
11862             as_Register($src2$$reg));
11863   %}
11864 
11865   ins_pipe(ialu_reg_reg);
11866 %}
11867 
11868 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11869   match(Set dst (AndL src1 src2));
11870 
11871   format %{ "and  $dst, $src1, $src2\t# int" %}
11872 
11873   ins_cost(INSN_COST);
11874   ins_encode %{
11875     __ andr(as_Register($dst$$reg),
11876             as_Register($src1$$reg),
11877             (unsigned long)($src2$$constant));
11878   %}
11879 
11880   ins_pipe(ialu_reg_imm);
11881 %}
11882 
11883 // Or Instructions
11884 
11885 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11886   match(Set dst (OrL src1 src2));
11887 
11888   format %{ "orr  $dst, $src1, $src2\t# int" %}
11889 
11890   ins_cost(INSN_COST);
11891   ins_encode %{
11892     __ orr(as_Register($dst$$reg),
11893            as_Register($src1$$reg),
11894            as_Register($src2$$reg));
11895   %}
11896 
11897   ins_pipe(ialu_reg_reg);
11898 %}
11899 
11900 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11901   match(Set dst (OrL src1 src2));
11902 
11903   format %{ "orr  $dst, $src1, $src2\t# int" %}
11904 
11905   ins_cost(INSN_COST);
11906   ins_encode %{
11907     __ orr(as_Register($dst$$reg),
11908            as_Register($src1$$reg),
11909            (unsigned long)($src2$$constant));
11910   %}
11911 
11912   ins_pipe(ialu_reg_imm);
11913 %}
11914 
11915 // Xor Instructions
11916 
11917 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11918   match(Set dst (XorL src1 src2));
11919 
11920   format %{ "eor  $dst, $src1, $src2\t# int" %}
11921 
11922   ins_cost(INSN_COST);
11923   ins_encode %{
11924     __ eor(as_Register($dst$$reg),
11925            as_Register($src1$$reg),
11926            as_Register($src2$$reg));
11927   %}
11928 
11929   ins_pipe(ialu_reg_reg);
11930 %}
11931 
11932 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11933   match(Set dst (XorL src1 src2));
11934 
11935   ins_cost(INSN_COST);
11936   format %{ "eor  $dst, $src1, $src2\t# int" %}
11937 
11938   ins_encode %{
11939     __ eor(as_Register($dst$$reg),
11940            as_Register($src1$$reg),
11941            (unsigned long)($src2$$constant));
11942   %}
11943 
11944   ins_pipe(ialu_reg_imm);
11945 %}
11946 
11947 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11948 %{
11949   match(Set dst (ConvI2L src));
11950 
11951   ins_cost(INSN_COST);
11952   format %{ "sxtw  $dst, $src\t# i2l" %}
11953   ins_encode %{
11954     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11955   %}
11956   ins_pipe(ialu_reg_shift);
11957 %}
11958 
11959 // this pattern occurs in bigmath arithmetic
11960 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11961 %{
11962   match(Set dst (AndL (ConvI2L src) mask));
11963 
11964   ins_cost(INSN_COST);
11965   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11966   ins_encode %{
11967     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11968   %}
11969 
11970   ins_pipe(ialu_reg_shift);
11971 %}
11972 
11973 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11974   match(Set dst (ConvL2I src));
11975 
11976   ins_cost(INSN_COST);
11977   format %{ "movw  $dst, $src \t// l2i" %}
11978 
11979   ins_encode %{
11980     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11981   %}
11982 
11983   ins_pipe(ialu_reg);
11984 %}
11985 
11986 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11987 %{
11988   match(Set dst (Conv2B src));
11989   effect(KILL cr);
11990 
11991   format %{
11992     "cmpw $src, zr\n\t"
11993     "cset $dst, ne"
11994   %}
11995 
11996   ins_encode %{
11997     __ cmpw(as_Register($src$$reg), zr);
11998     __ cset(as_Register($dst$$reg), Assembler::NE);
11999   %}
12000 
12001   ins_pipe(ialu_reg);
12002 %}
12003 
12004 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12005 %{
12006   match(Set dst (Conv2B src));
12007   effect(KILL cr);
12008 
12009   format %{
12010     "cmp  $src, zr\n\t"
12011     "cset $dst, ne"
12012   %}
12013 
12014   ins_encode %{
12015     __ cmp(as_Register($src$$reg), zr);
12016     __ cset(as_Register($dst$$reg), Assembler::NE);
12017   %}
12018 
12019   ins_pipe(ialu_reg);
12020 %}
12021 
12022 instruct convD2F_reg(vRegF dst, vRegD src) %{
12023   match(Set dst (ConvD2F src));
12024 
12025   ins_cost(INSN_COST * 5);
12026   format %{ "fcvtd  $dst, $src \t// d2f" %}
12027 
12028   ins_encode %{
12029     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12030   %}
12031 
12032   ins_pipe(fp_d2f);
12033 %}
12034 
12035 instruct convF2D_reg(vRegD dst, vRegF src) %{
12036   match(Set dst (ConvF2D src));
12037 
12038   ins_cost(INSN_COST * 5);
12039   format %{ "fcvts  $dst, $src \t// f2d" %}
12040 
12041   ins_encode %{
12042     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12043   %}
12044 
12045   ins_pipe(fp_f2d);
12046 %}
12047 
12048 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12049   match(Set dst (ConvF2I src));
12050 
12051   ins_cost(INSN_COST * 5);
12052   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12053 
12054   ins_encode %{
12055     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12056   %}
12057 
12058   ins_pipe(fp_f2i);
12059 %}
12060 
12061 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12062   match(Set dst (ConvF2L src));
12063 
12064   ins_cost(INSN_COST * 5);
12065   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12066 
12067   ins_encode %{
12068     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12069   %}
12070 
12071   ins_pipe(fp_f2l);
12072 %}
12073 
12074 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12075   match(Set dst (ConvI2F src));
12076 
12077   ins_cost(INSN_COST * 5);
12078   format %{ "scvtfws  $dst, $src \t// i2f" %}
12079 
12080   ins_encode %{
12081     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12082   %}
12083 
12084   ins_pipe(fp_i2f);
12085 %}
12086 
12087 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12088   match(Set dst (ConvL2F src));
12089 
12090   ins_cost(INSN_COST * 5);
12091   format %{ "scvtfs  $dst, $src \t// l2f" %}
12092 
12093   ins_encode %{
12094     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12095   %}
12096 
12097   ins_pipe(fp_l2f);
12098 %}
12099 
12100 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12101   match(Set dst (ConvD2I src));
12102 
12103   ins_cost(INSN_COST * 5);
12104   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12105 
12106   ins_encode %{
12107     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12108   %}
12109 
12110   ins_pipe(fp_d2i);
12111 %}
12112 
12113 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12114   match(Set dst (ConvD2L src));
12115 
12116   ins_cost(INSN_COST * 5);
12117   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12118 
12119   ins_encode %{
12120     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12121   %}
12122 
12123   ins_pipe(fp_d2l);
12124 %}
12125 
12126 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12127   match(Set dst (ConvI2D src));
12128 
12129   ins_cost(INSN_COST * 5);
12130   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12131 
12132   ins_encode %{
12133     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12134   %}
12135 
12136   ins_pipe(fp_i2d);
12137 %}
12138 
12139 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12140   match(Set dst (ConvL2D src));
12141 
12142   ins_cost(INSN_COST * 5);
12143   format %{ "scvtfd  $dst, $src \t// l2d" %}
12144 
12145   ins_encode %{
12146     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12147   %}
12148 
12149   ins_pipe(fp_l2d);
12150 %}
12151 
12152 // stack <-> reg and reg <-> reg shuffles with no conversion
12153 
12154 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12155 
12156   match(Set dst (MoveF2I src));
12157 
12158   effect(DEF dst, USE src);
12159 
12160   ins_cost(4 * INSN_COST);
12161 
12162   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12163 
12164   ins_encode %{
12165     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12166   %}
12167 
12168   ins_pipe(iload_reg_reg);
12169 
12170 %}
12171 
12172 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12173 
12174   match(Set dst (MoveI2F src));
12175 
12176   effect(DEF dst, USE src);
12177 
12178   ins_cost(4 * INSN_COST);
12179 
12180   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12181 
12182   ins_encode %{
12183     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12184   %}
12185 
12186   ins_pipe(pipe_class_memory);
12187 
12188 %}
12189 
12190 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12191 
12192   match(Set dst (MoveD2L src));
12193 
12194   effect(DEF dst, USE src);
12195 
12196   ins_cost(4 * INSN_COST);
12197 
12198   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12199 
12200   ins_encode %{
12201     __ ldr($dst$$Register, Address(sp, $src$$disp));
12202   %}
12203 
12204   ins_pipe(iload_reg_reg);
12205 
12206 %}
12207 
12208 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12209 
12210   match(Set dst (MoveL2D src));
12211 
12212   effect(DEF dst, USE src);
12213 
12214   ins_cost(4 * INSN_COST);
12215 
12216   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12217 
12218   ins_encode %{
12219     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12220   %}
12221 
12222   ins_pipe(pipe_class_memory);
12223 
12224 %}
12225 
12226 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12227 
12228   match(Set dst (MoveF2I src));
12229 
12230   effect(DEF dst, USE src);
12231 
12232   ins_cost(INSN_COST);
12233 
12234   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12235 
12236   ins_encode %{
12237     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12238   %}
12239 
12240   ins_pipe(pipe_class_memory);
12241 
12242 %}
12243 
12244 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12245 
12246   match(Set dst (MoveI2F src));
12247 
12248   effect(DEF dst, USE src);
12249 
12250   ins_cost(INSN_COST);
12251 
12252   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12253 
12254   ins_encode %{
12255     __ strw($src$$Register, Address(sp, $dst$$disp));
12256   %}
12257 
12258   ins_pipe(istore_reg_reg);
12259 
12260 %}
12261 
12262 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12263 
12264   match(Set dst (MoveD2L src));
12265 
12266   effect(DEF dst, USE src);
12267 
12268   ins_cost(INSN_COST);
12269 
12270   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12271 
12272   ins_encode %{
12273     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12274   %}
12275 
12276   ins_pipe(pipe_class_memory);
12277 
12278 %}
12279 
12280 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12281 
12282   match(Set dst (MoveL2D src));
12283 
12284   effect(DEF dst, USE src);
12285 
12286   ins_cost(INSN_COST);
12287 
12288   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12289 
12290   ins_encode %{
12291     __ str($src$$Register, Address(sp, $dst$$disp));
12292   %}
12293 
12294   ins_pipe(istore_reg_reg);
12295 
12296 %}
12297 
12298 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12299 
12300   match(Set dst (MoveF2I src));
12301 
12302   effect(DEF dst, USE src);
12303 
12304   ins_cost(INSN_COST);
12305 
12306   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12307 
12308   ins_encode %{
12309     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12310   %}
12311 
12312   ins_pipe(fp_f2i);
12313 
12314 %}
12315 
12316 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12317 
12318   match(Set dst (MoveI2F src));
12319 
12320   effect(DEF dst, USE src);
12321 
12322   ins_cost(INSN_COST);
12323 
12324   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12325 
12326   ins_encode %{
12327     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12328   %}
12329 
12330   ins_pipe(fp_i2f);
12331 
12332 %}
12333 
12334 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12335 
12336   match(Set dst (MoveD2L src));
12337 
12338   effect(DEF dst, USE src);
12339 
12340   ins_cost(INSN_COST);
12341 
12342   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12343 
12344   ins_encode %{
12345     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12346   %}
12347 
12348   ins_pipe(fp_d2l);
12349 
12350 %}
12351 
12352 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12353 
12354   match(Set dst (MoveL2D src));
12355 
12356   effect(DEF dst, USE src);
12357 
12358   ins_cost(INSN_COST);
12359 
12360   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12361 
12362   ins_encode %{
12363     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12364   %}
12365 
12366   ins_pipe(fp_l2d);
12367 
12368 %}
12369 
12370 // ============================================================================
12371 // clearing of an array
12372 
12373 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12374 %{
12375   match(Set dummy (ClearArray cnt base));
12376   effect(USE_KILL cnt, USE_KILL base);
12377 
12378   ins_cost(4 * INSN_COST);
12379   format %{ "ClearArray $cnt, $base" %}
12380 
12381   ins_encode %{
12382     __ zero_words($base$$Register, $cnt$$Register);
12383   %}
12384 
12385   ins_pipe(pipe_class_memory);
12386 %}
12387 
12388 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
12389 %{
12390   match(Set dummy (ClearArray cnt base));
12391   effect(USE_KILL base, TEMP tmp);
12392 
12393   ins_cost(4 * INSN_COST);
12394   format %{ "ClearArray $cnt, $base" %}
12395 
12396   ins_encode %{
12397     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12398   %}
12399 
12400   ins_pipe(pipe_class_memory);
12401 %}
12402 
12403 // ============================================================================
12404 // Overflow Math Instructions
12405 
12406 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12407 %{
12408   match(Set cr (OverflowAddI op1 op2));
12409 
12410   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12411   ins_cost(INSN_COST);
12412   ins_encode %{
12413     __ cmnw($op1$$Register, $op2$$Register);
12414   %}
12415 
12416   ins_pipe(icmp_reg_reg);
12417 %}
12418 
12419 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12420 %{
12421   match(Set cr (OverflowAddI op1 op2));
12422 
12423   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12424   ins_cost(INSN_COST);
12425   ins_encode %{
12426     __ cmnw($op1$$Register, $op2$$constant);
12427   %}
12428 
12429   ins_pipe(icmp_reg_imm);
12430 %}
12431 
12432 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12433 %{
12434   match(Set cr (OverflowAddL op1 op2));
12435 
12436   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12437   ins_cost(INSN_COST);
12438   ins_encode %{
12439     __ cmn($op1$$Register, $op2$$Register);
12440   %}
12441 
12442   ins_pipe(icmp_reg_reg);
12443 %}
12444 
12445 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12446 %{
12447   match(Set cr (OverflowAddL op1 op2));
12448 
12449   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12450   ins_cost(INSN_COST);
12451   ins_encode %{
12452     __ cmn($op1$$Register, $op2$$constant);
12453   %}
12454 
12455   ins_pipe(icmp_reg_imm);
12456 %}
12457 
12458 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12459 %{
12460   match(Set cr (OverflowSubI op1 op2));
12461 
12462   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12463   ins_cost(INSN_COST);
12464   ins_encode %{
12465     __ cmpw($op1$$Register, $op2$$Register);
12466   %}
12467 
12468   ins_pipe(icmp_reg_reg);
12469 %}
12470 
12471 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12472 %{
12473   match(Set cr (OverflowSubI op1 op2));
12474 
12475   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12476   ins_cost(INSN_COST);
12477   ins_encode %{
12478     __ cmpw($op1$$Register, $op2$$constant);
12479   %}
12480 
12481   ins_pipe(icmp_reg_imm);
12482 %}
12483 
12484 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12485 %{
12486   match(Set cr (OverflowSubL op1 op2));
12487 
12488   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12489   ins_cost(INSN_COST);
12490   ins_encode %{
12491     __ cmp($op1$$Register, $op2$$Register);
12492   %}
12493 
12494   ins_pipe(icmp_reg_reg);
12495 %}
12496 
12497 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12498 %{
12499   match(Set cr (OverflowSubL op1 op2));
12500 
12501   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12502   ins_cost(INSN_COST);
12503   ins_encode %{
12504     __ cmp($op1$$Register, $op2$$constant);
12505   %}
12506 
12507   ins_pipe(icmp_reg_imm);
12508 %}
12509 
12510 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12511 %{
12512   match(Set cr (OverflowSubI zero op1));
12513 
12514   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12515   ins_cost(INSN_COST);
12516   ins_encode %{
12517     __ cmpw(zr, $op1$$Register);
12518   %}
12519 
12520   ins_pipe(icmp_reg_imm);
12521 %}
12522 
12523 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12524 %{
12525   match(Set cr (OverflowSubL zero op1));
12526 
12527   format %{ "cmp   zr, $op1\t# overflow check long" %}
12528   ins_cost(INSN_COST);
12529   ins_encode %{
12530     __ cmp(zr, $op1$$Register);
12531   %}
12532 
12533   ins_pipe(icmp_reg_imm);
12534 %}
12535 
12536 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12537 %{
12538   match(Set cr (OverflowMulI op1 op2));
12539 
12540   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12541             "cmp   rscratch1, rscratch1, sxtw\n\t"
12542             "movw  rscratch1, #0x80000000\n\t"
12543             "cselw rscratch1, rscratch1, zr, NE\n\t"
12544             "cmpw  rscratch1, #1" %}
12545   ins_cost(5 * INSN_COST);
12546   ins_encode %{
12547     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12548     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12549     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12550     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12551     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12552   %}
12553 
12554   ins_pipe(pipe_slow);
12555 %}
12556 
12557 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12558 %{
12559   match(If cmp (OverflowMulI op1 op2));
12560   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12561             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12562   effect(USE labl, KILL cr);
12563 
12564   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12565             "cmp   rscratch1, rscratch1, sxtw\n\t"
12566             "b$cmp   $labl" %}
12567   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12568   ins_encode %{
12569     Label* L = $labl$$label;
12570     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12571     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12572     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12573     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12574   %}
12575 
12576   ins_pipe(pipe_serial);
12577 %}
12578 
12579 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12580 %{
12581   match(Set cr (OverflowMulL op1 op2));
12582 
12583   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12584             "smulh rscratch2, $op1, $op2\n\t"
12585             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12586             "movw  rscratch1, #0x80000000\n\t"
12587             "cselw rscratch1, rscratch1, zr, NE\n\t"
12588             "cmpw  rscratch1, #1" %}
12589   ins_cost(6 * INSN_COST);
12590   ins_encode %{
12591     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12592     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12593     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12594     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12595     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12596     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12597   %}
12598 
12599   ins_pipe(pipe_slow);
12600 %}
12601 
12602 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12603 %{
12604   match(If cmp (OverflowMulL op1 op2));
12605   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12606             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12607   effect(USE labl, KILL cr);
12608 
12609   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12610             "smulh rscratch2, $op1, $op2\n\t"
12611             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12612             "b$cmp $labl" %}
12613   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12614   ins_encode %{
12615     Label* L = $labl$$label;
12616     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12617     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12618     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12619     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12620     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12621   %}
12622 
12623   ins_pipe(pipe_serial);
12624 %}
12625 
12626 // ============================================================================
12627 // Compare Instructions
12628 
12629 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12630 %{
12631   match(Set cr (CmpI op1 op2));
12632 
12633   effect(DEF cr, USE op1, USE op2);
12634 
12635   ins_cost(INSN_COST);
12636   format %{ "cmpw  $op1, $op2" %}
12637 
12638   ins_encode(aarch64_enc_cmpw(op1, op2));
12639 
12640   ins_pipe(icmp_reg_reg);
12641 %}
12642 
12643 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12644 %{
12645   match(Set cr (CmpI op1 zero));
12646 
12647   effect(DEF cr, USE op1);
12648 
12649   ins_cost(INSN_COST);
12650   format %{ "cmpw $op1, 0" %}
12651 
12652   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12653 
12654   ins_pipe(icmp_reg_imm);
12655 %}
12656 
12657 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12658 %{
12659   match(Set cr (CmpI op1 op2));
12660 
12661   effect(DEF cr, USE op1);
12662 
12663   ins_cost(INSN_COST);
12664   format %{ "cmpw  $op1, $op2" %}
12665 
12666   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12667 
12668   ins_pipe(icmp_reg_imm);
12669 %}
12670 
12671 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12672 %{
12673   match(Set cr (CmpI op1 op2));
12674 
12675   effect(DEF cr, USE op1);
12676 
12677   ins_cost(INSN_COST * 2);
12678   format %{ "cmpw  $op1, $op2" %}
12679 
12680   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12681 
12682   ins_pipe(icmp_reg_imm);
12683 %}
12684 
12685 // Unsigned compare Instructions; really, same as signed compare
12686 // except it should only be used to feed an If or a CMovI which takes a
12687 // cmpOpU.
12688 
12689 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12690 %{
12691   match(Set cr (CmpU op1 op2));
12692 
12693   effect(DEF cr, USE op1, USE op2);
12694 
12695   ins_cost(INSN_COST);
12696   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12697 
12698   ins_encode(aarch64_enc_cmpw(op1, op2));
12699 
12700   ins_pipe(icmp_reg_reg);
12701 %}
12702 
12703 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12704 %{
12705   match(Set cr (CmpU op1 zero));
12706 
12707   effect(DEF cr, USE op1);
12708 
12709   ins_cost(INSN_COST);
12710   format %{ "cmpw $op1, #0\t# unsigned" %}
12711 
12712   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12713 
12714   ins_pipe(icmp_reg_imm);
12715 %}
12716 
12717 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12718 %{
12719   match(Set cr (CmpU op1 op2));
12720 
12721   effect(DEF cr, USE op1);
12722 
12723   ins_cost(INSN_COST);
12724   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12725 
12726   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12727 
12728   ins_pipe(icmp_reg_imm);
12729 %}
12730 
12731 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12732 %{
12733   match(Set cr (CmpU op1 op2));
12734 
12735   effect(DEF cr, USE op1);
12736 
12737   ins_cost(INSN_COST * 2);
12738   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12739 
12740   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12741 
12742   ins_pipe(icmp_reg_imm);
12743 %}
12744 
12745 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12746 %{
12747   match(Set cr (CmpL op1 op2));
12748 
12749   effect(DEF cr, USE op1, USE op2);
12750 
12751   ins_cost(INSN_COST);
12752   format %{ "cmp  $op1, $op2" %}
12753 
12754   ins_encode(aarch64_enc_cmp(op1, op2));
12755 
12756   ins_pipe(icmp_reg_reg);
12757 %}
12758 
12759 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
12760 %{
12761   match(Set cr (CmpL op1 zero));
12762 
12763   effect(DEF cr, USE op1);
12764 
12765   ins_cost(INSN_COST);
12766   format %{ "tst  $op1" %}
12767 
12768   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12769 
12770   ins_pipe(icmp_reg_imm);
12771 %}
12772 
12773 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12774 %{
12775   match(Set cr (CmpL op1 op2));
12776 
12777   effect(DEF cr, USE op1);
12778 
12779   ins_cost(INSN_COST);
12780   format %{ "cmp  $op1, $op2" %}
12781 
12782   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12783 
12784   ins_pipe(icmp_reg_imm);
12785 %}
12786 
12787 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12788 %{
12789   match(Set cr (CmpL op1 op2));
12790 
12791   effect(DEF cr, USE op1);
12792 
12793   ins_cost(INSN_COST * 2);
12794   format %{ "cmp  $op1, $op2" %}
12795 
12796   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12797 
12798   ins_pipe(icmp_reg_imm);
12799 %}
12800 
12801 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
12802 %{
12803   match(Set cr (CmpUL op1 op2));
12804 
12805   effect(DEF cr, USE op1, USE op2);
12806 
12807   ins_cost(INSN_COST);
12808   format %{ "cmp  $op1, $op2" %}
12809 
12810   ins_encode(aarch64_enc_cmp(op1, op2));
12811 
12812   ins_pipe(icmp_reg_reg);
12813 %}
12814 
12815 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
12816 %{
12817   match(Set cr (CmpUL op1 zero));
12818 
12819   effect(DEF cr, USE op1);
12820 
12821   ins_cost(INSN_COST);
12822   format %{ "tst  $op1" %}
12823 
12824   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12825 
12826   ins_pipe(icmp_reg_imm);
12827 %}
12828 
12829 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
12830 %{
12831   match(Set cr (CmpUL op1 op2));
12832 
12833   effect(DEF cr, USE op1);
12834 
12835   ins_cost(INSN_COST);
12836   format %{ "cmp  $op1, $op2" %}
12837 
12838   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12839 
12840   ins_pipe(icmp_reg_imm);
12841 %}
12842 
12843 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
12844 %{
12845   match(Set cr (CmpUL op1 op2));
12846 
12847   effect(DEF cr, USE op1);
12848 
12849   ins_cost(INSN_COST * 2);
12850   format %{ "cmp  $op1, $op2" %}
12851 
12852   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12853 
12854   ins_pipe(icmp_reg_imm);
12855 %}
12856 
12857 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12858 %{
12859   match(Set cr (CmpP op1 op2));
12860 
12861   effect(DEF cr, USE op1, USE op2);
12862 
12863   ins_cost(INSN_COST);
12864   format %{ "cmp  $op1, $op2\t // ptr" %}
12865 
12866   ins_encode(aarch64_enc_cmpp(op1, op2));
12867 
12868   ins_pipe(icmp_reg_reg);
12869 %}
12870 
12871 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12872 %{
12873   match(Set cr (CmpN op1 op2));
12874 
12875   effect(DEF cr, USE op1, USE op2);
12876 
12877   ins_cost(INSN_COST);
12878   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12879 
12880   ins_encode(aarch64_enc_cmpn(op1, op2));
12881 
12882   ins_pipe(icmp_reg_reg);
12883 %}
12884 
12885 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12886 %{
12887   match(Set cr (CmpP op1 zero));
12888 
12889   effect(DEF cr, USE op1, USE zero);
12890 
12891   ins_cost(INSN_COST);
12892   format %{ "cmp  $op1, 0\t // ptr" %}
12893 
12894   ins_encode(aarch64_enc_testp(op1));
12895 
12896   ins_pipe(icmp_reg_imm);
12897 %}
12898 
12899 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12900 %{
12901   match(Set cr (CmpN op1 zero));
12902 
12903   effect(DEF cr, USE op1, USE zero);
12904 
12905   ins_cost(INSN_COST);
12906   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12907 
12908   ins_encode(aarch64_enc_testn(op1));
12909 
12910   ins_pipe(icmp_reg_imm);
12911 %}
12912 
12913 // FP comparisons
12914 //
12915 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12916 // using normal cmpOp. See declaration of rFlagsReg for details.
12917 
12918 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12919 %{
12920   match(Set cr (CmpF src1 src2));
12921 
12922   ins_cost(3 * INSN_COST);
12923   format %{ "fcmps $src1, $src2" %}
12924 
12925   ins_encode %{
12926     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12927   %}
12928 
12929   ins_pipe(pipe_class_compare);
12930 %}
12931 
12932 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12933 %{
12934   match(Set cr (CmpF src1 src2));
12935 
12936   ins_cost(3 * INSN_COST);
12937   format %{ "fcmps $src1, 0.0" %}
12938 
12939   ins_encode %{
12940     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
12941   %}
12942 
12943   ins_pipe(pipe_class_compare);
12944 %}
12945 // FROM HERE
12946 
12947 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12948 %{
12949   match(Set cr (CmpD src1 src2));
12950 
12951   ins_cost(3 * INSN_COST);
12952   format %{ "fcmpd $src1, $src2" %}
12953 
12954   ins_encode %{
12955     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12956   %}
12957 
12958   ins_pipe(pipe_class_compare);
12959 %}
12960 
12961 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12962 %{
12963   match(Set cr (CmpD src1 src2));
12964 
12965   ins_cost(3 * INSN_COST);
12966   format %{ "fcmpd $src1, 0.0" %}
12967 
12968   ins_encode %{
12969     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
12970   %}
12971 
12972   ins_pipe(pipe_class_compare);
12973 %}
12974 
12975 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12976 %{
12977   match(Set dst (CmpF3 src1 src2));
12978   effect(KILL cr);
12979 
12980   ins_cost(5 * INSN_COST);
12981   format %{ "fcmps $src1, $src2\n\t"
12982             "csinvw($dst, zr, zr, eq\n\t"
12983             "csnegw($dst, $dst, $dst, lt)"
12984   %}
12985 
12986   ins_encode %{
12987     Label done;
12988     FloatRegister s1 = as_FloatRegister($src1$$reg);
12989     FloatRegister s2 = as_FloatRegister($src2$$reg);
12990     Register d = as_Register($dst$$reg);
12991     __ fcmps(s1, s2);
12992     // installs 0 if EQ else -1
12993     __ csinvw(d, zr, zr, Assembler::EQ);
12994     // keeps -1 if less or unordered else installs 1
12995     __ csnegw(d, d, d, Assembler::LT);
12996     __ bind(done);
12997   %}
12998 
12999   ins_pipe(pipe_class_default);
13000 
13001 %}
13002 
13003 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13004 %{
13005   match(Set dst (CmpD3 src1 src2));
13006   effect(KILL cr);
13007 
13008   ins_cost(5 * INSN_COST);
13009   format %{ "fcmpd $src1, $src2\n\t"
13010             "csinvw($dst, zr, zr, eq\n\t"
13011             "csnegw($dst, $dst, $dst, lt)"
13012   %}
13013 
13014   ins_encode %{
13015     Label done;
13016     FloatRegister s1 = as_FloatRegister($src1$$reg);
13017     FloatRegister s2 = as_FloatRegister($src2$$reg);
13018     Register d = as_Register($dst$$reg);
13019     __ fcmpd(s1, s2);
13020     // installs 0 if EQ else -1
13021     __ csinvw(d, zr, zr, Assembler::EQ);
13022     // keeps -1 if less or unordered else installs 1
13023     __ csnegw(d, d, d, Assembler::LT);
13024     __ bind(done);
13025   %}
13026   ins_pipe(pipe_class_default);
13027 
13028 %}
13029 
13030 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13031 %{
13032   match(Set dst (CmpF3 src1 zero));
13033   effect(KILL cr);
13034 
13035   ins_cost(5 * INSN_COST);
13036   format %{ "fcmps $src1, 0.0\n\t"
13037             "csinvw($dst, zr, zr, eq\n\t"
13038             "csnegw($dst, $dst, $dst, lt)"
13039   %}
13040 
13041   ins_encode %{
13042     Label done;
13043     FloatRegister s1 = as_FloatRegister($src1$$reg);
13044     Register d = as_Register($dst$$reg);
13045     __ fcmps(s1, 0.0);
13046     // installs 0 if EQ else -1
13047     __ csinvw(d, zr, zr, Assembler::EQ);
13048     // keeps -1 if less or unordered else installs 1
13049     __ csnegw(d, d, d, Assembler::LT);
13050     __ bind(done);
13051   %}
13052 
13053   ins_pipe(pipe_class_default);
13054 
13055 %}
13056 
13057 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13058 %{
13059   match(Set dst (CmpD3 src1 zero));
13060   effect(KILL cr);
13061 
13062   ins_cost(5 * INSN_COST);
13063   format %{ "fcmpd $src1, 0.0\n\t"
13064             "csinvw($dst, zr, zr, eq\n\t"
13065             "csnegw($dst, $dst, $dst, lt)"
13066   %}
13067 
13068   ins_encode %{
13069     Label done;
13070     FloatRegister s1 = as_FloatRegister($src1$$reg);
13071     Register d = as_Register($dst$$reg);
13072     __ fcmpd(s1, 0.0);
13073     // installs 0 if EQ else -1
13074     __ csinvw(d, zr, zr, Assembler::EQ);
13075     // keeps -1 if less or unordered else installs 1
13076     __ csnegw(d, d, d, Assembler::LT);
13077     __ bind(done);
13078   %}
13079   ins_pipe(pipe_class_default);
13080 
13081 %}
13082 
13083 // Manifest a CmpL result in an integer register.
13084 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
13085 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
13086 %{
13087   match(Set dst (CmpL3 src1 src2));
13088   effect(KILL flags);
13089 
13090   ins_cost(INSN_COST * 6);
13091   format %{
13092       "cmp $src1, $src2"
13093       "csetw $dst, ne"
13094       "cnegw $dst, lt"
13095   %}
13096   // format %{ "CmpL3 $dst, $src1, $src2" %}
13097   ins_encode %{
13098     __ cmp($src1$$Register, $src2$$Register);
13099     __ csetw($dst$$Register, Assembler::NE);
13100     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
13101   %}
13102 
13103   ins_pipe(ialu_reg_reg);
13104 %}
13105 
13106 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13107 %{
13108   match(Set dst (CmpLTMask p q));
13109   effect(KILL cr);
13110 
13111   ins_cost(3 * INSN_COST);
13112 
13113   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13114             "csetw $dst, lt\n\t"
13115             "subw $dst, zr, $dst"
13116   %}
13117 
13118   ins_encode %{
13119     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13120     __ csetw(as_Register($dst$$reg), Assembler::LT);
13121     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13122   %}
13123 
13124   ins_pipe(ialu_reg_reg);
13125 %}
13126 
13127 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13128 %{
13129   match(Set dst (CmpLTMask src zero));
13130   effect(KILL cr);
13131 
13132   ins_cost(INSN_COST);
13133 
13134   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13135 
13136   ins_encode %{
13137     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13138   %}
13139 
13140   ins_pipe(ialu_reg_shift);
13141 %}
13142 
13143 // ============================================================================
13144 // Max and Min
13145 
13146 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13147 %{
13148   match(Set dst (MinI src1 src2));
13149 
13150   effect(DEF dst, USE src1, USE src2, KILL cr);
13151   size(8);
13152 
13153   ins_cost(INSN_COST * 3);
13154   format %{
13155     "cmpw $src1 $src2\t signed int\n\t"
13156     "cselw $dst, $src1, $src2 lt\t"
13157   %}
13158 
13159   ins_encode %{
13160     __ cmpw(as_Register($src1$$reg),
13161             as_Register($src2$$reg));
13162     __ cselw(as_Register($dst$$reg),
13163              as_Register($src1$$reg),
13164              as_Register($src2$$reg),
13165              Assembler::LT);
13166   %}
13167 
13168   ins_pipe(ialu_reg_reg);
13169 %}
13170 // FROM HERE
13171 
13172 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13173 %{
13174   match(Set dst (MaxI src1 src2));
13175 
13176   effect(DEF dst, USE src1, USE src2, KILL cr);
13177   size(8);
13178 
13179   ins_cost(INSN_COST * 3);
13180   format %{
13181     "cmpw $src1 $src2\t signed int\n\t"
13182     "cselw $dst, $src1, $src2 gt\t"
13183   %}
13184 
13185   ins_encode %{
13186     __ cmpw(as_Register($src1$$reg),
13187             as_Register($src2$$reg));
13188     __ cselw(as_Register($dst$$reg),
13189              as_Register($src1$$reg),
13190              as_Register($src2$$reg),
13191              Assembler::GT);
13192   %}
13193 
13194   ins_pipe(ialu_reg_reg);
13195 %}
13196 
13197 // ============================================================================
13198 // Branch Instructions
13199 
13200 // Direct Branch.
13201 instruct branch(label lbl)
13202 %{
13203   match(Goto);
13204 
13205   effect(USE lbl);
13206 
13207   ins_cost(BRANCH_COST);
13208   format %{ "b  $lbl" %}
13209 
13210   ins_encode(aarch64_enc_b(lbl));
13211 
13212   ins_pipe(pipe_branch);
13213 %}
13214 
13215 // Conditional Near Branch
13216 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13217 %{
13218   // Same match rule as `branchConFar'.
13219   match(If cmp cr);
13220 
13221   effect(USE lbl);
13222 
13223   ins_cost(BRANCH_COST);
13224   // If set to 1 this indicates that the current instruction is a
13225   // short variant of a long branch. This avoids using this
13226   // instruction in first-pass matching. It will then only be used in
13227   // the `Shorten_branches' pass.
13228   // ins_short_branch(1);
13229   format %{ "b$cmp  $lbl" %}
13230 
13231   ins_encode(aarch64_enc_br_con(cmp, lbl));
13232 
13233   ins_pipe(pipe_branch_cond);
13234 %}
13235 
13236 // Conditional Near Branch Unsigned
13237 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13238 %{
13239   // Same match rule as `branchConFar'.
13240   match(If cmp cr);
13241 
13242   effect(USE lbl);
13243 
13244   ins_cost(BRANCH_COST);
13245   // If set to 1 this indicates that the current instruction is a
13246   // short variant of a long branch. This avoids using this
13247   // instruction in first-pass matching. It will then only be used in
13248   // the `Shorten_branches' pass.
13249   // ins_short_branch(1);
13250   format %{ "b$cmp  $lbl\t# unsigned" %}
13251 
13252   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13253 
13254   ins_pipe(pipe_branch_cond);
13255 %}
13256 
13257 // Make use of CBZ and CBNZ.  These instructions, as well as being
13258 // shorter than (cmp; branch), have the additional benefit of not
13259 // killing the flags.
13260 
13261 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13262   match(If cmp (CmpI op1 op2));
13263   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13264             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13265   effect(USE labl);
13266 
13267   ins_cost(BRANCH_COST);
13268   format %{ "cbw$cmp   $op1, $labl" %}
13269   ins_encode %{
13270     Label* L = $labl$$label;
13271     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13272     if (cond == Assembler::EQ)
13273       __ cbzw($op1$$Register, *L);
13274     else
13275       __ cbnzw($op1$$Register, *L);
13276   %}
13277   ins_pipe(pipe_cmp_branch);
13278 %}
13279 
13280 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13281   match(If cmp (CmpL op1 op2));
13282   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13283             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13284   effect(USE labl);
13285 
13286   ins_cost(BRANCH_COST);
13287   format %{ "cb$cmp   $op1, $labl" %}
13288   ins_encode %{
13289     Label* L = $labl$$label;
13290     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13291     if (cond == Assembler::EQ)
13292       __ cbz($op1$$Register, *L);
13293     else
13294       __ cbnz($op1$$Register, *L);
13295   %}
13296   ins_pipe(pipe_cmp_branch);
13297 %}
13298 
13299 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13300   match(If cmp (CmpP op1 op2));
13301   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13302             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13303   effect(USE labl);
13304 
13305   ins_cost(BRANCH_COST);
13306   format %{ "cb$cmp   $op1, $labl" %}
13307   ins_encode %{
13308     Label* L = $labl$$label;
13309     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13310     if (cond == Assembler::EQ)
13311       __ cbz($op1$$Register, *L);
13312     else
13313       __ cbnz($op1$$Register, *L);
13314   %}
13315   ins_pipe(pipe_cmp_branch);
13316 %}
13317 
13318 instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13319   match(If cmp (CmpN op1 op2));
13320   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13321             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13322   effect(USE labl);
13323 
13324   ins_cost(BRANCH_COST);
13325   format %{ "cbw$cmp   $op1, $labl" %}
13326   ins_encode %{
13327     Label* L = $labl$$label;
13328     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13329     if (cond == Assembler::EQ)
13330       __ cbzw($op1$$Register, *L);
13331     else
13332       __ cbnzw($op1$$Register, *L);
13333   %}
13334   ins_pipe(pipe_cmp_branch);
13335 %}
13336 
13337 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13338   match(If cmp (CmpP (DecodeN oop) zero));
13339   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13340             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13341   effect(USE labl);
13342 
13343   ins_cost(BRANCH_COST);
13344   format %{ "cb$cmp   $oop, $labl" %}
13345   ins_encode %{
13346     Label* L = $labl$$label;
13347     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13348     if (cond == Assembler::EQ)
13349       __ cbzw($oop$$Register, *L);
13350     else
13351       __ cbnzw($oop$$Register, *L);
13352   %}
13353   ins_pipe(pipe_cmp_branch);
13354 %}
13355 
13356 instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13357   match(If cmp (CmpU op1 op2));
13358   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13359             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13360             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13361             ||  n->in(1)->as_Bool()->_test._test == BoolTest::le);
13362   effect(USE labl);
13363 
13364   ins_cost(BRANCH_COST);
13365   format %{ "cbw$cmp   $op1, $labl" %}
13366   ins_encode %{
13367     Label* L = $labl$$label;
13368     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13369     if (cond == Assembler::EQ || cond == Assembler::LS)
13370       __ cbzw($op1$$Register, *L);
13371     else
13372       __ cbnzw($op1$$Register, *L);
13373   %}
13374   ins_pipe(pipe_cmp_branch);
13375 %}
13376 
13377 instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13378   match(If cmp (CmpUL op1 op2));
13379   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13380             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13381             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13382             || n->in(1)->as_Bool()->_test._test == BoolTest::le);
13383   effect(USE labl);
13384 
13385   ins_cost(BRANCH_COST);
13386   format %{ "cb$cmp   $op1, $labl" %}
13387   ins_encode %{
13388     Label* L = $labl$$label;
13389     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13390     if (cond == Assembler::EQ || cond == Assembler::LS)
13391       __ cbz($op1$$Register, *L);
13392     else
13393       __ cbnz($op1$$Register, *L);
13394   %}
13395   ins_pipe(pipe_cmp_branch);
13396 %}
13397 
13398 // Test bit and Branch
13399 
13400 // Patterns for short (< 32KiB) variants
13401 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13402   match(If cmp (CmpL op1 op2));
13403   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13404             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13405   effect(USE labl);
13406 
13407   ins_cost(BRANCH_COST);
13408   format %{ "cb$cmp   $op1, $labl # long" %}
13409   ins_encode %{
13410     Label* L = $labl$$label;
13411     Assembler::Condition cond =
13412       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13413     __ tbr(cond, $op1$$Register, 63, *L);
13414   %}
13415   ins_pipe(pipe_cmp_branch);
13416   ins_short_branch(1);
13417 %}
13418 
13419 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13420   match(If cmp (CmpI op1 op2));
13421   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13422             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13423   effect(USE labl);
13424 
13425   ins_cost(BRANCH_COST);
13426   format %{ "cb$cmp   $op1, $labl # int" %}
13427   ins_encode %{
13428     Label* L = $labl$$label;
13429     Assembler::Condition cond =
13430       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13431     __ tbr(cond, $op1$$Register, 31, *L);
13432   %}
13433   ins_pipe(pipe_cmp_branch);
13434   ins_short_branch(1);
13435 %}
13436 
13437 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13438   match(If cmp (CmpL (AndL op1 op2) op3));
13439   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13440             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13441             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13442   effect(USE labl);
13443 
13444   ins_cost(BRANCH_COST);
13445   format %{ "tb$cmp   $op1, $op2, $labl" %}
13446   ins_encode %{
13447     Label* L = $labl$$label;
13448     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13449     int bit = exact_log2($op2$$constant);
13450     __ tbr(cond, $op1$$Register, bit, *L);
13451   %}
13452   ins_pipe(pipe_cmp_branch);
13453   ins_short_branch(1);
13454 %}
13455 
13456 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13457   match(If cmp (CmpI (AndI op1 op2) op3));
13458   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13459             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13460             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13461   effect(USE labl);
13462 
13463   ins_cost(BRANCH_COST);
13464   format %{ "tb$cmp   $op1, $op2, $labl" %}
13465   ins_encode %{
13466     Label* L = $labl$$label;
13467     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13468     int bit = exact_log2($op2$$constant);
13469     __ tbr(cond, $op1$$Register, bit, *L);
13470   %}
13471   ins_pipe(pipe_cmp_branch);
13472   ins_short_branch(1);
13473 %}
13474 
13475 // And far variants
13476 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13477   match(If cmp (CmpL op1 op2));
13478   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13479             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13480   effect(USE labl);
13481 
13482   ins_cost(BRANCH_COST);
13483   format %{ "cb$cmp   $op1, $labl # long" %}
13484   ins_encode %{
13485     Label* L = $labl$$label;
13486     Assembler::Condition cond =
13487       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13488     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13489   %}
13490   ins_pipe(pipe_cmp_branch);
13491 %}
13492 
13493 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13494   match(If cmp (CmpI op1 op2));
13495   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13496             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13497   effect(USE labl);
13498 
13499   ins_cost(BRANCH_COST);
13500   format %{ "cb$cmp   $op1, $labl # int" %}
13501   ins_encode %{
13502     Label* L = $labl$$label;
13503     Assembler::Condition cond =
13504       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13505     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13506   %}
13507   ins_pipe(pipe_cmp_branch);
13508 %}
13509 
13510 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13511   match(If cmp (CmpL (AndL op1 op2) op3));
13512   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13513             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13514             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13515   effect(USE labl);
13516 
13517   ins_cost(BRANCH_COST);
13518   format %{ "tb$cmp   $op1, $op2, $labl" %}
13519   ins_encode %{
13520     Label* L = $labl$$label;
13521     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13522     int bit = exact_log2($op2$$constant);
13523     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13524   %}
13525   ins_pipe(pipe_cmp_branch);
13526 %}
13527 
13528 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13529   match(If cmp (CmpI (AndI op1 op2) op3));
13530   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13531             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13532             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13533   effect(USE labl);
13534 
13535   ins_cost(BRANCH_COST);
13536   format %{ "tb$cmp   $op1, $op2, $labl" %}
13537   ins_encode %{
13538     Label* L = $labl$$label;
13539     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13540     int bit = exact_log2($op2$$constant);
13541     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13542   %}
13543   ins_pipe(pipe_cmp_branch);
13544 %}
13545 
13546 // Test bits
13547 
13548 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13549   match(Set cr (CmpL (AndL op1 op2) op3));
13550   predicate(Assembler::operand_valid_for_logical_immediate
13551             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13552 
13553   ins_cost(INSN_COST);
13554   format %{ "tst $op1, $op2 # long" %}
13555   ins_encode %{
13556     __ tst($op1$$Register, $op2$$constant);
13557   %}
13558   ins_pipe(ialu_reg_reg);
13559 %}
13560 
13561 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13562   match(Set cr (CmpI (AndI op1 op2) op3));
13563   predicate(Assembler::operand_valid_for_logical_immediate
13564             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13565 
13566   ins_cost(INSN_COST);
13567   format %{ "tst $op1, $op2 # int" %}
13568   ins_encode %{
13569     __ tstw($op1$$Register, $op2$$constant);
13570   %}
13571   ins_pipe(ialu_reg_reg);
13572 %}
13573 
13574 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13575   match(Set cr (CmpL (AndL op1 op2) op3));
13576 
13577   ins_cost(INSN_COST);
13578   format %{ "tst $op1, $op2 # long" %}
13579   ins_encode %{
13580     __ tst($op1$$Register, $op2$$Register);
13581   %}
13582   ins_pipe(ialu_reg_reg);
13583 %}
13584 
13585 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13586   match(Set cr (CmpI (AndI op1 op2) op3));
13587 
13588   ins_cost(INSN_COST);
13589   format %{ "tstw $op1, $op2 # int" %}
13590   ins_encode %{
13591     __ tstw($op1$$Register, $op2$$Register);
13592   %}
13593   ins_pipe(ialu_reg_reg);
13594 %}
13595 
13596 
13597 // Conditional Far Branch
13598 // Conditional Far Branch Unsigned
13599 // TODO: fixme
13600 
13601 // counted loop end branch near
13602 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13603 %{
13604   match(CountedLoopEnd cmp cr);
13605 
13606   effect(USE lbl);
13607 
13608   ins_cost(BRANCH_COST);
13609   // short variant.
13610   // ins_short_branch(1);
13611   format %{ "b$cmp $lbl \t// counted loop end" %}
13612 
13613   ins_encode(aarch64_enc_br_con(cmp, lbl));
13614 
13615   ins_pipe(pipe_branch);
13616 %}
13617 
13618 // counted loop end branch near Unsigned
13619 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13620 %{
13621   match(CountedLoopEnd cmp cr);
13622 
13623   effect(USE lbl);
13624 
13625   ins_cost(BRANCH_COST);
13626   // short variant.
13627   // ins_short_branch(1);
13628   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13629 
13630   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13631 
13632   ins_pipe(pipe_branch);
13633 %}
13634 
13635 // counted loop end branch far
13636 // counted loop end branch far unsigned
13637 // TODO: fixme
13638 
13639 // ============================================================================
13640 // inlined locking and unlocking
13641 
13642 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13643 %{
13644   match(Set cr (FastLock object box));
13645   effect(TEMP tmp, TEMP tmp2);
13646 
13647   // TODO
13648   // identify correct cost
13649   ins_cost(5 * INSN_COST);
13650   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13651 
13652   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13653 
13654   ins_pipe(pipe_serial);
13655 %}
13656 
13657 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13658 %{
13659   match(Set cr (FastUnlock object box));
13660   effect(TEMP tmp, TEMP tmp2);
13661 
13662   ins_cost(5 * INSN_COST);
13663   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13664 
13665   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13666 
13667   ins_pipe(pipe_serial);
13668 %}
13669 
13670 
13671 // ============================================================================
13672 // Safepoint Instructions
13673 
13674 // TODO
13675 // provide a near and far version of this code
13676 
13677 instruct safePoint(rFlagsReg cr, iRegP poll)
13678 %{
13679   match(SafePoint poll);
13680   effect(KILL cr);
13681 
13682   format %{
13683     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13684   %}
13685   ins_encode %{
13686     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13687   %}
13688   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13689 %}
13690 
13691 
13692 // ============================================================================
13693 // Procedure Call/Return Instructions
13694 
13695 // Call Java Static Instruction
13696 
13697 instruct CallStaticJavaDirect(method meth)
13698 %{
13699   match(CallStaticJava);
13700 
13701   effect(USE meth);
13702 
13703   predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
13704 
13705   ins_cost(CALL_COST);
13706 
13707   format %{ "call,static $meth \t// ==> " %}
13708 
13709   ins_encode( aarch64_enc_java_static_call(meth),
13710               aarch64_enc_call_epilog );
13711 
13712   ins_pipe(pipe_class_call);
13713 %}
13714 
13715 // TO HERE
13716 
13717 // Call Java Static Instruction (method handle version)
13718 
13719 instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
13720 %{
13721   match(CallStaticJava);
13722 
13723   effect(USE meth);
13724 
13725   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13726 
13727   ins_cost(CALL_COST);
13728 
13729   format %{ "call,static $meth \t// (methodhandle) ==> " %}
13730 
13731   ins_encode( aarch64_enc_java_handle_call(meth),
13732               aarch64_enc_call_epilog );
13733 
13734   ins_pipe(pipe_class_call);
13735 %}
13736 
13737 // Call Java Dynamic Instruction
13738 instruct CallDynamicJavaDirect(method meth)
13739 %{
13740   match(CallDynamicJava);
13741 
13742   effect(USE meth);
13743 
13744   ins_cost(CALL_COST);
13745 
13746   format %{ "CALL,dynamic $meth \t// ==> " %}
13747 
13748   ins_encode( aarch64_enc_java_dynamic_call(meth),
13749                aarch64_enc_call_epilog );
13750 
13751   ins_pipe(pipe_class_call);
13752 %}
13753 
13754 // Call Runtime Instruction
13755 
13756 instruct CallRuntimeDirect(method meth)
13757 %{
13758   match(CallRuntime);
13759 
13760   effect(USE meth);
13761 
13762   ins_cost(CALL_COST);
13763 
13764   format %{ "CALL, runtime $meth" %}
13765 
13766   ins_encode( aarch64_enc_java_to_runtime(meth) );
13767 
13768   ins_pipe(pipe_class_call);
13769 %}
13770 
13771 // Call Runtime Instruction
13772 
13773 instruct CallLeafDirect(method meth)
13774 %{
13775   match(CallLeaf);
13776 
13777   effect(USE meth);
13778 
13779   ins_cost(CALL_COST);
13780 
13781   format %{ "CALL, runtime leaf $meth" %}
13782 
13783   ins_encode( aarch64_enc_java_to_runtime(meth) );
13784 
13785   ins_pipe(pipe_class_call);
13786 %}
13787 
13788 // Call Runtime Instruction
13789 
13790 instruct CallLeafNoFPDirect(method meth)
13791 %{
13792   match(CallLeafNoFP);
13793 
13794   effect(USE meth);
13795 
13796   ins_cost(CALL_COST);
13797 
13798   format %{ "CALL, runtime leaf nofp $meth" %}
13799 
13800   ins_encode( aarch64_enc_java_to_runtime(meth) );
13801 
13802   ins_pipe(pipe_class_call);
13803 %}
13804 
13805 // Tail Call; Jump from runtime stub to Java code.
13806 // Also known as an 'interprocedural jump'.
13807 // Target of jump will eventually return to caller.
13808 // TailJump below removes the return address.
13809 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13810 %{
13811   match(TailCall jump_target method_oop);
13812 
13813   ins_cost(CALL_COST);
13814 
13815   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13816 
13817   ins_encode(aarch64_enc_tail_call(jump_target));
13818 
13819   ins_pipe(pipe_class_call);
13820 %}
13821 
13822 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13823 %{
13824   match(TailJump jump_target ex_oop);
13825 
13826   ins_cost(CALL_COST);
13827 
13828   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13829 
13830   ins_encode(aarch64_enc_tail_jmp(jump_target));
13831 
13832   ins_pipe(pipe_class_call);
13833 %}
13834 
13835 // Create exception oop: created by stack-crawling runtime code.
13836 // Created exception is now available to this handler, and is setup
13837 // just prior to jumping to this handler. No code emitted.
13838 // TODO check
13839 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13840 instruct CreateException(iRegP_R0 ex_oop)
13841 %{
13842   match(Set ex_oop (CreateEx));
13843 
13844   format %{ " -- \t// exception oop; no code emitted" %}
13845 
13846   size(0);
13847 
13848   ins_encode( /*empty*/ );
13849 
13850   ins_pipe(pipe_class_empty);
13851 %}
13852 
13853 // Rethrow exception: The exception oop will come in the first
13854 // argument position. Then JUMP (not call) to the rethrow stub code.
13855 instruct RethrowException() %{
13856   match(Rethrow);
13857   ins_cost(CALL_COST);
13858 
13859   format %{ "b rethrow_stub" %}
13860 
13861   ins_encode( aarch64_enc_rethrow() );
13862 
13863   ins_pipe(pipe_class_call);
13864 %}
13865 
13866 
13867 // Return Instruction
13868 // epilog node loads ret address into lr as part of frame pop
13869 instruct Ret()
13870 %{
13871   match(Return);
13872 
13873   format %{ "ret\t// return register" %}
13874 
13875   ins_encode( aarch64_enc_ret() );
13876 
13877   ins_pipe(pipe_branch);
13878 %}
13879 
13880 // Die now.
13881 instruct ShouldNotReachHere() %{
13882   match(Halt);
13883 
13884   ins_cost(CALL_COST);
13885   format %{ "ShouldNotReachHere" %}
13886 
13887   ins_encode %{
13888     // TODO
13889     // implement proper trap call here
13890     __ brk(999);
13891   %}
13892 
13893   ins_pipe(pipe_class_default);
13894 %}
13895 
13896 // ============================================================================
13897 // Partial Subtype Check
13898 // 
13899 // superklass array for an instance of the superklass.  Set a hidden
13900 // internal cache on a hit (cache is checked with exposed code in
13901 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13902 // encoding ALSO sets flags.
13903 
13904 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13905 %{
13906   match(Set result (PartialSubtypeCheck sub super));
13907   effect(KILL cr, KILL temp);
13908 
13909   ins_cost(1100);  // slightly larger than the next version
13910   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13911 
13912   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13913 
13914   opcode(0x1); // Force zero of result reg on hit
13915 
13916   ins_pipe(pipe_class_memory);
13917 %}
13918 
13919 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13920 %{
13921   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13922   effect(KILL temp, KILL result);
13923 
13924   ins_cost(1100);  // slightly larger than the next version
13925   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13926 
13927   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13928 
13929   opcode(0x0); // Don't zero result reg on hit
13930 
13931   ins_pipe(pipe_class_memory);
13932 %}
13933 
13934 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13935                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13936 %{
13937   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13938   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13939 
13940   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13941   ins_encode %{
13942     __ string_compare($str1$$Register, $str2$$Register,
13943                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13944                       $tmp1$$Register);
13945   %}
13946   ins_pipe(pipe_class_memory);
13947 %}
13948 
13949 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13950        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13951 %{
13952   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13953   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13954          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13955   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13956 
13957   ins_encode %{
13958     __ string_indexof($str1$$Register, $str2$$Register,
13959                       $cnt1$$Register, $cnt2$$Register,
13960                       $tmp1$$Register, $tmp2$$Register,
13961                       $tmp3$$Register, $tmp4$$Register,
13962                       -1, $result$$Register);
13963   %}
13964   ins_pipe(pipe_class_memory);
13965 %}
13966 
13967 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13968                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
13969                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13970 %{
13971   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13972   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13973          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13974   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13975 
13976   ins_encode %{
13977     int icnt2 = (int)$int_cnt2$$constant;
13978     __ string_indexof($str1$$Register, $str2$$Register,
13979                       $cnt1$$Register, zr,
13980                       $tmp1$$Register, $tmp2$$Register,
13981                       $tmp3$$Register, $tmp4$$Register,
13982                       icnt2, $result$$Register);
13983   %}
13984   ins_pipe(pipe_class_memory);
13985 %}
13986 
13987 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13988                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13989 %{
13990   match(Set result (StrEquals (Binary str1 str2) cnt));
13991   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13992 
13993   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13994   ins_encode %{
13995     __ string_equals($str1$$Register, $str2$$Register,
13996                       $cnt$$Register, $result$$Register,
13997                       $tmp$$Register);
13998   %}
13999   ins_pipe(pipe_class_memory);
14000 %}
14001 
14002 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14003                       iRegP_R10 tmp, rFlagsReg cr)
14004 %{
14005   match(Set result (AryEq ary1 ary2));
14006   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
14007 
14008   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14009   ins_encode %{
14010     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
14011                           $result$$Register, $tmp$$Register);
14012   %}
14013   ins_pipe(pipe_class_memory);
14014 %}
14015 
14016 // encode char[] to byte[] in ISO_8859_1
14017 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14018                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14019                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14020                           iRegI_R0 result, rFlagsReg cr)
14021 %{
14022   match(Set result (EncodeISOArray src (Binary dst len)));
14023   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14024          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14025 
14026   format %{ "Encode array $src,$dst,$len -> $result" %}
14027   ins_encode %{
14028     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14029          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14030          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14031   %}
14032   ins_pipe( pipe_class_memory );
14033 %}
14034 
14035 // ============================================================================
14036 // This name is KNOWN by the ADLC and cannot be changed.
14037 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14038 // for this guy.
14039 instruct tlsLoadP(thread_RegP dst)
14040 %{
14041   match(Set dst (ThreadLocal));
14042 
14043   ins_cost(0);
14044 
14045   format %{ " -- \t// $dst=Thread::current(), empty" %}
14046 
14047   size(0);
14048 
14049   ins_encode( /*empty*/ );
14050 
14051   ins_pipe(pipe_class_empty);
14052 %}
14053 
14054 // ====================VECTOR INSTRUCTIONS=====================================
14055 
14056 // Load vector (32 bits)
14057 instruct loadV4(vecD dst, vmem4 mem)
14058 %{
14059   predicate(n->as_LoadVector()->memory_size() == 4);
14060   match(Set dst (LoadVector mem));
14061   ins_cost(4 * INSN_COST);
14062   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14063   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14064   ins_pipe(vload_reg_mem64);
14065 %}
14066 
14067 // Load vector (64 bits)
14068 instruct loadV8(vecD dst, vmem8 mem)
14069 %{
14070   predicate(n->as_LoadVector()->memory_size() == 8);
14071   match(Set dst (LoadVector mem));
14072   ins_cost(4 * INSN_COST);
14073   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14074   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14075   ins_pipe(vload_reg_mem64);
14076 %}
14077 
14078 // Load Vector (128 bits)
14079 instruct loadV16(vecX dst, vmem16 mem)
14080 %{
14081   predicate(n->as_LoadVector()->memory_size() == 16);
14082   match(Set dst (LoadVector mem));
14083   ins_cost(4 * INSN_COST);
14084   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14085   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14086   ins_pipe(vload_reg_mem128);
14087 %}
14088 
14089 // Store Vector (32 bits)
14090 instruct storeV4(vecD src, vmem4 mem)
14091 %{
14092   predicate(n->as_StoreVector()->memory_size() == 4);
14093   match(Set mem (StoreVector mem src));
14094   ins_cost(4 * INSN_COST);
14095   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14096   ins_encode( aarch64_enc_strvS(src, mem) );
14097   ins_pipe(vstore_reg_mem64);
14098 %}
14099 
14100 // Store Vector (64 bits)
14101 instruct storeV8(vecD src, vmem8 mem)
14102 %{
14103   predicate(n->as_StoreVector()->memory_size() == 8);
14104   match(Set mem (StoreVector mem src));
14105   ins_cost(4 * INSN_COST);
14106   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14107   ins_encode( aarch64_enc_strvD(src, mem) );
14108   ins_pipe(vstore_reg_mem64);
14109 %}
14110 
14111 // Store Vector (128 bits)
14112 instruct storeV16(vecX src, vmem16 mem)
14113 %{
14114   predicate(n->as_StoreVector()->memory_size() == 16);
14115   match(Set mem (StoreVector mem src));
14116   ins_cost(4 * INSN_COST);
14117   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14118   ins_encode( aarch64_enc_strvQ(src, mem) );
14119   ins_pipe(vstore_reg_mem128);
14120 %}
14121 
14122 instruct replicate8B(vecD dst, iRegIorL2I src)
14123 %{
14124   predicate(n->as_Vector()->length() == 4 ||
14125             n->as_Vector()->length() == 8);
14126   match(Set dst (ReplicateB src));
14127   ins_cost(INSN_COST);
14128   format %{ "dup  $dst, $src\t# vector (8B)" %}
14129   ins_encode %{
14130     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14131   %}
14132   ins_pipe(vdup_reg_reg64);
14133 %}
14134 
14135 instruct replicate16B(vecX dst, iRegIorL2I src)
14136 %{
14137   predicate(n->as_Vector()->length() == 16);
14138   match(Set dst (ReplicateB src));
14139   ins_cost(INSN_COST);
14140   format %{ "dup  $dst, $src\t# vector (16B)" %}
14141   ins_encode %{
14142     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14143   %}
14144   ins_pipe(vdup_reg_reg128);
14145 %}
14146 
14147 instruct replicate8B_imm(vecD dst, immI con)
14148 %{
14149   predicate(n->as_Vector()->length() == 4 ||
14150             n->as_Vector()->length() == 8);
14151   match(Set dst (ReplicateB con));
14152   ins_cost(INSN_COST);
14153   format %{ "movi  $dst, $con\t# vector(8B)" %}
14154   ins_encode %{
14155     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14156   %}
14157   ins_pipe(vmovi_reg_imm64);
14158 %}
14159 
14160 instruct replicate16B_imm(vecX dst, immI con)
14161 %{
14162   predicate(n->as_Vector()->length() == 16);
14163   match(Set dst (ReplicateB con));
14164   ins_cost(INSN_COST);
14165   format %{ "movi  $dst, $con\t# vector(16B)" %}
14166   ins_encode %{
14167     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14168   %}
14169   ins_pipe(vmovi_reg_imm128);
14170 %}
14171 
14172 instruct replicate4S(vecD dst, iRegIorL2I src)
14173 %{
14174   predicate(n->as_Vector()->length() == 2 ||
14175             n->as_Vector()->length() == 4);
14176   match(Set dst (ReplicateS src));
14177   ins_cost(INSN_COST);
14178   format %{ "dup  $dst, $src\t# vector (4S)" %}
14179   ins_encode %{
14180     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14181   %}
14182   ins_pipe(vdup_reg_reg64);
14183 %}
14184 
14185 instruct replicate8S(vecX dst, iRegIorL2I src)
14186 %{
14187   predicate(n->as_Vector()->length() == 8);
14188   match(Set dst (ReplicateS src));
14189   ins_cost(INSN_COST);
14190   format %{ "dup  $dst, $src\t# vector (8S)" %}
14191   ins_encode %{
14192     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14193   %}
14194   ins_pipe(vdup_reg_reg128);
14195 %}
14196 
14197 instruct replicate4S_imm(vecD dst, immI con)
14198 %{
14199   predicate(n->as_Vector()->length() == 2 ||
14200             n->as_Vector()->length() == 4);
14201   match(Set dst (ReplicateS con));
14202   ins_cost(INSN_COST);
14203   format %{ "movi  $dst, $con\t# vector(4H)" %}
14204   ins_encode %{
14205     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14206   %}
14207   ins_pipe(vmovi_reg_imm64);
14208 %}
14209 
14210 instruct replicate8S_imm(vecX dst, immI con)
14211 %{
14212   predicate(n->as_Vector()->length() == 8);
14213   match(Set dst (ReplicateS con));
14214   ins_cost(INSN_COST);
14215   format %{ "movi  $dst, $con\t# vector(8H)" %}
14216   ins_encode %{
14217     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14218   %}
14219   ins_pipe(vmovi_reg_imm128);
14220 %}
14221 
14222 instruct replicate2I(vecD dst, iRegIorL2I src)
14223 %{
14224   predicate(n->as_Vector()->length() == 2);
14225   match(Set dst (ReplicateI src));
14226   ins_cost(INSN_COST);
14227   format %{ "dup  $dst, $src\t# vector (2I)" %}
14228   ins_encode %{
14229     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14230   %}
14231   ins_pipe(vdup_reg_reg64);
14232 %}
14233 
14234 instruct replicate4I(vecX dst, iRegIorL2I src)
14235 %{
14236   predicate(n->as_Vector()->length() == 4);
14237   match(Set dst (ReplicateI src));
14238   ins_cost(INSN_COST);
14239   format %{ "dup  $dst, $src\t# vector (4I)" %}
14240   ins_encode %{
14241     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14242   %}
14243   ins_pipe(vdup_reg_reg128);
14244 %}
14245 
14246 instruct replicate2I_imm(vecD dst, immI con)
14247 %{
14248   predicate(n->as_Vector()->length() == 2);
14249   match(Set dst (ReplicateI con));
14250   ins_cost(INSN_COST);
14251   format %{ "movi  $dst, $con\t# vector(2I)" %}
14252   ins_encode %{
14253     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14254   %}
14255   ins_pipe(vmovi_reg_imm64);
14256 %}
14257 
14258 instruct replicate4I_imm(vecX dst, immI con)
14259 %{
14260   predicate(n->as_Vector()->length() == 4);
14261   match(Set dst (ReplicateI con));
14262   ins_cost(INSN_COST);
14263   format %{ "movi  $dst, $con\t# vector(4I)" %}
14264   ins_encode %{
14265     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14266   %}
14267   ins_pipe(vmovi_reg_imm128);
14268 %}
14269 
14270 instruct replicate2L(vecX dst, iRegL src)
14271 %{
14272   predicate(n->as_Vector()->length() == 2);
14273   match(Set dst (ReplicateL src));
14274   ins_cost(INSN_COST);
14275   format %{ "dup  $dst, $src\t# vector (2L)" %}
14276   ins_encode %{
14277     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14278   %}
14279   ins_pipe(vdup_reg_reg128);
14280 %}
14281 
14282 instruct replicate2L_zero(vecX dst, immI0 zero)
14283 %{
14284   predicate(n->as_Vector()->length() == 2);
14285   match(Set dst (ReplicateI zero));
14286   ins_cost(INSN_COST);
14287   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14288   ins_encode %{
14289     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14290            as_FloatRegister($dst$$reg),
14291            as_FloatRegister($dst$$reg));
14292   %}
14293   ins_pipe(vmovi_reg_imm128);
14294 %}
14295 
14296 instruct replicate2F(vecD dst, vRegF src)
14297 %{
14298   predicate(n->as_Vector()->length() == 2);
14299   match(Set dst (ReplicateF src));
14300   ins_cost(INSN_COST);
14301   format %{ "dup  $dst, $src\t# vector (2F)" %}
14302   ins_encode %{
14303     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14304            as_FloatRegister($src$$reg));
14305   %}
14306   ins_pipe(vdup_reg_freg64);
14307 %}
14308 
14309 instruct replicate4F(vecX dst, vRegF src)
14310 %{
14311   predicate(n->as_Vector()->length() == 4);
14312   match(Set dst (ReplicateF src));
14313   ins_cost(INSN_COST);
14314   format %{ "dup  $dst, $src\t# vector (4F)" %}
14315   ins_encode %{
14316     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14317            as_FloatRegister($src$$reg));
14318   %}
14319   ins_pipe(vdup_reg_freg128);
14320 %}
14321 
14322 instruct replicate2D(vecX dst, vRegD src)
14323 %{
14324   predicate(n->as_Vector()->length() == 2);
14325   match(Set dst (ReplicateD src));
14326   ins_cost(INSN_COST);
14327   format %{ "dup  $dst, $src\t# vector (2D)" %}
14328   ins_encode %{
14329     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14330            as_FloatRegister($src$$reg));
14331   %}
14332   ins_pipe(vdup_reg_dreg128);
14333 %}
14334 
14335 // ====================VECTOR ARITHMETIC=======================================
14336 
14337 // --------------------------------- ADD --------------------------------------
14338 
14339 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14340 %{
14341   predicate(n->as_Vector()->length() == 4 ||
14342             n->as_Vector()->length() == 8);
14343   match(Set dst (AddVB src1 src2));
14344   ins_cost(INSN_COST);
14345   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14346   ins_encode %{
14347     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14348             as_FloatRegister($src1$$reg),
14349             as_FloatRegister($src2$$reg));
14350   %}
14351   ins_pipe(vdop64);
14352 %}
14353 
14354 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14355 %{
14356   predicate(n->as_Vector()->length() == 16);
14357   match(Set dst (AddVB src1 src2));
14358   ins_cost(INSN_COST);
14359   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14360   ins_encode %{
14361     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14362             as_FloatRegister($src1$$reg),
14363             as_FloatRegister($src2$$reg));
14364   %}
14365   ins_pipe(vdop128);
14366 %}
14367 
14368 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14369 %{
14370   predicate(n->as_Vector()->length() == 2 ||
14371             n->as_Vector()->length() == 4);
14372   match(Set dst (AddVS src1 src2));
14373   ins_cost(INSN_COST);
14374   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14375   ins_encode %{
14376     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14377             as_FloatRegister($src1$$reg),
14378             as_FloatRegister($src2$$reg));
14379   %}
14380   ins_pipe(vdop64);
14381 %}
14382 
14383 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14384 %{
14385   predicate(n->as_Vector()->length() == 8);
14386   match(Set dst (AddVS src1 src2));
14387   ins_cost(INSN_COST);
14388   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14389   ins_encode %{
14390     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14391             as_FloatRegister($src1$$reg),
14392             as_FloatRegister($src2$$reg));
14393   %}
14394   ins_pipe(vdop128);
14395 %}
14396 
14397 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14398 %{
14399   predicate(n->as_Vector()->length() == 2);
14400   match(Set dst (AddVI src1 src2));
14401   ins_cost(INSN_COST);
14402   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14403   ins_encode %{
14404     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14405             as_FloatRegister($src1$$reg),
14406             as_FloatRegister($src2$$reg));
14407   %}
14408   ins_pipe(vdop64);
14409 %}
14410 
14411 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14412 %{
14413   predicate(n->as_Vector()->length() == 4);
14414   match(Set dst (AddVI src1 src2));
14415   ins_cost(INSN_COST);
14416   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14417   ins_encode %{
14418     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14419             as_FloatRegister($src1$$reg),
14420             as_FloatRegister($src2$$reg));
14421   %}
14422   ins_pipe(vdop128);
14423 %}
14424 
14425 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14426 %{
14427   predicate(n->as_Vector()->length() == 2);
14428   match(Set dst (AddVL src1 src2));
14429   ins_cost(INSN_COST);
14430   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14431   ins_encode %{
14432     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14433             as_FloatRegister($src1$$reg),
14434             as_FloatRegister($src2$$reg));
14435   %}
14436   ins_pipe(vdop128);
14437 %}
14438 
14439 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14440 %{
14441   predicate(n->as_Vector()->length() == 2);
14442   match(Set dst (AddVF src1 src2));
14443   ins_cost(INSN_COST);
14444   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14445   ins_encode %{
14446     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14447             as_FloatRegister($src1$$reg),
14448             as_FloatRegister($src2$$reg));
14449   %}
14450   ins_pipe(vdop_fp64);
14451 %}
14452 
14453 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14454 %{
14455   predicate(n->as_Vector()->length() == 4);
14456   match(Set dst (AddVF src1 src2));
14457   ins_cost(INSN_COST);
14458   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14459   ins_encode %{
14460     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14461             as_FloatRegister($src1$$reg),
14462             as_FloatRegister($src2$$reg));
14463   %}
14464   ins_pipe(vdop_fp128);
14465 %}
14466 
14467 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14468 %{
14469   match(Set dst (AddVD src1 src2));
14470   ins_cost(INSN_COST);
14471   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14472   ins_encode %{
14473     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14474             as_FloatRegister($src1$$reg),
14475             as_FloatRegister($src2$$reg));
14476   %}
14477   ins_pipe(vdop_fp128);
14478 %}
14479 
14480 // --------------------------------- SUB --------------------------------------
14481 
14482 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14483 %{
14484   predicate(n->as_Vector()->length() == 4 ||
14485             n->as_Vector()->length() == 8);
14486   match(Set dst (SubVB src1 src2));
14487   ins_cost(INSN_COST);
14488   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14489   ins_encode %{
14490     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14491             as_FloatRegister($src1$$reg),
14492             as_FloatRegister($src2$$reg));
14493   %}
14494   ins_pipe(vdop64);
14495 %}
14496 
14497 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14498 %{
14499   predicate(n->as_Vector()->length() == 16);
14500   match(Set dst (SubVB src1 src2));
14501   ins_cost(INSN_COST);
14502   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14503   ins_encode %{
14504     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14505             as_FloatRegister($src1$$reg),
14506             as_FloatRegister($src2$$reg));
14507   %}
14508   ins_pipe(vdop128);
14509 %}
14510 
14511 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14512 %{
14513   predicate(n->as_Vector()->length() == 2 ||
14514             n->as_Vector()->length() == 4);
14515   match(Set dst (SubVS src1 src2));
14516   ins_cost(INSN_COST);
14517   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14518   ins_encode %{
14519     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14520             as_FloatRegister($src1$$reg),
14521             as_FloatRegister($src2$$reg));
14522   %}
14523   ins_pipe(vdop64);
14524 %}
14525 
14526 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14527 %{
14528   predicate(n->as_Vector()->length() == 8);
14529   match(Set dst (SubVS src1 src2));
14530   ins_cost(INSN_COST);
14531   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14532   ins_encode %{
14533     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14534             as_FloatRegister($src1$$reg),
14535             as_FloatRegister($src2$$reg));
14536   %}
14537   ins_pipe(vdop128);
14538 %}
14539 
14540 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14541 %{
14542   predicate(n->as_Vector()->length() == 2);
14543   match(Set dst (SubVI src1 src2));
14544   ins_cost(INSN_COST);
14545   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14546   ins_encode %{
14547     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14548             as_FloatRegister($src1$$reg),
14549             as_FloatRegister($src2$$reg));
14550   %}
14551   ins_pipe(vdop64);
14552 %}
14553 
14554 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14555 %{
14556   predicate(n->as_Vector()->length() == 4);
14557   match(Set dst (SubVI src1 src2));
14558   ins_cost(INSN_COST);
14559   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14560   ins_encode %{
14561     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14562             as_FloatRegister($src1$$reg),
14563             as_FloatRegister($src2$$reg));
14564   %}
14565   ins_pipe(vdop128);
14566 %}
14567 
14568 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14569 %{
14570   predicate(n->as_Vector()->length() == 2);
14571   match(Set dst (SubVL src1 src2));
14572   ins_cost(INSN_COST);
14573   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14574   ins_encode %{
14575     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14576             as_FloatRegister($src1$$reg),
14577             as_FloatRegister($src2$$reg));
14578   %}
14579   ins_pipe(vdop128);
14580 %}
14581 
14582 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14583 %{
14584   predicate(n->as_Vector()->length() == 2);
14585   match(Set dst (SubVF src1 src2));
14586   ins_cost(INSN_COST);
14587   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14588   ins_encode %{
14589     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14590             as_FloatRegister($src1$$reg),
14591             as_FloatRegister($src2$$reg));
14592   %}
14593   ins_pipe(vdop_fp64);
14594 %}
14595 
14596 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14597 %{
14598   predicate(n->as_Vector()->length() == 4);
14599   match(Set dst (SubVF src1 src2));
14600   ins_cost(INSN_COST);
14601   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14602   ins_encode %{
14603     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14604             as_FloatRegister($src1$$reg),
14605             as_FloatRegister($src2$$reg));
14606   %}
14607   ins_pipe(vdop_fp128);
14608 %}
14609 
14610 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14611 %{
14612   predicate(n->as_Vector()->length() == 2);
14613   match(Set dst (SubVD src1 src2));
14614   ins_cost(INSN_COST);
14615   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14616   ins_encode %{
14617     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14618             as_FloatRegister($src1$$reg),
14619             as_FloatRegister($src2$$reg));
14620   %}
14621   ins_pipe(vdop_fp128);
14622 %}
14623 
14624 // --------------------------------- MUL --------------------------------------
14625 
14626 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14627 %{
14628   predicate(n->as_Vector()->length() == 2 ||
14629             n->as_Vector()->length() == 4);
14630   match(Set dst (MulVS src1 src2));
14631   ins_cost(INSN_COST);
14632   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14633   ins_encode %{
14634     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14635             as_FloatRegister($src1$$reg),
14636             as_FloatRegister($src2$$reg));
14637   %}
14638   ins_pipe(vmul64);
14639 %}
14640 
14641 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14642 %{
14643   predicate(n->as_Vector()->length() == 8);
14644   match(Set dst (MulVS src1 src2));
14645   ins_cost(INSN_COST);
14646   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14647   ins_encode %{
14648     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14649             as_FloatRegister($src1$$reg),
14650             as_FloatRegister($src2$$reg));
14651   %}
14652   ins_pipe(vmul128);
14653 %}
14654 
14655 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14656 %{
14657   predicate(n->as_Vector()->length() == 2);
14658   match(Set dst (MulVI src1 src2));
14659   ins_cost(INSN_COST);
14660   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14661   ins_encode %{
14662     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14663             as_FloatRegister($src1$$reg),
14664             as_FloatRegister($src2$$reg));
14665   %}
14666   ins_pipe(vmul64);
14667 %}
14668 
14669 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14670 %{
14671   predicate(n->as_Vector()->length() == 4);
14672   match(Set dst (MulVI src1 src2));
14673   ins_cost(INSN_COST);
14674   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14675   ins_encode %{
14676     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14677             as_FloatRegister($src1$$reg),
14678             as_FloatRegister($src2$$reg));
14679   %}
14680   ins_pipe(vmul128);
14681 %}
14682 
14683 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14684 %{
14685   predicate(n->as_Vector()->length() == 2);
14686   match(Set dst (MulVF src1 src2));
14687   ins_cost(INSN_COST);
14688   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14689   ins_encode %{
14690     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14691             as_FloatRegister($src1$$reg),
14692             as_FloatRegister($src2$$reg));
14693   %}
14694   ins_pipe(vmuldiv_fp64);
14695 %}
14696 
14697 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14698 %{
14699   predicate(n->as_Vector()->length() == 4);
14700   match(Set dst (MulVF src1 src2));
14701   ins_cost(INSN_COST);
14702   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14703   ins_encode %{
14704     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14705             as_FloatRegister($src1$$reg),
14706             as_FloatRegister($src2$$reg));
14707   %}
14708   ins_pipe(vmuldiv_fp128);
14709 %}
14710 
14711 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14712 %{
14713   predicate(n->as_Vector()->length() == 2);
14714   match(Set dst (MulVD src1 src2));
14715   ins_cost(INSN_COST);
14716   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14717   ins_encode %{
14718     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14719             as_FloatRegister($src1$$reg),
14720             as_FloatRegister($src2$$reg));
14721   %}
14722   ins_pipe(vmuldiv_fp128);
14723 %}
14724 
14725 // --------------------------------- MLA --------------------------------------
14726 
14727 instruct vmla4S(vecD dst, vecD src1, vecD src2)
14728 %{
14729   predicate(n->as_Vector()->length() == 2 ||
14730             n->as_Vector()->length() == 4);
14731   match(Set dst (AddVS dst (MulVS src1 src2)));
14732   ins_cost(INSN_COST);
14733   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
14734   ins_encode %{
14735     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
14736             as_FloatRegister($src1$$reg),
14737             as_FloatRegister($src2$$reg));
14738   %}
14739   ins_pipe(vmla64);
14740 %}
14741 
14742 instruct vmla8S(vecX dst, vecX src1, vecX src2)
14743 %{
14744   predicate(n->as_Vector()->length() == 8);
14745   match(Set dst (AddVS dst (MulVS src1 src2)));
14746   ins_cost(INSN_COST);
14747   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
14748   ins_encode %{
14749     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
14750             as_FloatRegister($src1$$reg),
14751             as_FloatRegister($src2$$reg));
14752   %}
14753   ins_pipe(vmla128);
14754 %}
14755 
14756 instruct vmla2I(vecD dst, vecD src1, vecD src2)
14757 %{
14758   predicate(n->as_Vector()->length() == 2);
14759   match(Set dst (AddVI dst (MulVI src1 src2)));
14760   ins_cost(INSN_COST);
14761   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
14762   ins_encode %{
14763     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
14764             as_FloatRegister($src1$$reg),
14765             as_FloatRegister($src2$$reg));
14766   %}
14767   ins_pipe(vmla64);
14768 %}
14769 
14770 instruct vmla4I(vecX dst, vecX src1, vecX src2)
14771 %{
14772   predicate(n->as_Vector()->length() == 4);
14773   match(Set dst (AddVI dst (MulVI src1 src2)));
14774   ins_cost(INSN_COST);
14775   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
14776   ins_encode %{
14777     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
14778             as_FloatRegister($src1$$reg),
14779             as_FloatRegister($src2$$reg));
14780   %}
14781   ins_pipe(vmla128);
14782 %}
14783 
14784 // --------------------------------- MLS --------------------------------------
14785 
14786 instruct vmls4S(vecD dst, vecD src1, vecD src2)
14787 %{
14788   predicate(n->as_Vector()->length() == 2 ||
14789             n->as_Vector()->length() == 4);
14790   match(Set dst (SubVS dst (MulVS src1 src2)));
14791   ins_cost(INSN_COST);
14792   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
14793   ins_encode %{
14794     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
14795             as_FloatRegister($src1$$reg),
14796             as_FloatRegister($src2$$reg));
14797   %}
14798   ins_pipe(vmla64);
14799 %}
14800 
14801 instruct vmls8S(vecX dst, vecX src1, vecX src2)
14802 %{
14803   predicate(n->as_Vector()->length() == 8);
14804   match(Set dst (SubVS dst (MulVS src1 src2)));
14805   ins_cost(INSN_COST);
14806   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
14807   ins_encode %{
14808     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
14809             as_FloatRegister($src1$$reg),
14810             as_FloatRegister($src2$$reg));
14811   %}
14812   ins_pipe(vmla128);
14813 %}
14814 
14815 instruct vmls2I(vecD dst, vecD src1, vecD src2)
14816 %{
14817   predicate(n->as_Vector()->length() == 2);
14818   match(Set dst (SubVI dst (MulVI src1 src2)));
14819   ins_cost(INSN_COST);
14820   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
14821   ins_encode %{
14822     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
14823             as_FloatRegister($src1$$reg),
14824             as_FloatRegister($src2$$reg));
14825   %}
14826   ins_pipe(vmla64);
14827 %}
14828 
14829 instruct vmls4I(vecX dst, vecX src1, vecX src2)
14830 %{
14831   predicate(n->as_Vector()->length() == 4);
14832   match(Set dst (SubVI dst (MulVI src1 src2)));
14833   ins_cost(INSN_COST);
14834   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
14835   ins_encode %{
14836     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
14837             as_FloatRegister($src1$$reg),
14838             as_FloatRegister($src2$$reg));
14839   %}
14840   ins_pipe(vmla128);
14841 %}
14842 
14843 // --------------------------------- DIV --------------------------------------
14844 
14845 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14846 %{
14847   predicate(n->as_Vector()->length() == 2);
14848   match(Set dst (DivVF src1 src2));
14849   ins_cost(INSN_COST);
14850   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14851   ins_encode %{
14852     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14853             as_FloatRegister($src1$$reg),
14854             as_FloatRegister($src2$$reg));
14855   %}
14856   ins_pipe(vmuldiv_fp64);
14857 %}
14858 
14859 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14860 %{
14861   predicate(n->as_Vector()->length() == 4);
14862   match(Set dst (DivVF src1 src2));
14863   ins_cost(INSN_COST);
14864   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14865   ins_encode %{
14866     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14867             as_FloatRegister($src1$$reg),
14868             as_FloatRegister($src2$$reg));
14869   %}
14870   ins_pipe(vmuldiv_fp128);
14871 %}
14872 
14873 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14874 %{
14875   predicate(n->as_Vector()->length() == 2);
14876   match(Set dst (DivVD src1 src2));
14877   ins_cost(INSN_COST);
14878   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14879   ins_encode %{
14880     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14881             as_FloatRegister($src1$$reg),
14882             as_FloatRegister($src2$$reg));
14883   %}
14884   ins_pipe(vmuldiv_fp128);
14885 %}
14886 
14887 // --------------------------------- AND --------------------------------------
14888 
14889 instruct vand8B(vecD dst, vecD src1, vecD src2)
14890 %{
14891   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14892             n->as_Vector()->length_in_bytes() == 8);
14893   match(Set dst (AndV src1 src2));
14894   ins_cost(INSN_COST);
14895   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14896   ins_encode %{
14897     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14898             as_FloatRegister($src1$$reg),
14899             as_FloatRegister($src2$$reg));
14900   %}
14901   ins_pipe(vlogical64);
14902 %}
14903 
14904 instruct vand16B(vecX dst, vecX src1, vecX src2)
14905 %{
14906   predicate(n->as_Vector()->length_in_bytes() == 16);
14907   match(Set dst (AndV src1 src2));
14908   ins_cost(INSN_COST);
14909   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14910   ins_encode %{
14911     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14912             as_FloatRegister($src1$$reg),
14913             as_FloatRegister($src2$$reg));
14914   %}
14915   ins_pipe(vlogical128);
14916 %}
14917 
14918 // --------------------------------- OR ---------------------------------------
14919 
14920 instruct vor8B(vecD dst, vecD src1, vecD src2)
14921 %{
14922   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14923             n->as_Vector()->length_in_bytes() == 8);
14924   match(Set dst (OrV src1 src2));
14925   ins_cost(INSN_COST);
14926   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14927   ins_encode %{
14928     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14929             as_FloatRegister($src1$$reg),
14930             as_FloatRegister($src2$$reg));
14931   %}
14932   ins_pipe(vlogical64);
14933 %}
14934 
14935 instruct vor16B(vecX dst, vecX src1, vecX src2)
14936 %{
14937   predicate(n->as_Vector()->length_in_bytes() == 16);
14938   match(Set dst (OrV src1 src2));
14939   ins_cost(INSN_COST);
14940   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14941   ins_encode %{
14942     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14943             as_FloatRegister($src1$$reg),
14944             as_FloatRegister($src2$$reg));
14945   %}
14946   ins_pipe(vlogical128);
14947 %}
14948 
14949 // --------------------------------- XOR --------------------------------------
14950 
14951 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14952 %{
14953   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14954             n->as_Vector()->length_in_bytes() == 8);
14955   match(Set dst (XorV src1 src2));
14956   ins_cost(INSN_COST);
14957   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14958   ins_encode %{
14959     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14960             as_FloatRegister($src1$$reg),
14961             as_FloatRegister($src2$$reg));
14962   %}
14963   ins_pipe(vlogical64);
14964 %}
14965 
14966 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14967 %{
14968   predicate(n->as_Vector()->length_in_bytes() == 16);
14969   match(Set dst (XorV src1 src2));
14970   ins_cost(INSN_COST);
14971   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14972   ins_encode %{
14973     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14974             as_FloatRegister($src1$$reg),
14975             as_FloatRegister($src2$$reg));
14976   %}
14977   ins_pipe(vlogical128);
14978 %}
14979 
14980 // ------------------------------ Shift ---------------------------------------
14981 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
14982   predicate(n->as_Vector()->length_in_bytes() == 8);
14983   match(Set dst (LShiftCntV cnt));
14984   match(Set dst (RShiftCntV cnt));
14985   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
14986   ins_encode %{
14987     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
14988   %}
14989   ins_pipe(vdup_reg_reg64);
14990 %}
14991 
14992 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
14993   predicate(n->as_Vector()->length_in_bytes() == 16);
14994   match(Set dst (LShiftCntV cnt));
14995   match(Set dst (RShiftCntV cnt));
14996   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
14997   ins_encode %{
14998     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14999   %}
15000   ins_pipe(vdup_reg_reg128);
15001 %}
15002 
15003 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
15004   predicate(n->as_Vector()->length() == 4 ||
15005             n->as_Vector()->length() == 8);
15006   match(Set dst (LShiftVB src shift));
15007   ins_cost(INSN_COST);
15008   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15009   ins_encode %{
15010     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15011             as_FloatRegister($src$$reg),
15012             as_FloatRegister($shift$$reg));
15013   %}
15014   ins_pipe(vshift64);
15015 %}
15016 
15017 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15018   predicate(n->as_Vector()->length() == 16);
15019   match(Set dst (LShiftVB src shift));
15020   ins_cost(INSN_COST);
15021   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15022   ins_encode %{
15023     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15024             as_FloatRegister($src$$reg),
15025             as_FloatRegister($shift$$reg));
15026   %}
15027   ins_pipe(vshift128);
15028 %}
15029 
15030 // Right shifts with vector shift count on aarch64 SIMD are implemented
15031 // as left shift by negative shift count.
15032 // There are two cases for vector shift count.
15033 //
15034 // Case 1: The vector shift count is from replication.
15035 //        |            |
15036 //    LoadVector  RShiftCntV
15037 //        |       /
15038 //     RShiftVI
15039 // Note: In inner loop, multiple neg instructions are used, which can be
15040 // moved to outer loop and merge into one neg instruction.
15041 //
15042 // Case 2: The vector shift count is from loading.
15043 // This case isn't supported by middle-end now. But it's supported by
15044 // panama/vectorIntrinsics(JEP 338: Vector API).
15045 //        |            |
15046 //    LoadVector  LoadVector
15047 //        |       /
15048 //     RShiftVI
15049 //
15050 
15051 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
15052   predicate(n->as_Vector()->length() == 4 ||
15053             n->as_Vector()->length() == 8);
15054   match(Set dst (RShiftVB src shift));
15055   ins_cost(INSN_COST);
15056   effect(TEMP tmp);
15057   format %{ "negr  $tmp,$shift\t"
15058             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
15059   ins_encode %{
15060     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15061             as_FloatRegister($shift$$reg));
15062     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15063             as_FloatRegister($src$$reg),
15064             as_FloatRegister($tmp$$reg));
15065   %}
15066   ins_pipe(vshift64);
15067 %}
15068 
15069 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15070   predicate(n->as_Vector()->length() == 16);
15071   match(Set dst (RShiftVB src shift));
15072   ins_cost(INSN_COST);
15073   effect(TEMP tmp);
15074   format %{ "negr  $tmp,$shift\t"
15075             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
15076   ins_encode %{
15077     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15078             as_FloatRegister($shift$$reg));
15079     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15080             as_FloatRegister($src$$reg),
15081             as_FloatRegister($tmp$$reg));
15082   %}
15083   ins_pipe(vshift128);
15084 %}
15085 
15086 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
15087   predicate(n->as_Vector()->length() == 4 ||
15088             n->as_Vector()->length() == 8);
15089   match(Set dst (URShiftVB src shift));
15090   ins_cost(INSN_COST);
15091   effect(TEMP tmp);
15092   format %{ "negr  $tmp,$shift\t"
15093             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
15094   ins_encode %{
15095     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15096             as_FloatRegister($shift$$reg));
15097     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15098             as_FloatRegister($src$$reg),
15099             as_FloatRegister($tmp$$reg));
15100   %}
15101   ins_pipe(vshift64);
15102 %}
15103 
15104 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15105   predicate(n->as_Vector()->length() == 16);
15106   match(Set dst (URShiftVB src shift));
15107   ins_cost(INSN_COST);
15108   effect(TEMP tmp);
15109   format %{ "negr  $tmp,$shift\t"
15110             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
15111   ins_encode %{
15112     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15113             as_FloatRegister($shift$$reg));
15114     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15115             as_FloatRegister($src$$reg),
15116             as_FloatRegister($tmp$$reg));
15117   %}
15118   ins_pipe(vshift128);
15119 %}
15120 
15121 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15122   predicate(n->as_Vector()->length() == 4 ||
15123             n->as_Vector()->length() == 8);
15124   match(Set dst (LShiftVB src shift));
15125   ins_cost(INSN_COST);
15126   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15127   ins_encode %{
15128     int sh = (int)$shift$$constant & 31;
15129     if (sh >= 8) {
15130       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15131              as_FloatRegister($src$$reg),
15132              as_FloatRegister($src$$reg));
15133     } else {
15134       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15135              as_FloatRegister($src$$reg), sh);
15136     }
15137   %}
15138   ins_pipe(vshift64_imm);
15139 %}
15140 
15141 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15142   predicate(n->as_Vector()->length() == 16);
15143   match(Set dst (LShiftVB src shift));
15144   ins_cost(INSN_COST);
15145   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15146   ins_encode %{
15147     int sh = (int)$shift$$constant & 31;
15148     if (sh >= 8) {
15149       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15150              as_FloatRegister($src$$reg),
15151              as_FloatRegister($src$$reg));
15152     } else {
15153       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15154              as_FloatRegister($src$$reg), sh);
15155     }
15156   %}
15157   ins_pipe(vshift128_imm);
15158 %}
15159 
15160 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15161   predicate(n->as_Vector()->length() == 4 ||
15162             n->as_Vector()->length() == 8);
15163   match(Set dst (RShiftVB src shift));
15164   ins_cost(INSN_COST);
15165   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15166   ins_encode %{
15167     int sh = (int)$shift$$constant & 31;
15168     if (sh >= 8) sh = 7;
15169     sh = -sh & 7;
15170     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15171            as_FloatRegister($src$$reg), sh);
15172   %}
15173   ins_pipe(vshift64_imm);
15174 %}
15175 
15176 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15177   predicate(n->as_Vector()->length() == 16);
15178   match(Set dst (RShiftVB src shift));
15179   ins_cost(INSN_COST);
15180   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15181   ins_encode %{
15182     int sh = (int)$shift$$constant & 31;
15183     if (sh >= 8) sh = 7;
15184     sh = -sh & 7;
15185     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15186            as_FloatRegister($src$$reg), sh);
15187   %}
15188   ins_pipe(vshift128_imm);
15189 %}
15190 
15191 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15192   predicate(n->as_Vector()->length() == 4 ||
15193             n->as_Vector()->length() == 8);
15194   match(Set dst (URShiftVB src shift));
15195   ins_cost(INSN_COST);
15196   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15197   ins_encode %{
15198     int sh = (int)$shift$$constant & 31;
15199     if (sh >= 8) {
15200       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15201              as_FloatRegister($src$$reg),
15202              as_FloatRegister($src$$reg));
15203     } else {
15204       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15205              as_FloatRegister($src$$reg), -sh & 7);
15206     }
15207   %}
15208   ins_pipe(vshift64_imm);
15209 %}
15210 
15211 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15212   predicate(n->as_Vector()->length() == 16);
15213   match(Set dst (URShiftVB src shift));
15214   ins_cost(INSN_COST);
15215   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15216   ins_encode %{
15217     int sh = (int)$shift$$constant & 31;
15218     if (sh >= 8) {
15219       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15220              as_FloatRegister($src$$reg),
15221              as_FloatRegister($src$$reg));
15222     } else {
15223       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15224              as_FloatRegister($src$$reg), -sh & 7);
15225     }
15226   %}
15227   ins_pipe(vshift128_imm);
15228 %}
15229 
15230 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
15231   predicate(n->as_Vector()->length() == 2 ||
15232             n->as_Vector()->length() == 4);
15233   match(Set dst (LShiftVS src shift));
15234   ins_cost(INSN_COST);
15235   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15236   ins_encode %{
15237     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15238             as_FloatRegister($src$$reg),
15239             as_FloatRegister($shift$$reg));
15240   %}
15241   ins_pipe(vshift64);
15242 %}
15243 
15244 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15245   predicate(n->as_Vector()->length() == 8);
15246   match(Set dst (LShiftVS src shift));
15247   ins_cost(INSN_COST);
15248   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15249   ins_encode %{
15250     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15251             as_FloatRegister($src$$reg),
15252             as_FloatRegister($shift$$reg));
15253   %}
15254   ins_pipe(vshift128);
15255 %}
15256 
15257 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15258   predicate(n->as_Vector()->length() == 2 ||
15259             n->as_Vector()->length() == 4);
15260   match(Set dst (RShiftVS src shift));
15261   ins_cost(INSN_COST);
15262   effect(TEMP tmp);
15263   format %{ "negr  $tmp,$shift\t"
15264             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
15265   ins_encode %{
15266     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15267             as_FloatRegister($shift$$reg));
15268     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15269             as_FloatRegister($src$$reg),
15270             as_FloatRegister($tmp$$reg));
15271   %}
15272   ins_pipe(vshift64);
15273 %}
15274 
15275 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15276   predicate(n->as_Vector()->length() == 8);
15277   match(Set dst (RShiftVS src shift));
15278   ins_cost(INSN_COST);
15279   effect(TEMP tmp);
15280   format %{ "negr  $tmp,$shift\t"
15281             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
15282   ins_encode %{
15283     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15284             as_FloatRegister($shift$$reg));
15285     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15286             as_FloatRegister($src$$reg),
15287             as_FloatRegister($tmp$$reg));
15288   %}
15289   ins_pipe(vshift128);
15290 %}
15291 
15292 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15293   predicate(n->as_Vector()->length() == 2 ||
15294             n->as_Vector()->length() == 4);
15295   match(Set dst (URShiftVS src shift));
15296   ins_cost(INSN_COST);
15297   effect(TEMP tmp);
15298   format %{ "negr  $tmp,$shift\t"
15299             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
15300   ins_encode %{
15301     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15302             as_FloatRegister($shift$$reg));
15303     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15304             as_FloatRegister($src$$reg),
15305             as_FloatRegister($tmp$$reg));
15306   %}
15307   ins_pipe(vshift64);
15308 %}
15309 
15310 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15311   predicate(n->as_Vector()->length() == 8);
15312   match(Set dst (URShiftVS src shift));
15313   ins_cost(INSN_COST);
15314   effect(TEMP tmp);
15315   format %{ "negr  $tmp,$shift\t"
15316             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
15317   ins_encode %{
15318     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15319             as_FloatRegister($shift$$reg));
15320     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15321             as_FloatRegister($src$$reg),
15322             as_FloatRegister($tmp$$reg));
15323   %}
15324   ins_pipe(vshift128);
15325 %}
15326 
15327 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15328   predicate(n->as_Vector()->length() == 2 ||
15329             n->as_Vector()->length() == 4);
15330   match(Set dst (LShiftVS src shift));
15331   ins_cost(INSN_COST);
15332   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15333   ins_encode %{
15334     int sh = (int)$shift$$constant & 31;
15335     if (sh >= 16) {
15336       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15337              as_FloatRegister($src$$reg),
15338              as_FloatRegister($src$$reg));
15339     } else {
15340       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15341              as_FloatRegister($src$$reg), sh);
15342     }
15343   %}
15344   ins_pipe(vshift64_imm);
15345 %}
15346 
15347 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15348   predicate(n->as_Vector()->length() == 8);
15349   match(Set dst (LShiftVS src shift));
15350   ins_cost(INSN_COST);
15351   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15352   ins_encode %{
15353     int sh = (int)$shift$$constant & 31;
15354     if (sh >= 16) {
15355       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15356              as_FloatRegister($src$$reg),
15357              as_FloatRegister($src$$reg));
15358     } else {
15359       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15360              as_FloatRegister($src$$reg), sh);
15361     }
15362   %}
15363   ins_pipe(vshift128_imm);
15364 %}
15365 
15366 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15367   predicate(n->as_Vector()->length() == 2 ||
15368             n->as_Vector()->length() == 4);
15369   match(Set dst (RShiftVS src shift));
15370   ins_cost(INSN_COST);
15371   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15372   ins_encode %{
15373     int sh = (int)$shift$$constant & 31;
15374     if (sh >= 16) sh = 15;
15375     sh = -sh & 15;
15376     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15377            as_FloatRegister($src$$reg), sh);
15378   %}
15379   ins_pipe(vshift64_imm);
15380 %}
15381 
15382 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15383   predicate(n->as_Vector()->length() == 8);
15384   match(Set dst (RShiftVS src shift));
15385   ins_cost(INSN_COST);
15386   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15387   ins_encode %{
15388     int sh = (int)$shift$$constant & 31;
15389     if (sh >= 16) sh = 15;
15390     sh = -sh & 15;
15391     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15392            as_FloatRegister($src$$reg), sh);
15393   %}
15394   ins_pipe(vshift128_imm);
15395 %}
15396 
15397 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15398   predicate(n->as_Vector()->length() == 2 ||
15399             n->as_Vector()->length() == 4);
15400   match(Set dst (URShiftVS src shift));
15401   ins_cost(INSN_COST);
15402   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15403   ins_encode %{
15404     int sh = (int)$shift$$constant & 31;
15405     if (sh >= 16) {
15406       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15407              as_FloatRegister($src$$reg),
15408              as_FloatRegister($src$$reg));
15409     } else {
15410       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15411              as_FloatRegister($src$$reg), -sh & 15);
15412     }
15413   %}
15414   ins_pipe(vshift64_imm);
15415 %}
15416 
15417 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15418   predicate(n->as_Vector()->length() == 8);
15419   match(Set dst (URShiftVS src shift));
15420   ins_cost(INSN_COST);
15421   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15422   ins_encode %{
15423     int sh = (int)$shift$$constant & 31;
15424     if (sh >= 16) {
15425       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15426              as_FloatRegister($src$$reg),
15427              as_FloatRegister($src$$reg));
15428     } else {
15429       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15430              as_FloatRegister($src$$reg), -sh & 15);
15431     }
15432   %}
15433   ins_pipe(vshift128_imm);
15434 %}
15435 
15436 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
15437   predicate(n->as_Vector()->length() == 2);
15438   match(Set dst (LShiftVI src shift));
15439   ins_cost(INSN_COST);
15440   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15441   ins_encode %{
15442     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15443             as_FloatRegister($src$$reg),
15444             as_FloatRegister($shift$$reg));
15445   %}
15446   ins_pipe(vshift64);
15447 %}
15448 
15449 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15450   predicate(n->as_Vector()->length() == 4);
15451   match(Set dst (LShiftVI src shift));
15452   ins_cost(INSN_COST);
15453   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15454   ins_encode %{
15455     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15456             as_FloatRegister($src$$reg),
15457             as_FloatRegister($shift$$reg));
15458   %}
15459   ins_pipe(vshift128);
15460 %}
15461 
15462 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15463   predicate(n->as_Vector()->length() == 2);
15464   match(Set dst (RShiftVI src shift));
15465   ins_cost(INSN_COST);
15466   effect(TEMP tmp);
15467   format %{ "negr  $tmp,$shift\t"
15468             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
15469   ins_encode %{
15470     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15471             as_FloatRegister($shift$$reg));
15472     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15473             as_FloatRegister($src$$reg),
15474             as_FloatRegister($tmp$$reg));
15475   %}
15476   ins_pipe(vshift64);
15477 %}
15478 
15479 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15480   predicate(n->as_Vector()->length() == 4);
15481   match(Set dst (RShiftVI src shift));
15482   ins_cost(INSN_COST);
15483   effect(TEMP tmp);
15484   format %{ "negr  $tmp,$shift\t"
15485             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
15486   ins_encode %{
15487     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15488             as_FloatRegister($shift$$reg));
15489     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15490             as_FloatRegister($src$$reg),
15491             as_FloatRegister($tmp$$reg));
15492   %}
15493   ins_pipe(vshift128);
15494 %}
15495 
15496 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15497   predicate(n->as_Vector()->length() == 2);
15498   match(Set dst (URShiftVI src shift));
15499   ins_cost(INSN_COST);
15500   effect(TEMP tmp);
15501   format %{ "negr  $tmp,$shift\t"
15502             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
15503   ins_encode %{
15504     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15505             as_FloatRegister($shift$$reg));
15506     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15507             as_FloatRegister($src$$reg),
15508             as_FloatRegister($tmp$$reg));
15509   %}
15510   ins_pipe(vshift64);
15511 %}
15512 
15513 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15514   predicate(n->as_Vector()->length() == 4);
15515   match(Set dst (URShiftVI src shift));
15516   ins_cost(INSN_COST);
15517   effect(TEMP tmp);
15518   format %{ "negr  $tmp,$shift\t"
15519             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
15520   ins_encode %{
15521     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15522             as_FloatRegister($shift$$reg));
15523     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15524             as_FloatRegister($src$$reg),
15525             as_FloatRegister($tmp$$reg));
15526   %}
15527   ins_pipe(vshift128);
15528 %}
15529 
15530 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15531   predicate(n->as_Vector()->length() == 2);
15532   match(Set dst (LShiftVI src shift));
15533   ins_cost(INSN_COST);
15534   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15535   ins_encode %{
15536     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15537            as_FloatRegister($src$$reg),
15538            (int)$shift$$constant & 31);
15539   %}
15540   ins_pipe(vshift64_imm);
15541 %}
15542 
15543 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15544   predicate(n->as_Vector()->length() == 4);
15545   match(Set dst (LShiftVI src shift));
15546   ins_cost(INSN_COST);
15547   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15548   ins_encode %{
15549     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15550            as_FloatRegister($src$$reg),
15551            (int)$shift$$constant & 31);
15552   %}
15553   ins_pipe(vshift128_imm);
15554 %}
15555 
15556 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15557   predicate(n->as_Vector()->length() == 2);
15558   match(Set dst (RShiftVI src shift));
15559   ins_cost(INSN_COST);
15560   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15561   ins_encode %{
15562     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15563             as_FloatRegister($src$$reg),
15564             -(int)$shift$$constant & 31);
15565   %}
15566   ins_pipe(vshift64_imm);
15567 %}
15568 
15569 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15570   predicate(n->as_Vector()->length() == 4);
15571   match(Set dst (RShiftVI src shift));
15572   ins_cost(INSN_COST);
15573   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15574   ins_encode %{
15575     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15576             as_FloatRegister($src$$reg),
15577             -(int)$shift$$constant & 31);
15578   %}
15579   ins_pipe(vshift128_imm);
15580 %}
15581 
15582 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15583   predicate(n->as_Vector()->length() == 2);
15584   match(Set dst (URShiftVI src shift));
15585   ins_cost(INSN_COST);
15586   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15587   ins_encode %{
15588     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15589             as_FloatRegister($src$$reg),
15590             -(int)$shift$$constant & 31);
15591   %}
15592   ins_pipe(vshift64_imm);
15593 %}
15594 
15595 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15596   predicate(n->as_Vector()->length() == 4);
15597   match(Set dst (URShiftVI src shift));
15598   ins_cost(INSN_COST);
15599   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15600   ins_encode %{
15601     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15602             as_FloatRegister($src$$reg),
15603             -(int)$shift$$constant & 31);
15604   %}
15605   ins_pipe(vshift128_imm);
15606 %}
15607 
15608 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15609   predicate(n->as_Vector()->length() == 2);
15610   match(Set dst (LShiftVL src shift));
15611   ins_cost(INSN_COST);
15612   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15613   ins_encode %{
15614     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15615             as_FloatRegister($src$$reg),
15616             as_FloatRegister($shift$$reg));
15617   %}
15618   ins_pipe(vshift128);
15619 %}
15620 
15621 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15622   predicate(n->as_Vector()->length() == 2);
15623   match(Set dst (RShiftVL src shift));
15624   ins_cost(INSN_COST);
15625   effect(TEMP tmp);
15626   format %{ "negr  $tmp,$shift\t"
15627             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
15628   ins_encode %{
15629     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15630             as_FloatRegister($shift$$reg));
15631     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15632             as_FloatRegister($src$$reg),
15633             as_FloatRegister($tmp$$reg));
15634   %}
15635   ins_pipe(vshift128);
15636 %}
15637 
15638 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15639   predicate(n->as_Vector()->length() == 2);
15640   match(Set dst (URShiftVL src shift));
15641   ins_cost(INSN_COST);
15642   effect(TEMP tmp);
15643   format %{ "negr  $tmp,$shift\t"
15644             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
15645   ins_encode %{
15646     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15647             as_FloatRegister($shift$$reg));
15648     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15649             as_FloatRegister($src$$reg),
15650             as_FloatRegister($tmp$$reg));
15651   %}
15652   ins_pipe(vshift128);
15653 %}
15654 
15655 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15656   predicate(n->as_Vector()->length() == 2);
15657   match(Set dst (LShiftVL src shift));
15658   ins_cost(INSN_COST);
15659   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15660   ins_encode %{
15661     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15662            as_FloatRegister($src$$reg),
15663            (int)$shift$$constant & 63);
15664   %}
15665   ins_pipe(vshift128_imm);
15666 %}
15667 
15668 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15669   predicate(n->as_Vector()->length() == 2);
15670   match(Set dst (RShiftVL src shift));
15671   ins_cost(INSN_COST);
15672   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15673   ins_encode %{
15674     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15675             as_FloatRegister($src$$reg),
15676             -(int)$shift$$constant & 63);
15677   %}
15678   ins_pipe(vshift128_imm);
15679 %}
15680 
15681 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15682   predicate(n->as_Vector()->length() == 2);
15683   match(Set dst (URShiftVL src shift));
15684   ins_cost(INSN_COST);
15685   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15686   ins_encode %{
15687     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15688             as_FloatRegister($src$$reg),
15689             -(int)$shift$$constant & 63);
15690   %}
15691   ins_pipe(vshift128_imm);
15692 %}
15693 
15694 //----------PEEPHOLE RULES-----------------------------------------------------
15695 // These must follow all instruction definitions as they use the names
15696 // defined in the instructions definitions.
15697 //
15698 // peepmatch ( root_instr_name [preceding_instruction]* );
15699 //
15700 // peepconstraint %{
15701 // (instruction_number.operand_name relational_op instruction_number.operand_name
15702 //  [, ...] );
15703 // // instruction numbers are zero-based using left to right order in peepmatch
15704 //
15705 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15706 // // provide an instruction_number.operand_name for each operand that appears
15707 // // in the replacement instruction's match rule
15708 //
15709 // ---------VM FLAGS---------------------------------------------------------
15710 //
15711 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15712 //
15713 // Each peephole rule is given an identifying number starting with zero and
15714 // increasing by one in the order seen by the parser.  An individual peephole
15715 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15716 // on the command-line.
15717 //
15718 // ---------CURRENT LIMITATIONS----------------------------------------------
15719 //
15720 // Only match adjacent instructions in same basic block
15721 // Only equality constraints
15722 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15723 // Only one replacement instruction
15724 //
15725 // ---------EXAMPLE----------------------------------------------------------
15726 //
15727 // // pertinent parts of existing instructions in architecture description
15728 // instruct movI(iRegINoSp dst, iRegI src)
15729 // %{
15730 //   match(Set dst (CopyI src));
15731 // %}
15732 //
15733 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15734 // %{
15735 //   match(Set dst (AddI dst src));
15736 //   effect(KILL cr);
15737 // %}
15738 //
15739 // // Change (inc mov) to lea
15740 // peephole %{
15741 //   // increment preceeded by register-register move
15742 //   peepmatch ( incI_iReg movI );
15743 //   // require that the destination register of the increment
15744 //   // match the destination register of the move
15745 //   peepconstraint ( 0.dst == 1.dst );
15746 //   // construct a replacement instruction that sets
15747 //   // the destination to ( move's source register + one )
15748 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15749 // %}
15750 //
15751 
15752 // Implementation no longer uses movX instructions since
15753 // machine-independent system no longer uses CopyX nodes.
15754 //
15755 // peephole
15756 // %{
15757 //   peepmatch (incI_iReg movI);
15758 //   peepconstraint (0.dst == 1.dst);
15759 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15760 // %}
15761 
15762 // peephole
15763 // %{
15764 //   peepmatch (decI_iReg movI);
15765 //   peepconstraint (0.dst == 1.dst);
15766 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15767 // %}
15768 
15769 // peephole
15770 // %{
15771 //   peepmatch (addI_iReg_imm movI);
15772 //   peepconstraint (0.dst == 1.dst);
15773 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15774 // %}
15775 
15776 // peephole
15777 // %{
15778 //   peepmatch (incL_iReg movL);
15779 //   peepconstraint (0.dst == 1.dst);
15780 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15781 // %}
15782 
15783 // peephole
15784 // %{
15785 //   peepmatch (decL_iReg movL);
15786 //   peepconstraint (0.dst == 1.dst);
15787 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15788 // %}
15789 
15790 // peephole
15791 // %{
15792 //   peepmatch (addL_iReg_imm movL);
15793 //   peepconstraint (0.dst == 1.dst);
15794 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15795 // %}
15796 
15797 // peephole
15798 // %{
15799 //   peepmatch (addP_iReg_imm movP);
15800 //   peepconstraint (0.dst == 1.dst);
15801 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15802 // %}
15803 
15804 // // Change load of spilled value to only a spill
15805 // instruct storeI(memory mem, iRegI src)
15806 // %{
15807 //   match(Set mem (StoreI mem src));
15808 // %}
15809 //
15810 // instruct loadI(iRegINoSp dst, memory mem)
15811 // %{
15812 //   match(Set dst (LoadI mem));
15813 // %}
15814 //
15815 
15816 //----------SMARTSPILL RULES---------------------------------------------------
15817 // These must follow all instruction definitions as they use the names
15818 // defined in the instructions definitions.
15819 
15820 // Local Variables:
15821 // mode: c++
15822 // End: