1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat Inc.
   4 // All rights reserved.
   5 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 //
   7 // This code is free software; you can redistribute it and/or modify it
   8 // under the terms of the GNU General Public License version 2 only, as
   9 // published by the Free Software Foundation.
  10 //
  11 // This code is distributed in the hope that it will be useful, but WITHOUT
  12 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 // version 2 for more details (a copy is included in the LICENSE file that
  15 // accompanied this code).
  16 //
  17 // You should have received a copy of the GNU General Public License version
  18 // 2 along with this work; if not, write to the Free Software Foundation,
  19 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 //
  21 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 // or visit www.oracle.com if you need additional information or have any
  23 // questions.
  24 //
  25 //
  26 
  27 // AArch64 Architecture Description File
  28 
  29 //----------REGISTER DEFINITION BLOCK------------------------------------------
  30 // This information is used by the matcher and the register allocator to
  31 // describe individual registers and classes of registers within the target
  32 // archtecture.
  33 
  34 register %{
  35 //----------Architecture Description Register Definitions----------------------
  36 // General Registers
  37 // "reg_def"  name ( register save type, C convention save type,
  38 //                   ideal register type, encoding );
  39 // Register Save Types:
  40 //
  41 // NS  = No-Save:       The register allocator assumes that these registers
  42 //                      can be used without saving upon entry to the method, &
  43 //                      that they do not need to be saved at call sites.
  44 //
  45 // SOC = Save-On-Call:  The register allocator assumes that these registers
  46 //                      can be used without saving upon entry to the method,
  47 //                      but that they must be saved at call sites.
  48 //
  49 // SOE = Save-On-Entry: The register allocator assumes that these registers
  50 //                      must be saved before using them upon entry to the
  51 //                      method, but they do not need to be saved at call
  52 //                      sites.
  53 //
  54 // AS  = Always-Save:   The register allocator assumes that these registers
  55 //                      must be saved before using them upon entry to the
  56 //                      method, & that they must be saved at call sites.
  57 //
  58 // Ideal Register Type is used to determine how to save & restore a
  59 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  60 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  61 //
  62 // The encoding number is the actual bit-pattern placed into the opcodes.
  63 
  64 // We must define the 64 bit int registers in two 32 bit halves, the
  65 // real lower register and a virtual upper half register. upper halves
  66 // are used by the register allocator but are not actually supplied as
  67 // operands to memory ops.
  68 //
  69 // follow the C1 compiler in making registers
  70 //
  71 //   r0-r7,r10-r26 volatile (caller save)
  72 //   r27-r32 system (no save, no allocate)
  73 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  74 //
  75 // as regards Java usage. we don't use any callee save registers
  76 // because this makes it difficult to de-optimise a frame (see comment
  77 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  78 //
  79 
  80 // General Registers
  81 
  82 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  83 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  84 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  85 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  86 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  87 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  88 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  89 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  90 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  91 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  92 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  93 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  94 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  95 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  96 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  97 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  98 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  99 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 100 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 101 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 102 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 103 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 104 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 105 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 106 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 107 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 108 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 109 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 110 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 111 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 112 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 113 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 114 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 115 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 116 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 117 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 118 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 119 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 120 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 121 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 122 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 123 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 124 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 125 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 126 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 127 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 128 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 129 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 130 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 131 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 132 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 133 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());  
 134 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 135 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 136 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 137 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 138 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 139 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 140 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 141 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 142 
 143 // ----------------------------
 144 // Float/Double Registers
 145 // ----------------------------
 146 
 147 // Double Registers
 148 
 149 // The rules of ADL require that double registers be defined in pairs.
 150 // Each pair must be two 32-bit values, but not necessarily a pair of
 151 // single float registers. In each pair, ADLC-assigned register numbers
 152 // must be adjacent, with the lower number even. Finally, when the
 153 // CPU stores such a register pair to memory, the word associated with
 154 // the lower ADLC-assigned number must be stored to the lower address.
 155 
 156 // AArch64 has 32 floating-point registers. Each can store a vector of
 157 // single or double precision floating-point values up to 8 * 32
 158 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 159 // use the first float or double element of the vector.
 160 
 161 // for Java use float registers v0-v15 are always save on call whereas
 162 // the platform ABI treats v8-v15 as callee save). float registers
 163 // v16-v31 are SOC as per the platform spec
 164 
 165   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 166   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 167   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 168   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 169 
 170   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 171   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 172   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 173   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 174 
 175   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 176   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 177   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 178   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 179 
 180   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 181   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 182   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 183   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 184 
 185   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 186   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 187   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 188   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 189 
 190   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 191   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 192   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 193   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 194 
 195   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 196   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 197   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 198   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 199 
 200   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 201   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 202   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 203   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 204 
 205   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 206   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 207   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 208   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 209 
 210   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 211   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 212   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 213   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 214 
 215   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 216   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 217   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 218   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 219 
 220   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 221   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 222   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 223   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 224 
 225   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 226   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 227   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 228   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 229 
 230   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 231   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 232   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 233   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 234 
 235   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 236   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 237   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 238   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 239 
 240   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 241   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 242   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 243   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 244 
 245   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 246   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 247   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 248   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 249 
 250   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 251   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 252   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 253   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 254 
 255   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 256   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 257   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 258   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 259 
 260   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 261   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 262   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 263   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 264 
 265   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 266   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 267   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 268   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 269 
 270   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 271   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 272   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 273   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 274 
 275   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 276   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 277   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 278   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 279 
 280   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 281   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 282   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 283   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 284 
 285   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 286   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 287   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 288   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 289 
 290   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 291   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 292   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 293   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 294 
 295   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 296   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 297   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 298   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 299 
 300   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 301   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 302   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 303   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 304 
 305   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 306   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 307   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 308   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 309 
 310   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 311   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 312   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 313   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 314 
 315   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 316   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 317   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 318   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 319 
 320   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 321   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 322   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 323   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 324 
 325 // ----------------------------
 326 // Special Registers
 327 // ----------------------------
 328 
 329 // the AArch64 CSPR status flag register is not directly acessible as
 330 // instruction operand. the FPSR status flag register is a system
 331 // register which can be written/read using MSR/MRS but again does not
 332 // appear as an operand (a code identifying the FSPR occurs as an
 333 // immediate value in the instruction).
 334 
 335 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 336 
 337 
 338 // Specify priority of register selection within phases of register
 339 // allocation.  Highest priority is first.  A useful heuristic is to
 340 // give registers a low priority when they are required by machine
 341 // instructions, like EAX and EDX on I486, and choose no-save registers
 342 // before save-on-call, & save-on-call before save-on-entry.  Registers
 343 // which participate in fixed calling sequences should come last.
 344 // Registers which are used as pairs must fall on an even boundary.
 345 
 346 alloc_class chunk0(
 347     // volatiles
 348     R10, R10_H,
 349     R11, R11_H,
 350     R12, R12_H,
 351     R13, R13_H,
 352     R14, R14_H,
 353     R15, R15_H,
 354     R16, R16_H,
 355     R17, R17_H,
 356     R18, R18_H,
 357 
 358     // arg registers
 359     R0, R0_H,
 360     R1, R1_H,
 361     R2, R2_H,
 362     R3, R3_H,
 363     R4, R4_H,
 364     R5, R5_H,
 365     R6, R6_H,
 366     R7, R7_H,
 367 
 368     // non-volatiles
 369     R19, R19_H,
 370     R20, R20_H,
 371     R21, R21_H,
 372     R22, R22_H,
 373     R23, R23_H,
 374     R24, R24_H,
 375     R25, R25_H,
 376     R26, R26_H,
 377     
 378     // non-allocatable registers
 379 
 380     R27, R27_H, // heapbase
 381     R28, R28_H, // thread
 382     R29, R29_H, // fp
 383     R30, R30_H, // lr
 384     R31, R31_H, // sp
 385 );
 386 
 387 alloc_class chunk1(
 388 
 389     // no save
 390     V16, V16_H, V16_J, V16_K,
 391     V17, V17_H, V17_J, V17_K,
 392     V18, V18_H, V18_J, V18_K,
 393     V19, V19_H, V19_J, V19_K,
 394     V20, V20_H, V20_J, V20_K,
 395     V21, V21_H, V21_J, V21_K,
 396     V22, V22_H, V22_J, V22_K,
 397     V23, V23_H, V23_J, V23_K,
 398     V24, V24_H, V24_J, V24_K,
 399     V25, V25_H, V25_J, V25_K,
 400     V26, V26_H, V26_J, V26_K,
 401     V27, V27_H, V27_J, V27_K,
 402     V28, V28_H, V28_J, V28_K,
 403     V29, V29_H, V29_J, V29_K,
 404     V30, V30_H, V30_J, V30_K,
 405     V31, V31_H, V31_J, V31_K,
 406 
 407     // arg registers
 408     V0, V0_H, V0_J, V0_K,
 409     V1, V1_H, V1_J, V1_K,
 410     V2, V2_H, V2_J, V2_K,
 411     V3, V3_H, V3_J, V3_K,
 412     V4, V4_H, V4_J, V4_K,
 413     V5, V5_H, V5_J, V5_K,
 414     V6, V6_H, V6_J, V6_K,
 415     V7, V7_H, V7_J, V7_K,
 416 
 417     // non-volatiles
 418     V8, V8_H, V8_J, V8_K,
 419     V9, V9_H, V9_J, V9_K,
 420     V10, V10_H, V10_J, V10_K,
 421     V11, V11_H, V11_J, V11_K,
 422     V12, V12_H, V12_J, V12_K,
 423     V13, V13_H, V13_J, V13_K,
 424     V14, V14_H, V14_J, V14_K,
 425     V15, V15_H, V15_J, V15_K,
 426 );
 427 
 428 alloc_class chunk2(RFLAGS);
 429 
 430 //----------Architecture Description Register Classes--------------------------
 431 // Several register classes are automatically defined based upon information in
 432 // this architecture description.
 433 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 434 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 435 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 436 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 437 //
 438 
 439 // Class for all 32 bit integer registers -- excludes SP which will
 440 // never be used as an integer register
 441 reg_class any_reg32(
 442     R0,
 443     R1,
 444     R2,
 445     R3,
 446     R4,
 447     R5,
 448     R6,
 449     R7,
 450     R10,
 451     R11,
 452     R12,
 453     R13,
 454     R14,
 455     R15,
 456     R16,
 457     R17,
 458     R18,
 459     R19,
 460     R20,
 461     R21,
 462     R22,
 463     R23,
 464     R24,
 465     R25,
 466     R26,
 467     R27,
 468     R28,
 469     R29,
 470     R30
 471 );
 472 
 473 // Singleton class for R0 int register
 474 reg_class int_r0_reg(R0);
 475 
 476 // Singleton class for R2 int register
 477 reg_class int_r2_reg(R2);
 478 
 479 // Singleton class for R3 int register
 480 reg_class int_r3_reg(R3);
 481 
 482 // Singleton class for R4 int register
 483 reg_class int_r4_reg(R4);
 484 
 485 // Class for all long integer registers (including RSP)
 486 reg_class any_reg(
 487     R0, R0_H,
 488     R1, R1_H,
 489     R2, R2_H,
 490     R3, R3_H,
 491     R4, R4_H,
 492     R5, R5_H,
 493     R6, R6_H,
 494     R7, R7_H,
 495     R10, R10_H,
 496     R11, R11_H,
 497     R12, R12_H,
 498     R13, R13_H,
 499     R14, R14_H,
 500     R15, R15_H,
 501     R16, R16_H,
 502     R17, R17_H,
 503     R18, R18_H,
 504     R19, R19_H,
 505     R20, R20_H,
 506     R21, R21_H,
 507     R22, R22_H,
 508     R23, R23_H,
 509     R24, R24_H,
 510     R25, R25_H,
 511     R26, R26_H,
 512     R27, R27_H,
 513     R28, R28_H,
 514     R29, R29_H,
 515     R30, R30_H,
 516     R31, R31_H
 517 );
 518 
 519 // Class for all non-special integer registers
 520 reg_class no_special_reg32(
 521     R0,
 522     R1,
 523     R2,
 524     R3,
 525     R4,
 526     R5,
 527     R6,
 528     R7,
 529     R10,
 530     R11,
 531     R12,                        // rmethod
 532     R13,
 533     R14,
 534     R15,
 535     R16,
 536     R17,
 537     R18,
 538     R19,
 539     R20,
 540     R21,
 541     R22,
 542     R23,
 543     R24,
 544     R25,
 545     R26
 546  /* R27, */                     // heapbase
 547  /* R28, */                     // thread
 548  /* R29, */                     // fp
 549  /* R30, */                     // lr
 550  /* R31 */                      // sp
 551 );
 552 
 553 // Class for all non-special long integer registers
 554 reg_class no_special_reg(
 555     R0, R0_H,
 556     R1, R1_H,
 557     R2, R2_H,
 558     R3, R3_H,
 559     R4, R4_H,
 560     R5, R5_H,
 561     R6, R6_H,
 562     R7, R7_H,
 563     R10, R10_H,
 564     R11, R11_H,
 565     R12, R12_H,                 // rmethod
 566     R13, R13_H,
 567     R14, R14_H,
 568     R15, R15_H,
 569     R16, R16_H,
 570     R17, R17_H,
 571     R18, R18_H,
 572     R19, R19_H,
 573     R20, R20_H,
 574     R21, R21_H,
 575     R22, R22_H,
 576     R23, R23_H,
 577     R24, R24_H,
 578     R25, R25_H,
 579     R26, R26_H,
 580  /* R27, R27_H, */              // heapbase
 581  /* R28, R28_H, */              // thread
 582  /* R29, R29_H, */              // fp
 583  /* R30, R30_H, */              // lr
 584  /* R31, R31_H */               // sp
 585 );
 586 
 587 // Class for 64 bit register r0
 588 reg_class r0_reg(
 589     R0, R0_H
 590 );
 591 
 592 // Class for 64 bit register r1
 593 reg_class r1_reg(
 594     R1, R1_H
 595 );
 596 
 597 // Class for 64 bit register r2
 598 reg_class r2_reg(
 599     R2, R2_H
 600 );
 601 
 602 // Class for 64 bit register r3
 603 reg_class r3_reg(
 604     R3, R3_H
 605 );
 606 
 607 // Class for 64 bit register r4
 608 reg_class r4_reg(
 609     R4, R4_H
 610 );
 611 
 612 // Class for 64 bit register r5
 613 reg_class r5_reg(
 614     R5, R5_H
 615 );
 616 
 617 // Class for 64 bit register r10
 618 reg_class r10_reg(
 619     R10, R10_H
 620 );
 621 
 622 // Class for 64 bit register r11
 623 reg_class r11_reg(
 624     R11, R11_H
 625 );
 626 
 627 // Class for method register
 628 reg_class method_reg(
 629     R12, R12_H
 630 );
 631 
 632 // Class for heapbase register
 633 reg_class heapbase_reg(
 634     R27, R27_H
 635 );
 636 
 637 // Class for thread register
 638 reg_class thread_reg(
 639     R28, R28_H
 640 );
 641 
 642 // Class for frame pointer register
 643 reg_class fp_reg(
 644     R29, R29_H
 645 );
 646 
 647 // Class for link register
 648 reg_class lr_reg(
 649     R30, R30_H
 650 );
 651 
 652 // Class for long sp register
 653 reg_class sp_reg(
 654   R31, R31_H
 655 );
 656 
 657 // Class for all pointer registers
 658 reg_class ptr_reg(
 659     R0, R0_H,
 660     R1, R1_H,
 661     R2, R2_H,
 662     R3, R3_H,
 663     R4, R4_H,
 664     R5, R5_H,
 665     R6, R6_H,
 666     R7, R7_H,
 667     R10, R10_H,
 668     R11, R11_H,
 669     R12, R12_H,
 670     R13, R13_H,
 671     R14, R14_H,
 672     R15, R15_H,
 673     R16, R16_H,
 674     R17, R17_H,
 675     R18, R18_H,
 676     R19, R19_H,
 677     R20, R20_H,
 678     R21, R21_H,
 679     R22, R22_H,
 680     R23, R23_H,
 681     R24, R24_H,
 682     R25, R25_H,
 683     R26, R26_H,
 684     R27, R27_H,
 685     R28, R28_H,
 686     R29, R29_H,
 687     R30, R30_H,
 688     R31, R31_H
 689 );
 690 
 691 // Class for all non_special pointer registers
 692 reg_class no_special_ptr_reg(
 693     R0, R0_H,
 694     R1, R1_H,
 695     R2, R2_H,
 696     R3, R3_H,
 697     R4, R4_H,
 698     R5, R5_H,
 699     R6, R6_H,
 700     R7, R7_H,
 701     R10, R10_H,
 702     R11, R11_H,
 703     R12, R12_H,
 704     R13, R13_H,
 705     R14, R14_H,
 706     R15, R15_H,
 707     R16, R16_H,
 708     R17, R17_H,
 709     R18, R18_H,
 710     R19, R19_H,
 711     R20, R20_H,
 712     R21, R21_H,
 713     R22, R22_H,
 714     R23, R23_H,
 715     R24, R24_H,
 716     R25, R25_H,
 717     R26, R26_H,
 718  /* R27, R27_H, */              // heapbase
 719  /* R28, R28_H, */              // thread
 720  /* R29, R29_H, */              // fp
 721  /* R30, R30_H, */              // lr
 722  /* R31, R31_H */               // sp
 723 );
 724 
 725 // Class for all float registers
 726 reg_class float_reg(
 727     V0,
 728     V1,
 729     V2,
 730     V3,
 731     V4,
 732     V5,
 733     V6,
 734     V7,
 735     V8,
 736     V9,
 737     V10,
 738     V11,
 739     V12,
 740     V13,
 741     V14,
 742     V15,
 743     V16,
 744     V17,
 745     V18,
 746     V19,
 747     V20,
 748     V21,
 749     V22,
 750     V23,
 751     V24,
 752     V25,
 753     V26,
 754     V27,
 755     V28,
 756     V29,
 757     V30,
 758     V31
 759 );
 760 
 761 // Double precision float registers have virtual `high halves' that
 762 // are needed by the allocator.
 763 // Class for all double registers
 764 reg_class double_reg(
 765     V0, V0_H, 
 766     V1, V1_H, 
 767     V2, V2_H, 
 768     V3, V3_H, 
 769     V4, V4_H, 
 770     V5, V5_H, 
 771     V6, V6_H, 
 772     V7, V7_H, 
 773     V8, V8_H, 
 774     V9, V9_H, 
 775     V10, V10_H, 
 776     V11, V11_H, 
 777     V12, V12_H, 
 778     V13, V13_H, 
 779     V14, V14_H, 
 780     V15, V15_H, 
 781     V16, V16_H, 
 782     V17, V17_H, 
 783     V18, V18_H, 
 784     V19, V19_H, 
 785     V20, V20_H, 
 786     V21, V21_H, 
 787     V22, V22_H, 
 788     V23, V23_H, 
 789     V24, V24_H, 
 790     V25, V25_H, 
 791     V26, V26_H, 
 792     V27, V27_H, 
 793     V28, V28_H, 
 794     V29, V29_H, 
 795     V30, V30_H, 
 796     V31, V31_H
 797 );
 798 
 799 // Class for all 64bit vector registers
 800 reg_class vectord_reg(
 801     V0, V0_H,
 802     V1, V1_H,
 803     V2, V2_H,
 804     V3, V3_H,
 805     V4, V4_H,
 806     V5, V5_H,
 807     V6, V6_H,
 808     V7, V7_H,
 809     V8, V8_H,
 810     V9, V9_H,
 811     V10, V10_H,
 812     V11, V11_H,
 813     V12, V12_H,
 814     V13, V13_H,
 815     V14, V14_H,
 816     V15, V15_H,
 817     V16, V16_H,
 818     V17, V17_H,
 819     V18, V18_H,
 820     V19, V19_H,
 821     V20, V20_H,
 822     V21, V21_H,
 823     V22, V22_H,
 824     V23, V23_H,
 825     V24, V24_H,
 826     V25, V25_H,
 827     V26, V26_H,
 828     V27, V27_H,
 829     V28, V28_H,
 830     V29, V29_H,
 831     V30, V30_H,
 832     V31, V31_H
 833 );
 834 
 835 // Class for all 128bit vector registers
 836 reg_class vectorx_reg(
 837     V0, V0_H, V0_J, V0_K,
 838     V1, V1_H, V1_J, V1_K,
 839     V2, V2_H, V2_J, V2_K,
 840     V3, V3_H, V3_J, V3_K,
 841     V4, V4_H, V4_J, V4_K,
 842     V5, V5_H, V5_J, V5_K,
 843     V6, V6_H, V6_J, V6_K,
 844     V7, V7_H, V7_J, V7_K,
 845     V8, V8_H, V8_J, V8_K,
 846     V9, V9_H, V9_J, V9_K,
 847     V10, V10_H, V10_J, V10_K,
 848     V11, V11_H, V11_J, V11_K,
 849     V12, V12_H, V12_J, V12_K,
 850     V13, V13_H, V13_J, V13_K,
 851     V14, V14_H, V14_J, V14_K,
 852     V15, V15_H, V15_J, V15_K,
 853     V16, V16_H, V16_J, V16_K,
 854     V17, V17_H, V17_J, V17_K,
 855     V18, V18_H, V18_J, V18_K,
 856     V19, V19_H, V19_J, V19_K,
 857     V20, V20_H, V20_J, V20_K,
 858     V21, V21_H, V21_J, V21_K,
 859     V22, V22_H, V22_J, V22_K,
 860     V23, V23_H, V23_J, V23_K,
 861     V24, V24_H, V24_J, V24_K,
 862     V25, V25_H, V25_J, V25_K,
 863     V26, V26_H, V26_J, V26_K,
 864     V27, V27_H, V27_J, V27_K,
 865     V28, V28_H, V28_J, V28_K,
 866     V29, V29_H, V29_J, V29_K,
 867     V30, V30_H, V30_J, V30_K,
 868     V31, V31_H, V31_J, V31_K
 869 );
 870 
 871 // Class for 128 bit register v0
 872 reg_class v0_reg(
 873     V0, V0_H
 874 );
 875 
 876 // Class for 128 bit register v1
 877 reg_class v1_reg(
 878     V1, V1_H
 879 );
 880 
 881 // Class for 128 bit register v2
 882 reg_class v2_reg(
 883     V2, V2_H
 884 );
 885 
 886 // Class for 128 bit register v3
 887 reg_class v3_reg(
 888     V3, V3_H
 889 );
 890 
 891 // Singleton class for condition codes
 892 reg_class int_flags(RFLAGS);
 893 
 894 %}
 895 
 896 //----------DEFINITION BLOCK---------------------------------------------------
 897 // Define name --> value mappings to inform the ADLC of an integer valued name
 898 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 899 // Format:
 900 //        int_def  <name>         ( <int_value>, <expression>);
 901 // Generated Code in ad_<arch>.hpp
 902 //        #define  <name>   (<expression>)
 903 //        // value == <int_value>
 904 // Generated code in ad_<arch>.cpp adlc_verification()
 905 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 906 //
 907 
 908 // we follow the ppc-aix port in using a simple cost model which ranks
 909 // register operations as cheap, memory ops as more expensive and
 910 // branches as most expensive. the first two have a low as well as a
 911 // normal cost. huge cost appears to be a way of saying don't do
 912 // something
 913 
 914 definitions %{
 915   // The default cost (of a register move instruction).
 916   int_def INSN_COST            (    100,     100);
 917   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 918   int_def CALL_COST            (    200,     2 * INSN_COST);
 919   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 920 %}
 921 
 922 
 923 //----------SOURCE BLOCK-------------------------------------------------------
 924 // This is a block of C++ code which provides values, functions, and
 925 // definitions necessary in the rest of the architecture description
 926 
 927 source_hpp %{
 928 
 929 #if INCLUDE_ALL_GCS
 930 #include "shenandoahBarrierSetAssembler_aarch64.hpp"
 931 #endif
 932 
 933 class CallStubImpl {
 934  
 935   //--------------------------------------------------------------
 936   //---<  Used for optimization in Compile::shorten_branches  >---
 937   //--------------------------------------------------------------
 938 
 939  public:
 940   // Size of call trampoline stub.
 941   static uint size_call_trampoline() {
 942     return 0; // no call trampolines on this platform
 943   }
 944   
 945   // number of relocations needed by a call trampoline stub
 946   static uint reloc_call_trampoline() { 
 947     return 0; // no call trampolines on this platform
 948   }
 949 };
 950 
 951 class HandlerImpl {
 952 
 953  public:
 954 
 955   static int emit_exception_handler(CodeBuffer &cbuf);
 956   static int emit_deopt_handler(CodeBuffer& cbuf);
 957 
 958   static uint size_exception_handler() {
 959     return MacroAssembler::far_branch_size();
 960   }
 961 
 962   static uint size_deopt_handler() {
 963     // count one adr and one far branch instruction
 964     // return 4 * NativeInstruction::instruction_size;
 965     return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
 966   }
 967 };
 968 
 969   bool is_CAS(int opcode);
 970 
 971   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
 972 
 973   bool unnecessary_acquire(const Node *barrier);
 974   bool needs_acquiring_load(const Node *load);
 975 
 976   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
 977 
 978   bool unnecessary_release(const Node *barrier);
 979   bool unnecessary_volatile(const Node *barrier);
 980   bool needs_releasing_store(const Node *store);
 981 
 982   // predicate controlling translation of CompareAndSwapX
 983   bool needs_acquiring_load_exclusive(const Node *load);
 984 
 985   // predicate controlling translation of StoreCM
 986   bool unnecessary_storestore(const Node *storecm);
 987 %}
 988 
 989 source %{
 990 
 991   // Optimizaton of volatile gets and puts
 992   // -------------------------------------
 993   //
 994   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
 995   // use to implement volatile reads and writes. For a volatile read
 996   // we simply need
 997   //
 998   //   ldar<x>
 999   //
1000   // and for a volatile write we need
1001   //
1002   //   stlr<x>
1003   // 
1004   // Alternatively, we can implement them by pairing a normal
1005   // load/store with a memory barrier. For a volatile read we need
1006   // 
1007   //   ldr<x>
1008   //   dmb ishld
1009   //
1010   // for a volatile write
1011   //
1012   //   dmb ish
1013   //   str<x>
1014   //   dmb ish
1015   //
1016   // We can also use ldaxr and stlxr to implement compare and swap CAS
1017   // sequences. These are normally translated to an instruction
1018   // sequence like the following
1019   //
1020   //   dmb      ish
1021   // retry:
1022   //   ldxr<x>   rval raddr
1023   //   cmp       rval rold
1024   //   b.ne done
1025   //   stlxr<x>  rval, rnew, rold
1026   //   cbnz      rval retry
1027   // done:
1028   //   cset      r0, eq
1029   //   dmb ishld
1030   //
1031   // Note that the exclusive store is already using an stlxr
1032   // instruction. That is required to ensure visibility to other
1033   // threads of the exclusive write (assuming it succeeds) before that
1034   // of any subsequent writes.
1035   //
1036   // The following instruction sequence is an improvement on the above
1037   //
1038   // retry:
1039   //   ldaxr<x>  rval raddr
1040   //   cmp       rval rold
1041   //   b.ne done
1042   //   stlxr<x>  rval, rnew, rold
1043   //   cbnz      rval retry
1044   // done:
1045   //   cset      r0, eq
1046   //
1047   // We don't need the leading dmb ish since the stlxr guarantees
1048   // visibility of prior writes in the case that the swap is
1049   // successful. Crucially we don't have to worry about the case where
1050   // the swap is not successful since no valid program should be
1051   // relying on visibility of prior changes by the attempting thread
1052   // in the case where the CAS fails.
1053   //
1054   // Similarly, we don't need the trailing dmb ishld if we substitute
1055   // an ldaxr instruction since that will provide all the guarantees we
1056   // require regarding observation of changes made by other threads
1057   // before any change to the CAS address observed by the load.
1058   //
1059   // In order to generate the desired instruction sequence we need to
1060   // be able to identify specific 'signature' ideal graph node
1061   // sequences which i) occur as a translation of a volatile reads or
1062   // writes or CAS operations and ii) do not occur through any other
1063   // translation or graph transformation. We can then provide
1064   // alternative aldc matching rules which translate these node
1065   // sequences to the desired machine code sequences. Selection of the
1066   // alternative rules can be implemented by predicates which identify
1067   // the relevant node sequences.
1068   //
1069   // The ideal graph generator translates a volatile read to the node
1070   // sequence
1071   //
1072   //   LoadX[mo_acquire]
1073   //   MemBarAcquire
1074   //
1075   // As a special case when using the compressed oops optimization we
1076   // may also see this variant
1077   //
1078   //   LoadN[mo_acquire]
1079   //   DecodeN
1080   //   MemBarAcquire
1081   //
1082   // A volatile write is translated to the node sequence
1083   //
1084   //   MemBarRelease
1085   //   StoreX[mo_release] {CardMark}-optional
1086   //   MemBarVolatile
1087   //
1088   // n.b. the above node patterns are generated with a strict
1089   // 'signature' configuration of input and output dependencies (see
1090   // the predicates below for exact details). The card mark may be as
1091   // simple as a few extra nodes or, in a few GC configurations, may
1092   // include more complex control flow between the leading and
1093   // trailing memory barriers. However, whatever the card mark
1094   // configuration these signatures are unique to translated volatile
1095   // reads/stores -- they will not appear as a result of any other
1096   // bytecode translation or inlining nor as a consequence of
1097   // optimizing transforms.
1098   //
1099   // We also want to catch inlined unsafe volatile gets and puts and
1100   // be able to implement them using either ldar<x>/stlr<x> or some
1101   // combination of ldr<x>/stlr<x> and dmb instructions.
1102   //
1103   // Inlined unsafe volatiles puts manifest as a minor variant of the
1104   // normal volatile put node sequence containing an extra cpuorder
1105   // membar
1106   //
1107   //   MemBarRelease
1108   //   MemBarCPUOrder
1109   //   StoreX[mo_release] {CardMark}-optional
1110   //   MemBarVolatile
1111   //
1112   // n.b. as an aside, the cpuorder membar is not itself subject to
1113   // matching and translation by adlc rules.  However, the rule
1114   // predicates need to detect its presence in order to correctly
1115   // select the desired adlc rules.
1116   //
1117   // Inlined unsafe volatile gets manifest as a somewhat different
1118   // node sequence to a normal volatile get
1119   //
1120   //   MemBarCPUOrder
1121   //        ||       \\
1122   //   MemBarAcquire LoadX[mo_acquire]
1123   //        ||
1124   //   MemBarCPUOrder
1125   //
1126   // In this case the acquire membar does not directly depend on the
1127   // load. However, we can be sure that the load is generated from an
1128   // inlined unsafe volatile get if we see it dependent on this unique
1129   // sequence of membar nodes. Similarly, given an acquire membar we
1130   // can know that it was added because of an inlined unsafe volatile
1131   // get if it is fed and feeds a cpuorder membar and if its feed
1132   // membar also feeds an acquiring load.
1133   //
1134   // Finally an inlined (Unsafe) CAS operation is translated to the
1135   // following ideal graph
1136   //
1137   //   MemBarRelease
1138   //   MemBarCPUOrder
1139   //   CompareAndSwapX {CardMark}-optional
1140   //   MemBarCPUOrder
1141   //   MemBarAcquire
1142   //
1143   // So, where we can identify these volatile read and write
1144   // signatures we can choose to plant either of the above two code
1145   // sequences. For a volatile read we can simply plant a normal
1146   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1147   // also choose to inhibit translation of the MemBarAcquire and
1148   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1149   //
1150   // When we recognise a volatile store signature we can choose to
1151   // plant at a dmb ish as a translation for the MemBarRelease, a
1152   // normal str<x> and then a dmb ish for the MemBarVolatile.
1153   // Alternatively, we can inhibit translation of the MemBarRelease
1154   // and MemBarVolatile and instead plant a simple stlr<x>
1155   // instruction.
1156   //
1157   // when we recognise a CAS signature we can choose to plant a dmb
1158   // ish as a translation for the MemBarRelease, the conventional
1159   // macro-instruction sequence for the CompareAndSwap node (which
1160   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1161   // Alternatively, we can elide generation of the dmb instructions
1162   // and plant the alternative CompareAndSwap macro-instruction
1163   // sequence (which uses ldaxr<x>).
1164   // 
1165   // Of course, the above only applies when we see these signature
1166   // configurations. We still want to plant dmb instructions in any
1167   // other cases where we may see a MemBarAcquire, MemBarRelease or
1168   // MemBarVolatile. For example, at the end of a constructor which
1169   // writes final/volatile fields we will see a MemBarRelease
1170   // instruction and this needs a 'dmb ish' lest we risk the
1171   // constructed object being visible without making the
1172   // final/volatile field writes visible.
1173   //
1174   // n.b. the translation rules below which rely on detection of the
1175   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1176   // If we see anything other than the signature configurations we
1177   // always just translate the loads and stores to ldr<x> and str<x>
1178   // and translate acquire, release and volatile membars to the
1179   // relevant dmb instructions.
1180   //
1181 
1182   // is_CAS(int opcode)
1183   //
1184   // return true if opcode is one of the possible CompareAndSwapX
1185   // values otherwise false.
1186 
1187   bool is_CAS(int opcode)
1188   {
1189     switch(opcode) {
1190     // We handle these
1191     case Op_CompareAndSwapI:
1192     case Op_CompareAndSwapL:
1193     case Op_CompareAndSwapP:
1194     case Op_CompareAndSwapN:
1195     case Op_GetAndSetI:
1196     case Op_GetAndSetL:
1197     case Op_GetAndSetP:
1198     case Op_GetAndSetN:
1199     case Op_GetAndAddI:
1200     case Op_GetAndAddL:
1201       return true;
1202     default:
1203       return false;
1204     }
1205   }
1206 
1207 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1208 
1209 bool unnecessary_acquire(const Node *barrier)
1210 {
1211   assert(barrier->is_MemBar(), "expecting a membar");
1212 
1213   if (UseBarriersForVolatile) {
1214     // we need to plant a dmb
1215     return false;
1216   }
1217 
1218   MemBarNode* mb = barrier->as_MemBar();
1219 
1220   if (mb->trailing_load()) {
1221     return true;
1222   }
1223 
1224   if (mb->trailing_load_store()) {
1225     Node* load_store = mb->in(MemBarNode::Precedent);
1226     assert(load_store->is_LoadStore(), "unexpected graph shape");
1227     return is_CAS(load_store->Opcode());
1228   }
1229 
1230   return false;
1231 }
1232 
1233 bool needs_acquiring_load(const Node *n)
1234 {
1235   assert(n->is_Load(), "expecting a load");
1236   if (UseBarriersForVolatile) {
1237     // we use a normal load and a dmb
1238     return false;
1239   }
1240 
1241   LoadNode *ld = n->as_Load();
1242 
1243   return ld->is_acquire();
1244 }
1245 
1246 bool unnecessary_release(const Node *n)
1247 {
1248   assert((n->is_MemBar() &&
1249           n->Opcode() == Op_MemBarRelease),
1250          "expecting a release membar");
1251 
1252   if (UseBarriersForVolatile) {
1253     // we need to plant a dmb
1254     return false;
1255   }
1256 
1257   MemBarNode *barrier = n->as_MemBar();
1258 
1259   if (!barrier->leading()) {
1260     return false;
1261   } else {
1262     Node* trailing = barrier->trailing_membar();
1263     MemBarNode* trailing_mb = trailing->as_MemBar();
1264     assert(trailing_mb->trailing(), "Not a trailing membar?");
1265     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1266 
1267     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1268     if (mem->is_Store()) {
1269       assert(mem->as_Store()->is_release(), "");
1270       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1271       return true;
1272     } else {
1273       assert(mem->is_LoadStore(), "");
1274       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1275       return is_CAS(mem->Opcode());
1276     }
1277   }
1278 
1279   return false;
1280 }
1281 
1282 bool unnecessary_volatile(const Node *n)
1283 {
1284   // assert n->is_MemBar();
1285   if (UseBarriersForVolatile) {
1286     // we need to plant a dmb
1287     return false;
1288   }
1289 
1290   MemBarNode *mbvol = n->as_MemBar();
1291 
1292   bool release = mbvol->trailing_store();
1293   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1294 #ifdef ASSERT
1295   if (release) {
1296     Node* leading = mbvol->leading_membar();
1297     assert(leading->Opcode() == Op_MemBarRelease, "");
1298     assert(leading->as_MemBar()->leading_store(), "");
1299     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1300    }
1301 #endif
1302 
1303   return release;
1304 }
1305 
1306 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1307 
1308 bool needs_releasing_store(const Node *n)
1309 {
1310   // assert n->is_Store();
1311   if (UseBarriersForVolatile) {
1312     // we use a normal store and dmb combination
1313     return false;
1314   }
1315 
1316   StoreNode *st = n->as_Store();
1317 
1318   return st->trailing_membar() != NULL;
1319 }
1320 
1321 // predicate controlling translation of CAS
1322 //
1323 // returns true if CAS needs to use an acquiring load otherwise false
1324 
1325 bool needs_acquiring_load_exclusive(const Node *n)
1326 {
1327   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
1328   if (UseBarriersForVolatile) {
1329     return false;
1330   }
1331 
1332   LoadStoreNode* ldst = n->as_LoadStore();
1333   assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1334 
1335   // so we can just return true here
1336   return true;
1337 }
1338 
1339 // predicate controlling translation of StoreCM
1340 //
1341 // returns true if a StoreStore must precede the card write otherwise
1342 // false
1343 
1344 bool unnecessary_storestore(const Node *storecm)
1345 {
1346   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1347 
1348   // we need to generate a dmb ishst between an object put and the
1349   // associated card mark when we are using CMS without conditional
1350   // card marking
1351 
1352   if (UseConcMarkSweepGC && !UseCondCardMark) {
1353     return false;
1354   }
1355 
1356   // a storestore is unnecesary in all other cases
1357 
1358   return true;
1359 }
1360 
1361 
1362 #define __ _masm.
1363 
1364 // advance declaratuons for helper functions to convert register
1365 // indices to register objects
1366 
1367 // the ad file has to provide implementations of certain methods
1368 // expected by the generic code
1369 //
1370 // REQUIRED FUNCTIONALITY
1371 
1372 //=============================================================================
1373 
1374 // !!!!! Special hack to get all types of calls to specify the byte offset
1375 //       from the start of the call to the point where the return address
1376 //       will point.
1377 
1378 int MachCallStaticJavaNode::ret_addr_offset()
1379 {
1380   // call should be a simple bl
1381   // unless this is a method handle invoke in which case it is
1382   // mov(rfp, sp), bl, mov(sp, rfp)
1383   int off = 4;
1384   if (_method_handle_invoke) {
1385     off += 4;
1386   }
1387   return off;
1388 }
1389 
1390 int MachCallDynamicJavaNode::ret_addr_offset()
1391 {
1392   return 16; // movz, movk, movk, bl
1393 }
1394 
1395 int MachCallRuntimeNode::ret_addr_offset() {
1396   // for generated stubs the call will be
1397   //   bl(addr)
1398   // for real runtime callouts it will be six instructions
1399   // see aarch64_enc_java_to_runtime
1400   //   adr(rscratch2, retaddr)
1401   //   lea(rscratch1, RuntimeAddress(addr)
1402   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1403   //   blr(rscratch1)
1404   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1405   if (cb) {
1406     return MacroAssembler::far_branch_size();
1407   } else {
1408     return 6 * NativeInstruction::instruction_size;
1409   }
1410 }
1411 
1412 // Indicate if the safepoint node needs the polling page as an input
1413 
1414 // the shared code plants the oop data at the start of the generated
1415 // code for the safepoint node and that needs ot be at the load
1416 // instruction itself. so we cannot plant a mov of the safepoint poll
1417 // address followed by a load. setting this to true means the mov is
1418 // scheduled as a prior instruction. that's better for scheduling
1419 // anyway.
1420 
1421 bool SafePointNode::needs_polling_address_input()
1422 {
1423   return true;
1424 }
1425 
1426 //=============================================================================
1427 
1428 #ifndef PRODUCT
1429 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1430   st->print("BREAKPOINT");
1431 }
1432 #endif
1433 
1434 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1435   MacroAssembler _masm(&cbuf);
1436   __ brk(0);
1437 }
1438 
1439 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1440   return MachNode::size(ra_);
1441 }
1442 
1443 //=============================================================================
1444 
1445 #ifndef PRODUCT
1446   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1447     st->print("nop \t# %d bytes pad for loops and calls", _count);
1448   }
1449 #endif
1450 
1451   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1452     MacroAssembler _masm(&cbuf);
1453     for (int i = 0; i < _count; i++) { 
1454       __ nop();
1455     }
1456   }
1457 
1458   uint MachNopNode::size(PhaseRegAlloc*) const {
1459     return _count * NativeInstruction::instruction_size;
1460   }
1461 
1462 //=============================================================================
1463 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1464 
1465 int Compile::ConstantTable::calculate_table_base_offset() const {
1466   return 0;  // absolute addressing, no offset
1467 }
1468 
1469 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1470 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1471   ShouldNotReachHere();
1472 }
1473 
1474 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1475   // Empty encoding
1476 }
1477 
1478 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1479   return 0;
1480 }
1481 
1482 #ifndef PRODUCT
1483 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1484   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1485 }
1486 #endif
1487 
1488 #ifndef PRODUCT
1489 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1490   Compile* C = ra_->C;
1491 
1492   int framesize = C->frame_slots() << LogBytesPerInt;
1493 
1494   if (C->need_stack_bang(framesize))
1495     st->print("# stack bang size=%d\n\t", framesize);
1496 
1497   if (framesize == 0) {
1498     // Is this even possible?
1499     st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize)); 
1500   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1501     st->print("sub  sp, sp, #%d\n\t", framesize);
1502     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1503   } else {
1504     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 
1505     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1506     st->print("sub  sp, sp, rscratch1");
1507   }
1508 }
1509 #endif
1510 
1511 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1512   Compile* C = ra_->C;
1513   MacroAssembler _masm(&cbuf);
1514 
1515   // n.b. frame size includes space for return pc and rfp
1516   long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
1517   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1518 
1519   // insert a nop at the start of the prolog so we can patch in a
1520   // branch if we need to invalidate the method later
1521   __ nop();
1522 
1523   if (C->need_stack_bang(framesize))
1524     __ generate_stack_overflow_check(framesize);
1525 
1526   __ build_frame(framesize);
1527 
1528   if (VerifyStackAtCalls) {
1529     Unimplemented();
1530   }
1531 
1532   C->set_frame_complete(cbuf.insts_size());
1533 
1534   if (C->has_mach_constant_base_node()) {
1535     // NOTE: We set the table base offset here because users might be
1536     // emitted before MachConstantBaseNode.
1537     Compile::ConstantTable& constant_table = C->constant_table();
1538     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1539   }
1540 }
1541 
1542 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1543 {
1544   return MachNode::size(ra_); // too many variables; just compute it
1545                               // the hard way
1546 }
1547 
1548 int MachPrologNode::reloc() const
1549 {
1550   return 0;
1551 }
1552 
1553 //=============================================================================
1554 
1555 #ifndef PRODUCT
1556 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1557   Compile* C = ra_->C;
1558   int framesize = C->frame_slots() << LogBytesPerInt;
1559 
1560   st->print("# pop frame %d\n\t",framesize);
1561 
1562   if (framesize == 0) {
1563     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1564   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1565     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1566     st->print("add  sp, sp, #%d\n\t", framesize);
1567   } else {
1568     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1569     st->print("add  sp, sp, rscratch1\n\t");
1570     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1571   }
1572 
1573   if (do_polling() && C->is_method_compilation()) {
1574     st->print("# touch polling page\n\t");
1575     st->print("mov  rscratch1, #" INTPTR_FORMAT "\n\t", p2i(os::get_polling_page()));
1576     st->print("ldr zr, [rscratch1]");
1577   }
1578 }
1579 #endif
1580 
1581 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1582   Compile* C = ra_->C;
1583   MacroAssembler _masm(&cbuf);
1584   int framesize = C->frame_slots() << LogBytesPerInt;
1585 
1586   __ remove_frame(framesize);
1587 
1588   if (do_polling() && C->is_method_compilation()) {
1589     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1590   }
1591 }
1592 
1593 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1594   // Variable size. Determine dynamically.
1595   return MachNode::size(ra_);
1596 }
1597 
1598 int MachEpilogNode::reloc() const {
1599   // Return number of relocatable values contained in this instruction.
1600   return 1; // 1 for polling page.
1601 }
1602 
1603 const Pipeline * MachEpilogNode::pipeline() const {
1604   return MachNode::pipeline_class();
1605 }
1606 
1607 // This method seems to be obsolete. It is declared in machnode.hpp
1608 // and defined in all *.ad files, but it is never called. Should we
1609 // get rid of it?
1610 int MachEpilogNode::safepoint_offset() const {
1611   assert(do_polling(), "no return for this epilog node");
1612   return 4;
1613 }
1614 
1615 //=============================================================================
1616 
1617 // Figure out which register class each belongs in: rc_int, rc_float or
1618 // rc_stack.
1619 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1620 
1621 static enum RC rc_class(OptoReg::Name reg) {
1622 
1623   if (reg == OptoReg::Bad) {
1624     return rc_bad;
1625   }
1626 
1627   // we have 30 int registers * 2 halves
1628   // (rscratch1 and rscratch2 are omitted)
1629 
1630   if (reg < 60) {
1631     return rc_int;
1632   }
1633 
1634   // we have 32 float register * 2 halves
1635   if (reg < 60 + 128) {
1636     return rc_float;
1637   }
1638 
1639   // Between float regs & stack is the flags regs.
1640   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1641 
1642   return rc_stack;
1643 }
1644 
1645 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1646   Compile* C = ra_->C;
1647 
1648   // Get registers to move.
1649   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1650   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1651   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1652   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1653 
1654   enum RC src_hi_rc = rc_class(src_hi);
1655   enum RC src_lo_rc = rc_class(src_lo);
1656   enum RC dst_hi_rc = rc_class(dst_hi);
1657   enum RC dst_lo_rc = rc_class(dst_lo);
1658 
1659   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1660 
1661   if (src_hi != OptoReg::Bad) {
1662     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1663            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1664            "expected aligned-adjacent pairs");
1665   }
1666 
1667   if (src_lo == dst_lo && src_hi == dst_hi) {
1668     return 0;            // Self copy, no move.
1669   }
1670 
1671   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1672               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1673   int src_offset = ra_->reg2offset(src_lo);
1674   int dst_offset = ra_->reg2offset(dst_lo);
1675 
1676   if (bottom_type()->isa_vect() != NULL) {
1677     uint ireg = ideal_reg();
1678     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1679     if (cbuf) {
1680       MacroAssembler _masm(cbuf);
1681       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1682       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1683         // stack->stack
1684         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1685         if (ireg == Op_VecD) {
1686           __ unspill(rscratch1, true, src_offset);
1687           __ spill(rscratch1, true, dst_offset);
1688         } else {
1689           __ spill_copy128(src_offset, dst_offset);
1690         }
1691       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1692         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1693                ireg == Op_VecD ? __ T8B : __ T16B,
1694                as_FloatRegister(Matcher::_regEncode[src_lo]));
1695       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1696         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1697                        ireg == Op_VecD ? __ D : __ Q,
1698                        ra_->reg2offset(dst_lo));
1699       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1700         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1701                        ireg == Op_VecD ? __ D : __ Q,
1702                        ra_->reg2offset(src_lo));
1703       } else {
1704         ShouldNotReachHere();
1705       }
1706     }
1707   } else if (cbuf) {
1708     MacroAssembler _masm(cbuf);
1709     switch (src_lo_rc) {
1710     case rc_int:
1711       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1712         if (is64) {
1713             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1714                    as_Register(Matcher::_regEncode[src_lo]));
1715         } else {
1716             MacroAssembler _masm(cbuf);
1717             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1718                     as_Register(Matcher::_regEncode[src_lo]));
1719         }
1720       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1721         if (is64) {
1722             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1723                      as_Register(Matcher::_regEncode[src_lo]));
1724         } else {
1725             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1726                      as_Register(Matcher::_regEncode[src_lo]));
1727         }
1728       } else {                    // gpr --> stack spill
1729         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1730         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1731       }
1732       break;
1733     case rc_float:
1734       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1735         if (is64) {
1736             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1737                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1738         } else {
1739             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1740                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1741         }
1742       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1743           if (cbuf) {
1744             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1745                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1746         } else {
1747             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1748                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1749         }
1750       } else {                    // fpr --> stack spill
1751         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1752         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1753                  is64 ? __ D : __ S, dst_offset);
1754       }
1755       break;
1756     case rc_stack:
1757       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1758         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1759       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1760         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1761                    is64 ? __ D : __ S, src_offset);
1762       } else {                    // stack --> stack copy
1763         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1764         __ unspill(rscratch1, is64, src_offset);
1765         __ spill(rscratch1, is64, dst_offset);
1766       }
1767       break;
1768     default:
1769       assert(false, "bad rc_class for spill");
1770       ShouldNotReachHere();
1771     }
1772   }
1773 
1774   if (st) {
1775     st->print("spill ");
1776     if (src_lo_rc == rc_stack) {
1777       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1778     } else {
1779       st->print("%s -> ", Matcher::regName[src_lo]);
1780     }
1781     if (dst_lo_rc == rc_stack) {
1782       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1783     } else {
1784       st->print("%s", Matcher::regName[dst_lo]);
1785     }
1786     if (bottom_type()->isa_vect() != NULL) {
1787       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1788     } else {
1789       st->print("\t# spill size = %d", is64 ? 64:32);
1790     }
1791   }
1792 
1793   return 0;
1794 
1795 }
1796 
1797 #ifndef PRODUCT
1798 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1799   if (!ra_)
1800     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1801   else
1802     implementation(NULL, ra_, false, st);
1803 }
1804 #endif
1805 
1806 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1807   implementation(&cbuf, ra_, false, NULL);
1808 }
1809 
1810 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1811   return MachNode::size(ra_);
1812 }
1813 
1814 //=============================================================================
1815 
1816 #ifndef PRODUCT
1817 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1818   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1819   int reg = ra_->get_reg_first(this);
1820   st->print("add %s, rsp, #%d]\t# box lock",
1821             Matcher::regName[reg], offset);
1822 }
1823 #endif
1824 
1825 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1826   MacroAssembler _masm(&cbuf);
1827 
1828   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1829   int reg    = ra_->get_encode(this);
1830 
1831   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1832     __ add(as_Register(reg), sp, offset);
1833   } else {
1834     ShouldNotReachHere();
1835   }
1836 }
1837 
1838 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1839   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1840   return 4;
1841 }
1842 
1843 //=============================================================================
1844 
1845 #ifndef PRODUCT
1846 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1847 {
1848   st->print_cr("# MachUEPNode");
1849   if (UseCompressedClassPointers) {
1850     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1851     if (Universe::narrow_klass_shift() != 0) {
1852       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1853     }
1854   } else {
1855    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1856   }
1857   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1858   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1859 }
1860 #endif
1861 
1862 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1863 {
1864   // This is the unverified entry point.
1865   MacroAssembler _masm(&cbuf);
1866 
1867   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1868   Label skip;
1869   // TODO
1870   // can we avoid this skip and still use a reloc?
1871   __ br(Assembler::EQ, skip);
1872   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1873   __ bind(skip);
1874 }
1875 
1876 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1877 {
1878   return MachNode::size(ra_);
1879 }
1880 
1881 // REQUIRED EMIT CODE
1882 
1883 //=============================================================================
1884 
1885 // Emit exception handler code.
1886 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1887 {
1888   // mov rscratch1 #exception_blob_entry_point
1889   // br rscratch1
1890   // Note that the code buffer's insts_mark is always relative to insts.
1891   // That's why we must use the macroassembler to generate a handler.
1892   MacroAssembler _masm(&cbuf);
1893   address base = __ start_a_stub(size_exception_handler());
1894   if (base == NULL) {
1895     ciEnv::current()->record_failure("CodeCache is full");
1896     return 0;  // CodeBuffer::expand failed
1897   }
1898   int offset = __ offset();
1899   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1900   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1901   __ end_a_stub();
1902   return offset;
1903 }
1904 
1905 // Emit deopt handler code.
1906 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
1907 {
1908   // Note that the code buffer's insts_mark is always relative to insts.
1909   // That's why we must use the macroassembler to generate a handler.
1910   MacroAssembler _masm(&cbuf);
1911   address base = __ start_a_stub(size_deopt_handler());
1912   if (base == NULL) {
1913     ciEnv::current()->record_failure("CodeCache is full");
1914     return 0;  // CodeBuffer::expand failed
1915   }
1916   int offset = __ offset();
1917 
1918   __ adr(lr, __ pc());
1919   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1920 
1921   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1922   __ end_a_stub();
1923   return offset;
1924 }
1925 
1926 // REQUIRED MATCHER CODE
1927 
1928 //=============================================================================
1929 
1930 const bool Matcher::match_rule_supported(int opcode) {
1931 
1932   // TODO 
1933   // identify extra cases that we might want to provide match rules for
1934   // e.g. Op_StrEquals and other intrinsics
1935   if (!has_match_rule(opcode)) {
1936     return false;
1937   }
1938 
1939   return true;  // Per default match rules are supported.
1940 }
1941 
1942 int Matcher::regnum_to_fpu_offset(int regnum)
1943 {
1944   Unimplemented();
1945   return 0;
1946 }
1947 
1948 // Is this branch offset short enough that a short branch can be used?
1949 //
1950 // NOTE: If the platform does not provide any short branch variants, then
1951 //       this method should return false for offset 0.
1952 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1953   // The passed offset is relative to address of the branch.
1954 
1955   return (-32768 <= offset && offset < 32768);
1956 }
1957 
1958 const bool Matcher::isSimpleConstant64(jlong value) {
1959   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1960   // Probably always true, even if a temp register is required.
1961   return true;
1962 }
1963 
1964 // true just means we have fast l2f conversion
1965 const bool Matcher::convL2FSupported(void) {
1966   return true;
1967 }
1968 
1969 // Vector width in bytes.
1970 const int Matcher::vector_width_in_bytes(BasicType bt) {
1971   int size = MIN2(16,(int)MaxVectorSize);
1972   // Minimum 2 values in vector
1973   if (size < 2*type2aelembytes(bt)) size = 0;
1974   // But never < 4
1975   if (size < 4) size = 0;
1976   return size;
1977 }
1978 
1979 // Limits on vector size (number of elements) loaded into vector.
1980 const int Matcher::max_vector_size(const BasicType bt) {
1981   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1982 }
1983 const int Matcher::min_vector_size(const BasicType bt) {
1984 //  For the moment limit the vector size to 8 bytes
1985     int size = 8 / type2aelembytes(bt);
1986     if (size < 2) size = 2;
1987     return size;
1988 }
1989 
1990 // Vector ideal reg.
1991 const uint Matcher::vector_ideal_reg(int len) {
1992   switch(len) {
1993     case  8: return Op_VecD;
1994     case 16: return Op_VecX;
1995   }
1996   ShouldNotReachHere();
1997   return 0;
1998 }
1999 
2000 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2001   switch(size) {
2002     case  8: return Op_VecD;
2003     case 16: return Op_VecX;
2004   }
2005   ShouldNotReachHere();
2006   return 0;
2007 }
2008 
2009 // AES support not yet implemented
2010 const bool Matcher::pass_original_key_for_aes() {
2011   return false;
2012 }
2013 
2014 // x86 supports misaligned vectors store/load.
2015 const bool Matcher::misaligned_vectors_ok() {
2016   return !AlignVector; // can be changed by flag
2017 }
2018 
2019 // false => size gets scaled to BytesPerLong, ok.
2020 const bool Matcher::init_array_count_is_in_bytes = false;
2021 
2022 // Threshold size for cleararray.
2023 const int Matcher::init_array_short_size = 4 * BytesPerLong;
2024 
2025 // Use conditional move (CMOVL)
2026 const int Matcher::long_cmove_cost() {
2027   // long cmoves are no more expensive than int cmoves
2028   return 0;
2029 }
2030 
2031 const int Matcher::float_cmove_cost() {
2032   // float cmoves are no more expensive than int cmoves
2033   return 0;
2034 }
2035 
2036 // Does the CPU require late expand (see block.cpp for description of late expand)?
2037 const bool Matcher::require_postalloc_expand = false;
2038 
2039 // Should the Matcher clone shifts on addressing modes, expecting them
2040 // to be subsumed into complex addressing expressions or compute them
2041 // into registers?  True for Intel but false for most RISCs
2042 const bool Matcher::clone_shift_expressions = false;
2043 
2044 // Do we need to mask the count passed to shift instructions or does
2045 // the cpu only look at the lower 5/6 bits anyway?
2046 const bool Matcher::need_masked_shift_count = false;
2047 
2048 // This affects two different things:
2049 //  - how Decode nodes are matched
2050 //  - how ImplicitNullCheck opportunities are recognized
2051 // If true, the matcher will try to remove all Decodes and match them
2052 // (as operands) into nodes. NullChecks are not prepared to deal with 
2053 // Decodes by final_graph_reshaping().
2054 // If false, final_graph_reshaping() forces the decode behind the Cmp
2055 // for a NullCheck. The matcher matches the Decode node into a register.
2056 // Implicit_null_check optimization moves the Decode along with the 
2057 // memory operation back up before the NullCheck.
2058 bool Matcher::narrow_oop_use_complex_address() {
2059   return Universe::narrow_oop_shift() == 0;
2060 }
2061 
2062 bool Matcher::narrow_klass_use_complex_address() {
2063 // TODO
2064 // decide whether we need to set this to true
2065   return false;
2066 }
2067 
2068 // Is it better to copy float constants, or load them directly from
2069 // memory?  Intel can load a float constant from a direct address,
2070 // requiring no extra registers.  Most RISCs will have to materialize
2071 // an address into a register first, so they would do better to copy
2072 // the constant from stack.
2073 const bool Matcher::rematerialize_float_constants = false;
2074 
2075 // If CPU can load and store mis-aligned doubles directly then no
2076 // fixup is needed.  Else we split the double into 2 integer pieces
2077 // and move it piece-by-piece.  Only happens when passing doubles into
2078 // C code as the Java calling convention forces doubles to be aligned.
2079 const bool Matcher::misaligned_doubles_ok = true;
2080 
2081 // No-op on amd64
2082 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2083   Unimplemented();
2084 }
2085 
2086 // Advertise here if the CPU requires explicit rounding operations to
2087 // implement the UseStrictFP mode.
2088 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2089 
2090 // Are floats converted to double when stored to stack during
2091 // deoptimization?
2092 bool Matcher::float_in_double() { return true; }
2093 
2094 // Do ints take an entire long register or just half?
2095 // The relevant question is how the int is callee-saved:
2096 // the whole long is written but de-opt'ing will have to extract
2097 // the relevant 32 bits.
2098 const bool Matcher::int_in_long = true;
2099 
2100 // Return whether or not this register is ever used as an argument.
2101 // This function is used on startup to build the trampoline stubs in
2102 // generateOptoStub.  Registers not mentioned will be killed by the VM
2103 // call in the trampoline, and arguments in those registers not be
2104 // available to the callee.
2105 bool Matcher::can_be_java_arg(int reg)
2106 {
2107   return
2108     reg ==  R0_num || reg == R0_H_num ||
2109     reg ==  R1_num || reg == R1_H_num ||
2110     reg ==  R2_num || reg == R2_H_num ||
2111     reg ==  R3_num || reg == R3_H_num ||
2112     reg ==  R4_num || reg == R4_H_num ||
2113     reg ==  R5_num || reg == R5_H_num ||
2114     reg ==  R6_num || reg == R6_H_num ||
2115     reg ==  R7_num || reg == R7_H_num ||
2116     reg ==  V0_num || reg == V0_H_num ||
2117     reg ==  V1_num || reg == V1_H_num ||
2118     reg ==  V2_num || reg == V2_H_num ||
2119     reg ==  V3_num || reg == V3_H_num ||
2120     reg ==  V4_num || reg == V4_H_num ||
2121     reg ==  V5_num || reg == V5_H_num ||
2122     reg ==  V6_num || reg == V6_H_num ||
2123     reg ==  V7_num || reg == V7_H_num;
2124 }
2125 
2126 bool Matcher::is_spillable_arg(int reg)
2127 {
2128   return can_be_java_arg(reg);
2129 }
2130 
2131 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2132   return false;
2133 }
2134 
2135 RegMask Matcher::divI_proj_mask() {
2136   ShouldNotReachHere();
2137   return RegMask();
2138 }
2139 
2140 // Register for MODI projection of divmodI.
2141 RegMask Matcher::modI_proj_mask() {
2142   ShouldNotReachHere();
2143   return RegMask();
2144 }
2145 
2146 // Register for DIVL projection of divmodL.
2147 RegMask Matcher::divL_proj_mask() {
2148   ShouldNotReachHere();
2149   return RegMask();
2150 }
2151 
2152 // Register for MODL projection of divmodL.
2153 RegMask Matcher::modL_proj_mask() {
2154   ShouldNotReachHere();
2155   return RegMask();
2156 }
2157 
2158 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2159   return FP_REG_mask();
2160 }
2161 
2162 
2163 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2164   MacroAssembler _masm(&cbuf);                                              \
2165   {                                                                     \
2166     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2167     guarantee(DISP == 0, "mode not permitted for volatile");            \
2168     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2169     __ INSN(REG, as_Register(BASE));                                    \
2170   }
2171 
2172 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2173 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2174 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2175                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2176 
2177   // Used for all non-volatile memory accesses.  The use of
2178   // $mem->opcode() to discover whether this pattern uses sign-extended
2179   // offsets is something of a kludge.
2180   static void loadStore(MacroAssembler masm, mem_insn insn,
2181                          Register reg, int opcode,
2182                          Register base, int index, int size, int disp)
2183   {
2184     Address::extend scale;
2185 
2186     // Hooboy, this is fugly.  We need a way to communicate to the
2187     // encoder that the index needs to be sign extended, so we have to
2188     // enumerate all the cases.
2189     switch (opcode) {
2190     case INDINDEXSCALEDOFFSETI2L:
2191     case INDINDEXSCALEDI2L:
2192     case INDINDEXSCALEDOFFSETI2LN:
2193     case INDINDEXSCALEDI2LN:
2194     case INDINDEXOFFSETI2L:
2195     case INDINDEXOFFSETI2LN:
2196       scale = Address::sxtw(size);
2197       break;
2198     default:
2199       scale = Address::lsl(size);
2200     }
2201 
2202     if (index == -1) {
2203       (masm.*insn)(reg, Address(base, disp));
2204     } else {
2205       if (disp == 0) {
2206         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2207       } else {
2208         masm.lea(rscratch1, Address(base, disp));
2209         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2210       }
2211     }
2212   }
2213 
2214   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2215                          FloatRegister reg, int opcode,
2216                          Register base, int index, int size, int disp)
2217   {
2218     Address::extend scale;
2219 
2220     switch (opcode) {
2221     case INDINDEXSCALEDOFFSETI2L:
2222     case INDINDEXSCALEDI2L:
2223     case INDINDEXSCALEDOFFSETI2LN:
2224     case INDINDEXSCALEDI2LN:
2225       scale = Address::sxtw(size);
2226       break;
2227     default:
2228       scale = Address::lsl(size);
2229     }
2230 
2231      if (index == -1) {
2232       (masm.*insn)(reg, Address(base, disp));
2233     } else {
2234       if (disp == 0) {
2235         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2236       } else {
2237         masm.lea(rscratch1, Address(base, disp));
2238         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2239       }
2240     }
2241   }
2242 
2243   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2244                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2245                          int opcode, Register base, int index, int size, int disp)
2246   {
2247     if (index == -1) {
2248       (masm.*insn)(reg, T, Address(base, disp));
2249     } else {
2250       assert(disp == 0, "unsupported address mode");
2251       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2252     }
2253   }
2254 
2255 %}
2256 
2257 
2258 
2259 //----------ENCODING BLOCK-----------------------------------------------------
2260 // This block specifies the encoding classes used by the compiler to
2261 // output byte streams.  Encoding classes are parameterized macros
2262 // used by Machine Instruction Nodes in order to generate the bit
2263 // encoding of the instruction.  Operands specify their base encoding
2264 // interface with the interface keyword.  There are currently
2265 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2266 // COND_INTER.  REG_INTER causes an operand to generate a function
2267 // which returns its register number when queried.  CONST_INTER causes
2268 // an operand to generate a function which returns the value of the
2269 // constant when queried.  MEMORY_INTER causes an operand to generate
2270 // four functions which return the Base Register, the Index Register,
2271 // the Scale Value, and the Offset Value of the operand when queried.
2272 // COND_INTER causes an operand to generate six functions which return
2273 // the encoding code (ie - encoding bits for the instruction)
2274 // associated with each basic boolean condition for a conditional
2275 // instruction.
2276 //
2277 // Instructions specify two basic values for encoding.  Again, a
2278 // function is available to check if the constant displacement is an
2279 // oop. They use the ins_encode keyword to specify their encoding
2280 // classes (which must be a sequence of enc_class names, and their
2281 // parameters, specified in the encoding block), and they use the
2282 // opcode keyword to specify, in order, their primary, secondary, and
2283 // tertiary opcode.  Only the opcode sections which a particular
2284 // instruction needs for encoding need to be specified.
2285 encode %{
2286   // Build emit functions for each basic byte or larger field in the
2287   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2288   // from C++ code in the enc_class source block.  Emit functions will
2289   // live in the main source block for now.  In future, we can
2290   // generalize this by adding a syntax that specifies the sizes of
2291   // fields in an order, so that the adlc can build the emit functions
2292   // automagically
2293 
2294   // catch all for unimplemented encodings
2295   enc_class enc_unimplemented %{
2296     MacroAssembler _masm(&cbuf);
2297     __ unimplemented("C2 catch all");    
2298   %}
2299 
2300   // BEGIN Non-volatile memory access
2301 
2302   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2303     Register dst_reg = as_Register($dst$$reg);
2304     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2305                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2306   %}
2307 
2308   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2309     Register dst_reg = as_Register($dst$$reg);
2310     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2311                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2312   %}
2313 
2314   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2315     Register dst_reg = as_Register($dst$$reg);
2316     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2317                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2318   %}
2319 
2320   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2321     Register dst_reg = as_Register($dst$$reg);
2322     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2323                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2324   %}
2325 
2326   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2327     Register dst_reg = as_Register($dst$$reg);
2328     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2329                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2330   %}
2331 
2332   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2333     Register dst_reg = as_Register($dst$$reg);
2334     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2335                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2336   %}
2337 
2338   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2339     Register dst_reg = as_Register($dst$$reg);
2340     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2341                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2342   %}
2343 
2344   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2345     Register dst_reg = as_Register($dst$$reg);
2346     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2347                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2348   %}
2349 
2350   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2351     Register dst_reg = as_Register($dst$$reg);
2352     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2353                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2354   %}
2355 
2356   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2357     Register dst_reg = as_Register($dst$$reg);
2358     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2359                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2360   %}
2361 
2362   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2363     Register dst_reg = as_Register($dst$$reg);
2364     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2365                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2366   %}
2367 
2368   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2369     Register dst_reg = as_Register($dst$$reg);
2370     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2371                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2372   %}
2373 
2374   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2375     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2376     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2377                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2378   %}
2379 
2380   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2381     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2382     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2383                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2384   %}
2385 
2386   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2387     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2388     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2389        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2390   %}
2391 
2392   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2393     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2394     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2395        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2396   %}
2397 
2398   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2399     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2400     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2401        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2402   %}
2403 
2404   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2405     Register src_reg = as_Register($src$$reg);
2406     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2407                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2408   %}
2409 
2410   enc_class aarch64_enc_strb0(memory mem) %{
2411     MacroAssembler _masm(&cbuf);
2412     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2413                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2414   %}
2415 
2416   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2417     MacroAssembler _masm(&cbuf);
2418     __ membar(Assembler::StoreStore);
2419     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2420                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2421   %}
2422 
2423   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2424     Register src_reg = as_Register($src$$reg);
2425     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2426                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2427   %}
2428 
2429   enc_class aarch64_enc_strh0(memory mem) %{
2430     MacroAssembler _masm(&cbuf);
2431     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2432                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2433   %}
2434 
2435   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2436     Register src_reg = as_Register($src$$reg);
2437     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2438                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2439   %}
2440 
2441   enc_class aarch64_enc_strw0(memory mem) %{
2442     MacroAssembler _masm(&cbuf);
2443     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2444                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2445   %}
2446 
2447   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2448     Register src_reg = as_Register($src$$reg);
2449     // we sometimes get asked to store the stack pointer into the
2450     // current thread -- we cannot do that directly on AArch64
2451     if (src_reg == r31_sp) {
2452       MacroAssembler _masm(&cbuf);
2453       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2454       __ mov(rscratch2, sp);
2455       src_reg = rscratch2;
2456     }
2457     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2458                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2459   %}
2460 
2461   enc_class aarch64_enc_str0(memory mem) %{
2462     MacroAssembler _masm(&cbuf);
2463     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2464                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2465   %}
2466 
2467   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2468     FloatRegister src_reg = as_FloatRegister($src$$reg);
2469     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2470                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2471   %}
2472 
2473   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2474     FloatRegister src_reg = as_FloatRegister($src$$reg);
2475     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2476                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2477   %}
2478 
2479   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2480     FloatRegister src_reg = as_FloatRegister($src$$reg);
2481     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2482        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2483   %}
2484 
2485   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2486     FloatRegister src_reg = as_FloatRegister($src$$reg);
2487     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2488        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2489   %}
2490 
2491   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2492     FloatRegister src_reg = as_FloatRegister($src$$reg);
2493     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2494        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2495   %}
2496 
2497   // END Non-volatile memory access
2498 
2499   // this encoding writes the address of the first instruction in the
2500   // call sequence for the runtime call into the anchor pc slot. this
2501   // address allows the runtime to i) locate the code buffer for the
2502   // caller (any address in the buffer would do) and ii) find the oop
2503   // map associated with the call (has to address the instruction
2504   // following the call). note that we have to store the address which
2505   // follows the actual call.
2506   // 
2507   // the offset from the current pc can be computed by considering
2508   // what gets generated between this point up to and including the
2509   // call. it looks like this
2510   //
2511   //   movz xscratch1 0xnnnn        <-- current pc is here
2512   //   movk xscratch1 0xnnnn
2513   //   movk xscratch1 0xnnnn
2514   //   str xscratch1, [xthread,#anchor_pc_off]
2515   //   mov xscratch2, sp
2516   //   str xscratch2, [xthread,#anchor_sp_off
2517   //   mov x0, x1
2518   //   . . .
2519   //   mov xn-1, xn
2520   //   mov xn, thread            <-- always passed
2521   //   mov xn+1, rfp             <-- optional iff primary == 1
2522   //   movz xscratch1 0xnnnn
2523   //   movk xscratch1 0xnnnn
2524   //   movk xscratch1 0xnnnn
2525   //   blr  xscratch1
2526   //   . . .
2527   //
2528   // where the called routine has n args (including the thread and,
2529   // possibly the stub's caller return address currently in rfp).  we
2530   // can compute n by looking at the number of args passed into the
2531   // stub. we assert that nargs is < 7.
2532   //
2533   // so the offset we need to add to the pc (in 32-bit words) is
2534   //   3 +        <-- load 48-bit constant return pc
2535   //   1 +        <-- write anchor pc
2536   //   1 +        <-- copy sp
2537   //   1 +        <-- write anchor sp
2538   //   nargs +    <-- java stub arg count
2539   //   1 +        <-- extra thread arg
2540   // [ 1 + ]      <-- optional ret address of stub caller
2541   //   3 +        <-- load 64 bit call target address
2542   //   1          <-- blr instruction
2543   //
2544   // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes
2545   //
2546 
2547   enc_class aarch64_enc_save_pc() %{
2548     Compile* C = ra_->C;
2549     int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms;
2550     if ($primary) { nargs++; }
2551     assert(nargs <= 8, "opto runtime stub has more than 8 args!");
2552     MacroAssembler _masm(&cbuf);
2553     address pc = __ pc();
2554     int call_offset = (nargs + 11) * 4;
2555     int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
2556                        in_bytes(JavaFrameAnchor::last_Java_pc_offset());
2557     __ lea(rscratch1, InternalAddress(pc + call_offset));
2558     __ str(rscratch1, Address(rthread, field_offset));
2559   %}
2560 
2561   // volatile loads and stores
2562 
2563   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2564     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2565                  rscratch1, stlrb);
2566     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2567       __ dmb(__ ISH);
2568   %}
2569 
2570   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2571     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2572                  rscratch1, stlrh);
2573     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2574       __ dmb(__ ISH);
2575   %}
2576 
2577   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2578     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2579                  rscratch1, stlrw);
2580     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2581       __ dmb(__ ISH);
2582   %}
2583 
2584 
2585   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2586     Register dst_reg = as_Register($dst$$reg);
2587     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2588              rscratch1, ldarb);
2589     __ sxtbw(dst_reg, dst_reg);
2590   %}
2591 
2592   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2593     Register dst_reg = as_Register($dst$$reg);
2594     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2595              rscratch1, ldarb);
2596     __ sxtb(dst_reg, dst_reg);
2597   %}
2598 
2599   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2600     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2601              rscratch1, ldarb);
2602   %}
2603 
2604   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2605     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2606              rscratch1, ldarb);
2607   %}
2608 
2609   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2610     Register dst_reg = as_Register($dst$$reg);
2611     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2612              rscratch1, ldarh);
2613     __ sxthw(dst_reg, dst_reg);
2614   %}
2615 
2616   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2617     Register dst_reg = as_Register($dst$$reg);
2618     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2619              rscratch1, ldarh);
2620     __ sxth(dst_reg, dst_reg);
2621   %}
2622 
2623   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2624     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2625              rscratch1, ldarh);
2626   %}
2627 
2628   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2629     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2630              rscratch1, ldarh);
2631   %}
2632 
2633   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2634     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2635              rscratch1, ldarw);
2636   %}
2637 
2638   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2639     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2640              rscratch1, ldarw);
2641   %}
2642 
2643   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2644     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2645              rscratch1, ldar);
2646   %}
2647 
2648   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2649     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2650              rscratch1, ldarw);
2651     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2652   %}
2653 
2654   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2655     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2656              rscratch1, ldar);
2657     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2658   %}
2659 
2660   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2661     Register src_reg = as_Register($src$$reg);
2662     // we sometimes get asked to store the stack pointer into the
2663     // current thread -- we cannot do that directly on AArch64
2664     if (src_reg == r31_sp) {
2665         MacroAssembler _masm(&cbuf);
2666       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2667       __ mov(rscratch2, sp);
2668       src_reg = rscratch2;
2669     }
2670     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2671                  rscratch1, stlr);
2672     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2673       __ dmb(__ ISH);
2674   %}
2675 
2676   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2677     {
2678       MacroAssembler _masm(&cbuf);
2679       FloatRegister src_reg = as_FloatRegister($src$$reg);
2680       __ fmovs(rscratch2, src_reg);
2681     }
2682     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2683                  rscratch1, stlrw);
2684     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2685       __ dmb(__ ISH);
2686   %}
2687 
2688   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2689     {
2690       MacroAssembler _masm(&cbuf);
2691       FloatRegister src_reg = as_FloatRegister($src$$reg);
2692       __ fmovd(rscratch2, src_reg);
2693     }
2694     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2695                  rscratch1, stlr);
2696     if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS)
2697       __ dmb(__ ISH);
2698   %}
2699 
2700   // synchronized read/update encodings
2701 
2702   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2703     MacroAssembler _masm(&cbuf);
2704     Register dst_reg = as_Register($dst$$reg);
2705     Register base = as_Register($mem$$base);
2706     int index = $mem$$index;
2707     int scale = $mem$$scale;
2708     int disp = $mem$$disp;
2709     if (index == -1) {
2710        if (disp != 0) {      
2711         __ lea(rscratch1, Address(base, disp));
2712         __ ldaxr(dst_reg, rscratch1);
2713       } else {
2714         // TODO
2715         // should we ever get anything other than this case?
2716         __ ldaxr(dst_reg, base);
2717       }
2718     } else {
2719       Register index_reg = as_Register(index);
2720       if (disp == 0) {
2721         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2722         __ ldaxr(dst_reg, rscratch1);
2723       } else {
2724         __ lea(rscratch1, Address(base, disp));
2725         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2726         __ ldaxr(dst_reg, rscratch1);
2727       }
2728     }
2729   %}
2730 
2731   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2732     MacroAssembler _masm(&cbuf);
2733     Register src_reg = as_Register($src$$reg);
2734     Register base = as_Register($mem$$base);
2735     int index = $mem$$index;
2736     int scale = $mem$$scale;
2737     int disp = $mem$$disp;
2738     if (index == -1) {
2739        if (disp != 0) {      
2740         __ lea(rscratch2, Address(base, disp));
2741         __ stlxr(rscratch1, src_reg, rscratch2);
2742       } else {
2743         // TODO
2744         // should we ever get anything other than this case?
2745         __ stlxr(rscratch1, src_reg, base);
2746       }
2747     } else {
2748       Register index_reg = as_Register(index);
2749       if (disp == 0) {
2750         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2751         __ stlxr(rscratch1, src_reg, rscratch2);
2752       } else {
2753         __ lea(rscratch2, Address(base, disp));
2754         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2755         __ stlxr(rscratch1, src_reg, rscratch2);
2756       }
2757     }
2758     __ cmpw(rscratch1, zr);
2759   %}
2760 
2761   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2762     MacroAssembler _masm(&cbuf);
2763     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2764     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2765                Assembler::xword, /*acquire*/ false, /*release*/ true);
2766   %}
2767 
2768   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2769     MacroAssembler _masm(&cbuf);
2770     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2771     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2772                Assembler::word, /*acquire*/ false, /*release*/ true);
2773   %}
2774 
2775 
2776   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
2777     MacroAssembler _masm(&cbuf);
2778     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2779     Register tmp = $tmp$$Register;
2780     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
2781     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
2782                               /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
2783   %}
2784 
2785   // The only difference between aarch64_enc_cmpxchg and
2786   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2787   // CompareAndSwap sequence to serve as a barrier on acquiring a
2788   // lock.
2789   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2790     MacroAssembler _masm(&cbuf);
2791     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2792     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2793                Assembler::xword, /*acquire*/ true, /*release*/ true);
2794   %}
2795 
2796   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2797     MacroAssembler _masm(&cbuf);
2798     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2799     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2800                Assembler::word, /*acquire*/ true, /*release*/ true);
2801   %}
2802 
2803   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
2804     MacroAssembler _masm(&cbuf);
2805     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2806     Register tmp = $tmp$$Register;
2807     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
2808     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
2809                               /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false,
2810                               $res$$Register);
2811   %}
2812 
2813   // auxiliary used for CompareAndSwapX to set result register
2814   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2815     MacroAssembler _masm(&cbuf);
2816     Register res_reg = as_Register($res$$reg);
2817     __ cset(res_reg, Assembler::EQ);
2818   %}
2819 
2820   // prefetch encodings
2821 
2822   enc_class aarch64_enc_prefetchr(memory mem) %{
2823     MacroAssembler _masm(&cbuf);
2824     Register base = as_Register($mem$$base);
2825     int index = $mem$$index;
2826     int scale = $mem$$scale;
2827     int disp = $mem$$disp;
2828     if (index == -1) {
2829       __ prfm(Address(base, disp), PLDL1KEEP);
2830     } else {
2831       Register index_reg = as_Register(index);
2832       if (disp == 0) {
2833         __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
2834       } else {
2835         __ lea(rscratch1, Address(base, disp));
2836         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
2837       }
2838     }
2839   %}
2840 
2841   enc_class aarch64_enc_prefetchw(memory mem) %{
2842     MacroAssembler _masm(&cbuf);
2843     Register base = as_Register($mem$$base);
2844     int index = $mem$$index;
2845     int scale = $mem$$scale;
2846     int disp = $mem$$disp;
2847     if (index == -1) {
2848       __ prfm(Address(base, disp), PSTL1KEEP);
2849     } else {
2850       Register index_reg = as_Register(index);
2851       if (disp == 0) {
2852         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2853       } else {
2854         __ lea(rscratch1, Address(base, disp));
2855         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2856       }
2857     }
2858   %}
2859 
2860   enc_class aarch64_enc_prefetchnta(memory mem) %{
2861     MacroAssembler _masm(&cbuf);
2862     Register base = as_Register($mem$$base);
2863     int index = $mem$$index;
2864     int scale = $mem$$scale;
2865     int disp = $mem$$disp;
2866     if (index == -1) {
2867       __ prfm(Address(base, disp), PSTL1STRM);
2868     } else {
2869       Register index_reg = as_Register(index);
2870       if (disp == 0) {
2871         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
2872         __ nop();
2873       } else {
2874         __ lea(rscratch1, Address(base, disp));
2875         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
2876       }
2877     }
2878   %}
2879 
2880   /// mov envcodings
2881 
2882   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2883     MacroAssembler _masm(&cbuf);
2884     u_int32_t con = (u_int32_t)$src$$constant;
2885     Register dst_reg = as_Register($dst$$reg);
2886     if (con == 0) {
2887       __ movw(dst_reg, zr);
2888     } else {
2889       __ movw(dst_reg, con);
2890     }
2891   %}
2892 
2893   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2894     MacroAssembler _masm(&cbuf);
2895     Register dst_reg = as_Register($dst$$reg);
2896     u_int64_t con = (u_int64_t)$src$$constant;
2897     if (con == 0) {
2898       __ mov(dst_reg, zr);
2899     } else {
2900       __ mov(dst_reg, con);
2901     }
2902   %}
2903 
2904   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2905     MacroAssembler _masm(&cbuf);
2906     Register dst_reg = as_Register($dst$$reg);
2907     address con = (address)$src$$constant;
2908     if (con == NULL || con == (address)1) {
2909       ShouldNotReachHere();
2910     } else {
2911       relocInfo::relocType rtype = $src->constant_reloc();
2912       if (rtype == relocInfo::oop_type) {
2913         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2914       } else if (rtype == relocInfo::metadata_type) {
2915         __ mov_metadata(dst_reg, (Metadata*)con);
2916       } else {
2917         assert(rtype == relocInfo::none, "unexpected reloc type");
2918         if (con < (address)(uintptr_t)os::vm_page_size()) {
2919           __ mov(dst_reg, con);
2920         } else {
2921           unsigned long offset;
2922           __ adrp(dst_reg, con, offset);
2923           __ add(dst_reg, dst_reg, offset);
2924         }
2925       }
2926     }
2927   %}
2928 
2929   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
2930     MacroAssembler _masm(&cbuf);
2931     Register dst_reg = as_Register($dst$$reg);
2932     __ mov(dst_reg, zr);
2933   %}
2934 
2935   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
2936     MacroAssembler _masm(&cbuf);
2937     Register dst_reg = as_Register($dst$$reg);
2938     __ mov(dst_reg, (u_int64_t)1);
2939   %}
2940 
2941   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
2942     MacroAssembler _masm(&cbuf);
2943     address page = (address)$src$$constant;
2944     Register dst_reg = as_Register($dst$$reg);
2945     unsigned long off;
2946     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
2947     assert(off == 0, "assumed offset == 0");
2948   %}
2949 
2950   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
2951     MacroAssembler _masm(&cbuf);
2952     __ load_byte_map_base($dst$$Register);
2953   %}
2954 
2955   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
2956     MacroAssembler _masm(&cbuf);
2957     Register dst_reg = as_Register($dst$$reg);
2958     address con = (address)$src$$constant;
2959     if (con == NULL) {
2960       ShouldNotReachHere();
2961     } else {
2962       relocInfo::relocType rtype = $src->constant_reloc();
2963       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
2964       __ set_narrow_oop(dst_reg, (jobject)con);
2965     }
2966   %}
2967 
2968   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
2969     MacroAssembler _masm(&cbuf);
2970     Register dst_reg = as_Register($dst$$reg);
2971     __ mov(dst_reg, zr);
2972   %}
2973 
2974   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
2975     MacroAssembler _masm(&cbuf);
2976     Register dst_reg = as_Register($dst$$reg);
2977     address con = (address)$src$$constant;
2978     if (con == NULL) {
2979       ShouldNotReachHere();
2980     } else {
2981       relocInfo::relocType rtype = $src->constant_reloc();
2982       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
2983       __ set_narrow_klass(dst_reg, (Klass *)con);
2984     }
2985   %}
2986 
2987   // arithmetic encodings
2988 
2989   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
2990     MacroAssembler _masm(&cbuf);
2991     Register dst_reg = as_Register($dst$$reg);
2992     Register src_reg = as_Register($src1$$reg);
2993     int32_t con = (int32_t)$src2$$constant;
2994     // add has primary == 0, subtract has primary == 1
2995     if ($primary) { con = -con; }
2996     if (con < 0) {
2997       __ subw(dst_reg, src_reg, -con);
2998     } else {
2999       __ addw(dst_reg, src_reg, con);
3000     }
3001   %}
3002 
3003   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3004     MacroAssembler _masm(&cbuf);
3005     Register dst_reg = as_Register($dst$$reg);
3006     Register src_reg = as_Register($src1$$reg);
3007     int32_t con = (int32_t)$src2$$constant;
3008     // add has primary == 0, subtract has primary == 1
3009     if ($primary) { con = -con; }
3010     if (con < 0) {
3011       __ sub(dst_reg, src_reg, -con);
3012     } else {
3013       __ add(dst_reg, src_reg, con);
3014     }
3015   %}
3016 
3017   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3018     MacroAssembler _masm(&cbuf);
3019    Register dst_reg = as_Register($dst$$reg);
3020    Register src1_reg = as_Register($src1$$reg);
3021    Register src2_reg = as_Register($src2$$reg);
3022     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3023   %}
3024 
3025   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3026     MacroAssembler _masm(&cbuf);
3027    Register dst_reg = as_Register($dst$$reg);
3028    Register src1_reg = as_Register($src1$$reg);
3029    Register src2_reg = as_Register($src2$$reg);
3030     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3031   %}
3032 
3033   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3034     MacroAssembler _masm(&cbuf);
3035    Register dst_reg = as_Register($dst$$reg);
3036    Register src1_reg = as_Register($src1$$reg);
3037    Register src2_reg = as_Register($src2$$reg);
3038     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3039   %}
3040 
3041   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3042     MacroAssembler _masm(&cbuf);
3043    Register dst_reg = as_Register($dst$$reg);
3044    Register src1_reg = as_Register($src1$$reg);
3045    Register src2_reg = as_Register($src2$$reg);
3046     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3047   %}
3048 
3049   // compare instruction encodings
3050 
3051   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3052     MacroAssembler _masm(&cbuf);
3053     Register reg1 = as_Register($src1$$reg);
3054     Register reg2 = as_Register($src2$$reg);
3055     __ cmpw(reg1, reg2);
3056   %}
3057 
3058   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3059     MacroAssembler _masm(&cbuf);
3060     Register reg = as_Register($src1$$reg);
3061     int32_t val = $src2$$constant;
3062     if (val >= 0) {
3063       __ subsw(zr, reg, val);
3064     } else {
3065       __ addsw(zr, reg, -val);
3066     }
3067   %}
3068 
3069   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3070     MacroAssembler _masm(&cbuf);
3071     Register reg1 = as_Register($src1$$reg);
3072     u_int32_t val = (u_int32_t)$src2$$constant;
3073     __ movw(rscratch1, val);
3074     __ cmpw(reg1, rscratch1);
3075   %}
3076 
3077   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3078     MacroAssembler _masm(&cbuf);
3079     Register reg1 = as_Register($src1$$reg);
3080     Register reg2 = as_Register($src2$$reg);
3081     __ cmp(reg1, reg2);
3082   %}
3083 
3084   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3085     MacroAssembler _masm(&cbuf);
3086     Register reg = as_Register($src1$$reg);
3087     int64_t val = $src2$$constant;
3088     if (val >= 0) {
3089       __ subs(zr, reg, val);
3090     } else if (val != -val) {
3091       __ adds(zr, reg, -val);
3092     } else {
3093     // aargh, Long.MIN_VALUE is a special case
3094       __ orr(rscratch1, zr, (u_int64_t)val);
3095       __ subs(zr, reg, rscratch1);
3096     }
3097   %}
3098 
3099   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3100     MacroAssembler _masm(&cbuf);
3101     Register reg1 = as_Register($src1$$reg);
3102     u_int64_t val = (u_int64_t)$src2$$constant;
3103     __ mov(rscratch1, val);
3104     __ cmp(reg1, rscratch1);
3105   %}
3106 
3107   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3108     MacroAssembler _masm(&cbuf);
3109     Register reg1 = as_Register($src1$$reg);
3110     Register reg2 = as_Register($src2$$reg);
3111     __ cmp(reg1, reg2);
3112   %}
3113 
3114   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3115     MacroAssembler _masm(&cbuf);
3116     Register reg1 = as_Register($src1$$reg);
3117     Register reg2 = as_Register($src2$$reg);
3118     __ cmpw(reg1, reg2);
3119   %}
3120 
3121   enc_class aarch64_enc_testp(iRegP src) %{
3122     MacroAssembler _masm(&cbuf);
3123     Register reg = as_Register($src$$reg);
3124     __ cmp(reg, zr);
3125   %}
3126 
3127   enc_class aarch64_enc_testn(iRegN src) %{
3128     MacroAssembler _masm(&cbuf);
3129     Register reg = as_Register($src$$reg);
3130     __ cmpw(reg, zr);
3131   %}
3132 
3133   enc_class aarch64_enc_b(label lbl) %{
3134     MacroAssembler _masm(&cbuf);
3135     Label *L = $lbl$$label;
3136     __ b(*L);
3137   %}
3138 
3139   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3140     MacroAssembler _masm(&cbuf);
3141     Label *L = $lbl$$label;
3142     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3143   %}
3144 
3145   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3146     MacroAssembler _masm(&cbuf);
3147     Label *L = $lbl$$label;
3148     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3149   %}
3150 
3151   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3152   %{
3153      Register sub_reg = as_Register($sub$$reg);
3154      Register super_reg = as_Register($super$$reg);
3155      Register temp_reg = as_Register($temp$$reg);
3156      Register result_reg = as_Register($result$$reg);
3157 
3158      Label miss;
3159      MacroAssembler _masm(&cbuf);
3160      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3161                                      NULL, &miss,
3162                                      /*set_cond_codes:*/ true);
3163      if ($primary) {
3164        __ mov(result_reg, zr);
3165      }
3166      __ bind(miss);
3167   %}
3168 
3169   enc_class aarch64_enc_java_static_call(method meth) %{
3170     MacroAssembler _masm(&cbuf);
3171 
3172     address mark = __ pc();
3173     address addr = (address)$meth$$method;
3174     address call;
3175     if (!_method) {
3176       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3177       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3178     } else if (_optimized_virtual) {
3179       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3180     } else {
3181       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3182     }
3183     if (call == NULL) {
3184       ciEnv::current()->record_failure("CodeCache is full"); 
3185       return;
3186     }
3187 
3188     if (_method) {
3189       // Emit stub for static call
3190       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3191       if (stub == NULL) {
3192         ciEnv::current()->record_failure("CodeCache is full"); 
3193         return;
3194       }
3195     }
3196   %}
3197 
3198   enc_class aarch64_enc_java_handle_call(method meth) %{
3199     MacroAssembler _masm(&cbuf);
3200     relocInfo::relocType reloc;
3201 
3202     // RFP is preserved across all calls, even compiled calls.
3203     // Use it to preserve SP.
3204     __ mov(rfp, sp);
3205 
3206     address mark = __ pc();
3207     address addr = (address)$meth$$method;
3208     address call;
3209     if (!_method) {
3210       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3211       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3212     } else if (_optimized_virtual) {
3213       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3214     } else {
3215       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3216     }
3217     if (call == NULL) {
3218       ciEnv::current()->record_failure("CodeCache is full"); 
3219       return;
3220     }
3221 
3222     if (_method) {
3223       // Emit stub for static call
3224       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
3225       if (stub == NULL) {
3226         ciEnv::current()->record_failure("CodeCache is full"); 
3227         return;
3228       }
3229     }
3230 
3231     // now restore sp
3232     __ mov(sp, rfp);
3233   %}
3234 
3235   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3236     MacroAssembler _masm(&cbuf);
3237     address call = __ ic_call((address)$meth$$method);
3238     if (call == NULL) {
3239       ciEnv::current()->record_failure("CodeCache is full"); 
3240       return;
3241     }
3242   %}
3243 
3244   enc_class aarch64_enc_call_epilog() %{
3245     MacroAssembler _masm(&cbuf);
3246     if (VerifyStackAtCalls) {
3247       // Check that stack depth is unchanged: find majik cookie on stack
3248       __ call_Unimplemented();
3249     }
3250   %}
3251 
3252   enc_class aarch64_enc_java_to_runtime(method meth) %{
3253     MacroAssembler _masm(&cbuf);
3254 
3255     // some calls to generated routines (arraycopy code) are scheduled
3256     // by C2 as runtime calls. if so we can call them using a br (they
3257     // will be in a reachable segment) otherwise we have to use a blr
3258     // which loads the absolute address into a register.
3259     address entry = (address)$meth$$method;
3260     CodeBlob *cb = CodeCache::find_blob(entry);
3261     if (cb) {
3262       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3263       if (call == NULL) {
3264         ciEnv::current()->record_failure("CodeCache is full"); 
3265         return;
3266       }
3267     } else {
3268       Label retaddr;
3269       __ adr(rscratch2, retaddr);
3270       __ lea(rscratch1, RuntimeAddress(entry));
3271       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3272       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3273       __ blr(rscratch1);
3274       __ bind(retaddr);
3275       __ add(sp, sp, 2 * wordSize);
3276     }
3277   %}
3278 
3279   enc_class aarch64_enc_rethrow() %{
3280     MacroAssembler _masm(&cbuf);
3281     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3282   %}
3283 
3284   enc_class aarch64_enc_ret() %{
3285     MacroAssembler _masm(&cbuf);
3286     __ ret(lr);
3287   %}
3288 
3289   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3290     MacroAssembler _masm(&cbuf);
3291     Register target_reg = as_Register($jump_target$$reg);
3292     __ br(target_reg);
3293   %}
3294 
3295   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3296     MacroAssembler _masm(&cbuf);
3297     Register target_reg = as_Register($jump_target$$reg);
3298     // exception oop should be in r0
3299     // ret addr has been popped into lr
3300     // callee expects it in r3
3301     __ mov(r3, lr);
3302     __ br(target_reg);
3303   %}
3304 
3305   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3306     MacroAssembler _masm(&cbuf);
3307     Register oop = as_Register($object$$reg);
3308     Register box = as_Register($box$$reg);
3309     Register disp_hdr = as_Register($tmp$$reg);
3310     Register tmp = as_Register($tmp2$$reg);
3311     Label cont;
3312     Label object_has_monitor;
3313     Label cas_failed;
3314 
3315     assert_different_registers(oop, box, tmp, disp_hdr);
3316 
3317     // Load markOop from object into displaced_header.
3318     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3319 
3320     // Always do locking in runtime.
3321     if (EmitSync & 0x01) {
3322       __ cmp(oop, zr);
3323       return;
3324     }
3325     
3326     if (UseBiasedLocking && !UseOptoBiasInlining) {
3327       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3328     }
3329 
3330     // Handle existing monitor
3331     if ((EmitSync & 0x02) == 0) {
3332       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3333     }
3334 
3335     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3336     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3337 
3338     // Load Compare Value application register.
3339 
3340     // Initialize the box. (Must happen before we update the object mark!)
3341     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3342 
3343     // Compare object markOop with an unlocked value (tmp) and if
3344     // equal exchange the stack address of our box with object markOop.
3345     // On failure disp_hdr contains the possibly locked markOop.
3346     if (UseLSE) {
3347       __ mov(disp_hdr, tmp);
3348       __ casal(Assembler::xword, disp_hdr, box, oop);  // Updates disp_hdr
3349       __ cmp(tmp, disp_hdr);
3350       __ br(Assembler::EQ, cont);
3351     } else {
3352       Label retry_load;
3353       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3354         __ prfm(Address(oop), PSTL1STRM);
3355       __ bind(retry_load);
3356       __ ldaxr(disp_hdr, oop);
3357       __ cmp(tmp, disp_hdr);
3358       __ br(Assembler::NE, cas_failed);
3359       // use stlxr to ensure update is immediately visible
3360       __ stlxr(disp_hdr, box, oop);
3361       __ cbzw(disp_hdr, cont);
3362       __ b(retry_load);
3363     }
3364 
3365     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3366 
3367     // If the compare-and-exchange succeeded, then we found an unlocked
3368     // object, will have now locked it will continue at label cont
3369 
3370     __ bind(cas_failed);
3371     // We did not see an unlocked object so try the fast recursive case.
3372 
3373     // Check if the owner is self by comparing the value in the
3374     // markOop of object (disp_hdr) with the stack pointer.
3375     __ mov(rscratch1, sp);
3376     __ sub(disp_hdr, disp_hdr, rscratch1);
3377     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3378     // If condition is true we are cont and hence we can store 0 as the
3379     // displaced header in the box, which indicates that it is a recursive lock.
3380     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3381     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3382 
3383     // Handle existing monitor.
3384     if ((EmitSync & 0x02) == 0) {
3385       __ b(cont);
3386 
3387       __ bind(object_has_monitor);
3388       // The object's monitor m is unlocked iff m->owner == NULL,
3389       // otherwise m->owner may contain a thread or a stack address.
3390       //
3391       // Try to CAS m->owner from NULL to current thread.
3392       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3393       __ mov(disp_hdr, zr);
3394 
3395       if (UseLSE) {
3396         __ mov(rscratch1, disp_hdr);
3397         __ casal(Assembler::xword, rscratch1, rthread, tmp);
3398         __ cmp(rscratch1, disp_hdr);
3399       } else {
3400         Label retry_load, fail;
3401         if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3402           __ prfm(Address(tmp), PSTL1STRM);
3403         __ bind(retry_load);
3404         __ ldaxr(rscratch1, tmp);
3405         __ cmp(disp_hdr, rscratch1);
3406         __ br(Assembler::NE, fail);
3407         // use stlxr to ensure update is immediately visible
3408         __ stlxr(rscratch1, rthread, tmp);
3409         __ cbnzw(rscratch1, retry_load);
3410         __ bind(fail);
3411       }
3412 
3413       // Store a non-null value into the box to avoid looking like a re-entrant
3414       // lock. The fast-path monitor unlock code checks for
3415       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3416       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3417       __ mov(tmp, (address)markOopDesc::unused_mark());
3418       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3419     }
3420 
3421     __ bind(cont);
3422     // flag == EQ indicates success
3423     // flag == NE indicates failure
3424   %}
3425 
3426   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3427     MacroAssembler _masm(&cbuf);
3428     Register oop = as_Register($object$$reg);
3429     Register box = as_Register($box$$reg);
3430     Register disp_hdr = as_Register($tmp$$reg);
3431     Register tmp = as_Register($tmp2$$reg);
3432     Label cont;
3433     Label object_has_monitor;
3434 
3435     assert_different_registers(oop, box, tmp, disp_hdr);
3436 
3437     // Always do locking in runtime.
3438     if (EmitSync & 0x01) {
3439       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3440       return;
3441     }
3442 
3443     if (UseBiasedLocking && !UseOptoBiasInlining) {
3444       __ biased_locking_exit(oop, tmp, cont);
3445     }
3446 
3447     // Find the lock address and load the displaced header from the stack.
3448     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3449 
3450     // If the displaced header is 0, we have a recursive unlock.
3451     __ cmp(disp_hdr, zr);
3452     __ br(Assembler::EQ, cont);
3453 
3454     // Handle existing monitor.
3455     if ((EmitSync & 0x02) == 0) {
3456       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3457       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3458     }
3459 
3460     // Check if it is still a light weight lock, this is is true if we
3461     // see the stack address of the basicLock in the markOop of the
3462     // object.
3463 
3464     if (UseLSE) {
3465       __ mov(tmp, box);
3466       __ casl(Assembler::xword, tmp, disp_hdr, oop);
3467       __ cmp(tmp, box);
3468       __ b(cont);
3469     } else {
3470       Label retry_load;
3471       if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
3472         __ prfm(Address(oop), PSTL1STRM);
3473       __ bind(retry_load);
3474       __ ldxr(tmp, oop);
3475       __ cmp(box, tmp);
3476       __ br(Assembler::NE, cont);
3477       // use stlxr to ensure update is immediately visible
3478       __ stlxr(tmp, disp_hdr, oop);
3479       __ cbzw(tmp, cont);
3480       __ b(retry_load);
3481     }
3482 
3483     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3484 
3485     // Handle existing monitor.
3486     if ((EmitSync & 0x02) == 0) {
3487       __ bind(object_has_monitor);
3488       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3489       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3490       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3491       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3492       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3493       __ cmp(rscratch1, zr);
3494       __ br(Assembler::NE, cont);
3495 
3496       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3497       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3498       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3499       __ cmp(rscratch1, zr);
3500       __ br(Assembler::NE, cont);
3501       // need a release store here
3502       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3503       __ stlr(zr, tmp); // set unowned
3504     }
3505 
3506     __ bind(cont);
3507     // flag == EQ indicates success
3508     // flag == NE indicates failure
3509   %}
3510 
3511 %}
3512 
3513 //----------FRAME--------------------------------------------------------------
3514 // Definition of frame structure and management information.
3515 //
3516 //  S T A C K   L A Y O U T    Allocators stack-slot number
3517 //                             |   (to get allocators register number
3518 //  G  Owned by    |        |  v    add OptoReg::stack0())
3519 //  r   CALLER     |        |
3520 //  o     |        +--------+      pad to even-align allocators stack-slot
3521 //  w     V        |  pad0  |        numbers; owned by CALLER
3522 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3523 //  h     ^        |   in   |  5
3524 //        |        |  args  |  4   Holes in incoming args owned by SELF
3525 //  |     |        |        |  3
3526 //  |     |        +--------+
3527 //  V     |        | old out|      Empty on Intel, window on Sparc
3528 //        |    old |preserve|      Must be even aligned.
3529 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3530 //        |        |   in   |  3   area for Intel ret address
3531 //     Owned by    |preserve|      Empty on Sparc.
3532 //       SELF      +--------+
3533 //        |        |  pad2  |  2   pad to align old SP
3534 //        |        +--------+  1
3535 //        |        | locks  |  0
3536 //        |        +--------+----> OptoReg::stack0(), even aligned
3537 //        |        |  pad1  | 11   pad to align new SP
3538 //        |        +--------+
3539 //        |        |        | 10
3540 //        |        | spills |  9   spills
3541 //        V        |        |  8   (pad0 slot for callee)
3542 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3543 //        ^        |  out   |  7
3544 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3545 //     Owned by    +--------+
3546 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3547 //        |    new |preserve|      Must be even-aligned.
3548 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3549 //        |        |        |
3550 //
3551 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3552 //         known from SELF's arguments and the Java calling convention.
3553 //         Region 6-7 is determined per call site.
3554 // Note 2: If the calling convention leaves holes in the incoming argument
3555 //         area, those holes are owned by SELF.  Holes in the outgoing area
3556 //         are owned by the CALLEE.  Holes should not be nessecary in the
3557 //         incoming area, as the Java calling convention is completely under
3558 //         the control of the AD file.  Doubles can be sorted and packed to
3559 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3560 //         varargs C calling conventions.
3561 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3562 //         even aligned with pad0 as needed.
3563 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3564 //           (the latter is true on Intel but is it false on AArch64?)
3565 //         region 6-11 is even aligned; it may be padded out more so that
3566 //         the region from SP to FP meets the minimum stack alignment.
3567 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3568 //         alignment.  Region 11, pad1, may be dynamically extended so that
3569 //         SP meets the minimum alignment.
3570 
3571 frame %{
3572   // What direction does stack grow in (assumed to be same for C & Java)
3573   stack_direction(TOWARDS_LOW);
3574 
3575   // These three registers define part of the calling convention
3576   // between compiled code and the interpreter.
3577 
3578   // Inline Cache Register or methodOop for I2C.
3579   inline_cache_reg(R12);
3580 
3581   // Method Oop Register when calling interpreter.
3582   interpreter_method_oop_reg(R12);
3583 
3584   // Number of stack slots consumed by locking an object
3585   sync_stack_slots(2);
3586 
3587   // Compiled code's Frame Pointer
3588   frame_pointer(R31);
3589 
3590   // Interpreter stores its frame pointer in a register which is
3591   // stored to the stack by I2CAdaptors.
3592   // I2CAdaptors convert from interpreted java to compiled java.
3593   interpreter_frame_pointer(R29);
3594 
3595   // Stack alignment requirement
3596   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3597 
3598   // Number of stack slots between incoming argument block and the start of
3599   // a new frame.  The PROLOG must add this many slots to the stack.  The
3600   // EPILOG must remove this many slots. aarch64 needs two slots for
3601   // return address and fp.
3602   // TODO think this is correct but check
3603   in_preserve_stack_slots(4);
3604 
3605   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3606   // for calls to C.  Supports the var-args backing area for register parms.
3607   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3608 
3609   // The after-PROLOG location of the return address.  Location of
3610   // return address specifies a type (REG or STACK) and a number
3611   // representing the register number (i.e. - use a register name) or
3612   // stack slot.
3613   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3614   // Otherwise, it is above the locks and verification slot and alignment word
3615   // TODO this may well be correct but need to check why that - 2 is there
3616   // ppc port uses 0 but we definitely need to allow for fixed_slots
3617   // which folds in the space used for monitors
3618   return_addr(STACK - 2 +
3619               round_to((Compile::current()->in_preserve_stack_slots() +
3620                         Compile::current()->fixed_slots()),
3621                        stack_alignment_in_slots()));
3622 
3623   // Body of function which returns an integer array locating
3624   // arguments either in registers or in stack slots.  Passed an array
3625   // of ideal registers called "sig" and a "length" count.  Stack-slot
3626   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3627   // arguments for a CALLEE.  Incoming stack arguments are
3628   // automatically biased by the preserve_stack_slots field above.
3629 
3630   calling_convention
3631   %{
3632     // No difference between ingoing/outgoing just pass false
3633     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3634   %}
3635 
3636   c_calling_convention
3637   %{
3638     // This is obviously always outgoing
3639     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3640   %}
3641 
3642   // Location of compiled Java return values.  Same as C for now.
3643   return_value
3644   %{
3645     // TODO do we allow ideal_reg == Op_RegN???
3646     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3647            "only return normal values");
3648 
3649     static const int lo[Op_RegL + 1] = { // enum name
3650       0,                                 // Op_Node
3651       0,                                 // Op_Set
3652       R0_num,                            // Op_RegN
3653       R0_num,                            // Op_RegI
3654       R0_num,                            // Op_RegP
3655       V0_num,                            // Op_RegF
3656       V0_num,                            // Op_RegD
3657       R0_num                             // Op_RegL
3658     };
3659   
3660     static const int hi[Op_RegL + 1] = { // enum name
3661       0,                                 // Op_Node
3662       0,                                 // Op_Set
3663       OptoReg::Bad,                       // Op_RegN
3664       OptoReg::Bad,                      // Op_RegI
3665       R0_H_num,                          // Op_RegP
3666       OptoReg::Bad,                      // Op_RegF
3667       V0_H_num,                          // Op_RegD
3668       R0_H_num                           // Op_RegL
3669     };
3670 
3671     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3672   %}
3673 %}
3674 
3675 //----------ATTRIBUTES---------------------------------------------------------
3676 //----------Operand Attributes-------------------------------------------------
3677 op_attrib op_cost(1);        // Required cost attribute
3678 
3679 //----------Instruction Attributes---------------------------------------------
3680 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3681 ins_attrib ins_size(32);        // Required size attribute (in bits)
3682 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3683                                 // a non-matching short branch variant
3684                                 // of some long branch?
3685 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3686                                 // be a power of 2) specifies the
3687                                 // alignment that some part of the
3688                                 // instruction (not necessarily the
3689                                 // start) requires.  If > 1, a
3690                                 // compute_padding() function must be
3691                                 // provided for the instruction
3692 
3693 //----------OPERANDS-----------------------------------------------------------
3694 // Operand definitions must precede instruction definitions for correct parsing
3695 // in the ADLC because operands constitute user defined types which are used in
3696 // instruction definitions.
3697 
3698 //----------Simple Operands----------------------------------------------------
3699 
3700 // Integer operands 32 bit
3701 // 32 bit immediate
3702 operand immI()
3703 %{
3704   match(ConI);
3705 
3706   op_cost(0);
3707   format %{ %}
3708   interface(CONST_INTER);
3709 %}
3710 
3711 // 32 bit zero
3712 operand immI0()
3713 %{
3714   predicate(n->get_int() == 0);
3715   match(ConI);
3716 
3717   op_cost(0);
3718   format %{ %}
3719   interface(CONST_INTER);
3720 %}
3721 
3722 // 32 bit unit increment
3723 operand immI_1()
3724 %{
3725   predicate(n->get_int() == 1);
3726   match(ConI);
3727 
3728   op_cost(0);
3729   format %{ %}
3730   interface(CONST_INTER);
3731 %}
3732 
3733 // 32 bit unit decrement
3734 operand immI_M1()
3735 %{
3736   predicate(n->get_int() == -1);
3737   match(ConI);
3738 
3739   op_cost(0);
3740   format %{ %}
3741   interface(CONST_INTER);
3742 %}
3743 
3744 operand immI_le_4()
3745 %{
3746   predicate(n->get_int() <= 4);
3747   match(ConI);
3748 
3749   op_cost(0);
3750   format %{ %}
3751   interface(CONST_INTER);
3752 %}
3753 
3754 operand immI_31()
3755 %{
3756   predicate(n->get_int() == 31);
3757   match(ConI);
3758 
3759   op_cost(0);
3760   format %{ %}
3761   interface(CONST_INTER);
3762 %}
3763 
3764 operand immI_8()
3765 %{
3766   predicate(n->get_int() == 8);
3767   match(ConI);
3768 
3769   op_cost(0);
3770   format %{ %}
3771   interface(CONST_INTER);
3772 %}
3773 
3774 operand immI_16()
3775 %{
3776   predicate(n->get_int() == 16);
3777   match(ConI);
3778 
3779   op_cost(0);
3780   format %{ %}
3781   interface(CONST_INTER);
3782 %}
3783 
3784 operand immI_24()
3785 %{
3786   predicate(n->get_int() == 24);
3787   match(ConI);
3788 
3789   op_cost(0);
3790   format %{ %}
3791   interface(CONST_INTER);
3792 %}
3793 
3794 operand immI_32()
3795 %{
3796   predicate(n->get_int() == 32);
3797   match(ConI);
3798 
3799   op_cost(0);
3800   format %{ %}
3801   interface(CONST_INTER);
3802 %}
3803 
3804 operand immI_48()
3805 %{
3806   predicate(n->get_int() == 48);
3807   match(ConI);
3808 
3809   op_cost(0);
3810   format %{ %}
3811   interface(CONST_INTER);
3812 %}
3813 
3814 operand immI_56()
3815 %{
3816   predicate(n->get_int() == 56);
3817   match(ConI);
3818 
3819   op_cost(0);
3820   format %{ %}
3821   interface(CONST_INTER);
3822 %}
3823 
3824 operand immI_64()
3825 %{
3826   predicate(n->get_int() == 64);
3827   match(ConI);
3828 
3829   op_cost(0);
3830   format %{ %}
3831   interface(CONST_INTER);
3832 %}
3833 
3834 operand immI_255()
3835 %{
3836   predicate(n->get_int() == 255);
3837   match(ConI);
3838 
3839   op_cost(0);
3840   format %{ %}
3841   interface(CONST_INTER);
3842 %}
3843 
3844 operand immI_65535()
3845 %{
3846   predicate(n->get_int() == 65535);
3847   match(ConI);
3848 
3849   op_cost(0);
3850   format %{ %}
3851   interface(CONST_INTER);
3852 %}
3853 
3854 operand immL_63()
3855 %{
3856   predicate(n->get_int() == 63);
3857   match(ConI);
3858 
3859   op_cost(0);
3860   format %{ %}
3861   interface(CONST_INTER);
3862 %}
3863 
3864 operand immL_255()
3865 %{
3866   predicate(n->get_int() == 255);
3867   match(ConI);
3868 
3869   op_cost(0);
3870   format %{ %}
3871   interface(CONST_INTER);
3872 %}
3873 
3874 operand immL_65535()
3875 %{
3876   predicate(n->get_long() == 65535L);
3877   match(ConL);
3878 
3879   op_cost(0);
3880   format %{ %}
3881   interface(CONST_INTER);
3882 %}
3883 
3884 operand immL_4294967295()
3885 %{
3886   predicate(n->get_long() == 4294967295L);
3887   match(ConL);
3888 
3889   op_cost(0);
3890   format %{ %}
3891   interface(CONST_INTER);
3892 %}
3893 
3894 operand immL_bitmask()
3895 %{
3896   predicate(((n->get_long() & 0xc000000000000000l) == 0)
3897             && is_power_of_2(n->get_long() + 1));
3898   match(ConL);
3899 
3900   op_cost(0);
3901   format %{ %}
3902   interface(CONST_INTER);
3903 %}
3904 
3905 operand immI_bitmask()
3906 %{
3907   predicate(((n->get_int() & 0xc0000000) == 0)
3908             && is_power_of_2(n->get_int() + 1));
3909   match(ConI);
3910 
3911   op_cost(0);
3912   format %{ %}
3913   interface(CONST_INTER);
3914 %}
3915 
3916 // Scale values for scaled offset addressing modes (up to long but not quad)
3917 operand immIScale()
3918 %{
3919   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3920   match(ConI);
3921 
3922   op_cost(0);
3923   format %{ %}
3924   interface(CONST_INTER);
3925 %}
3926 
3927 // 26 bit signed offset -- for pc-relative branches
3928 operand immI26()
3929 %{
3930   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3931   match(ConI);
3932 
3933   op_cost(0);
3934   format %{ %}
3935   interface(CONST_INTER);
3936 %}
3937 
3938 // 19 bit signed offset -- for pc-relative loads
3939 operand immI19()
3940 %{
3941   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3942   match(ConI);
3943 
3944   op_cost(0);
3945   format %{ %}
3946   interface(CONST_INTER);
3947 %}
3948 
3949 // 12 bit unsigned offset -- for base plus immediate loads
3950 operand immIU12()
3951 %{
3952   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3953   match(ConI);
3954 
3955   op_cost(0);
3956   format %{ %}
3957   interface(CONST_INTER);
3958 %}
3959 
3960 operand immLU12()
3961 %{
3962   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3963   match(ConL);
3964 
3965   op_cost(0);
3966   format %{ %}
3967   interface(CONST_INTER);
3968 %}
3969 
3970 // Offset for scaled or unscaled immediate loads and stores
3971 operand immIOffset()
3972 %{
3973   predicate(Address::offset_ok_for_immed(n->get_int()));
3974   match(ConI);
3975 
3976   op_cost(0);
3977   format %{ %}
3978   interface(CONST_INTER);
3979 %}
3980 
3981 operand immIOffset4()
3982 %{
3983   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3984   match(ConI);
3985 
3986   op_cost(0);
3987   format %{ %}
3988   interface(CONST_INTER);
3989 %}
3990 
3991 operand immIOffset8()
3992 %{
3993   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3994   match(ConI);
3995 
3996   op_cost(0);
3997   format %{ %}
3998   interface(CONST_INTER);
3999 %}
4000 
4001 operand immIOffset16()
4002 %{
4003   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4004   match(ConI);
4005 
4006   op_cost(0);
4007   format %{ %}
4008   interface(CONST_INTER);
4009 %}
4010 
4011 operand immLoffset()
4012 %{
4013   predicate(Address::offset_ok_for_immed(n->get_long()));
4014   match(ConL);
4015 
4016   op_cost(0);
4017   format %{ %}
4018   interface(CONST_INTER);
4019 %}
4020 
4021 operand immLoffset4()
4022 %{
4023   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4024   match(ConL);
4025 
4026   op_cost(0);
4027   format %{ %}
4028   interface(CONST_INTER);
4029 %}
4030 
4031 operand immLoffset8()
4032 %{
4033   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4034   match(ConL);
4035 
4036   op_cost(0);
4037   format %{ %}
4038   interface(CONST_INTER);
4039 %}
4040 
4041 operand immLoffset16()
4042 %{
4043   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4044   match(ConL);
4045 
4046   op_cost(0);
4047   format %{ %}
4048   interface(CONST_INTER);
4049 %}
4050 
4051 // 32 bit integer valid for add sub immediate
4052 operand immIAddSub()
4053 %{
4054   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4055   match(ConI);
4056   op_cost(0);
4057   format %{ %}
4058   interface(CONST_INTER);
4059 %}
4060 
4061 // 32 bit unsigned integer valid for logical immediate
4062 // TODO -- check this is right when e.g the mask is 0x80000000
4063 operand immILog()
4064 %{
4065   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4066   match(ConI);
4067 
4068   op_cost(0);
4069   format %{ %}
4070   interface(CONST_INTER);
4071 %}
4072 
4073 // Integer operands 64 bit
4074 // 64 bit immediate
4075 operand immL()
4076 %{
4077   match(ConL);
4078 
4079   op_cost(0);
4080   format %{ %}
4081   interface(CONST_INTER);
4082 %}
4083 
4084 // 64 bit zero
4085 operand immL0()
4086 %{
4087   predicate(n->get_long() == 0);
4088   match(ConL);
4089 
4090   op_cost(0);
4091   format %{ %}
4092   interface(CONST_INTER);
4093 %}
4094 
4095 // 64 bit unit increment
4096 operand immL_1()
4097 %{
4098   predicate(n->get_long() == 1);
4099   match(ConL);
4100 
4101   op_cost(0);
4102   format %{ %}
4103   interface(CONST_INTER);
4104 %}
4105 
4106 // 64 bit unit decrement
4107 operand immL_M1()
4108 %{
4109   predicate(n->get_long() == -1);
4110   match(ConL);
4111 
4112   op_cost(0);
4113   format %{ %}
4114   interface(CONST_INTER);
4115 %}
4116 
4117 // 32 bit offset of pc in thread anchor
4118 
4119 operand immL_pc_off()
4120 %{
4121   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4122                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4123   match(ConL);
4124 
4125   op_cost(0);
4126   format %{ %}
4127   interface(CONST_INTER);
4128 %}
4129 
4130 // 64 bit integer valid for add sub immediate
4131 operand immLAddSub()
4132 %{
4133   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4134   match(ConL);
4135   op_cost(0);
4136   format %{ %}
4137   interface(CONST_INTER);
4138 %}
4139 
4140 // 64 bit integer valid for logical immediate
4141 operand immLLog()
4142 %{
4143   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4144   match(ConL);
4145   op_cost(0);
4146   format %{ %}
4147   interface(CONST_INTER);
4148 %}
4149 
4150 // Long Immediate: low 32-bit mask
4151 operand immL_32bits()
4152 %{
4153   predicate(n->get_long() == 0xFFFFFFFFL);
4154   match(ConL);
4155   op_cost(0);
4156   format %{ %}
4157   interface(CONST_INTER);
4158 %}
4159 
4160 // Pointer operands
4161 // Pointer Immediate
4162 operand immP()
4163 %{
4164   match(ConP);
4165 
4166   op_cost(0);
4167   format %{ %}
4168   interface(CONST_INTER);
4169 %}
4170 
4171 // NULL Pointer Immediate
4172 operand immP0()
4173 %{
4174   predicate(n->get_ptr() == 0);
4175   match(ConP);
4176 
4177   op_cost(0);
4178   format %{ %}
4179   interface(CONST_INTER);
4180 %}
4181 
4182 // Pointer Immediate One
4183 // this is used in object initialization (initial object header)
4184 operand immP_1()
4185 %{
4186   predicate(n->get_ptr() == 1);
4187   match(ConP);
4188 
4189   op_cost(0);
4190   format %{ %}
4191   interface(CONST_INTER);
4192 %}
4193 
4194 // Polling Page Pointer Immediate
4195 operand immPollPage()
4196 %{
4197   predicate((address)n->get_ptr() == os::get_polling_page());
4198   match(ConP);
4199 
4200   op_cost(0);
4201   format %{ %}
4202   interface(CONST_INTER);
4203 %}
4204 
4205 // Card Table Byte Map Base
4206 operand immByteMapBase()
4207 %{
4208   // Get base of card map
4209   predicate(!UseShenandoahGC && // TODO: Should really check for BS::is_a, see JDK-8193193
4210     (jbyte*)n->get_ptr() == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4211   match(ConP);
4212 
4213   op_cost(0);
4214   format %{ %}
4215   interface(CONST_INTER);
4216 %}
4217 
4218 // Pointer Immediate Minus One
4219 // this is used when we want to write the current PC to the thread anchor
4220 operand immP_M1()
4221 %{
4222   predicate(n->get_ptr() == -1);
4223   match(ConP);
4224 
4225   op_cost(0);
4226   format %{ %}
4227   interface(CONST_INTER);
4228 %}
4229 
4230 // Pointer Immediate Minus Two
4231 // this is used when we want to write the current PC to the thread anchor
4232 operand immP_M2()
4233 %{
4234   predicate(n->get_ptr() == -2);
4235   match(ConP);
4236 
4237   op_cost(0);
4238   format %{ %}
4239   interface(CONST_INTER);
4240 %}
4241 
4242 // Float and Double operands
4243 // Double Immediate
4244 operand immD()
4245 %{
4246   match(ConD);
4247   op_cost(0);
4248   format %{ %}
4249   interface(CONST_INTER);
4250 %}
4251 
4252 // constant 'double +0.0'.
4253 operand immD0()
4254 %{
4255   predicate((n->getd() == 0) &&
4256             (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0));
4257   match(ConD);
4258   op_cost(0);
4259   format %{ %}
4260   interface(CONST_INTER);
4261 %}
4262 
4263 // constant 'double +0.0'.
4264 operand immDPacked()
4265 %{
4266   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4267   match(ConD);
4268   op_cost(0);
4269   format %{ %}
4270   interface(CONST_INTER);
4271 %}
4272 
4273 // Float Immediate
4274 operand immF()
4275 %{
4276   match(ConF);
4277   op_cost(0);
4278   format %{ %}
4279   interface(CONST_INTER);
4280 %}
4281 
4282 // constant 'float +0.0'.
4283 operand immF0()
4284 %{
4285   predicate((n->getf() == 0) &&
4286             (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
4287   match(ConF);
4288   op_cost(0);
4289   format %{ %}
4290   interface(CONST_INTER);
4291 %}
4292 
4293 // 
4294 operand immFPacked()
4295 %{
4296   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4297   match(ConF);
4298   op_cost(0);
4299   format %{ %}
4300   interface(CONST_INTER);
4301 %}
4302 
4303 // Narrow pointer operands
4304 // Narrow Pointer Immediate
4305 operand immN()
4306 %{
4307   match(ConN);
4308 
4309   op_cost(0);
4310   format %{ %}
4311   interface(CONST_INTER);
4312 %}
4313 
4314 // Narrow NULL Pointer Immediate
4315 operand immN0()
4316 %{
4317   predicate(n->get_narrowcon() == 0);
4318   match(ConN);
4319 
4320   op_cost(0);
4321   format %{ %}
4322   interface(CONST_INTER);
4323 %}
4324 
4325 operand immNKlass()
4326 %{
4327   match(ConNKlass);
4328 
4329   op_cost(0);
4330   format %{ %}
4331   interface(CONST_INTER);
4332 %}
4333 
4334 // Integer 32 bit Register Operands
4335 // Integer 32 bitRegister (excludes SP)
4336 operand iRegI()
4337 %{
4338   constraint(ALLOC_IN_RC(any_reg32));
4339   match(RegI);
4340   match(iRegINoSp);
4341   op_cost(0);
4342   format %{ %}
4343   interface(REG_INTER);
4344 %}
4345 
4346 // Integer 32 bit Register not Special
4347 operand iRegINoSp()
4348 %{
4349   constraint(ALLOC_IN_RC(no_special_reg32));
4350   match(RegI);
4351   op_cost(0);
4352   format %{ %}
4353   interface(REG_INTER);
4354 %}
4355 
4356 // Integer 64 bit Register Operands
4357 // Integer 64 bit Register (includes SP)
4358 operand iRegL()
4359 %{
4360   constraint(ALLOC_IN_RC(any_reg));
4361   match(RegL);
4362   match(iRegLNoSp);
4363   op_cost(0);
4364   format %{ %}
4365   interface(REG_INTER);
4366 %}
4367 
4368 // Integer 64 bit Register not Special
4369 operand iRegLNoSp()
4370 %{
4371   constraint(ALLOC_IN_RC(no_special_reg));
4372   match(RegL);
4373   format %{ %}
4374   interface(REG_INTER);
4375 %}
4376 
4377 // Pointer Register Operands
4378 // Pointer Register
4379 operand iRegP()
4380 %{
4381   constraint(ALLOC_IN_RC(ptr_reg));
4382   match(RegP);
4383   match(iRegPNoSp);
4384   match(iRegP_R0);
4385   //match(iRegP_R2);
4386   //match(iRegP_R4);
4387   //match(iRegP_R5);
4388   match(thread_RegP);
4389   op_cost(0);
4390   format %{ %}
4391   interface(REG_INTER);
4392 %}
4393 
4394 // Pointer 64 bit Register not Special
4395 operand iRegPNoSp()
4396 %{
4397   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4398   match(RegP);
4399   // match(iRegP);
4400   // match(iRegP_R0);
4401   // match(iRegP_R2);
4402   // match(iRegP_R4);
4403   // match(iRegP_R5);
4404   // match(thread_RegP);
4405   op_cost(0);
4406   format %{ %}
4407   interface(REG_INTER);
4408 %}
4409 
4410 // Pointer 64 bit Register R0 only
4411 operand iRegP_R0()
4412 %{
4413   constraint(ALLOC_IN_RC(r0_reg));
4414   match(RegP);
4415   // match(iRegP);
4416   match(iRegPNoSp);
4417   op_cost(0);
4418   format %{ %}
4419   interface(REG_INTER);
4420 %}
4421 
4422 // Pointer 64 bit Register R1 only
4423 operand iRegP_R1()
4424 %{
4425   constraint(ALLOC_IN_RC(r1_reg));
4426   match(RegP);
4427   // match(iRegP);
4428   match(iRegPNoSp);
4429   op_cost(0);
4430   format %{ %}
4431   interface(REG_INTER);
4432 %}
4433 
4434 // Pointer 64 bit Register R2 only
4435 operand iRegP_R2()
4436 %{
4437   constraint(ALLOC_IN_RC(r2_reg));
4438   match(RegP);
4439   // match(iRegP);
4440   match(iRegPNoSp);
4441   op_cost(0);
4442   format %{ %}
4443   interface(REG_INTER);
4444 %}
4445 
4446 // Pointer 64 bit Register R3 only
4447 operand iRegP_R3()
4448 %{
4449   constraint(ALLOC_IN_RC(r3_reg));
4450   match(RegP);
4451   // match(iRegP);
4452   match(iRegPNoSp);
4453   op_cost(0);
4454   format %{ %}
4455   interface(REG_INTER);
4456 %}
4457 
4458 // Pointer 64 bit Register R4 only
4459 operand iRegP_R4()
4460 %{
4461   constraint(ALLOC_IN_RC(r4_reg));
4462   match(RegP);
4463   // match(iRegP);
4464   match(iRegPNoSp);
4465   op_cost(0);
4466   format %{ %}
4467   interface(REG_INTER);
4468 %}
4469 
4470 // Pointer 64 bit Register R5 only
4471 operand iRegP_R5()
4472 %{
4473   constraint(ALLOC_IN_RC(r5_reg));
4474   match(RegP);
4475   // match(iRegP);
4476   match(iRegPNoSp);
4477   op_cost(0);
4478   format %{ %}
4479   interface(REG_INTER);
4480 %}
4481 
4482 // Pointer 64 bit Register R10 only
4483 operand iRegP_R10()
4484 %{
4485   constraint(ALLOC_IN_RC(r10_reg));
4486   match(RegP);
4487   // match(iRegP);
4488   match(iRegPNoSp);
4489   op_cost(0);
4490   format %{ %}
4491   interface(REG_INTER);
4492 %}
4493 
4494 // Long 64 bit Register R11 only
4495 operand iRegL_R11()
4496 %{
4497   constraint(ALLOC_IN_RC(r11_reg));
4498   match(RegL);
4499   match(iRegLNoSp);
4500   op_cost(0);
4501   format %{ %}
4502   interface(REG_INTER);
4503 %}
4504 
4505 // Pointer 64 bit Register FP only
4506 operand iRegP_FP()
4507 %{
4508   constraint(ALLOC_IN_RC(fp_reg));
4509   match(RegP);
4510   // match(iRegP);
4511   op_cost(0);
4512   format %{ %}
4513   interface(REG_INTER);
4514 %}
4515 
4516 // Register R0 only
4517 operand iRegI_R0()
4518 %{
4519   constraint(ALLOC_IN_RC(int_r0_reg));
4520   match(RegI);
4521   match(iRegINoSp);
4522   op_cost(0);
4523   format %{ %}
4524   interface(REG_INTER);
4525 %}
4526 
4527 // Register R2 only
4528 operand iRegI_R2()
4529 %{
4530   constraint(ALLOC_IN_RC(int_r2_reg));
4531   match(RegI);
4532   match(iRegINoSp);
4533   op_cost(0);
4534   format %{ %}
4535   interface(REG_INTER);
4536 %}
4537 
4538 // Register R3 only
4539 operand iRegI_R3()
4540 %{
4541   constraint(ALLOC_IN_RC(int_r3_reg));
4542   match(RegI);
4543   match(iRegINoSp);
4544   op_cost(0);
4545   format %{ %}
4546   interface(REG_INTER);
4547 %}
4548 
4549 
4550 // Register R2 only
4551 operand iRegI_R4()
4552 %{
4553   constraint(ALLOC_IN_RC(int_r4_reg));
4554   match(RegI);
4555   match(iRegINoSp);
4556   op_cost(0);
4557   format %{ %}
4558   interface(REG_INTER);
4559 %}
4560 
4561 
4562 // Pointer Register Operands
4563 // Narrow Pointer Register
4564 operand iRegN()
4565 %{
4566   constraint(ALLOC_IN_RC(any_reg32));
4567   match(RegN);
4568   match(iRegNNoSp);
4569   op_cost(0);
4570   format %{ %}
4571   interface(REG_INTER);
4572 %}
4573 
4574 // Integer 64 bit Register not Special
4575 operand iRegNNoSp()
4576 %{
4577   constraint(ALLOC_IN_RC(no_special_reg32));
4578   match(RegN);
4579   op_cost(0);
4580   format %{ %}
4581   interface(REG_INTER);
4582 %}
4583 
4584 // heap base register -- used for encoding immN0
4585 
4586 operand iRegIHeapbase()
4587 %{
4588   constraint(ALLOC_IN_RC(heapbase_reg));
4589   match(RegI);
4590   op_cost(0);
4591   format %{ %}
4592   interface(REG_INTER);
4593 %}
4594 
4595 // Float Register
4596 // Float register operands
4597 operand vRegF()
4598 %{
4599   constraint(ALLOC_IN_RC(float_reg));
4600   match(RegF);
4601 
4602   op_cost(0);
4603   format %{ %}
4604   interface(REG_INTER);
4605 %}
4606 
4607 // Double Register
4608 // Double register operands
4609 operand vRegD()
4610 %{
4611   constraint(ALLOC_IN_RC(double_reg));
4612   match(RegD);
4613 
4614   op_cost(0);
4615   format %{ %}
4616   interface(REG_INTER);
4617 %}
4618 
4619 operand vecD()
4620 %{
4621   constraint(ALLOC_IN_RC(vectord_reg));
4622   match(VecD);
4623 
4624   op_cost(0);
4625   format %{ %}
4626   interface(REG_INTER);
4627 %}
4628 
4629 operand vecX()
4630 %{
4631   constraint(ALLOC_IN_RC(vectorx_reg));
4632   match(VecX);
4633 
4634   op_cost(0);
4635   format %{ %}
4636   interface(REG_INTER);
4637 %}
4638 
4639 operand vRegD_V0()
4640 %{
4641   constraint(ALLOC_IN_RC(v0_reg));
4642   match(RegD);
4643   op_cost(0);
4644   format %{ %}
4645   interface(REG_INTER);
4646 %}
4647 
4648 operand vRegD_V1()
4649 %{
4650   constraint(ALLOC_IN_RC(v1_reg));
4651   match(RegD);
4652   op_cost(0);
4653   format %{ %}
4654   interface(REG_INTER);
4655 %}
4656 
4657 operand vRegD_V2()
4658 %{
4659   constraint(ALLOC_IN_RC(v2_reg));
4660   match(RegD);
4661   op_cost(0);
4662   format %{ %}
4663   interface(REG_INTER);
4664 %}
4665 
4666 operand vRegD_V3()
4667 %{
4668   constraint(ALLOC_IN_RC(v3_reg));
4669   match(RegD);
4670   op_cost(0);
4671   format %{ %}
4672   interface(REG_INTER);
4673 %}
4674 
4675 // Flags register, used as output of signed compare instructions
4676 
4677 // note that on AArch64 we also use this register as the output for
4678 // for floating point compare instructions (CmpF CmpD). this ensures
4679 // that ordered inequality tests use GT, GE, LT or LE none of which
4680 // pass through cases where the result is unordered i.e. one or both
4681 // inputs to the compare is a NaN. this means that the ideal code can
4682 // replace e.g. a GT with an LE and not end up capturing the NaN case
4683 // (where the comparison should always fail). EQ and NE tests are
4684 // always generated in ideal code so that unordered folds into the NE
4685 // case, matching the behaviour of AArch64 NE.
4686 //
4687 // This differs from x86 where the outputs of FP compares use a
4688 // special FP flags registers and where compares based on this
4689 // register are distinguished into ordered inequalities (cmpOpUCF) and
4690 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4691 // to explicitly handle the unordered case in branches. x86 also has
4692 // to include extra CMoveX rules to accept a cmpOpUCF input.
4693 
4694 operand rFlagsReg()
4695 %{
4696   constraint(ALLOC_IN_RC(int_flags));
4697   match(RegFlags);
4698 
4699   op_cost(0);
4700   format %{ "RFLAGS" %}
4701   interface(REG_INTER);
4702 %}
4703 
4704 // Flags register, used as output of unsigned compare instructions
4705 operand rFlagsRegU()
4706 %{
4707   constraint(ALLOC_IN_RC(int_flags));
4708   match(RegFlags);
4709 
4710   op_cost(0);
4711   format %{ "RFLAGSU" %}
4712   interface(REG_INTER);
4713 %}
4714 
4715 // Special Registers
4716 
4717 // Method Register
4718 operand inline_cache_RegP(iRegP reg)
4719 %{
4720   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4721   match(reg);
4722   match(iRegPNoSp);
4723   op_cost(0);
4724   format %{ %}
4725   interface(REG_INTER);
4726 %}
4727 
4728 operand interpreter_method_oop_RegP(iRegP reg)
4729 %{
4730   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4731   match(reg);
4732   match(iRegPNoSp);
4733   op_cost(0);
4734   format %{ %}
4735   interface(REG_INTER);
4736 %}
4737 
4738 // Thread Register
4739 operand thread_RegP(iRegP reg)
4740 %{
4741   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4742   match(reg);
4743   op_cost(0);
4744   format %{ %}
4745   interface(REG_INTER);
4746 %}
4747 
4748 operand lr_RegP(iRegP reg)
4749 %{
4750   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4751   match(reg);
4752   op_cost(0);
4753   format %{ %}
4754   interface(REG_INTER);
4755 %}
4756 
4757 //----------Memory Operands----------------------------------------------------
4758 
4759 operand indirect(iRegP reg)
4760 %{
4761   constraint(ALLOC_IN_RC(ptr_reg));
4762   match(reg);
4763   op_cost(0);
4764   format %{ "[$reg]" %}
4765   interface(MEMORY_INTER) %{
4766     base($reg);
4767     index(0xffffffff);
4768     scale(0x0);
4769     disp(0x0);
4770   %}
4771 %}
4772 
4773 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
4774 %{
4775   constraint(ALLOC_IN_RC(ptr_reg));
4776   match(AddP (AddP reg (LShiftL lreg scale)) off);
4777   op_cost(INSN_COST);
4778   format %{ "$reg, $lreg lsl($scale), $off" %}
4779   interface(MEMORY_INTER) %{
4780     base($reg);
4781     index($lreg);
4782     scale($scale);
4783     disp($off);
4784   %}
4785 %}
4786 
4787 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
4788 %{
4789   constraint(ALLOC_IN_RC(ptr_reg));
4790   match(AddP (AddP reg (LShiftL lreg scale)) off);
4791   op_cost(INSN_COST);
4792   format %{ "$reg, $lreg lsl($scale), $off" %}
4793   interface(MEMORY_INTER) %{
4794     base($reg);
4795     index($lreg);
4796     scale($scale);
4797     disp($off);
4798   %}
4799 %}
4800 
4801 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
4802 %{
4803   constraint(ALLOC_IN_RC(ptr_reg));
4804   match(AddP (AddP reg (ConvI2L ireg)) off);
4805   op_cost(INSN_COST);
4806   format %{ "$reg, $ireg, $off I2L" %}
4807   interface(MEMORY_INTER) %{
4808     base($reg);
4809     index($ireg);
4810     scale(0x0);
4811     disp($off);
4812   %}
4813 %}
4814 
4815 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
4816 %{
4817   constraint(ALLOC_IN_RC(ptr_reg));
4818   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
4819   op_cost(INSN_COST);
4820   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
4821   interface(MEMORY_INTER) %{
4822     base($reg);
4823     index($ireg);
4824     scale($scale);
4825     disp($off);
4826   %}
4827 %}
4828 
4829 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4830 %{
4831   constraint(ALLOC_IN_RC(ptr_reg));
4832   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4833   op_cost(0);
4834   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4835   interface(MEMORY_INTER) %{
4836     base($reg);
4837     index($ireg);
4838     scale($scale);
4839     disp(0x0);
4840   %}
4841 %}
4842 
4843 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4844 %{
4845   constraint(ALLOC_IN_RC(ptr_reg));
4846   match(AddP reg (LShiftL lreg scale));
4847   op_cost(0);
4848   format %{ "$reg, $lreg lsl($scale)" %}
4849   interface(MEMORY_INTER) %{
4850     base($reg);
4851     index($lreg);
4852     scale($scale);
4853     disp(0x0);
4854   %}
4855 %}
4856 
4857 operand indIndex(iRegP reg, iRegL lreg)
4858 %{
4859   constraint(ALLOC_IN_RC(ptr_reg));
4860   match(AddP reg lreg);
4861   op_cost(0);
4862   format %{ "$reg, $lreg" %}
4863   interface(MEMORY_INTER) %{
4864     base($reg);
4865     index($lreg);
4866     scale(0x0);
4867     disp(0x0);
4868   %}
4869 %}
4870 
4871 operand indOffI(iRegP reg, immIOffset off)
4872 %{
4873   constraint(ALLOC_IN_RC(ptr_reg));
4874   match(AddP reg off);
4875   op_cost(0);
4876   format %{ "[$reg, $off]" %}
4877   interface(MEMORY_INTER) %{
4878     base($reg);
4879     index(0xffffffff);
4880     scale(0x0);
4881     disp($off);
4882   %}
4883 %}
4884 
4885 operand indOffI4(iRegP reg, immIOffset4 off)
4886 %{
4887   constraint(ALLOC_IN_RC(ptr_reg));
4888   match(AddP reg off);
4889   op_cost(0);
4890   format %{ "[$reg, $off]" %}
4891   interface(MEMORY_INTER) %{
4892     base($reg);
4893     index(0xffffffff);
4894     scale(0x0);
4895     disp($off);
4896   %}
4897 %}
4898 
4899 operand indOffI8(iRegP reg, immIOffset8 off)
4900 %{
4901   constraint(ALLOC_IN_RC(ptr_reg));
4902   match(AddP reg off);
4903   op_cost(0);
4904   format %{ "[$reg, $off]" %}
4905   interface(MEMORY_INTER) %{
4906     base($reg);
4907     index(0xffffffff);
4908     scale(0x0);
4909     disp($off);
4910   %}
4911 %}
4912 
4913 operand indOffI16(iRegP reg, immIOffset16 off)
4914 %{
4915   constraint(ALLOC_IN_RC(ptr_reg));
4916   match(AddP reg off);
4917   op_cost(0);
4918   format %{ "[$reg, $off]" %}
4919   interface(MEMORY_INTER) %{
4920     base($reg);
4921     index(0xffffffff);
4922     scale(0x0);
4923     disp($off);
4924   %}
4925 %}
4926 
4927 operand indOffL(iRegP reg, immLoffset off)
4928 %{
4929   constraint(ALLOC_IN_RC(ptr_reg));
4930   match(AddP reg off);
4931   op_cost(0);
4932   format %{ "[$reg, $off]" %}
4933   interface(MEMORY_INTER) %{
4934     base($reg);
4935     index(0xffffffff);
4936     scale(0x0);
4937     disp($off);
4938   %}
4939 %}
4940 
4941 operand indOffL4(iRegP reg, immLoffset4 off)
4942 %{
4943   constraint(ALLOC_IN_RC(ptr_reg));
4944   match(AddP reg off);
4945   op_cost(0);
4946   format %{ "[$reg, $off]" %}
4947   interface(MEMORY_INTER) %{
4948     base($reg);
4949     index(0xffffffff);
4950     scale(0x0);
4951     disp($off);
4952   %}
4953 %}
4954 
4955 operand indOffL8(iRegP reg, immLoffset8 off)
4956 %{
4957   constraint(ALLOC_IN_RC(ptr_reg));
4958   match(AddP reg off);
4959   op_cost(0);
4960   format %{ "[$reg, $off]" %}
4961   interface(MEMORY_INTER) %{
4962     base($reg);
4963     index(0xffffffff);
4964     scale(0x0);
4965     disp($off);
4966   %}
4967 %}
4968 
4969 operand indOffL16(iRegP reg, immLoffset16 off)
4970 %{
4971   constraint(ALLOC_IN_RC(ptr_reg));
4972   match(AddP reg off);
4973   op_cost(0);
4974   format %{ "[$reg, $off]" %}
4975   interface(MEMORY_INTER) %{
4976     base($reg);
4977     index(0xffffffff);
4978     scale(0x0);
4979     disp($off);
4980   %}
4981 %}
4982 
4983 operand indirectN(iRegN reg)
4984 %{
4985   predicate(Universe::narrow_oop_shift() == 0);
4986   constraint(ALLOC_IN_RC(ptr_reg));
4987   match(DecodeN reg);
4988   op_cost(0);
4989   format %{ "[$reg]\t# narrow" %}
4990   interface(MEMORY_INTER) %{
4991     base($reg);
4992     index(0xffffffff);
4993     scale(0x0);
4994     disp(0x0);
4995   %}
4996 %}
4997 
4998 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
4999 %{
5000   predicate(Universe::narrow_oop_shift() == 0);
5001   constraint(ALLOC_IN_RC(ptr_reg));
5002   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5003   op_cost(0);
5004   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5005   interface(MEMORY_INTER) %{
5006     base($reg);
5007     index($lreg);
5008     scale($scale);
5009     disp($off);
5010   %}
5011 %}
5012 
5013 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5014 %{
5015   predicate(Universe::narrow_oop_shift() == 0);
5016   constraint(ALLOC_IN_RC(ptr_reg));
5017   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5018   op_cost(INSN_COST);
5019   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5020   interface(MEMORY_INTER) %{
5021     base($reg);
5022     index($lreg);
5023     scale($scale);
5024     disp($off);
5025   %}
5026 %}
5027 
5028 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5029 %{
5030   predicate(Universe::narrow_oop_shift() == 0);
5031   constraint(ALLOC_IN_RC(ptr_reg));
5032   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5033   op_cost(INSN_COST);
5034   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5035   interface(MEMORY_INTER) %{
5036     base($reg);
5037     index($ireg);
5038     scale(0x0);
5039     disp($off);
5040   %}
5041 %}
5042 
5043 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5044 %{
5045   predicate(Universe::narrow_oop_shift() == 0);
5046   constraint(ALLOC_IN_RC(ptr_reg));
5047   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5048   op_cost(INSN_COST);
5049   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5050   interface(MEMORY_INTER) %{
5051     base($reg);
5052     index($ireg);
5053     scale($scale);
5054     disp($off);
5055   %}
5056 %}
5057 
5058 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5059 %{
5060   predicate(Universe::narrow_oop_shift() == 0);
5061   constraint(ALLOC_IN_RC(ptr_reg));
5062   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5063   op_cost(0);
5064   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5065   interface(MEMORY_INTER) %{
5066     base($reg);
5067     index($ireg);
5068     scale($scale);
5069     disp(0x0);
5070   %}
5071 %}
5072 
5073 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5074 %{
5075   predicate(Universe::narrow_oop_shift() == 0);
5076   constraint(ALLOC_IN_RC(ptr_reg));
5077   match(AddP (DecodeN reg) (LShiftL lreg scale));
5078   op_cost(0);
5079   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5080   interface(MEMORY_INTER) %{
5081     base($reg);
5082     index($lreg);
5083     scale($scale);
5084     disp(0x0);
5085   %}
5086 %}
5087 
5088 operand indIndexN(iRegN reg, iRegL lreg)
5089 %{
5090   predicate(Universe::narrow_oop_shift() == 0);
5091   constraint(ALLOC_IN_RC(ptr_reg));
5092   match(AddP (DecodeN reg) lreg);
5093   op_cost(0);
5094   format %{ "$reg, $lreg\t# narrow" %}
5095   interface(MEMORY_INTER) %{
5096     base($reg);
5097     index($lreg);
5098     scale(0x0);
5099     disp(0x0);
5100   %}
5101 %}
5102 
5103 operand indOffIN(iRegN reg, immIOffset off)
5104 %{
5105   predicate(Universe::narrow_oop_shift() == 0);
5106   constraint(ALLOC_IN_RC(ptr_reg));
5107   match(AddP (DecodeN reg) off);
5108   op_cost(0);
5109   format %{ "[$reg, $off]\t# narrow" %}
5110   interface(MEMORY_INTER) %{
5111     base($reg);
5112     index(0xffffffff);
5113     scale(0x0);
5114     disp($off);
5115   %}
5116 %}
5117 
5118 operand indOffLN(iRegN reg, immLoffset off)
5119 %{
5120   predicate(Universe::narrow_oop_shift() == 0);
5121   constraint(ALLOC_IN_RC(ptr_reg));
5122   match(AddP (DecodeN reg) off);
5123   op_cost(0);
5124   format %{ "[$reg, $off]\t# narrow" %}
5125   interface(MEMORY_INTER) %{
5126     base($reg);
5127     index(0xffffffff);
5128     scale(0x0);
5129     disp($off);
5130   %}
5131 %}
5132 
5133 
5134 
5135 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5136 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5137 %{
5138   constraint(ALLOC_IN_RC(ptr_reg));
5139   match(AddP reg off);
5140   op_cost(0);
5141   format %{ "[$reg, $off]" %}
5142   interface(MEMORY_INTER) %{
5143     base($reg);
5144     index(0xffffffff);
5145     scale(0x0);
5146     disp($off);
5147   %}
5148 %}
5149 
5150 //----------Special Memory Operands--------------------------------------------
5151 // Stack Slot Operand - This operand is used for loading and storing temporary
5152 //                      values on the stack where a match requires a value to
5153 //                      flow through memory.
5154 operand stackSlotP(sRegP reg)
5155 %{
5156   constraint(ALLOC_IN_RC(stack_slots));
5157   op_cost(100);
5158   // No match rule because this operand is only generated in matching
5159   // match(RegP);
5160   format %{ "[$reg]" %}
5161   interface(MEMORY_INTER) %{
5162     base(0x1e);  // RSP
5163     index(0x0);  // No Index
5164     scale(0x0);  // No Scale
5165     disp($reg);  // Stack Offset
5166   %}
5167 %}
5168 
5169 operand stackSlotI(sRegI reg)
5170 %{
5171   constraint(ALLOC_IN_RC(stack_slots));
5172   // No match rule because this operand is only generated in matching
5173   // match(RegI);
5174   format %{ "[$reg]" %}
5175   interface(MEMORY_INTER) %{
5176     base(0x1e);  // RSP
5177     index(0x0);  // No Index
5178     scale(0x0);  // No Scale
5179     disp($reg);  // Stack Offset
5180   %}
5181 %}
5182 
5183 operand stackSlotF(sRegF reg)
5184 %{
5185   constraint(ALLOC_IN_RC(stack_slots));
5186   // No match rule because this operand is only generated in matching
5187   // match(RegF);
5188   format %{ "[$reg]" %}
5189   interface(MEMORY_INTER) %{
5190     base(0x1e);  // RSP
5191     index(0x0);  // No Index
5192     scale(0x0);  // No Scale
5193     disp($reg);  // Stack Offset
5194   %}
5195 %}
5196 
5197 operand stackSlotD(sRegD reg)
5198 %{
5199   constraint(ALLOC_IN_RC(stack_slots));
5200   // No match rule because this operand is only generated in matching
5201   // match(RegD);
5202   format %{ "[$reg]" %}
5203   interface(MEMORY_INTER) %{
5204     base(0x1e);  // RSP
5205     index(0x0);  // No Index
5206     scale(0x0);  // No Scale
5207     disp($reg);  // Stack Offset
5208   %}
5209 %}
5210 
5211 operand stackSlotL(sRegL reg)
5212 %{
5213   constraint(ALLOC_IN_RC(stack_slots));
5214   // No match rule because this operand is only generated in matching
5215   // match(RegL);
5216   format %{ "[$reg]" %}
5217   interface(MEMORY_INTER) %{
5218     base(0x1e);  // RSP
5219     index(0x0);  // No Index
5220     scale(0x0);  // No Scale
5221     disp($reg);  // Stack Offset
5222   %}
5223 %}
5224 
5225 // Operands for expressing Control Flow
5226 // NOTE: Label is a predefined operand which should not be redefined in
5227 //       the AD file. It is generically handled within the ADLC.
5228 
5229 //----------Conditional Branch Operands----------------------------------------
5230 // Comparison Op  - This is the operation of the comparison, and is limited to
5231 //                  the following set of codes:
5232 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5233 //
5234 // Other attributes of the comparison, such as unsignedness, are specified
5235 // by the comparison instruction that sets a condition code flags register.
5236 // That result is represented by a flags operand whose subtype is appropriate
5237 // to the unsignedness (etc.) of the comparison.
5238 //
5239 // Later, the instruction which matches both the Comparison Op (a Bool) and
5240 // the flags (produced by the Cmp) specifies the coding of the comparison op
5241 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5242 
5243 // used for signed integral comparisons and fp comparisons
5244 
5245 operand cmpOp()
5246 %{
5247   match(Bool);
5248 
5249   format %{ "" %}
5250   interface(COND_INTER) %{
5251     equal(0x0, "eq");
5252     not_equal(0x1, "ne");
5253     less(0xb, "lt");
5254     greater_equal(0xa, "ge");
5255     less_equal(0xd, "le");
5256     greater(0xc, "gt");
5257     overflow(0x6, "vs");
5258     no_overflow(0x7, "vc");
5259   %}
5260 %}
5261 
5262 // used for unsigned integral comparisons
5263 
5264 operand cmpOpU()
5265 %{
5266   match(Bool);
5267 
5268   format %{ "" %}
5269   interface(COND_INTER) %{
5270     equal(0x0, "eq");
5271     not_equal(0x1, "ne");
5272     less(0x3, "lo");
5273     greater_equal(0x2, "hs");
5274     less_equal(0x9, "ls");
5275     greater(0x8, "hi");
5276     overflow(0x6, "vs");
5277     no_overflow(0x7, "vc");
5278   %}
5279 %}
5280 
5281 // Special operand allowing long args to int ops to be truncated for free
5282 
5283 operand iRegL2I(iRegL reg) %{
5284 
5285   op_cost(0);
5286 
5287   match(ConvL2I reg);
5288 
5289   format %{ "l2i($reg)" %}
5290 
5291   interface(REG_INTER)
5292 %}
5293 
5294 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5295 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5296 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5297 
5298 //----------OPERAND CLASSES----------------------------------------------------
5299 // Operand Classes are groups of operands that are used as to simplify
5300 // instruction definitions by not requiring the AD writer to specify
5301 // separate instructions for every form of operand when the
5302 // instruction accepts multiple operand types with the same basic
5303 // encoding and format. The classic case of this is memory operands.
5304 
5305 // memory is used to define read/write location for load/store
5306 // instruction defs. we can turn a memory op into an Address
5307 
5308 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5309                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5310  
5311  // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5312 
5313 
5314 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5315 // operations. it allows the src to be either an iRegI or a (ConvL2I
5316 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5317 // can be elided because the 32-bit instruction will just employ the
5318 // lower 32 bits anyway.
5319 //
5320 // n.b. this does not elide all L2I conversions. if the truncated
5321 // value is consumed by more than one operation then the ConvL2I
5322 // cannot be bundled into the consuming nodes so an l2i gets planted
5323 // (actually a movw $dst $src) and the downstream instructions consume
5324 // the result of the l2i as an iRegI input. That's a shame since the
5325 // movw is actually redundant but its not too costly.
5326 
5327 opclass iRegIorL2I(iRegI, iRegL2I);
5328 
5329 //----------PIPELINE-----------------------------------------------------------
5330 // Rules which define the behavior of the target architectures pipeline.
5331 
5332 // For specific pipelines, eg A53, define the stages of that pipeline
5333 //pipe_desc(ISS, EX1, EX2, WR);
5334 #define ISS S0
5335 #define EX1 S1
5336 #define EX2 S2
5337 #define WR  S3
5338 
5339 // Integer ALU reg operation
5340 pipeline %{
5341 
5342 attributes %{
5343   // ARM instructions are of fixed length
5344   fixed_size_instructions;        // Fixed size instructions TODO does
5345   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5346   // ARM instructions come in 32-bit word units
5347   instruction_unit_size = 4;         // An instruction is 4 bytes long
5348   instruction_fetch_unit_size = 64;  // The processor fetches one line
5349   instruction_fetch_units = 1;       // of 64 bytes
5350 
5351   // List of nop instructions
5352   nops( MachNop );
5353 %}
5354 
5355 // We don't use an actual pipeline model so don't care about resources
5356 // or description. we do use pipeline classes to introduce fixed
5357 // latencies
5358 
5359 //----------RESOURCES----------------------------------------------------------
5360 // Resources are the functional units available to the machine
5361 
5362 resources( INS0, INS1, INS01 = INS0 | INS1,
5363            ALU0, ALU1, ALU = ALU0 | ALU1,
5364            MAC,
5365            DIV,
5366            BRANCH,
5367            LDST,
5368            NEON_FP);
5369 
5370 //----------PIPELINE DESCRIPTION-----------------------------------------------
5371 // Pipeline Description specifies the stages in the machine's pipeline
5372 
5373 // Define the pipeline as a generic 6 stage pipeline
5374 pipe_desc(S0, S1, S2, S3, S4, S5);
5375 
5376 //----------PIPELINE CLASSES---------------------------------------------------
5377 // Pipeline Classes describe the stages in which input and output are
5378 // referenced by the hardware pipeline.
5379 
5380 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5381 %{
5382   single_instruction;
5383   src1   : S1(read);
5384   src2   : S2(read);
5385   dst    : S5(write);
5386   INS01  : ISS;
5387   NEON_FP : S5;
5388 %}
5389 
5390 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5391 %{
5392   single_instruction;
5393   src1   : S1(read);
5394   src2   : S2(read);
5395   dst    : S5(write);
5396   INS01  : ISS;
5397   NEON_FP : S5;
5398 %}
5399 
5400 pipe_class fp_uop_s(vRegF dst, vRegF src)
5401 %{
5402   single_instruction;
5403   src    : S1(read);
5404   dst    : S5(write);
5405   INS01  : ISS;
5406   NEON_FP : S5;
5407 %}
5408 
5409 pipe_class fp_uop_d(vRegD dst, vRegD src)
5410 %{
5411   single_instruction;
5412   src    : S1(read);
5413   dst    : S5(write);
5414   INS01  : ISS;
5415   NEON_FP : S5;
5416 %}
5417 
5418 pipe_class fp_d2f(vRegF dst, vRegD src)
5419 %{
5420   single_instruction;
5421   src    : S1(read);
5422   dst    : S5(write);
5423   INS01  : ISS;
5424   NEON_FP : S5;
5425 %}
5426 
5427 pipe_class fp_f2d(vRegD dst, vRegF src)
5428 %{
5429   single_instruction;
5430   src    : S1(read);
5431   dst    : S5(write);
5432   INS01  : ISS;
5433   NEON_FP : S5;
5434 %}
5435 
5436 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5437 %{
5438   single_instruction;
5439   src    : S1(read);
5440   dst    : S5(write);
5441   INS01  : ISS;
5442   NEON_FP : S5;
5443 %}
5444 
5445 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5446 %{
5447   single_instruction;
5448   src    : S1(read);
5449   dst    : S5(write);
5450   INS01  : ISS;
5451   NEON_FP : S5;
5452 %}
5453 
5454 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5455 %{
5456   single_instruction;
5457   src    : S1(read);
5458   dst    : S5(write);
5459   INS01  : ISS;
5460   NEON_FP : S5;
5461 %}
5462 
5463 pipe_class fp_l2f(vRegF dst, iRegL src)
5464 %{
5465   single_instruction;
5466   src    : S1(read);
5467   dst    : S5(write);
5468   INS01  : ISS;
5469   NEON_FP : S5;
5470 %}
5471 
5472 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5473 %{
5474   single_instruction;
5475   src    : S1(read);
5476   dst    : S5(write);
5477   INS01  : ISS;
5478   NEON_FP : S5;
5479 %}
5480 
5481 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5482 %{
5483   single_instruction;
5484   src    : S1(read);
5485   dst    : S5(write);
5486   INS01  : ISS;
5487   NEON_FP : S5;
5488 %}
5489 
5490 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5491 %{
5492   single_instruction;
5493   src    : S1(read);
5494   dst    : S5(write);
5495   INS01  : ISS;
5496   NEON_FP : S5;
5497 %}
5498 
5499 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5500 %{
5501   single_instruction;
5502   src    : S1(read);
5503   dst    : S5(write);
5504   INS01  : ISS;
5505   NEON_FP : S5;
5506 %}
5507 
5508 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5509 %{
5510   single_instruction;
5511   src1   : S1(read);
5512   src2   : S2(read);
5513   dst    : S5(write);
5514   INS0   : ISS;
5515   NEON_FP : S5;
5516 %}
5517 
5518 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5519 %{
5520   single_instruction;
5521   src1   : S1(read);
5522   src2   : S2(read);
5523   dst    : S5(write);
5524   INS0   : ISS;
5525   NEON_FP : S5;
5526 %}
5527 
5528 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5529 %{
5530   single_instruction;
5531   cr     : S1(read);
5532   src1   : S1(read);
5533   src2   : S1(read);
5534   dst    : S3(write);
5535   INS01  : ISS;
5536   NEON_FP : S3;
5537 %}
5538 
5539 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5540 %{
5541   single_instruction;
5542   cr     : S1(read);
5543   src1   : S1(read);
5544   src2   : S1(read);
5545   dst    : S3(write);
5546   INS01  : ISS;
5547   NEON_FP : S3;
5548 %}
5549 
5550 pipe_class fp_imm_s(vRegF dst)
5551 %{
5552   single_instruction;
5553   dst    : S3(write);
5554   INS01  : ISS;
5555   NEON_FP : S3;
5556 %}
5557 
5558 pipe_class fp_imm_d(vRegD dst)
5559 %{
5560   single_instruction;
5561   dst    : S3(write);
5562   INS01  : ISS;
5563   NEON_FP : S3;
5564 %}
5565 
5566 pipe_class fp_load_constant_s(vRegF dst)
5567 %{
5568   single_instruction;
5569   dst    : S4(write);
5570   INS01  : ISS;
5571   NEON_FP : S4;
5572 %}
5573 
5574 pipe_class fp_load_constant_d(vRegD dst)
5575 %{
5576   single_instruction;
5577   dst    : S4(write);
5578   INS01  : ISS;
5579   NEON_FP : S4;
5580 %}
5581 
5582 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5583 %{
5584   single_instruction;
5585   dst    : S5(write);
5586   src1   : S1(read);
5587   src2   : S1(read);
5588   INS01  : ISS;
5589   NEON_FP : S5;
5590 %}
5591 
5592 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5593 %{
5594   single_instruction;
5595   dst    : S5(write);
5596   src1   : S1(read);
5597   src2   : S1(read);
5598   INS0   : ISS;
5599   NEON_FP : S5;
5600 %}
5601 
5602 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5603 %{
5604   single_instruction;
5605   dst    : S5(write);
5606   src1   : S1(read);
5607   src2   : S1(read);
5608   dst    : S1(read);
5609   INS01  : ISS;
5610   NEON_FP : S5;
5611 %}
5612 
5613 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5614 %{
5615   single_instruction;
5616   dst    : S5(write);
5617   src1   : S1(read);
5618   src2   : S1(read);
5619   dst    : S1(read);
5620   INS0   : ISS;
5621   NEON_FP : S5;
5622 %}
5623 
5624 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5625 %{
5626   single_instruction;
5627   dst    : S4(write);
5628   src1   : S2(read);
5629   src2   : S2(read);
5630   INS01  : ISS;
5631   NEON_FP : S4;
5632 %}
5633 
5634 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5635 %{
5636   single_instruction;
5637   dst    : S4(write);
5638   src1   : S2(read);
5639   src2   : S2(read);
5640   INS0   : ISS;
5641   NEON_FP : S4;
5642 %}
5643 
5644 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5645 %{
5646   single_instruction;
5647   dst    : S3(write);
5648   src1   : S2(read);
5649   src2   : S2(read);
5650   INS01  : ISS;
5651   NEON_FP : S3;
5652 %}
5653 
5654 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5655 %{
5656   single_instruction;
5657   dst    : S3(write);
5658   src1   : S2(read);
5659   src2   : S2(read);
5660   INS0   : ISS;
5661   NEON_FP : S3;
5662 %}
5663 
5664 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5665 %{
5666   single_instruction;
5667   dst    : S3(write);
5668   src    : S1(read);
5669   shift  : S1(read);
5670   INS01  : ISS;
5671   NEON_FP : S3;
5672 %}
5673 
5674 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5675 %{
5676   single_instruction;
5677   dst    : S3(write);
5678   src    : S1(read);
5679   shift  : S1(read);
5680   INS0   : ISS;
5681   NEON_FP : S3;
5682 %}
5683 
5684 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5685 %{
5686   single_instruction;
5687   dst    : S3(write);
5688   src    : S1(read);
5689   INS01  : ISS;
5690   NEON_FP : S3;
5691 %}
5692 
5693 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5694 %{
5695   single_instruction;
5696   dst    : S3(write);
5697   src    : S1(read);
5698   INS0   : ISS;
5699   NEON_FP : S3;
5700 %}
5701 
5702 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5703 %{
5704   single_instruction;
5705   dst    : S5(write);
5706   src1   : S1(read);
5707   src2   : S1(read);
5708   INS01  : ISS;
5709   NEON_FP : S5;
5710 %}
5711 
5712 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5713 %{
5714   single_instruction;
5715   dst    : S5(write);
5716   src1   : S1(read);
5717   src2   : S1(read);
5718   INS0   : ISS;
5719   NEON_FP : S5;
5720 %}
5721 
5722 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5723 %{
5724   single_instruction;
5725   dst    : S5(write);
5726   src1   : S1(read);
5727   src2   : S1(read);
5728   INS0   : ISS;
5729   NEON_FP : S5;
5730 %}
5731 
5732 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5733 %{
5734   single_instruction;
5735   dst    : S5(write);
5736   src1   : S1(read);
5737   src2   : S1(read);
5738   INS0   : ISS;
5739   NEON_FP : S5;
5740 %}
5741 
5742 pipe_class vsqrt_fp128(vecX dst, vecX src)
5743 %{
5744   single_instruction;
5745   dst    : S5(write);
5746   src    : S1(read);
5747   INS0   : ISS;
5748   NEON_FP : S5;
5749 %}
5750 
5751 pipe_class vunop_fp64(vecD dst, vecD src)
5752 %{
5753   single_instruction;
5754   dst    : S5(write);
5755   src    : S1(read);
5756   INS01  : ISS;
5757   NEON_FP : S5;
5758 %}
5759 
5760 pipe_class vunop_fp128(vecX dst, vecX src)
5761 %{
5762   single_instruction;
5763   dst    : S5(write);
5764   src    : S1(read);
5765   INS0   : ISS;
5766   NEON_FP : S5;
5767 %}
5768 
5769 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5770 %{
5771   single_instruction;
5772   dst    : S3(write);
5773   src    : S1(read);
5774   INS01  : ISS;
5775   NEON_FP : S3;
5776 %}
5777 
5778 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5779 %{
5780   single_instruction;
5781   dst    : S3(write);
5782   src    : S1(read);
5783   INS01  : ISS;
5784   NEON_FP : S3;
5785 %}
5786 
5787 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5788 %{
5789   single_instruction;
5790   dst    : S3(write);
5791   src    : S1(read);
5792   INS01  : ISS;
5793   NEON_FP : S3;
5794 %}
5795 
5796 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5797 %{
5798   single_instruction;
5799   dst    : S3(write);
5800   src    : S1(read);
5801   INS01  : ISS;
5802   NEON_FP : S3;
5803 %}
5804 
5805 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5806 %{
5807   single_instruction;
5808   dst    : S3(write);
5809   src    : S1(read);
5810   INS01  : ISS;
5811   NEON_FP : S3;
5812 %}
5813 
5814 pipe_class vmovi_reg_imm64(vecD dst)
5815 %{
5816   single_instruction;
5817   dst    : S3(write);
5818   INS01  : ISS;
5819   NEON_FP : S3;
5820 %}
5821 
5822 pipe_class vmovi_reg_imm128(vecX dst)
5823 %{
5824   single_instruction;
5825   dst    : S3(write);
5826   INS0   : ISS;
5827   NEON_FP : S3;
5828 %}
5829 
5830 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5831 %{
5832   single_instruction;
5833   dst    : S5(write);
5834   mem    : ISS(read);
5835   INS01  : ISS;
5836   NEON_FP : S3;
5837 %}
5838 
5839 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5840 %{
5841   single_instruction;
5842   dst    : S5(write);
5843   mem    : ISS(read);
5844   INS01  : ISS;
5845   NEON_FP : S3;
5846 %}
5847 
5848 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5849 %{
5850   single_instruction;
5851   mem    : ISS(read);
5852   src    : S2(read);
5853   INS01  : ISS;
5854   NEON_FP : S3;
5855 %}
5856 
5857 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5858 %{
5859   single_instruction;
5860   mem    : ISS(read);
5861   src    : S2(read);
5862   INS01  : ISS;
5863   NEON_FP : S3;
5864 %}
5865 
5866 //------- Integer ALU operations --------------------------
5867 
5868 // Integer ALU reg-reg operation
5869 // Operands needed in EX1, result generated in EX2
5870 // Eg.  ADD     x0, x1, x2
5871 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5872 %{
5873   single_instruction;
5874   dst    : EX2(write);
5875   src1   : EX1(read);
5876   src2   : EX1(read);
5877   INS01  : ISS; // Dual issue as instruction 0 or 1
5878   ALU    : EX2;
5879 %}
5880 
5881 // Integer ALU reg-reg operation with constant shift
5882 // Shifted register must be available in LATE_ISS instead of EX1
5883 // Eg.  ADD     x0, x1, x2, LSL #2
5884 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5885 %{
5886   single_instruction;
5887   dst    : EX2(write);
5888   src1   : EX1(read);
5889   src2   : ISS(read);
5890   INS01  : ISS;
5891   ALU    : EX2;
5892 %}
5893 
5894 // Integer ALU reg operation with constant shift
5895 // Eg.  LSL     x0, x1, #shift
5896 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5897 %{
5898   single_instruction;
5899   dst    : EX2(write);
5900   src1   : ISS(read);
5901   INS01  : ISS;
5902   ALU    : EX2;
5903 %}
5904 
5905 // Integer ALU reg-reg operation with variable shift
5906 // Both operands must be available in LATE_ISS instead of EX1
5907 // Result is available in EX1 instead of EX2
5908 // Eg.  LSLV    x0, x1, x2
5909 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5910 %{
5911   single_instruction;
5912   dst    : EX1(write);
5913   src1   : ISS(read);
5914   src2   : ISS(read);
5915   INS01  : ISS;
5916   ALU    : EX1;
5917 %}
5918 
5919 // Integer ALU reg-reg operation with extract
5920 // As for _vshift above, but result generated in EX2
5921 // Eg.  EXTR    x0, x1, x2, #N
5922 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5923 %{
5924   single_instruction;
5925   dst    : EX2(write);
5926   src1   : ISS(read);
5927   src2   : ISS(read);
5928   INS1   : ISS; // Can only dual issue as Instruction 1
5929   ALU    : EX1;
5930 %}
5931 
5932 // Integer ALU reg operation
5933 // Eg.  NEG     x0, x1
5934 pipe_class ialu_reg(iRegI dst, iRegI src)
5935 %{
5936   single_instruction;
5937   dst    : EX2(write);
5938   src    : EX1(read);
5939   INS01  : ISS;
5940   ALU    : EX2;
5941 %}
5942 
5943 // Integer ALU reg mmediate operation
5944 // Eg.  ADD     x0, x1, #N
5945 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5946 %{
5947   single_instruction;
5948   dst    : EX2(write);
5949   src1   : EX1(read);
5950   INS01  : ISS;
5951   ALU    : EX2;
5952 %}
5953 
5954 // Integer ALU immediate operation (no source operands)
5955 // Eg.  MOV     x0, #N
5956 pipe_class ialu_imm(iRegI dst)
5957 %{
5958   single_instruction;
5959   dst    : EX1(write);
5960   INS01  : ISS;
5961   ALU    : EX1;
5962 %}
5963 
5964 //------- Compare operation -------------------------------
5965 
5966 // Compare reg-reg
5967 // Eg.  CMP     x0, x1
5968 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5969 %{
5970   single_instruction;
5971 //  fixed_latency(16);
5972   cr     : EX2(write);
5973   op1    : EX1(read);
5974   op2    : EX1(read);
5975   INS01  : ISS;
5976   ALU    : EX2;
5977 %}
5978 
5979 // Compare reg-reg
5980 // Eg.  CMP     x0, #N
5981 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
5982 %{
5983   single_instruction;
5984 //  fixed_latency(16);
5985   cr     : EX2(write);
5986   op1    : EX1(read);
5987   INS01  : ISS;
5988   ALU    : EX2;
5989 %}
5990 
5991 //------- Conditional instructions ------------------------
5992 
5993 // Conditional no operands
5994 // Eg.  CSINC   x0, zr, zr, <cond>
5995 pipe_class icond_none(iRegI dst, rFlagsReg cr)
5996 %{
5997   single_instruction;
5998   cr     : EX1(read);
5999   dst    : EX2(write);
6000   INS01  : ISS;
6001   ALU    : EX2;
6002 %}
6003 
6004 // Conditional 2 operand
6005 // EG.  CSEL    X0, X1, X2, <cond>
6006 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6007 %{
6008   single_instruction;
6009   cr     : EX1(read);
6010   src1   : EX1(read);
6011   src2   : EX1(read);
6012   dst    : EX2(write);
6013   INS01  : ISS;
6014   ALU    : EX2;
6015 %}
6016 
6017 // Conditional 2 operand
6018 // EG.  CSEL    X0, X1, X2, <cond>
6019 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6020 %{
6021   single_instruction;
6022   cr     : EX1(read);
6023   src    : EX1(read);
6024   dst    : EX2(write);
6025   INS01  : ISS;
6026   ALU    : EX2;
6027 %}
6028 
6029 //------- Multiply pipeline operations --------------------
6030 
6031 // Multiply reg-reg
6032 // Eg.  MUL     w0, w1, w2
6033 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6034 %{
6035   single_instruction;
6036   dst    : WR(write);
6037   src1   : ISS(read);
6038   src2   : ISS(read);
6039   INS01  : ISS;
6040   MAC    : WR;
6041 %}
6042 
6043 // Multiply accumulate
6044 // Eg.  MADD    w0, w1, w2, w3
6045 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6046 %{
6047   single_instruction;
6048   dst    : WR(write);
6049   src1   : ISS(read);
6050   src2   : ISS(read);
6051   src3   : ISS(read);
6052   INS01  : ISS;
6053   MAC    : WR;
6054 %}
6055 
6056 // Eg.  MUL     w0, w1, w2
6057 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6058 %{
6059   single_instruction;
6060   fixed_latency(3); // Maximum latency for 64 bit mul
6061   dst    : WR(write);
6062   src1   : ISS(read);
6063   src2   : ISS(read);
6064   INS01  : ISS;
6065   MAC    : WR;
6066 %}
6067 
6068 // Multiply accumulate
6069 // Eg.  MADD    w0, w1, w2, w3
6070 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6071 %{
6072   single_instruction;
6073   fixed_latency(3); // Maximum latency for 64 bit mul
6074   dst    : WR(write);
6075   src1   : ISS(read);
6076   src2   : ISS(read);
6077   src3   : ISS(read);
6078   INS01  : ISS;
6079   MAC    : WR;
6080 %}
6081 
6082 //------- Divide pipeline operations --------------------
6083 
6084 // Eg.  SDIV    w0, w1, w2
6085 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6086 %{
6087   single_instruction;
6088   fixed_latency(8); // Maximum latency for 32 bit divide
6089   dst    : WR(write);
6090   src1   : ISS(read);
6091   src2   : ISS(read);
6092   INS0   : ISS; // Can only dual issue as instruction 0
6093   DIV    : WR;
6094 %}
6095 
6096 // Eg.  SDIV    x0, x1, x2
6097 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6098 %{
6099   single_instruction;
6100   fixed_latency(16); // Maximum latency for 64 bit divide
6101   dst    : WR(write);
6102   src1   : ISS(read);
6103   src2   : ISS(read);
6104   INS0   : ISS; // Can only dual issue as instruction 0
6105   DIV    : WR;
6106 %}
6107 
6108 //------- Load pipeline operations ------------------------
6109 
6110 // Load - prefetch
6111 // Eg.  PFRM    <mem>
6112 pipe_class iload_prefetch(memory mem)
6113 %{
6114   single_instruction;
6115   mem    : ISS(read);
6116   INS01  : ISS;
6117   LDST   : WR;
6118 %}
6119 
6120 // Load - reg, mem
6121 // Eg.  LDR     x0, <mem>
6122 pipe_class iload_reg_mem(iRegI dst, memory mem)
6123 %{
6124   single_instruction;
6125   dst    : WR(write);
6126   mem    : ISS(read);
6127   INS01  : ISS;
6128   LDST   : WR;
6129 %}
6130 
6131 // Load - reg, reg
6132 // Eg.  LDR     x0, [sp, x1]
6133 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6134 %{
6135   single_instruction;
6136   dst    : WR(write);
6137   src    : ISS(read);
6138   INS01  : ISS;
6139   LDST   : WR;
6140 %}
6141 
6142 //------- Store pipeline operations -----------------------
6143 
6144 // Store - zr, mem
6145 // Eg.  STR     zr, <mem>
6146 pipe_class istore_mem(memory mem)
6147 %{
6148   single_instruction;
6149   mem    : ISS(read);
6150   INS01  : ISS;
6151   LDST   : WR;
6152 %}
6153 
6154 // Store - reg, mem
6155 // Eg.  STR     x0, <mem>
6156 pipe_class istore_reg_mem(iRegI src, memory mem)
6157 %{
6158   single_instruction;
6159   mem    : ISS(read);
6160   src    : EX2(read);
6161   INS01  : ISS;
6162   LDST   : WR;
6163 %}
6164 
6165 // Store - reg, reg
6166 // Eg. STR      x0, [sp, x1]
6167 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6168 %{
6169   single_instruction;
6170   dst    : ISS(read);
6171   src    : EX2(read);
6172   INS01  : ISS;
6173   LDST   : WR;
6174 %}
6175 
6176 //------- Store pipeline operations -----------------------
6177 
6178 // Branch
6179 pipe_class pipe_branch()
6180 %{
6181   single_instruction;
6182   INS01  : ISS;
6183   BRANCH : EX1;
6184 %}
6185 
6186 // Conditional branch
6187 pipe_class pipe_branch_cond(rFlagsReg cr)
6188 %{
6189   single_instruction;
6190   cr     : EX1(read);
6191   INS01  : ISS;
6192   BRANCH : EX1;
6193 %}
6194 
6195 // Compare & Branch
6196 // EG.  CBZ/CBNZ
6197 pipe_class pipe_cmp_branch(iRegI op1)
6198 %{
6199   single_instruction;
6200   op1    : EX1(read);
6201   INS01  : ISS;
6202   BRANCH : EX1;
6203 %}
6204 
6205 //------- Synchronisation operations ----------------------
6206 
6207 // Any operation requiring serialization.
6208 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6209 pipe_class pipe_serial()
6210 %{
6211   single_instruction;
6212   force_serialization;
6213   fixed_latency(16);
6214   INS01  : ISS(2); // Cannot dual issue with any other instruction
6215   LDST   : WR;
6216 %}
6217 
6218 // Generic big/slow expanded idiom - also serialized
6219 pipe_class pipe_slow()
6220 %{
6221   instruction_count(10);
6222   multiple_bundles;
6223   force_serialization;
6224   fixed_latency(16);
6225   INS01  : ISS(2); // Cannot dual issue with any other instruction
6226   LDST   : WR;
6227 %}
6228 
6229 // Empty pipeline class
6230 pipe_class pipe_class_empty()
6231 %{
6232   single_instruction;
6233   fixed_latency(0);
6234 %}
6235 
6236 // Default pipeline class.
6237 pipe_class pipe_class_default()
6238 %{
6239   single_instruction;
6240   fixed_latency(2);
6241 %}
6242 
6243 // Pipeline class for compares.
6244 pipe_class pipe_class_compare()
6245 %{
6246   single_instruction;
6247   fixed_latency(16);
6248 %}
6249 
6250 // Pipeline class for memory operations.
6251 pipe_class pipe_class_memory()
6252 %{
6253   single_instruction;
6254   fixed_latency(16);
6255 %}
6256 
6257 // Pipeline class for call.
6258 pipe_class pipe_class_call()
6259 %{
6260   single_instruction;
6261   fixed_latency(100);
6262 %}
6263 
6264 // Define the class for the Nop node.
6265 define %{
6266    MachNop = pipe_class_empty;
6267 %}
6268 
6269 %}
6270 //----------INSTRUCTIONS-------------------------------------------------------
6271 //
6272 // match      -- States which machine-independent subtree may be replaced
6273 //               by this instruction.
6274 // ins_cost   -- The estimated cost of this instruction is used by instruction
6275 //               selection to identify a minimum cost tree of machine
6276 //               instructions that matches a tree of machine-independent
6277 //               instructions.
6278 // format     -- A string providing the disassembly for this instruction.
6279 //               The value of an instruction's operand may be inserted
6280 //               by referring to it with a '$' prefix.
6281 // opcode     -- Three instruction opcodes may be provided.  These are referred
6282 //               to within an encode class as $primary, $secondary, and $tertiary
6283 //               rrspectively.  The primary opcode is commonly used to
6284 //               indicate the type of machine instruction, while secondary
6285 //               and tertiary are often used for prefix options or addressing
6286 //               modes.
6287 // ins_encode -- A list of encode classes with parameters. The encode class
6288 //               name must have been defined in an 'enc_class' specification
6289 //               in the encode section of the architecture description.
6290 
6291 // ============================================================================
6292 // Memory (Load/Store) Instructions
6293 
6294 // Load Instructions
6295 
6296 // Load Byte (8 bit signed)
6297 instruct loadB(iRegINoSp dst, memory mem)
6298 %{
6299   match(Set dst (LoadB mem));
6300   predicate(!needs_acquiring_load(n));
6301 
6302   ins_cost(4 * INSN_COST);
6303   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6304 
6305   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6306 
6307   ins_pipe(iload_reg_mem);
6308 %}
6309 
6310 // Load Byte (8 bit signed) into long
6311 instruct loadB2L(iRegLNoSp dst, memory mem)
6312 %{
6313   match(Set dst (ConvI2L (LoadB mem)));
6314   predicate(!needs_acquiring_load(n->in(1)));
6315 
6316   ins_cost(4 * INSN_COST);
6317   format %{ "ldrsb  $dst, $mem\t# byte" %}
6318 
6319   ins_encode(aarch64_enc_ldrsb(dst, mem));
6320 
6321   ins_pipe(iload_reg_mem);
6322 %}
6323 
6324 // Load Byte (8 bit unsigned)
6325 instruct loadUB(iRegINoSp dst, memory mem)
6326 %{
6327   match(Set dst (LoadUB mem));
6328   predicate(!needs_acquiring_load(n));
6329 
6330   ins_cost(4 * INSN_COST);
6331   format %{ "ldrbw  $dst, $mem\t# byte" %}
6332 
6333   ins_encode(aarch64_enc_ldrb(dst, mem));
6334 
6335   ins_pipe(iload_reg_mem);
6336 %}
6337 
6338 // Load Byte (8 bit unsigned) into long
6339 instruct loadUB2L(iRegLNoSp dst, memory mem)
6340 %{
6341   match(Set dst (ConvI2L (LoadUB mem)));
6342   predicate(!needs_acquiring_load(n->in(1)));
6343 
6344   ins_cost(4 * INSN_COST);
6345   format %{ "ldrb  $dst, $mem\t# byte" %}
6346 
6347   ins_encode(aarch64_enc_ldrb(dst, mem));
6348 
6349   ins_pipe(iload_reg_mem);
6350 %}
6351 
6352 // Load Short (16 bit signed)
6353 instruct loadS(iRegINoSp dst, memory mem)
6354 %{
6355   match(Set dst (LoadS mem));
6356   predicate(!needs_acquiring_load(n));
6357 
6358   ins_cost(4 * INSN_COST);
6359   format %{ "ldrshw  $dst, $mem\t# short" %}
6360 
6361   ins_encode(aarch64_enc_ldrshw(dst, mem));
6362 
6363   ins_pipe(iload_reg_mem);
6364 %}
6365 
6366 // Load Short (16 bit signed) into long
6367 instruct loadS2L(iRegLNoSp dst, memory mem)
6368 %{
6369   match(Set dst (ConvI2L (LoadS mem)));
6370   predicate(!needs_acquiring_load(n->in(1)));
6371 
6372   ins_cost(4 * INSN_COST);
6373   format %{ "ldrsh  $dst, $mem\t# short" %}
6374 
6375   ins_encode(aarch64_enc_ldrsh(dst, mem));
6376 
6377   ins_pipe(iload_reg_mem);
6378 %}
6379 
6380 // Load Char (16 bit unsigned)
6381 instruct loadUS(iRegINoSp dst, memory mem)
6382 %{
6383   match(Set dst (LoadUS mem));
6384   predicate(!needs_acquiring_load(n));
6385 
6386   ins_cost(4 * INSN_COST);
6387   format %{ "ldrh  $dst, $mem\t# short" %}
6388 
6389   ins_encode(aarch64_enc_ldrh(dst, mem));
6390 
6391   ins_pipe(iload_reg_mem);
6392 %}
6393 
6394 // Load Short/Char (16 bit unsigned) into long
6395 instruct loadUS2L(iRegLNoSp dst, memory mem)
6396 %{
6397   match(Set dst (ConvI2L (LoadUS mem)));
6398   predicate(!needs_acquiring_load(n->in(1)));
6399 
6400   ins_cost(4 * INSN_COST);
6401   format %{ "ldrh  $dst, $mem\t# short" %}
6402 
6403   ins_encode(aarch64_enc_ldrh(dst, mem));
6404 
6405   ins_pipe(iload_reg_mem);
6406 %}
6407 
6408 // Load Integer (32 bit signed)
6409 instruct loadI(iRegINoSp dst, memory mem)
6410 %{
6411   match(Set dst (LoadI mem));
6412   predicate(!needs_acquiring_load(n));
6413 
6414   ins_cost(4 * INSN_COST);
6415   format %{ "ldrw  $dst, $mem\t# int" %}
6416 
6417   ins_encode(aarch64_enc_ldrw(dst, mem));
6418 
6419   ins_pipe(iload_reg_mem);
6420 %}
6421 
6422 // Load Integer (32 bit signed) into long
6423 instruct loadI2L(iRegLNoSp dst, memory mem)
6424 %{
6425   match(Set dst (ConvI2L (LoadI mem)));
6426   predicate(!needs_acquiring_load(n->in(1)));
6427 
6428   ins_cost(4 * INSN_COST);
6429   format %{ "ldrsw  $dst, $mem\t# int" %}
6430 
6431   ins_encode(aarch64_enc_ldrsw(dst, mem));
6432 
6433   ins_pipe(iload_reg_mem);
6434 %}
6435 
6436 // Load Integer (32 bit unsigned) into long
6437 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6438 %{
6439   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6440   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6441 
6442   ins_cost(4 * INSN_COST);
6443   format %{ "ldrw  $dst, $mem\t# int" %}
6444 
6445   ins_encode(aarch64_enc_ldrw(dst, mem));
6446 
6447   ins_pipe(iload_reg_mem);
6448 %}
6449 
6450 // Load Long (64 bit signed)
6451 instruct loadL(iRegLNoSp dst, memory mem)
6452 %{
6453   match(Set dst (LoadL mem));
6454   predicate(!needs_acquiring_load(n));
6455 
6456   ins_cost(4 * INSN_COST);
6457   format %{ "ldr  $dst, $mem\t# int" %}
6458 
6459   ins_encode(aarch64_enc_ldr(dst, mem));
6460 
6461   ins_pipe(iload_reg_mem);
6462 %}
6463 
6464 // Load Range
6465 instruct loadRange(iRegINoSp dst, memory mem)
6466 %{
6467   match(Set dst (LoadRange mem));
6468 
6469   ins_cost(4 * INSN_COST);
6470   format %{ "ldrw  $dst, $mem\t# range" %}
6471 
6472   ins_encode(aarch64_enc_ldrw(dst, mem));
6473 
6474   ins_pipe(iload_reg_mem);
6475 %}
6476 
6477 // Load Pointer
6478 instruct loadP(iRegPNoSp dst, memory mem)
6479 %{
6480   match(Set dst (LoadP mem));
6481   predicate(!needs_acquiring_load(n));
6482 
6483   ins_cost(4 * INSN_COST);
6484   format %{ "ldr  $dst, $mem\t# ptr" %}
6485 
6486   ins_encode(aarch64_enc_ldr(dst, mem));
6487 
6488   ins_pipe(iload_reg_mem);
6489 %}
6490 
6491 // Load Compressed Pointer
6492 instruct loadN(iRegNNoSp dst, memory mem)
6493 %{
6494   match(Set dst (LoadN mem));
6495   predicate(!needs_acquiring_load(n));
6496 
6497   ins_cost(4 * INSN_COST);
6498   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6499 
6500   ins_encode(aarch64_enc_ldrw(dst, mem));
6501 
6502   ins_pipe(iload_reg_mem);
6503 %}
6504 
6505 // Load Klass Pointer
6506 instruct loadKlass(iRegPNoSp dst, memory mem)
6507 %{
6508   match(Set dst (LoadKlass mem));
6509   predicate(!needs_acquiring_load(n));
6510 
6511   ins_cost(4 * INSN_COST);
6512   format %{ "ldr  $dst, $mem\t# class" %}
6513 
6514   ins_encode(aarch64_enc_ldr(dst, mem));
6515 
6516   ins_pipe(iload_reg_mem);
6517 %}
6518 
6519 // Load Narrow Klass Pointer
6520 instruct loadNKlass(iRegNNoSp dst, memory mem)
6521 %{
6522   match(Set dst (LoadNKlass mem));
6523   predicate(!needs_acquiring_load(n));
6524 
6525   ins_cost(4 * INSN_COST);
6526   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6527 
6528   ins_encode(aarch64_enc_ldrw(dst, mem));
6529 
6530   ins_pipe(iload_reg_mem);
6531 %}
6532 
6533 // Load Float
6534 instruct loadF(vRegF dst, memory mem)
6535 %{
6536   match(Set dst (LoadF mem));
6537   predicate(!needs_acquiring_load(n));
6538 
6539   ins_cost(4 * INSN_COST);
6540   format %{ "ldrs  $dst, $mem\t# float" %}
6541 
6542   ins_encode( aarch64_enc_ldrs(dst, mem) );
6543 
6544   ins_pipe(pipe_class_memory);
6545 %}
6546 
6547 // Load Double
6548 instruct loadD(vRegD dst, memory mem)
6549 %{
6550   match(Set dst (LoadD mem));
6551   predicate(!needs_acquiring_load(n));
6552 
6553   ins_cost(4 * INSN_COST);
6554   format %{ "ldrd  $dst, $mem\t# double" %}
6555 
6556   ins_encode( aarch64_enc_ldrd(dst, mem) );
6557 
6558   ins_pipe(pipe_class_memory);
6559 %}
6560 
6561 
6562 // Load Int Constant
6563 instruct loadConI(iRegINoSp dst, immI src)
6564 %{
6565   match(Set dst src);
6566 
6567   ins_cost(INSN_COST);
6568   format %{ "mov $dst, $src\t# int" %}
6569 
6570   ins_encode( aarch64_enc_movw_imm(dst, src) );
6571 
6572   ins_pipe(ialu_imm);
6573 %}
6574 
6575 // Load Long Constant
6576 instruct loadConL(iRegLNoSp dst, immL src)
6577 %{
6578   match(Set dst src);
6579 
6580   ins_cost(INSN_COST);
6581   format %{ "mov $dst, $src\t# long" %}
6582 
6583   ins_encode( aarch64_enc_mov_imm(dst, src) );
6584 
6585   ins_pipe(ialu_imm);
6586 %}
6587 
6588 // Load Pointer Constant
6589 
6590 instruct loadConP(iRegPNoSp dst, immP con)
6591 %{
6592   match(Set dst con);
6593 
6594   ins_cost(INSN_COST * 4);
6595   format %{
6596     "mov  $dst, $con\t# ptr\n\t"
6597   %}
6598 
6599   ins_encode(aarch64_enc_mov_p(dst, con));
6600 
6601   ins_pipe(ialu_imm);
6602 %}
6603 
6604 // Load Null Pointer Constant
6605 
6606 instruct loadConP0(iRegPNoSp dst, immP0 con)
6607 %{
6608   match(Set dst con);
6609 
6610   ins_cost(INSN_COST);
6611   format %{ "mov  $dst, $con\t# NULL ptr" %}
6612 
6613   ins_encode(aarch64_enc_mov_p0(dst, con));
6614 
6615   ins_pipe(ialu_imm);
6616 %}
6617 
6618 // Load Pointer Constant One
6619 
6620 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6621 %{
6622   match(Set dst con);
6623 
6624   ins_cost(INSN_COST);
6625   format %{ "mov  $dst, $con\t# NULL ptr" %}
6626 
6627   ins_encode(aarch64_enc_mov_p1(dst, con));
6628 
6629   ins_pipe(ialu_imm);
6630 %}
6631 
6632 // Load Poll Page Constant
6633 
6634 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6635 %{
6636   match(Set dst con);
6637 
6638   ins_cost(INSN_COST);
6639   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6640 
6641   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6642 
6643   ins_pipe(ialu_imm);
6644 %}
6645 
6646 // Load Byte Map Base Constant
6647 
6648 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6649 %{
6650   match(Set dst con);
6651 
6652   ins_cost(INSN_COST);
6653   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6654 
6655   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6656 
6657   ins_pipe(ialu_imm);
6658 %}
6659 
6660 // Load Narrow Pointer Constant
6661 
6662 instruct loadConN(iRegNNoSp dst, immN con)
6663 %{
6664   match(Set dst con);
6665 
6666   ins_cost(INSN_COST * 4);
6667   format %{ "mov  $dst, $con\t# compressed ptr" %}
6668 
6669   ins_encode(aarch64_enc_mov_n(dst, con));
6670 
6671   ins_pipe(ialu_imm);
6672 %}
6673 
6674 // Load Narrow Null Pointer Constant
6675 
6676 instruct loadConN0(iRegNNoSp dst, immN0 con)
6677 %{
6678   match(Set dst con);
6679 
6680   ins_cost(INSN_COST);
6681   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6682 
6683   ins_encode(aarch64_enc_mov_n0(dst, con));
6684 
6685   ins_pipe(ialu_imm);
6686 %}
6687 
6688 // Load Narrow Klass Constant
6689 
6690 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6691 %{
6692   match(Set dst con);
6693 
6694   ins_cost(INSN_COST);
6695   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6696 
6697   ins_encode(aarch64_enc_mov_nk(dst, con));
6698 
6699   ins_pipe(ialu_imm);
6700 %}
6701 
6702 // Load Packed Float Constant
6703 
6704 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6705   match(Set dst con);
6706   ins_cost(INSN_COST * 4);
6707   format %{ "fmovs  $dst, $con"%}
6708   ins_encode %{
6709     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6710   %}
6711 
6712   ins_pipe(fp_imm_s);
6713 %}
6714 
6715 // Load Float Constant
6716 
6717 instruct loadConF(vRegF dst, immF con) %{
6718   match(Set dst con);
6719 
6720   ins_cost(INSN_COST * 4);
6721 
6722   format %{
6723     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6724   %}
6725 
6726   ins_encode %{
6727     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6728   %}
6729 
6730   ins_pipe(fp_load_constant_s);
6731 %}
6732 
6733 // Load Packed Double Constant
6734 
6735 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6736   match(Set dst con);
6737   ins_cost(INSN_COST);
6738   format %{ "fmovd  $dst, $con"%}
6739   ins_encode %{
6740     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6741   %}
6742 
6743   ins_pipe(fp_imm_d);
6744 %}
6745 
6746 // Load Double Constant
6747 
6748 instruct loadConD(vRegD dst, immD con) %{
6749   match(Set dst con);
6750 
6751   ins_cost(INSN_COST * 5);
6752   format %{
6753     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6754   %}
6755 
6756   ins_encode %{
6757     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6758   %}
6759 
6760   ins_pipe(fp_load_constant_d);
6761 %}
6762 
6763 // Store Instructions
6764 
6765 // Store CMS card-mark Immediate
6766 instruct storeimmCM0(immI0 zero, memory mem)
6767 %{
6768   match(Set mem (StoreCM mem zero));
6769   predicate(unnecessary_storestore(n));
6770 
6771   ins_cost(INSN_COST);
6772   format %{ "strb zr, $mem\t# byte" %}
6773 
6774   ins_encode(aarch64_enc_strb0(mem));
6775 
6776   ins_pipe(istore_mem);
6777 %}
6778 
6779 // Store CMS card-mark Immediate with intervening StoreStore
6780 // needed when using CMS with no conditional card marking
6781 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6782 %{
6783   match(Set mem (StoreCM mem zero));
6784 
6785   ins_cost(INSN_COST * 2);
6786   format %{ "dmb ishst"
6787       "\n\tstrb zr, $mem\t# byte" %}
6788 
6789   ins_encode(aarch64_enc_strb0_ordered(mem));
6790 
6791   ins_pipe(istore_mem);
6792 %}
6793 
6794 // Store Byte
6795 instruct storeB(iRegIorL2I src, memory mem)
6796 %{
6797   match(Set mem (StoreB mem src));
6798   predicate(!needs_releasing_store(n));
6799 
6800   ins_cost(INSN_COST);
6801   format %{ "strb  $src, $mem\t# byte" %}
6802 
6803   ins_encode(aarch64_enc_strb(src, mem));
6804 
6805   ins_pipe(istore_reg_mem);
6806 %}
6807 
6808 
6809 instruct storeimmB0(immI0 zero, memory mem)
6810 %{
6811   match(Set mem (StoreB mem zero));
6812   predicate(!needs_releasing_store(n));
6813 
6814   ins_cost(INSN_COST);
6815   format %{ "strb zr, $mem\t# byte" %}
6816 
6817   ins_encode(aarch64_enc_strb0(mem));
6818 
6819   ins_pipe(istore_mem);
6820 %}
6821 
6822 // Store Char/Short
6823 instruct storeC(iRegIorL2I src, memory mem)
6824 %{
6825   match(Set mem (StoreC mem src));
6826   predicate(!needs_releasing_store(n));
6827 
6828   ins_cost(INSN_COST);
6829   format %{ "strh  $src, $mem\t# short" %}
6830 
6831   ins_encode(aarch64_enc_strh(src, mem));
6832 
6833   ins_pipe(istore_reg_mem);
6834 %}
6835 
6836 instruct storeimmC0(immI0 zero, memory mem)
6837 %{
6838   match(Set mem (StoreC mem zero));
6839   predicate(!needs_releasing_store(n));
6840 
6841   ins_cost(INSN_COST);
6842   format %{ "strh  zr, $mem\t# short" %}
6843 
6844   ins_encode(aarch64_enc_strh0(mem));
6845 
6846   ins_pipe(istore_mem);
6847 %}
6848 
6849 // Store Integer
6850 
6851 instruct storeI(iRegIorL2I src, memory mem)
6852 %{
6853   match(Set mem(StoreI mem src));
6854   predicate(!needs_releasing_store(n));
6855 
6856   ins_cost(INSN_COST);
6857   format %{ "strw  $src, $mem\t# int" %}
6858 
6859   ins_encode(aarch64_enc_strw(src, mem));
6860 
6861   ins_pipe(istore_reg_mem);
6862 %}
6863 
6864 instruct storeimmI0(immI0 zero, memory mem)
6865 %{
6866   match(Set mem(StoreI mem zero));
6867   predicate(!needs_releasing_store(n));
6868 
6869   ins_cost(INSN_COST);
6870   format %{ "strw  zr, $mem\t# int" %}
6871 
6872   ins_encode(aarch64_enc_strw0(mem));
6873 
6874   ins_pipe(istore_mem);
6875 %}
6876 
6877 // Store Long (64 bit signed)
6878 instruct storeL(iRegL src, memory mem)
6879 %{
6880   match(Set mem (StoreL mem src));
6881   predicate(!needs_releasing_store(n));
6882 
6883   ins_cost(INSN_COST);
6884   format %{ "str  $src, $mem\t# int" %}
6885 
6886   ins_encode(aarch64_enc_str(src, mem));
6887 
6888   ins_pipe(istore_reg_mem);
6889 %}
6890 
6891 // Store Long (64 bit signed)
6892 instruct storeimmL0(immL0 zero, memory mem)
6893 %{
6894   match(Set mem (StoreL mem zero));
6895   predicate(!needs_releasing_store(n));
6896 
6897   ins_cost(INSN_COST);
6898   format %{ "str  zr, $mem\t# int" %}
6899 
6900   ins_encode(aarch64_enc_str0(mem));
6901 
6902   ins_pipe(istore_mem);
6903 %}
6904 
6905 // Store Pointer
6906 instruct storeP(iRegP src, memory mem)
6907 %{
6908   match(Set mem (StoreP mem src));
6909   predicate(!needs_releasing_store(n));
6910 
6911   ins_cost(INSN_COST);
6912   format %{ "str  $src, $mem\t# ptr" %}
6913 
6914   ins_encode %{
6915     int opcode = $mem->opcode();
6916     Register base = as_Register($mem$$base);
6917     int index = $mem$$index;
6918     int size = $mem$$scale;
6919     int disp = $mem$$disp;
6920     Register reg = as_Register($src$$reg);
6921 
6922     // we sometimes get asked to store the stack pointer into the
6923     // current thread -- we cannot do that directly on AArch64
6924     if (reg == r31_sp) {
6925       MacroAssembler _masm(&cbuf);
6926       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
6927       __ mov(rscratch2, sp);
6928       reg = rscratch2;
6929     }
6930     Address::extend scale;
6931 
6932     // Hooboy, this is fugly.  We need a way to communicate to the
6933     // encoder that the index needs to be sign extended, so we have to
6934     // enumerate all the cases.
6935     switch (opcode) {
6936     case INDINDEXSCALEDOFFSETI2L:
6937     case INDINDEXSCALEDI2L:
6938     case INDINDEXSCALEDOFFSETI2LN:
6939     case INDINDEXSCALEDI2LN:
6940     case INDINDEXOFFSETI2L:
6941     case INDINDEXOFFSETI2LN:
6942       scale = Address::sxtw(size);
6943       break;
6944     default:
6945       scale = Address::lsl(size);
6946     }
6947 
6948     Address adr;
6949     if (index == -1) {
6950       adr = Address(base, disp);
6951     } else {
6952       if (disp == 0) {
6953         adr = Address(base, as_Register(index), scale);
6954       } else {
6955         __ lea(rscratch1, Address(base, disp));
6956         adr = Address(rscratch1, as_Register(index), scale);
6957       }
6958     }
6959 
6960     __ str(reg, adr);
6961   %}
6962 
6963   ins_pipe(istore_reg_mem);
6964 %}
6965 
6966 // Store Pointer
6967 instruct storeimmP0(immP0 zero, memory mem)
6968 %{
6969   match(Set mem (StoreP mem zero));
6970   predicate(!needs_releasing_store(n));
6971 
6972   ins_cost(INSN_COST);
6973   format %{ "str zr, $mem\t# ptr" %}
6974 
6975   ins_encode(aarch64_enc_str0(mem));
6976 
6977   ins_pipe(istore_mem);
6978 %}
6979 
6980 // Store Compressed Pointer
6981 instruct storeN(iRegN src, memory mem)
6982 %{
6983   match(Set mem (StoreN mem src));
6984   predicate(!needs_releasing_store(n));
6985 
6986   ins_cost(INSN_COST);
6987   format %{ "strw  $src, $mem\t# compressed ptr" %}
6988 
6989   ins_encode(aarch64_enc_strw(src, mem));
6990 
6991   ins_pipe(istore_reg_mem);
6992 %}
6993 
6994 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6995 %{
6996   match(Set mem (StoreN mem zero));
6997   predicate(Universe::narrow_oop_base() == NULL &&
6998             Universe::narrow_klass_base() == NULL  &&
6999             (!needs_releasing_store(n)));
7000 
7001   ins_cost(INSN_COST);
7002   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7003 
7004   ins_encode(aarch64_enc_strw(heapbase, mem));
7005 
7006   ins_pipe(istore_reg_mem);
7007 %}
7008 
7009 // Store Float
7010 instruct storeF(vRegF src, memory mem)
7011 %{
7012   match(Set mem (StoreF mem src));
7013   predicate(!needs_releasing_store(n));
7014 
7015   ins_cost(INSN_COST);
7016   format %{ "strs  $src, $mem\t# float" %}
7017 
7018   ins_encode( aarch64_enc_strs(src, mem) );
7019 
7020   ins_pipe(pipe_class_memory);
7021 %}
7022 
7023 // TODO
7024 // implement storeImmF0 and storeFImmPacked
7025 
7026 // Store Double
7027 instruct storeD(vRegD src, memory mem)
7028 %{
7029   match(Set mem (StoreD mem src));
7030   predicate(!needs_releasing_store(n));
7031 
7032   ins_cost(INSN_COST);
7033   format %{ "strd  $src, $mem\t# double" %}
7034 
7035   ins_encode( aarch64_enc_strd(src, mem) );
7036 
7037   ins_pipe(pipe_class_memory);
7038 %}
7039 
7040 // Store Compressed Klass Pointer
7041 instruct storeNKlass(iRegN src, memory mem)
7042 %{
7043   predicate(!needs_releasing_store(n));
7044   match(Set mem (StoreNKlass mem src));
7045 
7046   ins_cost(INSN_COST);
7047   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7048 
7049   ins_encode(aarch64_enc_strw(src, mem));
7050 
7051   ins_pipe(istore_reg_mem);
7052 %}
7053 
7054 // TODO
7055 // implement storeImmD0 and storeDImmPacked
7056 
7057 // prefetch instructions
7058 // Must be safe to execute with invalid address (cannot fault).
7059 
7060 instruct prefetchr( memory mem ) %{
7061   match(PrefetchRead mem);
7062 
7063   ins_cost(INSN_COST);
7064   format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %}
7065 
7066   ins_encode( aarch64_enc_prefetchr(mem) );
7067 
7068   ins_pipe(iload_prefetch);
7069 %}
7070 
7071 instruct prefetchw( memory mem ) %{
7072   match(PrefetchAllocation mem);
7073 
7074   ins_cost(INSN_COST);
7075   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7076 
7077   ins_encode( aarch64_enc_prefetchw(mem) );
7078 
7079   ins_pipe(iload_prefetch);
7080 %}
7081 
7082 instruct prefetchnta( memory mem ) %{
7083   match(PrefetchWrite mem);
7084 
7085   ins_cost(INSN_COST);
7086   format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %}
7087 
7088   ins_encode( aarch64_enc_prefetchnta(mem) );
7089 
7090   ins_pipe(iload_prefetch);
7091 %}
7092 
7093 //  ---------------- volatile loads and stores ----------------
7094 
7095 // Load Byte (8 bit signed)
7096 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7097 %{
7098   match(Set dst (LoadB mem));
7099 
7100   ins_cost(VOLATILE_REF_COST);
7101   format %{ "ldarsb  $dst, $mem\t# byte" %}
7102 
7103   ins_encode(aarch64_enc_ldarsb(dst, mem));
7104 
7105   ins_pipe(pipe_serial);
7106 %}
7107 
7108 // Load Byte (8 bit signed) into long
7109 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7110 %{
7111   match(Set dst (ConvI2L (LoadB mem)));
7112 
7113   ins_cost(VOLATILE_REF_COST);
7114   format %{ "ldarsb  $dst, $mem\t# byte" %}
7115 
7116   ins_encode(aarch64_enc_ldarsb(dst, mem));
7117 
7118   ins_pipe(pipe_serial);
7119 %}
7120 
7121 // Load Byte (8 bit unsigned)
7122 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7123 %{
7124   match(Set dst (LoadUB mem));
7125 
7126   ins_cost(VOLATILE_REF_COST);
7127   format %{ "ldarb  $dst, $mem\t# byte" %}
7128 
7129   ins_encode(aarch64_enc_ldarb(dst, mem));
7130 
7131   ins_pipe(pipe_serial);
7132 %}
7133 
7134 // Load Byte (8 bit unsigned) into long
7135 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7136 %{
7137   match(Set dst (ConvI2L (LoadUB mem)));
7138 
7139   ins_cost(VOLATILE_REF_COST);
7140   format %{ "ldarb  $dst, $mem\t# byte" %}
7141 
7142   ins_encode(aarch64_enc_ldarb(dst, mem));
7143 
7144   ins_pipe(pipe_serial);
7145 %}
7146 
7147 // Load Short (16 bit signed)
7148 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7149 %{
7150   match(Set dst (LoadS mem));
7151 
7152   ins_cost(VOLATILE_REF_COST);
7153   format %{ "ldarshw  $dst, $mem\t# short" %}
7154 
7155   ins_encode(aarch64_enc_ldarshw(dst, mem));
7156 
7157   ins_pipe(pipe_serial);
7158 %}
7159 
7160 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7161 %{
7162   match(Set dst (LoadUS mem));
7163 
7164   ins_cost(VOLATILE_REF_COST);
7165   format %{ "ldarhw  $dst, $mem\t# short" %}
7166 
7167   ins_encode(aarch64_enc_ldarhw(dst, mem));
7168 
7169   ins_pipe(pipe_serial);
7170 %}
7171 
7172 // Load Short/Char (16 bit unsigned) into long
7173 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7174 %{
7175   match(Set dst (ConvI2L (LoadUS mem)));
7176 
7177   ins_cost(VOLATILE_REF_COST);
7178   format %{ "ldarh  $dst, $mem\t# short" %}
7179 
7180   ins_encode(aarch64_enc_ldarh(dst, mem));
7181 
7182   ins_pipe(pipe_serial);
7183 %}
7184 
7185 // Load Short/Char (16 bit signed) into long
7186 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7187 %{
7188   match(Set dst (ConvI2L (LoadS mem)));
7189 
7190   ins_cost(VOLATILE_REF_COST);
7191   format %{ "ldarh  $dst, $mem\t# short" %}
7192 
7193   ins_encode(aarch64_enc_ldarsh(dst, mem));
7194 
7195   ins_pipe(pipe_serial);
7196 %}
7197 
7198 // Load Integer (32 bit signed)
7199 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7200 %{
7201   match(Set dst (LoadI mem));
7202 
7203   ins_cost(VOLATILE_REF_COST);
7204   format %{ "ldarw  $dst, $mem\t# int" %}
7205 
7206   ins_encode(aarch64_enc_ldarw(dst, mem));
7207 
7208   ins_pipe(pipe_serial);
7209 %}
7210 
7211 // Load Integer (32 bit unsigned) into long
7212 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7213 %{
7214   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7215 
7216   ins_cost(VOLATILE_REF_COST);
7217   format %{ "ldarw  $dst, $mem\t# int" %}
7218 
7219   ins_encode(aarch64_enc_ldarw(dst, mem));
7220 
7221   ins_pipe(pipe_serial);
7222 %}
7223 
7224 // Load Long (64 bit signed)
7225 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7226 %{
7227   match(Set dst (LoadL mem));
7228 
7229   ins_cost(VOLATILE_REF_COST);
7230   format %{ "ldar  $dst, $mem\t# int" %}
7231 
7232   ins_encode(aarch64_enc_ldar(dst, mem));
7233 
7234   ins_pipe(pipe_serial);
7235 %}
7236 
7237 // Load Pointer
7238 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7239 %{
7240   match(Set dst (LoadP mem));
7241 
7242   ins_cost(VOLATILE_REF_COST);
7243   format %{ "ldar  $dst, $mem\t# ptr" %}
7244 
7245   ins_encode(aarch64_enc_ldar(dst, mem));
7246 
7247   ins_pipe(pipe_serial);
7248 %}
7249 
7250 // Load Compressed Pointer
7251 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7252 %{
7253   match(Set dst (LoadN mem));
7254 
7255   ins_cost(VOLATILE_REF_COST);
7256   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7257 
7258   ins_encode(aarch64_enc_ldarw(dst, mem));
7259 
7260   ins_pipe(pipe_serial);
7261 %}
7262 
7263 // Load Float
7264 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7265 %{
7266   match(Set dst (LoadF mem));
7267 
7268   ins_cost(VOLATILE_REF_COST);
7269   format %{ "ldars  $dst, $mem\t# float" %}
7270 
7271   ins_encode( aarch64_enc_fldars(dst, mem) );
7272 
7273   ins_pipe(pipe_serial);
7274 %}
7275 
7276 // Load Double
7277 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7278 %{
7279   match(Set dst (LoadD mem));
7280 
7281   ins_cost(VOLATILE_REF_COST);
7282   format %{ "ldard  $dst, $mem\t# double" %}
7283 
7284   ins_encode( aarch64_enc_fldard(dst, mem) );
7285 
7286   ins_pipe(pipe_serial);
7287 %}
7288 
7289 // Store Byte
7290 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7291 %{
7292   match(Set mem (StoreB mem src));
7293 
7294   ins_cost(VOLATILE_REF_COST);
7295   format %{ "stlrb  $src, $mem\t# byte" %}
7296 
7297   ins_encode(aarch64_enc_stlrb(src, mem));
7298 
7299   ins_pipe(pipe_class_memory);
7300 %}
7301 
7302 // Store Char/Short
7303 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7304 %{
7305   match(Set mem (StoreC mem src));
7306 
7307   ins_cost(VOLATILE_REF_COST);
7308   format %{ "stlrh  $src, $mem\t# short" %}
7309 
7310   ins_encode(aarch64_enc_stlrh(src, mem));
7311 
7312   ins_pipe(pipe_class_memory);
7313 %}
7314 
7315 // Store Integer
7316 
7317 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7318 %{
7319   match(Set mem(StoreI mem src));
7320 
7321   ins_cost(VOLATILE_REF_COST);
7322   format %{ "stlrw  $src, $mem\t# int" %}
7323 
7324   ins_encode(aarch64_enc_stlrw(src, mem));
7325 
7326   ins_pipe(pipe_class_memory);
7327 %}
7328 
7329 // Store Long (64 bit signed)
7330 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7331 %{
7332   match(Set mem (StoreL mem src));
7333 
7334   ins_cost(VOLATILE_REF_COST);
7335   format %{ "stlr  $src, $mem\t# int" %}
7336 
7337   ins_encode(aarch64_enc_stlr(src, mem));
7338 
7339   ins_pipe(pipe_class_memory);
7340 %}
7341 
7342 // Store Pointer
7343 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7344 %{
7345   match(Set mem (StoreP mem src));
7346 
7347   ins_cost(VOLATILE_REF_COST);
7348   format %{ "stlr  $src, $mem\t# ptr" %}
7349 
7350   ins_encode(aarch64_enc_stlr(src, mem));
7351 
7352   ins_pipe(pipe_class_memory);
7353 %}
7354 
7355 // Store Compressed Pointer
7356 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7357 %{
7358   match(Set mem (StoreN mem src));
7359 
7360   ins_cost(VOLATILE_REF_COST);
7361   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7362 
7363   ins_encode(aarch64_enc_stlrw(src, mem));
7364 
7365   ins_pipe(pipe_class_memory);
7366 %}
7367 
7368 // Store Float
7369 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7370 %{
7371   match(Set mem (StoreF mem src));
7372 
7373   ins_cost(VOLATILE_REF_COST);
7374   format %{ "stlrs  $src, $mem\t# float" %}
7375 
7376   ins_encode( aarch64_enc_fstlrs(src, mem) );
7377 
7378   ins_pipe(pipe_class_memory);
7379 %}
7380 
7381 // TODO
7382 // implement storeImmF0 and storeFImmPacked
7383 
7384 // Store Double
7385 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7386 %{
7387   match(Set mem (StoreD mem src));
7388 
7389   ins_cost(VOLATILE_REF_COST);
7390   format %{ "stlrd  $src, $mem\t# double" %}
7391 
7392   ins_encode( aarch64_enc_fstlrd(src, mem) );
7393 
7394   ins_pipe(pipe_class_memory);
7395 %}
7396 
7397 //  ---------------- end of volatile loads and stores ----------------
7398 
7399 // ============================================================================
7400 // BSWAP Instructions
7401 
7402 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7403   match(Set dst (ReverseBytesI src));
7404 
7405   ins_cost(INSN_COST);
7406   format %{ "revw  $dst, $src" %}
7407 
7408   ins_encode %{
7409     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7410   %}
7411 
7412   ins_pipe(ialu_reg);
7413 %}
7414 
7415 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7416   match(Set dst (ReverseBytesL src));
7417 
7418   ins_cost(INSN_COST);
7419   format %{ "rev  $dst, $src" %}
7420 
7421   ins_encode %{
7422     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7423   %}
7424 
7425   ins_pipe(ialu_reg);
7426 %}
7427 
7428 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7429   match(Set dst (ReverseBytesUS src));
7430 
7431   ins_cost(INSN_COST);
7432   format %{ "rev16w  $dst, $src" %}
7433 
7434   ins_encode %{
7435     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7436   %}
7437 
7438   ins_pipe(ialu_reg);
7439 %}
7440 
7441 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7442   match(Set dst (ReverseBytesS src));
7443 
7444   ins_cost(INSN_COST);
7445   format %{ "rev16w  $dst, $src\n\t"
7446             "sbfmw $dst, $dst, #0, #15" %}
7447 
7448   ins_encode %{
7449     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7450     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7451   %}
7452 
7453   ins_pipe(ialu_reg);
7454 %}
7455 
7456 // ============================================================================
7457 // Zero Count Instructions
7458 
7459 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7460   match(Set dst (CountLeadingZerosI src));
7461 
7462   ins_cost(INSN_COST);
7463   format %{ "clzw  $dst, $src" %}
7464   ins_encode %{
7465     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7466   %}
7467 
7468   ins_pipe(ialu_reg);
7469 %}
7470 
7471 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7472   match(Set dst (CountLeadingZerosL src));
7473 
7474   ins_cost(INSN_COST);
7475   format %{ "clz   $dst, $src" %}
7476   ins_encode %{
7477     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7478   %}
7479 
7480   ins_pipe(ialu_reg);
7481 %}
7482 
7483 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7484   match(Set dst (CountTrailingZerosI src));
7485 
7486   ins_cost(INSN_COST * 2);
7487   format %{ "rbitw  $dst, $src\n\t"
7488             "clzw   $dst, $dst" %}
7489   ins_encode %{
7490     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7491     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7492   %}
7493 
7494   ins_pipe(ialu_reg);
7495 %}
7496 
7497 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7498   match(Set dst (CountTrailingZerosL src));
7499 
7500   ins_cost(INSN_COST * 2);
7501   format %{ "rbit   $dst, $src\n\t"
7502             "clz    $dst, $dst" %}
7503   ins_encode %{
7504     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7505     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7506   %}
7507 
7508   ins_pipe(ialu_reg);
7509 %}
7510 
7511 //---------- Population Count Instructions -------------------------------------
7512 //
7513 
7514 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7515   predicate(UsePopCountInstruction);
7516   match(Set dst (PopCountI src));
7517   effect(TEMP tmp);
7518   ins_cost(INSN_COST * 13);
7519 
7520   format %{ "movw   $src, $src\n\t"
7521             "mov    $tmp, $src\t# vector (1D)\n\t"
7522             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7523             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7524             "mov    $dst, $tmp\t# vector (1D)" %}
7525   ins_encode %{
7526     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7527     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7528     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7529     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7530     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7531   %}
7532 
7533   ins_pipe(pipe_class_default);
7534 %}
7535 
7536 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7537   predicate(UsePopCountInstruction);
7538   match(Set dst (PopCountI (LoadI mem)));
7539   effect(TEMP tmp);
7540   ins_cost(INSN_COST * 13);
7541 
7542   format %{ "ldrs   $tmp, $mem\n\t"
7543             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7544             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7545             "mov    $dst, $tmp\t# vector (1D)" %}
7546   ins_encode %{
7547     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7548     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7549                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7550     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7551     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7552     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7553   %}
7554 
7555   ins_pipe(pipe_class_default);
7556 %}
7557 
7558 // Note: Long.bitCount(long) returns an int.
7559 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7560   predicate(UsePopCountInstruction);
7561   match(Set dst (PopCountL src));
7562   effect(TEMP tmp);
7563   ins_cost(INSN_COST * 13);
7564 
7565   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7566             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7567             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7568             "mov    $dst, $tmp\t# vector (1D)" %}
7569   ins_encode %{
7570     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7571     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7572     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7573     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7574   %}
7575 
7576   ins_pipe(pipe_class_default);
7577 %}
7578 
7579 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7580   predicate(UsePopCountInstruction);
7581   match(Set dst (PopCountL (LoadL mem)));
7582   effect(TEMP tmp);
7583   ins_cost(INSN_COST * 13);
7584 
7585   format %{ "ldrd   $tmp, $mem\n\t"
7586             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7587             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7588             "mov    $dst, $tmp\t# vector (1D)" %}
7589   ins_encode %{
7590     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7591     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7592                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7593     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7594     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7595     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7596   %}
7597 
7598   ins_pipe(pipe_class_default);
7599 %}
7600 
7601 // ============================================================================
7602 // MemBar Instruction
7603 
7604 instruct load_fence() %{
7605   match(LoadFence);
7606   ins_cost(VOLATILE_REF_COST);
7607 
7608   format %{ "load_fence" %}
7609 
7610   ins_encode %{
7611     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7612   %}
7613   ins_pipe(pipe_serial);
7614 %}
7615 
7616 instruct unnecessary_membar_acquire() %{
7617   predicate(unnecessary_acquire(n));
7618   match(MemBarAcquire);
7619   ins_cost(0);
7620 
7621   format %{ "membar_acquire (elided)" %}
7622 
7623   ins_encode %{
7624     __ block_comment("membar_acquire (elided)");
7625   %}
7626 
7627   ins_pipe(pipe_class_empty);
7628 %}
7629 
7630 instruct membar_acquire() %{
7631   match(MemBarAcquire);
7632   ins_cost(VOLATILE_REF_COST);
7633 
7634   format %{ "membar_acquire" %}
7635 
7636   ins_encode %{
7637     __ block_comment("membar_acquire");
7638     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7639   %}
7640 
7641   ins_pipe(pipe_serial);
7642 %}
7643 
7644 
7645 instruct membar_acquire_lock() %{
7646   match(MemBarAcquireLock);
7647   ins_cost(VOLATILE_REF_COST);
7648 
7649   format %{ "membar_acquire_lock (elided)" %}
7650 
7651   ins_encode %{
7652     __ block_comment("membar_acquire_lock (elided)");
7653   %}
7654 
7655   ins_pipe(pipe_serial);
7656 %}
7657 
7658 instruct store_fence() %{
7659   match(StoreFence);
7660   ins_cost(VOLATILE_REF_COST);
7661 
7662   format %{ "store_fence" %}
7663 
7664   ins_encode %{
7665     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7666   %}
7667   ins_pipe(pipe_serial);
7668 %}
7669 
7670 instruct unnecessary_membar_release() %{
7671   predicate(unnecessary_release(n));
7672   match(MemBarRelease);
7673   ins_cost(0);
7674 
7675   format %{ "membar_release (elided)" %}
7676 
7677   ins_encode %{
7678     __ block_comment("membar_release (elided)");
7679   %}
7680   ins_pipe(pipe_serial);
7681 %}
7682 
7683 instruct membar_release() %{
7684   match(MemBarRelease);
7685   ins_cost(VOLATILE_REF_COST);
7686 
7687   format %{ "membar_release" %}
7688 
7689   ins_encode %{
7690     __ block_comment("membar_release");
7691     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7692   %}
7693   ins_pipe(pipe_serial);
7694 %}
7695 
7696 instruct membar_storestore() %{
7697   match(MemBarStoreStore);
7698   ins_cost(VOLATILE_REF_COST);
7699 
7700   format %{ "MEMBAR-store-store" %}
7701 
7702   ins_encode %{
7703     __ membar(Assembler::StoreStore);
7704   %}
7705   ins_pipe(pipe_serial);
7706 %}
7707 
7708 instruct membar_release_lock() %{
7709   match(MemBarReleaseLock);
7710   ins_cost(VOLATILE_REF_COST);
7711 
7712   format %{ "membar_release_lock (elided)" %}
7713 
7714   ins_encode %{
7715     __ block_comment("membar_release_lock (elided)");
7716   %}
7717 
7718   ins_pipe(pipe_serial);
7719 %}
7720 
7721 instruct unnecessary_membar_volatile() %{
7722   predicate(unnecessary_volatile(n));
7723   match(MemBarVolatile);
7724   ins_cost(0);
7725 
7726   format %{ "membar_volatile (elided)" %}
7727 
7728   ins_encode %{
7729     __ block_comment("membar_volatile (elided)");
7730   %}
7731 
7732   ins_pipe(pipe_serial);
7733 %}
7734 
7735 instruct membar_volatile() %{
7736   match(MemBarVolatile);
7737   ins_cost(VOLATILE_REF_COST*100);
7738 
7739   format %{ "membar_volatile" %}
7740 
7741   ins_encode %{
7742     __ block_comment("membar_volatile");
7743     __ membar(Assembler::StoreLoad);
7744     %}
7745 
7746   ins_pipe(pipe_serial);
7747 %}
7748 
7749 // ============================================================================
7750 // Cast/Convert Instructions
7751 
7752 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7753   match(Set dst (CastX2P src));
7754 
7755   ins_cost(INSN_COST);
7756   format %{ "mov $dst, $src\t# long -> ptr" %}
7757 
7758   ins_encode %{
7759     if ($dst$$reg != $src$$reg) {
7760       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7761     }
7762   %}
7763 
7764   ins_pipe(ialu_reg);
7765 %}
7766 
7767 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7768   match(Set dst (CastP2X src));
7769 
7770   ins_cost(INSN_COST);
7771   format %{ "mov $dst, $src\t# ptr -> long" %}
7772 
7773   ins_encode %{
7774     if ($dst$$reg != $src$$reg) {
7775       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7776     }
7777   %}
7778 
7779   ins_pipe(ialu_reg);
7780 %}
7781 
7782 // Convert oop into int for vectors alignment masking
7783 instruct convP2I(iRegINoSp dst, iRegP src) %{
7784   match(Set dst (ConvL2I (CastP2X src)));
7785 
7786   ins_cost(INSN_COST);
7787   format %{ "movw $dst, $src\t# ptr -> int" %}
7788   ins_encode %{
7789     __ movw($dst$$Register, $src$$Register);
7790   %}
7791 
7792   ins_pipe(ialu_reg);
7793 %}
7794 
7795 // Convert compressed oop into int for vectors alignment masking
7796 // in case of 32bit oops (heap < 4Gb).
7797 instruct convN2I(iRegINoSp dst, iRegN src)
7798 %{
7799   predicate(Universe::narrow_oop_shift() == 0);
7800   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7801 
7802   ins_cost(INSN_COST);
7803   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7804   ins_encode %{
7805     __ movw($dst$$Register, $src$$Register);
7806   %}
7807 
7808   ins_pipe(ialu_reg);
7809 %}
7810 
7811 // Convert oop pointer into compressed form
7812 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7813   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7814   match(Set dst (EncodeP src));
7815   effect(KILL cr);
7816   ins_cost(INSN_COST * 3);
7817   format %{ "encode_heap_oop $dst, $src" %}
7818   ins_encode %{
7819     Register s = $src$$Register;
7820     Register d = $dst$$Register;
7821     __ encode_heap_oop(d, s);
7822   %}
7823   ins_pipe(ialu_reg);
7824 %}
7825 
7826 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7827   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7828   match(Set dst (EncodeP src));
7829   ins_cost(INSN_COST * 3);
7830   format %{ "encode_heap_oop_not_null $dst, $src" %}
7831   ins_encode %{
7832     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7833   %}
7834   ins_pipe(ialu_reg);
7835 %}
7836 
7837 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7838   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7839             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7840   match(Set dst (DecodeN src));
7841   ins_cost(INSN_COST * 3);
7842   format %{ "decode_heap_oop $dst, $src" %}
7843   ins_encode %{
7844     Register s = $src$$Register;
7845     Register d = $dst$$Register;
7846     __ decode_heap_oop(d, s);
7847   %}
7848   ins_pipe(ialu_reg);
7849 %}
7850 
7851 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7852   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7853             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7854   match(Set dst (DecodeN src));
7855   ins_cost(INSN_COST * 3);
7856   format %{ "decode_heap_oop_not_null $dst, $src" %}
7857   ins_encode %{
7858     Register s = $src$$Register;
7859     Register d = $dst$$Register;
7860     __ decode_heap_oop_not_null(d, s);
7861   %}
7862   ins_pipe(ialu_reg);
7863 %}
7864 
7865 // n.b. AArch64 implementations of encode_klass_not_null and
7866 // decode_klass_not_null do not modify the flags register so, unlike
7867 // Intel, we don't kill CR as a side effect here
7868 
7869 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7870   match(Set dst (EncodePKlass src));
7871 
7872   ins_cost(INSN_COST * 3);
7873   format %{ "encode_klass_not_null $dst,$src" %}
7874 
7875   ins_encode %{
7876     Register src_reg = as_Register($src$$reg);
7877     Register dst_reg = as_Register($dst$$reg);
7878     __ encode_klass_not_null(dst_reg, src_reg);
7879   %}
7880 
7881    ins_pipe(ialu_reg);
7882 %}
7883 
7884 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7885   match(Set dst (DecodeNKlass src));
7886 
7887   ins_cost(INSN_COST * 3);
7888   format %{ "decode_klass_not_null $dst,$src" %}
7889 
7890   ins_encode %{
7891     Register src_reg = as_Register($src$$reg);
7892     Register dst_reg = as_Register($dst$$reg);
7893     if (dst_reg != src_reg) {
7894       __ decode_klass_not_null(dst_reg, src_reg);
7895     } else {
7896       __ decode_klass_not_null(dst_reg);
7897     }
7898   %}
7899 
7900    ins_pipe(ialu_reg);
7901 %}
7902 
7903 instruct checkCastPP(iRegPNoSp dst)
7904 %{
7905   match(Set dst (CheckCastPP dst));
7906 
7907   size(0);
7908   format %{ "# checkcastPP of $dst" %}
7909   ins_encode(/* empty encoding */);
7910   ins_pipe(pipe_class_empty);
7911 %}
7912 
7913 instruct castPP(iRegPNoSp dst)
7914 %{
7915   match(Set dst (CastPP dst));
7916 
7917   size(0);
7918   format %{ "# castPP of $dst" %}
7919   ins_encode(/* empty encoding */);
7920   ins_pipe(pipe_class_empty);
7921 %}
7922 
7923 instruct castII(iRegI dst)
7924 %{
7925   match(Set dst (CastII dst));
7926 
7927   size(0);
7928   format %{ "# castII of $dst" %}
7929   ins_encode(/* empty encoding */);
7930   ins_cost(0);
7931   ins_pipe(pipe_class_empty);
7932 %}
7933 
7934 // ============================================================================
7935 // Atomic operation instructions
7936 //
7937 // Intel and SPARC both implement Ideal Node LoadPLocked and
7938 // Store{PIL}Conditional instructions using a normal load for the
7939 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7940 //
7941 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7942 // pair to lock object allocations from Eden space when not using
7943 // TLABs.
7944 //
7945 // There does not appear to be a Load{IL}Locked Ideal Node and the
7946 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7947 // and to use StoreIConditional only for 32-bit and StoreLConditional
7948 // only for 64-bit.
7949 //
7950 // We implement LoadPLocked and StorePLocked instructions using,
7951 // respectively the AArch64 hw load-exclusive and store-conditional
7952 // instructions. Whereas we must implement each of
7953 // Store{IL}Conditional using a CAS which employs a pair of
7954 // instructions comprising a load-exclusive followed by a
7955 // store-conditional.
7956 
7957 
7958 // Locked-load (linked load) of the current heap-top
7959 // used when updating the eden heap top
7960 // implemented using ldaxr on AArch64
7961 
7962 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7963 %{
7964   match(Set dst (LoadPLocked mem));
7965 
7966   ins_cost(VOLATILE_REF_COST);
7967 
7968   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7969 
7970   ins_encode(aarch64_enc_ldaxr(dst, mem));
7971 
7972   ins_pipe(pipe_serial);
7973 %}
7974 
7975 // Conditional-store of the updated heap-top.
7976 // Used during allocation of the shared heap.
7977 // Sets flag (EQ) on success.
7978 // implemented using stlxr on AArch64.
7979 
7980 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 
7981 %{
7982   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7983 
7984   ins_cost(VOLATILE_REF_COST);
7985 
7986  // TODO
7987  // do we need to do a store-conditional release or can we just use a
7988  // plain store-conditional?
7989 
7990   format %{
7991     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7992     "cmpw rscratch1, zr\t# EQ on successful write"
7993   %}
7994 
7995   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7996 
7997   ins_pipe(pipe_serial);
7998 %}
7999 
8000 
8001 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8002 // when attempting to rebias a lock towards the current thread.  We
8003 // must use the acquire form of cmpxchg in order to guarantee acquire
8004 // semantics in this case.
8005 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 
8006 %{
8007   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8008 
8009   ins_cost(VOLATILE_REF_COST);
8010 
8011   format %{
8012     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8013     "cmpw rscratch1, zr\t# EQ on successful write"
8014   %}
8015 
8016   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8017 
8018   ins_pipe(pipe_slow);
8019 %}
8020 
8021 // storeIConditional also has acquire semantics, for no better reason
8022 // than matching storeLConditional.  At the time of writing this
8023 // comment storeIConditional was not used anywhere by AArch64.
8024 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 
8025 %{
8026   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8027 
8028   ins_cost(VOLATILE_REF_COST);
8029 
8030   format %{
8031     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8032     "cmpw rscratch1, zr\t# EQ on successful write"
8033   %}
8034 
8035   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8036 
8037   ins_pipe(pipe_slow);
8038 %}
8039 
8040 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8041 // can't match them
8042 
8043 // standard CompareAndSwapX when we are using barriers
8044 // these have higher priority than the rules selected by a predicate
8045 
8046 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8047 
8048   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8049   ins_cost(2 * VOLATILE_REF_COST);
8050 
8051   effect(KILL cr);
8052 
8053  format %{
8054     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8055     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8056  %}
8057 
8058  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8059             aarch64_enc_cset_eq(res));
8060 
8061   ins_pipe(pipe_slow);
8062 %}
8063 
8064 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8065 
8066   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8067   ins_cost(2 * VOLATILE_REF_COST);
8068 
8069   effect(KILL cr);
8070 
8071  format %{
8072     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8073     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8074  %}
8075 
8076  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8077             aarch64_enc_cset_eq(res));
8078 
8079   ins_pipe(pipe_slow);
8080 %}
8081 
8082 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8083 
8084   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
8085   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8086   ins_cost(2 * VOLATILE_REF_COST);
8087 
8088   effect(KILL cr);
8089 
8090  format %{
8091     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8092     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8093  %}
8094 
8095  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8096             aarch64_enc_cset_eq(res));
8097 
8098   ins_pipe(pipe_slow);
8099 %}
8100 
8101 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
8102 
8103   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
8104   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8105   ins_cost(2 * VOLATILE_REF_COST);
8106 
8107   effect(TEMP tmp, KILL cr);
8108 
8109   format %{
8110     "cmpxchg_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8111   %}
8112 
8113   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res));
8114 
8115   ins_pipe(pipe_slow);
8116 %}
8117 
8118 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8119 
8120   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR);
8121   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8122   ins_cost(2 * VOLATILE_REF_COST);
8123 
8124   effect(KILL cr);
8125 
8126  format %{
8127     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8128     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8129  %}
8130 
8131  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8132             aarch64_enc_cset_eq(res));
8133 
8134   ins_pipe(pipe_slow);
8135 %}
8136 
8137 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
8138 
8139   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
8140   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8141   ins_cost(2 * VOLATILE_REF_COST);
8142 
8143   effect(TEMP tmp, KILL cr);
8144 
8145   format %{
8146     "cmpxchgw_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8147   %}
8148 
8149   ins_encode %{
8150     Register tmp = $tmp$$Register;
8151     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
8152     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
8153   %}
8154 
8155   ins_pipe(pipe_slow);
8156 %}
8157 
8158 // alternative CompareAndSwapX when we are eliding barriers
8159 
8160 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8161 
8162   predicate(needs_acquiring_load_exclusive(n));
8163   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8164   ins_cost(VOLATILE_REF_COST);
8165 
8166   effect(KILL cr);
8167 
8168  format %{
8169     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8170     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8171  %}
8172 
8173  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8174             aarch64_enc_cset_eq(res));
8175 
8176   ins_pipe(pipe_slow);
8177 %}
8178 
8179 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8180 
8181   predicate(needs_acquiring_load_exclusive(n));
8182   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8183   ins_cost(VOLATILE_REF_COST);
8184 
8185   effect(KILL cr);
8186 
8187  format %{
8188     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8189     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8190  %}
8191 
8192  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8193             aarch64_enc_cset_eq(res));
8194 
8195   ins_pipe(pipe_slow);
8196 %}
8197 
8198 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8199 
8200   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
8201   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8202   ins_cost(VOLATILE_REF_COST);
8203 
8204   effect(KILL cr);
8205 
8206  format %{
8207     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8208     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8209  %}
8210 
8211  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8212             aarch64_enc_cset_eq(res));
8213 
8214   ins_pipe(pipe_slow);
8215 %}
8216 
8217 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
8218 
8219   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
8220   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8221   ins_cost(VOLATILE_REF_COST);
8222 
8223   effect(TEMP tmp, KILL cr);
8224 
8225   format %{
8226     "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8227   %}
8228 
8229   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res));
8230 
8231   ins_pipe(pipe_slow);
8232 %}
8233 
8234 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8235 
8236   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier|| n->in(3)->in(1)->bottom_type() == TypeNarrowOop::NULL_PTR));
8237   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8238   ins_cost(VOLATILE_REF_COST);
8239 
8240   effect(KILL cr);
8241 
8242  format %{
8243     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8244     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8245  %}
8246 
8247  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8248             aarch64_enc_cset_eq(res));
8249 
8250   ins_pipe(pipe_slow);
8251 %}
8252 
8253 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
8254 
8255   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypeNarrowOop::NULL_PTR);
8256   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8257   ins_cost(VOLATILE_REF_COST);
8258 
8259   effect(TEMP tmp, KILL cr);
8260 
8261  format %{
8262     "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
8263  %}
8264 
8265   ins_encode %{
8266     Register tmp = $tmp$$Register;
8267     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
8268     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
8269   %}
8270 
8271   ins_pipe(pipe_slow);
8272 %}
8273 
8274 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8275   match(Set prev (GetAndSetI mem newv));
8276   ins_cost(2 * VOLATILE_REF_COST);
8277   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8278   ins_encode %{
8279     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8280   %}
8281   ins_pipe(pipe_serial);
8282 %}
8283 
8284 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8285   match(Set prev (GetAndSetL mem newv));
8286   ins_cost(2 * VOLATILE_REF_COST);
8287   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8288   ins_encode %{
8289     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8290   %}
8291   ins_pipe(pipe_serial);
8292 %}
8293 
8294 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8295   match(Set prev (GetAndSetN mem newv));
8296   ins_cost(2 * VOLATILE_REF_COST);
8297   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8298   ins_encode %{
8299     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8300   %}
8301   ins_pipe(pipe_serial);
8302 %}
8303 
8304 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8305   match(Set prev (GetAndSetP mem newv));
8306   ins_cost(2 * VOLATILE_REF_COST);
8307   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8308   ins_encode %{
8309     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8310   %}
8311   ins_pipe(pipe_serial);
8312 %}
8313 
8314 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8315   predicate(needs_acquiring_load_exclusive(n));
8316   match(Set prev (GetAndSetI mem newv));
8317   ins_cost(VOLATILE_REF_COST);
8318   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8319   ins_encode %{
8320     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8321   %}
8322   ins_pipe(pipe_serial);
8323 %}
8324 
8325 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8326   predicate(needs_acquiring_load_exclusive(n));
8327   match(Set prev (GetAndSetL mem newv));
8328   ins_cost(VOLATILE_REF_COST);
8329   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8330   ins_encode %{
8331     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8332   %}
8333   ins_pipe(pipe_serial);
8334 %}
8335 
8336 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8337   predicate(needs_acquiring_load_exclusive(n));
8338   match(Set prev (GetAndSetN mem newv));
8339   ins_cost(VOLATILE_REF_COST);
8340   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8341   ins_encode %{
8342     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8343   %}
8344   ins_pipe(pipe_serial);
8345 %}
8346 
8347 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8348   predicate(needs_acquiring_load_exclusive(n));
8349   match(Set prev (GetAndSetP mem newv));
8350   ins_cost(VOLATILE_REF_COST);
8351   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8352   ins_encode %{
8353     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8354   %}
8355   ins_pipe(pipe_serial);
8356 %}
8357 
8358 
8359 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8360   match(Set newval (GetAndAddL mem incr));
8361   ins_cost(2 * VOLATILE_REF_COST + 1);
8362   format %{ "get_and_addL $newval, [$mem], $incr" %}
8363   ins_encode %{
8364     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8365   %}
8366   ins_pipe(pipe_serial);
8367 %}
8368 
8369 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8370   predicate(n->as_LoadStore()->result_not_used());
8371   match(Set dummy (GetAndAddL mem incr));
8372   ins_cost(2 * VOLATILE_REF_COST);
8373   format %{ "get_and_addL [$mem], $incr" %}
8374   ins_encode %{
8375     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8376   %}
8377   ins_pipe(pipe_serial);
8378 %}
8379 
8380 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8381   match(Set newval (GetAndAddL mem incr));
8382   ins_cost(2 * VOLATILE_REF_COST + 1);
8383   format %{ "get_and_addL $newval, [$mem], $incr" %}
8384   ins_encode %{
8385     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8386   %}
8387   ins_pipe(pipe_serial);
8388 %}
8389 
8390 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8391   predicate(n->as_LoadStore()->result_not_used());
8392   match(Set dummy (GetAndAddL mem incr));
8393   ins_cost(2 * VOLATILE_REF_COST);
8394   format %{ "get_and_addL [$mem], $incr" %}
8395   ins_encode %{
8396     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8397   %}
8398   ins_pipe(pipe_serial);
8399 %}
8400 
8401 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8402   match(Set newval (GetAndAddI mem incr));
8403   ins_cost(2 * VOLATILE_REF_COST + 1);
8404   format %{ "get_and_addI $newval, [$mem], $incr" %}
8405   ins_encode %{
8406     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8407   %}
8408   ins_pipe(pipe_serial);
8409 %}
8410 
8411 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8412   predicate(n->as_LoadStore()->result_not_used());
8413   match(Set dummy (GetAndAddI mem incr));
8414   ins_cost(2 * VOLATILE_REF_COST);
8415   format %{ "get_and_addI [$mem], $incr" %}
8416   ins_encode %{
8417     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8418   %}
8419   ins_pipe(pipe_serial);
8420 %}
8421 
8422 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8423   match(Set newval (GetAndAddI mem incr));
8424   ins_cost(2 * VOLATILE_REF_COST + 1);
8425   format %{ "get_and_addI $newval, [$mem], $incr" %}
8426   ins_encode %{
8427     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8428   %}
8429   ins_pipe(pipe_serial);
8430 %}
8431 
8432 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8433   predicate(n->as_LoadStore()->result_not_used());
8434   match(Set dummy (GetAndAddI mem incr));
8435   ins_cost(2 * VOLATILE_REF_COST);
8436   format %{ "get_and_addI [$mem], $incr" %}
8437   ins_encode %{
8438     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8439   %}
8440   ins_pipe(pipe_serial);
8441 %}
8442 
8443 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8444   predicate(needs_acquiring_load_exclusive(n));
8445   match(Set newval (GetAndAddL mem incr));
8446   ins_cost(VOLATILE_REF_COST + 1);
8447   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8448   ins_encode %{
8449     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8450   %}
8451   ins_pipe(pipe_serial);
8452 %}
8453 
8454 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8455   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8456   match(Set dummy (GetAndAddL mem incr));
8457   ins_cost(VOLATILE_REF_COST);
8458   format %{ "get_and_addL_acq [$mem], $incr" %}
8459   ins_encode %{
8460     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8461   %}
8462   ins_pipe(pipe_serial);
8463 %}
8464 
8465 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8466   predicate(needs_acquiring_load_exclusive(n));
8467   match(Set newval (GetAndAddL mem incr));
8468   ins_cost(VOLATILE_REF_COST + 1);
8469   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8470   ins_encode %{
8471     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8472   %}
8473   ins_pipe(pipe_serial);
8474 %}
8475 
8476 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8477   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8478   match(Set dummy (GetAndAddL mem incr));
8479   ins_cost(VOLATILE_REF_COST);
8480   format %{ "get_and_addL_acq [$mem], $incr" %}
8481   ins_encode %{
8482     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8483   %}
8484   ins_pipe(pipe_serial);
8485 %}
8486 
8487 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8488   predicate(needs_acquiring_load_exclusive(n));
8489   match(Set newval (GetAndAddI mem incr));
8490   ins_cost(VOLATILE_REF_COST + 1);
8491   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8492   ins_encode %{
8493     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8494   %}
8495   ins_pipe(pipe_serial);
8496 %}
8497 
8498 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8499   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8500   match(Set dummy (GetAndAddI mem incr));
8501   ins_cost(VOLATILE_REF_COST);
8502   format %{ "get_and_addI_acq [$mem], $incr" %}
8503   ins_encode %{
8504     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8505   %}
8506   ins_pipe(pipe_serial);
8507 %}
8508 
8509 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8510   predicate(needs_acquiring_load_exclusive(n));
8511   match(Set newval (GetAndAddI mem incr));
8512   ins_cost(VOLATILE_REF_COST + 1);
8513   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8514   ins_encode %{
8515     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8516   %}
8517   ins_pipe(pipe_serial);
8518 %}
8519 
8520 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8521   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8522   match(Set dummy (GetAndAddI mem incr));
8523   ins_cost(VOLATILE_REF_COST);
8524   format %{ "get_and_addI_acq [$mem], $incr" %}
8525   ins_encode %{
8526     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8527   %}
8528   ins_pipe(pipe_serial);
8529 %}
8530 
8531 // ============================================================================
8532 // Conditional Move Instructions
8533 
8534 // n.b. we have identical rules for both a signed compare op (cmpOp)
8535 // and an unsigned compare op (cmpOpU). it would be nice if we could
8536 // define an op class which merged both inputs and use it to type the
8537 // argument to a single rule. unfortunatelyt his fails because the
8538 // opclass does not live up to the COND_INTER interface of its
8539 // component operands. When the generic code tries to negate the
8540 // operand it ends up running the generci Machoper::negate method
8541 // which throws a ShouldNotHappen. So, we have to provide two flavours
8542 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8543 
8544 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8545   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8546 
8547   ins_cost(INSN_COST * 2);
8548   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8549 
8550   ins_encode %{
8551     __ cselw(as_Register($dst$$reg),
8552              as_Register($src2$$reg),
8553              as_Register($src1$$reg),
8554              (Assembler::Condition)$cmp$$cmpcode);
8555   %}
8556 
8557   ins_pipe(icond_reg_reg);
8558 %}
8559 
8560 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8561   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8562 
8563   ins_cost(INSN_COST * 2);
8564   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8565 
8566   ins_encode %{
8567     __ cselw(as_Register($dst$$reg),
8568              as_Register($src2$$reg),
8569              as_Register($src1$$reg),
8570              (Assembler::Condition)$cmp$$cmpcode);
8571   %}
8572 
8573   ins_pipe(icond_reg_reg);
8574 %}
8575 
8576 // special cases where one arg is zero
8577 
8578 // n.b. this is selected in preference to the rule above because it
8579 // avoids loading constant 0 into a source register
8580 
8581 // TODO
8582 // we ought only to be able to cull one of these variants as the ideal
8583 // transforms ought always to order the zero consistently (to left/right?)
8584 
8585 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8586   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8587 
8588   ins_cost(INSN_COST * 2);
8589   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8590 
8591   ins_encode %{
8592     __ cselw(as_Register($dst$$reg),
8593              as_Register($src$$reg),
8594              zr,
8595              (Assembler::Condition)$cmp$$cmpcode);
8596   %}
8597 
8598   ins_pipe(icond_reg);
8599 %}
8600 
8601 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8602   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8603 
8604   ins_cost(INSN_COST * 2);
8605   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8606 
8607   ins_encode %{
8608     __ cselw(as_Register($dst$$reg),
8609              as_Register($src$$reg),
8610              zr,
8611              (Assembler::Condition)$cmp$$cmpcode);
8612   %}
8613 
8614   ins_pipe(icond_reg);
8615 %}
8616 
8617 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8618   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8619 
8620   ins_cost(INSN_COST * 2);
8621   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8622 
8623   ins_encode %{
8624     __ cselw(as_Register($dst$$reg),
8625              zr,
8626              as_Register($src$$reg),
8627              (Assembler::Condition)$cmp$$cmpcode);
8628   %}
8629 
8630   ins_pipe(icond_reg);
8631 %}
8632 
8633 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8634   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8635 
8636   ins_cost(INSN_COST * 2);
8637   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8638 
8639   ins_encode %{
8640     __ cselw(as_Register($dst$$reg),
8641              zr,
8642              as_Register($src$$reg),
8643              (Assembler::Condition)$cmp$$cmpcode);
8644   %}
8645 
8646   ins_pipe(icond_reg);
8647 %}
8648 
8649 // special case for creating a boolean 0 or 1
8650 
8651 // n.b. this is selected in preference to the rule above because it
8652 // avoids loading constants 0 and 1 into a source register
8653 
8654 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8655   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8656 
8657   ins_cost(INSN_COST * 2);
8658   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8659 
8660   ins_encode %{
8661     // equivalently
8662     // cset(as_Register($dst$$reg),
8663     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8664     __ csincw(as_Register($dst$$reg),
8665              zr,
8666              zr,
8667              (Assembler::Condition)$cmp$$cmpcode);
8668   %}
8669 
8670   ins_pipe(icond_none);
8671 %}
8672 
8673 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8674   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8675 
8676   ins_cost(INSN_COST * 2);
8677   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8678 
8679   ins_encode %{
8680     // equivalently
8681     // cset(as_Register($dst$$reg),
8682     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8683     __ csincw(as_Register($dst$$reg),
8684              zr,
8685              zr,
8686              (Assembler::Condition)$cmp$$cmpcode);
8687   %}
8688 
8689   ins_pipe(icond_none);
8690 %}
8691 
8692 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8693   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8694 
8695   ins_cost(INSN_COST * 2);
8696   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8697 
8698   ins_encode %{
8699     __ csel(as_Register($dst$$reg),
8700             as_Register($src2$$reg),
8701             as_Register($src1$$reg),
8702             (Assembler::Condition)$cmp$$cmpcode);
8703   %}
8704 
8705   ins_pipe(icond_reg_reg);
8706 %}
8707 
8708 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8709   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8710 
8711   ins_cost(INSN_COST * 2);
8712   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8713 
8714   ins_encode %{
8715     __ csel(as_Register($dst$$reg),
8716             as_Register($src2$$reg),
8717             as_Register($src1$$reg),
8718             (Assembler::Condition)$cmp$$cmpcode);
8719   %}
8720 
8721   ins_pipe(icond_reg_reg);
8722 %}
8723 
8724 // special cases where one arg is zero
8725 
8726 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8727   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8728 
8729   ins_cost(INSN_COST * 2);
8730   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8731 
8732   ins_encode %{
8733     __ csel(as_Register($dst$$reg),
8734             zr,
8735             as_Register($src$$reg),
8736             (Assembler::Condition)$cmp$$cmpcode);
8737   %}
8738 
8739   ins_pipe(icond_reg);
8740 %}
8741 
8742 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8743   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8744 
8745   ins_cost(INSN_COST * 2);
8746   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8747 
8748   ins_encode %{
8749     __ csel(as_Register($dst$$reg),
8750             zr,
8751             as_Register($src$$reg),
8752             (Assembler::Condition)$cmp$$cmpcode);
8753   %}
8754 
8755   ins_pipe(icond_reg);
8756 %}
8757 
8758 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8759   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8760 
8761   ins_cost(INSN_COST * 2);
8762   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8763 
8764   ins_encode %{
8765     __ csel(as_Register($dst$$reg),
8766             as_Register($src$$reg),
8767             zr,
8768             (Assembler::Condition)$cmp$$cmpcode);
8769   %}
8770 
8771   ins_pipe(icond_reg);
8772 %}
8773 
8774 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8775   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8776 
8777   ins_cost(INSN_COST * 2);
8778   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8779 
8780   ins_encode %{
8781     __ csel(as_Register($dst$$reg),
8782             as_Register($src$$reg),
8783             zr,
8784             (Assembler::Condition)$cmp$$cmpcode);
8785   %}
8786 
8787   ins_pipe(icond_reg);
8788 %}
8789 
8790 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8791   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8792 
8793   ins_cost(INSN_COST * 2);
8794   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8795 
8796   ins_encode %{
8797     __ csel(as_Register($dst$$reg),
8798             as_Register($src2$$reg),
8799             as_Register($src1$$reg),
8800             (Assembler::Condition)$cmp$$cmpcode);
8801   %}
8802 
8803   ins_pipe(icond_reg_reg);
8804 %}
8805 
8806 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8807   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8808 
8809   ins_cost(INSN_COST * 2);
8810   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8811 
8812   ins_encode %{
8813     __ csel(as_Register($dst$$reg),
8814             as_Register($src2$$reg),
8815             as_Register($src1$$reg),
8816             (Assembler::Condition)$cmp$$cmpcode);
8817   %}
8818 
8819   ins_pipe(icond_reg_reg);
8820 %}
8821 
8822 // special cases where one arg is zero
8823 
8824 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8825   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8826 
8827   ins_cost(INSN_COST * 2);
8828   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8829 
8830   ins_encode %{
8831     __ csel(as_Register($dst$$reg),
8832             zr,
8833             as_Register($src$$reg),
8834             (Assembler::Condition)$cmp$$cmpcode);
8835   %}
8836 
8837   ins_pipe(icond_reg);
8838 %}
8839 
8840 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8841   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8842 
8843   ins_cost(INSN_COST * 2);
8844   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8845 
8846   ins_encode %{
8847     __ csel(as_Register($dst$$reg),
8848             zr,
8849             as_Register($src$$reg),
8850             (Assembler::Condition)$cmp$$cmpcode);
8851   %}
8852 
8853   ins_pipe(icond_reg);
8854 %}
8855 
8856 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8857   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8858 
8859   ins_cost(INSN_COST * 2);
8860   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8861 
8862   ins_encode %{
8863     __ csel(as_Register($dst$$reg),
8864             as_Register($src$$reg),
8865             zr,
8866             (Assembler::Condition)$cmp$$cmpcode);
8867   %}
8868 
8869   ins_pipe(icond_reg);
8870 %}
8871 
8872 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8873   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8874 
8875   ins_cost(INSN_COST * 2);
8876   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8877 
8878   ins_encode %{
8879     __ csel(as_Register($dst$$reg),
8880             as_Register($src$$reg),
8881             zr,
8882             (Assembler::Condition)$cmp$$cmpcode);
8883   %}
8884 
8885   ins_pipe(icond_reg);
8886 %}
8887 
8888 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8889   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8890 
8891   ins_cost(INSN_COST * 2);
8892   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8893 
8894   ins_encode %{
8895     __ cselw(as_Register($dst$$reg),
8896              as_Register($src2$$reg),
8897              as_Register($src1$$reg),
8898              (Assembler::Condition)$cmp$$cmpcode);
8899   %}
8900 
8901   ins_pipe(icond_reg_reg);
8902 %}
8903 
8904 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8905   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8906 
8907   ins_cost(INSN_COST * 2);
8908   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8909 
8910   ins_encode %{
8911     __ cselw(as_Register($dst$$reg),
8912              as_Register($src2$$reg),
8913              as_Register($src1$$reg),
8914              (Assembler::Condition)$cmp$$cmpcode);
8915   %}
8916 
8917   ins_pipe(icond_reg_reg);
8918 %}
8919 
8920 // special cases where one arg is zero
8921 
8922 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8923   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8924 
8925   ins_cost(INSN_COST * 2);
8926   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8927 
8928   ins_encode %{
8929     __ cselw(as_Register($dst$$reg),
8930              zr,
8931              as_Register($src$$reg),
8932              (Assembler::Condition)$cmp$$cmpcode);
8933   %}
8934 
8935   ins_pipe(icond_reg);
8936 %}
8937 
8938 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8939   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8940 
8941   ins_cost(INSN_COST * 2);
8942   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8943 
8944   ins_encode %{
8945     __ cselw(as_Register($dst$$reg),
8946              zr,
8947              as_Register($src$$reg),
8948              (Assembler::Condition)$cmp$$cmpcode);
8949   %}
8950 
8951   ins_pipe(icond_reg);
8952 %}
8953 
8954 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8955   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8956 
8957   ins_cost(INSN_COST * 2);
8958   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8959 
8960   ins_encode %{
8961     __ cselw(as_Register($dst$$reg),
8962              as_Register($src$$reg),
8963              zr,
8964              (Assembler::Condition)$cmp$$cmpcode);
8965   %}
8966 
8967   ins_pipe(icond_reg);
8968 %}
8969 
8970 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8971   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8972 
8973   ins_cost(INSN_COST * 2);
8974   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8975 
8976   ins_encode %{
8977     __ cselw(as_Register($dst$$reg),
8978              as_Register($src$$reg),
8979              zr,
8980              (Assembler::Condition)$cmp$$cmpcode);
8981   %}
8982 
8983   ins_pipe(icond_reg);
8984 %}
8985 
8986 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8987 %{
8988   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8989 
8990   ins_cost(INSN_COST * 3);
8991 
8992   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8993   ins_encode %{
8994     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8995     __ fcsels(as_FloatRegister($dst$$reg),
8996               as_FloatRegister($src2$$reg),
8997               as_FloatRegister($src1$$reg),
8998               cond);
8999   %}
9000 
9001   ins_pipe(fp_cond_reg_reg_s);
9002 %}
9003 
9004 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9005 %{
9006   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9007 
9008   ins_cost(INSN_COST * 3);
9009 
9010   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9011   ins_encode %{
9012     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9013     __ fcsels(as_FloatRegister($dst$$reg),
9014               as_FloatRegister($src2$$reg),
9015               as_FloatRegister($src1$$reg),
9016               cond);
9017   %}
9018 
9019   ins_pipe(fp_cond_reg_reg_s);
9020 %}
9021 
9022 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9023 %{
9024   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9025 
9026   ins_cost(INSN_COST * 3);
9027 
9028   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9029   ins_encode %{
9030     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9031     __ fcseld(as_FloatRegister($dst$$reg),
9032               as_FloatRegister($src2$$reg),
9033               as_FloatRegister($src1$$reg),
9034               cond);
9035   %}
9036 
9037   ins_pipe(fp_cond_reg_reg_d);
9038 %}
9039 
9040 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9041 %{
9042   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9043 
9044   ins_cost(INSN_COST * 3);
9045 
9046   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9047   ins_encode %{
9048     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9049     __ fcseld(as_FloatRegister($dst$$reg),
9050               as_FloatRegister($src2$$reg),
9051               as_FloatRegister($src1$$reg),
9052               cond);
9053   %}
9054 
9055   ins_pipe(fp_cond_reg_reg_d);
9056 %}
9057 
9058 // ============================================================================
9059 // Arithmetic Instructions
9060 //
9061 
9062 // Integer Addition
9063 
9064 // TODO
9065 // these currently employ operations which do not set CR and hence are
9066 // not flagged as killing CR but we would like to isolate the cases
9067 // where we want to set flags from those where we don't. need to work
9068 // out how to do that.
9069 
9070 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9071   match(Set dst (AddI src1 src2));
9072 
9073   ins_cost(INSN_COST);
9074   format %{ "addw  $dst, $src1, $src2" %}
9075 
9076   ins_encode %{
9077     __ addw(as_Register($dst$$reg),
9078             as_Register($src1$$reg),
9079             as_Register($src2$$reg));
9080   %}
9081 
9082   ins_pipe(ialu_reg_reg);
9083 %}
9084 
9085 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9086   match(Set dst (AddI src1 src2));
9087 
9088   ins_cost(INSN_COST);
9089   format %{ "addw $dst, $src1, $src2" %}
9090 
9091   // use opcode to indicate that this is an add not a sub
9092   opcode(0x0);
9093 
9094   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9095 
9096   ins_pipe(ialu_reg_imm);
9097 %}
9098 
9099 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9100   match(Set dst (AddI (ConvL2I src1) src2));
9101 
9102   ins_cost(INSN_COST);
9103   format %{ "addw $dst, $src1, $src2" %}
9104 
9105   // use opcode to indicate that this is an add not a sub
9106   opcode(0x0);
9107 
9108   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9109 
9110   ins_pipe(ialu_reg_imm);
9111 %}
9112 
9113 // Pointer Addition
9114 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9115   match(Set dst (AddP src1 src2));
9116 
9117   ins_cost(INSN_COST);
9118   format %{ "add $dst, $src1, $src2\t# ptr" %}
9119 
9120   ins_encode %{
9121     __ add(as_Register($dst$$reg),
9122            as_Register($src1$$reg),
9123            as_Register($src2$$reg));
9124   %}
9125 
9126   ins_pipe(ialu_reg_reg);
9127 %}
9128 
9129 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9130   match(Set dst (AddP src1 (ConvI2L src2)));
9131 
9132   ins_cost(1.9 * INSN_COST);
9133   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9134 
9135   ins_encode %{
9136     __ add(as_Register($dst$$reg),
9137            as_Register($src1$$reg),
9138            as_Register($src2$$reg), ext::sxtw);
9139   %}
9140 
9141   ins_pipe(ialu_reg_reg);
9142 %}
9143 
9144 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9145   match(Set dst (AddP src1 (LShiftL src2 scale)));
9146 
9147   ins_cost(1.9 * INSN_COST);
9148   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9149 
9150   ins_encode %{
9151     __ lea(as_Register($dst$$reg),
9152            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9153                    Address::lsl($scale$$constant)));
9154   %}
9155 
9156   ins_pipe(ialu_reg_reg_shift);
9157 %}
9158 
9159 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9160   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9161 
9162   ins_cost(1.9 * INSN_COST);
9163   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9164 
9165   ins_encode %{
9166     __ lea(as_Register($dst$$reg),
9167            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9168                    Address::sxtw($scale$$constant)));
9169   %}
9170 
9171   ins_pipe(ialu_reg_reg_shift);
9172 %}
9173 
9174 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9175   match(Set dst (LShiftL (ConvI2L src) scale));
9176 
9177   ins_cost(INSN_COST);
9178   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9179 
9180   ins_encode %{
9181     __ sbfiz(as_Register($dst$$reg),
9182           as_Register($src$$reg),
9183           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9184   %}
9185 
9186   ins_pipe(ialu_reg_shift);
9187 %}
9188 
9189 // Pointer Immediate Addition
9190 // n.b. this needs to be more expensive than using an indirect memory
9191 // operand
9192 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9193   match(Set dst (AddP src1 src2));
9194 
9195   ins_cost(INSN_COST);
9196   format %{ "add $dst, $src1, $src2\t# ptr" %}
9197 
9198   // use opcode to indicate that this is an add not a sub
9199   opcode(0x0);
9200 
9201   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9202 
9203   ins_pipe(ialu_reg_imm);
9204 %}
9205 
9206 // Long Addition
9207 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9208 
9209   match(Set dst (AddL src1 src2));
9210 
9211   ins_cost(INSN_COST);
9212   format %{ "add  $dst, $src1, $src2" %}
9213 
9214   ins_encode %{
9215     __ add(as_Register($dst$$reg),
9216            as_Register($src1$$reg),
9217            as_Register($src2$$reg));
9218   %}
9219 
9220   ins_pipe(ialu_reg_reg);
9221 %}
9222 
9223 // No constant pool entries requiredLong Immediate Addition.
9224 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9225   match(Set dst (AddL src1 src2));
9226 
9227   ins_cost(INSN_COST);
9228   format %{ "add $dst, $src1, $src2" %}
9229 
9230   // use opcode to indicate that this is an add not a sub
9231   opcode(0x0);
9232 
9233   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9234 
9235   ins_pipe(ialu_reg_imm);
9236 %}
9237 
9238 // Integer Subtraction
9239 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9240   match(Set dst (SubI src1 src2));
9241 
9242   ins_cost(INSN_COST);
9243   format %{ "subw  $dst, $src1, $src2" %}
9244 
9245   ins_encode %{
9246     __ subw(as_Register($dst$$reg),
9247             as_Register($src1$$reg),
9248             as_Register($src2$$reg));
9249   %}
9250 
9251   ins_pipe(ialu_reg_reg);
9252 %}
9253 
9254 // Immediate Subtraction
9255 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9256   match(Set dst (SubI src1 src2));
9257 
9258   ins_cost(INSN_COST);
9259   format %{ "subw $dst, $src1, $src2" %}
9260 
9261   // use opcode to indicate that this is a sub not an add
9262   opcode(0x1);
9263 
9264   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9265 
9266   ins_pipe(ialu_reg_imm);
9267 %}
9268 
9269 // Long Subtraction
9270 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9271 
9272   match(Set dst (SubL src1 src2));
9273 
9274   ins_cost(INSN_COST);
9275   format %{ "sub  $dst, $src1, $src2" %}
9276 
9277   ins_encode %{
9278     __ sub(as_Register($dst$$reg),
9279            as_Register($src1$$reg),
9280            as_Register($src2$$reg));
9281   %}
9282 
9283   ins_pipe(ialu_reg_reg);
9284 %}
9285 
9286 // No constant pool entries requiredLong Immediate Subtraction.
9287 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9288   match(Set dst (SubL src1 src2));
9289 
9290   ins_cost(INSN_COST);
9291   format %{ "sub$dst, $src1, $src2" %}
9292 
9293   // use opcode to indicate that this is a sub not an add
9294   opcode(0x1);
9295 
9296   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9297 
9298   ins_pipe(ialu_reg_imm);
9299 %}
9300 
9301 // Integer Negation (special case for sub)
9302 
9303 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9304   match(Set dst (SubI zero src));
9305 
9306   ins_cost(INSN_COST);
9307   format %{ "negw $dst, $src\t# int" %}
9308 
9309   ins_encode %{
9310     __ negw(as_Register($dst$$reg),
9311             as_Register($src$$reg));
9312   %}
9313 
9314   ins_pipe(ialu_reg);
9315 %}
9316 
9317 // Long Negation
9318 
9319 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9320   match(Set dst (SubL zero src));
9321 
9322   ins_cost(INSN_COST);
9323   format %{ "neg $dst, $src\t# long" %}
9324 
9325   ins_encode %{
9326     __ neg(as_Register($dst$$reg),
9327            as_Register($src$$reg));
9328   %}
9329 
9330   ins_pipe(ialu_reg);
9331 %}
9332 
9333 // Integer Multiply
9334 
9335 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9336   match(Set dst (MulI src1 src2));
9337 
9338   ins_cost(INSN_COST * 3);
9339   format %{ "mulw  $dst, $src1, $src2" %}
9340 
9341   ins_encode %{
9342     __ mulw(as_Register($dst$$reg),
9343             as_Register($src1$$reg),
9344             as_Register($src2$$reg));
9345   %}
9346 
9347   ins_pipe(imul_reg_reg);
9348 %}
9349 
9350 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9351   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9352 
9353   ins_cost(INSN_COST * 3);
9354   format %{ "smull  $dst, $src1, $src2" %}
9355 
9356   ins_encode %{
9357     __ smull(as_Register($dst$$reg),
9358              as_Register($src1$$reg),
9359              as_Register($src2$$reg));
9360   %}
9361 
9362   ins_pipe(imul_reg_reg);
9363 %}
9364 
9365 // Long Multiply
9366 
9367 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9368   match(Set dst (MulL src1 src2));
9369 
9370   ins_cost(INSN_COST * 5);
9371   format %{ "mul  $dst, $src1, $src2" %}
9372 
9373   ins_encode %{
9374     __ mul(as_Register($dst$$reg),
9375            as_Register($src1$$reg),
9376            as_Register($src2$$reg));
9377   %}
9378 
9379   ins_pipe(lmul_reg_reg);
9380 %}
9381 
9382 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9383 %{
9384   match(Set dst (MulHiL src1 src2));
9385 
9386   ins_cost(INSN_COST * 7);
9387   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9388 
9389   ins_encode %{
9390     __ smulh(as_Register($dst$$reg),
9391              as_Register($src1$$reg),
9392              as_Register($src2$$reg));
9393   %}
9394 
9395   ins_pipe(lmul_reg_reg);
9396 %}
9397 
9398 // Combined Integer Multiply & Add/Sub
9399 
9400 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9401   match(Set dst (AddI src3 (MulI src1 src2)));
9402 
9403   ins_cost(INSN_COST * 3);
9404   format %{ "madd  $dst, $src1, $src2, $src3" %}
9405 
9406   ins_encode %{
9407     __ maddw(as_Register($dst$$reg),
9408              as_Register($src1$$reg),
9409              as_Register($src2$$reg),
9410              as_Register($src3$$reg));
9411   %}
9412 
9413   ins_pipe(imac_reg_reg);
9414 %}
9415 
9416 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9417   match(Set dst (SubI src3 (MulI src1 src2)));
9418 
9419   ins_cost(INSN_COST * 3);
9420   format %{ "msub  $dst, $src1, $src2, $src3" %}
9421 
9422   ins_encode %{
9423     __ msubw(as_Register($dst$$reg),
9424              as_Register($src1$$reg),
9425              as_Register($src2$$reg),
9426              as_Register($src3$$reg));
9427   %}
9428 
9429   ins_pipe(imac_reg_reg);
9430 %}
9431 
9432 // Combined Long Multiply & Add/Sub
9433 
9434 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9435   match(Set dst (AddL src3 (MulL src1 src2)));
9436 
9437   ins_cost(INSN_COST * 5);
9438   format %{ "madd  $dst, $src1, $src2, $src3" %}
9439 
9440   ins_encode %{
9441     __ madd(as_Register($dst$$reg),
9442             as_Register($src1$$reg),
9443             as_Register($src2$$reg),
9444             as_Register($src3$$reg));
9445   %}
9446 
9447   ins_pipe(lmac_reg_reg);
9448 %}
9449 
9450 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9451   match(Set dst (SubL src3 (MulL src1 src2)));
9452 
9453   ins_cost(INSN_COST * 5);
9454   format %{ "msub  $dst, $src1, $src2, $src3" %}
9455 
9456   ins_encode %{
9457     __ msub(as_Register($dst$$reg),
9458             as_Register($src1$$reg),
9459             as_Register($src2$$reg),
9460             as_Register($src3$$reg));
9461   %}
9462 
9463   ins_pipe(lmac_reg_reg);
9464 %}
9465 
9466 // Integer Divide
9467 
9468 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9469   match(Set dst (DivI src1 src2));
9470 
9471   ins_cost(INSN_COST * 19);
9472   format %{ "sdivw  $dst, $src1, $src2" %}
9473 
9474   ins_encode(aarch64_enc_divw(dst, src1, src2));
9475   ins_pipe(idiv_reg_reg);
9476 %}
9477 
9478 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9479   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9480   ins_cost(INSN_COST);
9481   format %{ "lsrw $dst, $src1, $div1" %}
9482   ins_encode %{
9483     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9484   %}
9485   ins_pipe(ialu_reg_shift);
9486 %}
9487 
9488 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9489   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9490   ins_cost(INSN_COST);
9491   format %{ "addw $dst, $src, LSR $div1" %}
9492 
9493   ins_encode %{
9494     __ addw(as_Register($dst$$reg),
9495               as_Register($src$$reg),
9496               as_Register($src$$reg),
9497               Assembler::LSR, 31);
9498   %}
9499   ins_pipe(ialu_reg);
9500 %}
9501 
9502 // Long Divide
9503 
9504 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9505   match(Set dst (DivL src1 src2));
9506 
9507   ins_cost(INSN_COST * 35);
9508   format %{ "sdiv   $dst, $src1, $src2" %}
9509 
9510   ins_encode(aarch64_enc_div(dst, src1, src2));
9511   ins_pipe(ldiv_reg_reg);
9512 %}
9513 
9514 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9515   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9516   ins_cost(INSN_COST);
9517   format %{ "lsr $dst, $src1, $div1" %}
9518   ins_encode %{
9519     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9520   %}
9521   ins_pipe(ialu_reg_shift);
9522 %}
9523 
9524 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9525   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9526   ins_cost(INSN_COST);
9527   format %{ "add $dst, $src, $div1" %}
9528 
9529   ins_encode %{
9530     __ add(as_Register($dst$$reg),
9531               as_Register($src$$reg),
9532               as_Register($src$$reg),
9533               Assembler::LSR, 63);
9534   %}
9535   ins_pipe(ialu_reg);
9536 %}
9537 
9538 // Integer Remainder
9539 
9540 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9541   match(Set dst (ModI src1 src2));
9542 
9543   ins_cost(INSN_COST * 22);
9544   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9545             "msubw($dst, rscratch1, $src2, $src1" %}
9546 
9547   ins_encode(aarch64_enc_modw(dst, src1, src2));
9548   ins_pipe(idiv_reg_reg);
9549 %}
9550 
9551 // Long Remainder
9552 
9553 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9554   match(Set dst (ModL src1 src2));
9555 
9556   ins_cost(INSN_COST * 38);
9557   format %{ "sdiv   rscratch1, $src1, $src2\n"
9558             "msub($dst, rscratch1, $src2, $src1" %}
9559 
9560   ins_encode(aarch64_enc_mod(dst, src1, src2));
9561   ins_pipe(ldiv_reg_reg);
9562 %}
9563 
9564 // Integer Shifts
9565 
9566 // Shift Left Register
9567 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9568   match(Set dst (LShiftI src1 src2));
9569 
9570   ins_cost(INSN_COST * 2);
9571   format %{ "lslvw  $dst, $src1, $src2" %}
9572 
9573   ins_encode %{
9574     __ lslvw(as_Register($dst$$reg),
9575              as_Register($src1$$reg),
9576              as_Register($src2$$reg));
9577   %}
9578 
9579   ins_pipe(ialu_reg_reg_vshift);
9580 %}
9581 
9582 // Shift Left Immediate
9583 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9584   match(Set dst (LShiftI src1 src2));
9585 
9586   ins_cost(INSN_COST);
9587   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9588 
9589   ins_encode %{
9590     __ lslw(as_Register($dst$$reg),
9591             as_Register($src1$$reg),
9592             $src2$$constant & 0x1f);
9593   %}
9594 
9595   ins_pipe(ialu_reg_shift);
9596 %}
9597 
9598 // Shift Right Logical Register
9599 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9600   match(Set dst (URShiftI src1 src2));
9601 
9602   ins_cost(INSN_COST * 2);
9603   format %{ "lsrvw  $dst, $src1, $src2" %}
9604 
9605   ins_encode %{
9606     __ lsrvw(as_Register($dst$$reg),
9607              as_Register($src1$$reg),
9608              as_Register($src2$$reg));
9609   %}
9610 
9611   ins_pipe(ialu_reg_reg_vshift);
9612 %}
9613 
9614 // Shift Right Logical Immediate
9615 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9616   match(Set dst (URShiftI src1 src2));
9617 
9618   ins_cost(INSN_COST);
9619   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9620 
9621   ins_encode %{
9622     __ lsrw(as_Register($dst$$reg),
9623             as_Register($src1$$reg),
9624             $src2$$constant & 0x1f);
9625   %}
9626 
9627   ins_pipe(ialu_reg_shift);
9628 %}
9629 
9630 // Shift Right Arithmetic Register
9631 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9632   match(Set dst (RShiftI src1 src2));
9633 
9634   ins_cost(INSN_COST * 2);
9635   format %{ "asrvw  $dst, $src1, $src2" %}
9636 
9637   ins_encode %{
9638     __ asrvw(as_Register($dst$$reg),
9639              as_Register($src1$$reg),
9640              as_Register($src2$$reg));
9641   %}
9642 
9643   ins_pipe(ialu_reg_reg_vshift);
9644 %}
9645 
9646 // Shift Right Arithmetic Immediate
9647 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9648   match(Set dst (RShiftI src1 src2));
9649 
9650   ins_cost(INSN_COST);
9651   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9652 
9653   ins_encode %{
9654     __ asrw(as_Register($dst$$reg),
9655             as_Register($src1$$reg),
9656             $src2$$constant & 0x1f);
9657   %}
9658 
9659   ins_pipe(ialu_reg_shift);
9660 %}
9661 
9662 // Combined Int Mask and Right Shift (using UBFM)
9663 // TODO
9664 
9665 // Long Shifts
9666 
9667 // Shift Left Register
9668 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9669   match(Set dst (LShiftL src1 src2));
9670 
9671   ins_cost(INSN_COST * 2);
9672   format %{ "lslv  $dst, $src1, $src2" %}
9673 
9674   ins_encode %{
9675     __ lslv(as_Register($dst$$reg),
9676             as_Register($src1$$reg),
9677             as_Register($src2$$reg));
9678   %}
9679 
9680   ins_pipe(ialu_reg_reg_vshift);
9681 %}
9682 
9683 // Shift Left Immediate
9684 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9685   match(Set dst (LShiftL src1 src2));
9686 
9687   ins_cost(INSN_COST);
9688   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9689 
9690   ins_encode %{
9691     __ lsl(as_Register($dst$$reg),
9692             as_Register($src1$$reg),
9693             $src2$$constant & 0x3f);
9694   %}
9695 
9696   ins_pipe(ialu_reg_shift);
9697 %}
9698 
9699 // Shift Right Logical Register
9700 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9701   match(Set dst (URShiftL src1 src2));
9702 
9703   ins_cost(INSN_COST * 2);
9704   format %{ "lsrv  $dst, $src1, $src2" %}
9705 
9706   ins_encode %{
9707     __ lsrv(as_Register($dst$$reg),
9708             as_Register($src1$$reg),
9709             as_Register($src2$$reg));
9710   %}
9711 
9712   ins_pipe(ialu_reg_reg_vshift);
9713 %}
9714 
9715 // Shift Right Logical Immediate
9716 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9717   match(Set dst (URShiftL src1 src2));
9718 
9719   ins_cost(INSN_COST);
9720   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9721 
9722   ins_encode %{
9723     __ lsr(as_Register($dst$$reg),
9724            as_Register($src1$$reg),
9725            $src2$$constant & 0x3f);
9726   %}
9727 
9728   ins_pipe(ialu_reg_shift);
9729 %}
9730 
9731 // A special-case pattern for card table stores.
9732 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9733   match(Set dst (URShiftL (CastP2X src1) src2));
9734 
9735   ins_cost(INSN_COST);
9736   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9737 
9738   ins_encode %{
9739     __ lsr(as_Register($dst$$reg),
9740            as_Register($src1$$reg),
9741            $src2$$constant & 0x3f);
9742   %}
9743 
9744   ins_pipe(ialu_reg_shift);
9745 %}
9746 
9747 // Shift Right Arithmetic Register
9748 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9749   match(Set dst (RShiftL src1 src2));
9750 
9751   ins_cost(INSN_COST * 2);
9752   format %{ "asrv  $dst, $src1, $src2" %}
9753 
9754   ins_encode %{
9755     __ asrv(as_Register($dst$$reg),
9756             as_Register($src1$$reg),
9757             as_Register($src2$$reg));
9758   %}
9759 
9760   ins_pipe(ialu_reg_reg_vshift);
9761 %}
9762 
9763 // Shift Right Arithmetic Immediate
9764 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9765   match(Set dst (RShiftL src1 src2));
9766 
9767   ins_cost(INSN_COST);
9768   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9769 
9770   ins_encode %{
9771     __ asr(as_Register($dst$$reg),
9772            as_Register($src1$$reg),
9773            $src2$$constant & 0x3f);
9774   %}
9775 
9776   ins_pipe(ialu_reg_shift);
9777 %}
9778 
9779 // BEGIN This section of the file is automatically generated. Do not edit --------------
9780 
9781 instruct regL_not_reg(iRegLNoSp dst,
9782                          iRegL src1, immL_M1 m1,
9783                          rFlagsReg cr) %{
9784   match(Set dst (XorL src1 m1));
9785   ins_cost(INSN_COST);
9786   format %{ "eon  $dst, $src1, zr" %}
9787 
9788   ins_encode %{
9789     __ eon(as_Register($dst$$reg),
9790               as_Register($src1$$reg),
9791               zr,
9792               Assembler::LSL, 0);
9793   %}
9794 
9795   ins_pipe(ialu_reg);
9796 %}
9797 instruct regI_not_reg(iRegINoSp dst,
9798                          iRegIorL2I src1, immI_M1 m1,
9799                          rFlagsReg cr) %{
9800   match(Set dst (XorI src1 m1));
9801   ins_cost(INSN_COST);
9802   format %{ "eonw  $dst, $src1, zr" %}
9803 
9804   ins_encode %{
9805     __ eonw(as_Register($dst$$reg),
9806               as_Register($src1$$reg),
9807               zr,
9808               Assembler::LSL, 0);
9809   %}
9810 
9811   ins_pipe(ialu_reg);
9812 %}
9813 
9814 instruct AndI_reg_not_reg(iRegINoSp dst,
9815                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9816                          rFlagsReg cr) %{
9817   match(Set dst (AndI src1 (XorI src2 m1)));
9818   ins_cost(INSN_COST);
9819   format %{ "bicw  $dst, $src1, $src2" %}
9820 
9821   ins_encode %{
9822     __ bicw(as_Register($dst$$reg),
9823               as_Register($src1$$reg),
9824               as_Register($src2$$reg),
9825               Assembler::LSL, 0);
9826   %}
9827 
9828   ins_pipe(ialu_reg_reg);
9829 %}
9830 
9831 instruct AndL_reg_not_reg(iRegLNoSp dst,
9832                          iRegL src1, iRegL src2, immL_M1 m1,
9833                          rFlagsReg cr) %{
9834   match(Set dst (AndL src1 (XorL src2 m1)));
9835   ins_cost(INSN_COST);
9836   format %{ "bic  $dst, $src1, $src2" %}
9837 
9838   ins_encode %{
9839     __ bic(as_Register($dst$$reg),
9840               as_Register($src1$$reg),
9841               as_Register($src2$$reg),
9842               Assembler::LSL, 0);
9843   %}
9844 
9845   ins_pipe(ialu_reg_reg);
9846 %}
9847 
9848 instruct OrI_reg_not_reg(iRegINoSp dst,
9849                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9850                          rFlagsReg cr) %{
9851   match(Set dst (OrI src1 (XorI src2 m1)));
9852   ins_cost(INSN_COST);
9853   format %{ "ornw  $dst, $src1, $src2" %}
9854 
9855   ins_encode %{
9856     __ ornw(as_Register($dst$$reg),
9857               as_Register($src1$$reg),
9858               as_Register($src2$$reg),
9859               Assembler::LSL, 0);
9860   %}
9861 
9862   ins_pipe(ialu_reg_reg);
9863 %}
9864 
9865 instruct OrL_reg_not_reg(iRegLNoSp dst,
9866                          iRegL src1, iRegL src2, immL_M1 m1,
9867                          rFlagsReg cr) %{
9868   match(Set dst (OrL src1 (XorL src2 m1)));
9869   ins_cost(INSN_COST);
9870   format %{ "orn  $dst, $src1, $src2" %}
9871 
9872   ins_encode %{
9873     __ orn(as_Register($dst$$reg),
9874               as_Register($src1$$reg),
9875               as_Register($src2$$reg),
9876               Assembler::LSL, 0);
9877   %}
9878 
9879   ins_pipe(ialu_reg_reg);
9880 %}
9881 
9882 instruct XorI_reg_not_reg(iRegINoSp dst,
9883                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9884                          rFlagsReg cr) %{
9885   match(Set dst (XorI m1 (XorI src2 src1)));
9886   ins_cost(INSN_COST);
9887   format %{ "eonw  $dst, $src1, $src2" %}
9888 
9889   ins_encode %{
9890     __ eonw(as_Register($dst$$reg),
9891               as_Register($src1$$reg),
9892               as_Register($src2$$reg),
9893               Assembler::LSL, 0);
9894   %}
9895 
9896   ins_pipe(ialu_reg_reg);
9897 %}
9898 
9899 instruct XorL_reg_not_reg(iRegLNoSp dst,
9900                          iRegL src1, iRegL src2, immL_M1 m1,
9901                          rFlagsReg cr) %{
9902   match(Set dst (XorL m1 (XorL src2 src1)));
9903   ins_cost(INSN_COST);
9904   format %{ "eon  $dst, $src1, $src2" %}
9905 
9906   ins_encode %{
9907     __ eon(as_Register($dst$$reg),
9908               as_Register($src1$$reg),
9909               as_Register($src2$$reg),
9910               Assembler::LSL, 0);
9911   %}
9912 
9913   ins_pipe(ialu_reg_reg);
9914 %}
9915 
9916 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9917                          iRegIorL2I src1, iRegIorL2I src2,
9918                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9919   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9920   ins_cost(1.9 * INSN_COST);
9921   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9922 
9923   ins_encode %{
9924     __ bicw(as_Register($dst$$reg),
9925               as_Register($src1$$reg),
9926               as_Register($src2$$reg),
9927               Assembler::LSR,
9928               $src3$$constant & 0x1f);
9929   %}
9930 
9931   ins_pipe(ialu_reg_reg_shift);
9932 %}
9933 
9934 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9935                          iRegL src1, iRegL src2,
9936                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9937   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9938   ins_cost(1.9 * INSN_COST);
9939   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9940 
9941   ins_encode %{
9942     __ bic(as_Register($dst$$reg),
9943               as_Register($src1$$reg),
9944               as_Register($src2$$reg),
9945               Assembler::LSR,
9946               $src3$$constant & 0x3f);
9947   %}
9948 
9949   ins_pipe(ialu_reg_reg_shift);
9950 %}
9951 
9952 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9953                          iRegIorL2I src1, iRegIorL2I src2,
9954                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9955   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9956   ins_cost(1.9 * INSN_COST);
9957   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9958 
9959   ins_encode %{
9960     __ bicw(as_Register($dst$$reg),
9961               as_Register($src1$$reg),
9962               as_Register($src2$$reg),
9963               Assembler::ASR,
9964               $src3$$constant & 0x1f);
9965   %}
9966 
9967   ins_pipe(ialu_reg_reg_shift);
9968 %}
9969 
9970 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9971                          iRegL src1, iRegL src2,
9972                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9973   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9974   ins_cost(1.9 * INSN_COST);
9975   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9976 
9977   ins_encode %{
9978     __ bic(as_Register($dst$$reg),
9979               as_Register($src1$$reg),
9980               as_Register($src2$$reg),
9981               Assembler::ASR,
9982               $src3$$constant & 0x3f);
9983   %}
9984 
9985   ins_pipe(ialu_reg_reg_shift);
9986 %}
9987 
9988 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9989                          iRegIorL2I src1, iRegIorL2I src2,
9990                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9991   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9992   ins_cost(1.9 * INSN_COST);
9993   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9994 
9995   ins_encode %{
9996     __ bicw(as_Register($dst$$reg),
9997               as_Register($src1$$reg),
9998               as_Register($src2$$reg),
9999               Assembler::LSL,
10000               $src3$$constant & 0x1f);
10001   %}
10002 
10003   ins_pipe(ialu_reg_reg_shift);
10004 %}
10005 
10006 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10007                          iRegL src1, iRegL src2,
10008                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10009   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10010   ins_cost(1.9 * INSN_COST);
10011   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10012 
10013   ins_encode %{
10014     __ bic(as_Register($dst$$reg),
10015               as_Register($src1$$reg),
10016               as_Register($src2$$reg),
10017               Assembler::LSL,
10018               $src3$$constant & 0x3f);
10019   %}
10020 
10021   ins_pipe(ialu_reg_reg_shift);
10022 %}
10023 
10024 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10025                          iRegIorL2I src1, iRegIorL2I src2,
10026                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10027   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10028   ins_cost(1.9 * INSN_COST);
10029   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10030 
10031   ins_encode %{
10032     __ eonw(as_Register($dst$$reg),
10033               as_Register($src1$$reg),
10034               as_Register($src2$$reg),
10035               Assembler::LSR,
10036               $src3$$constant & 0x1f);
10037   %}
10038 
10039   ins_pipe(ialu_reg_reg_shift);
10040 %}
10041 
10042 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10043                          iRegL src1, iRegL src2,
10044                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10045   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10046   ins_cost(1.9 * INSN_COST);
10047   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10048 
10049   ins_encode %{
10050     __ eon(as_Register($dst$$reg),
10051               as_Register($src1$$reg),
10052               as_Register($src2$$reg),
10053               Assembler::LSR,
10054               $src3$$constant & 0x3f);
10055   %}
10056 
10057   ins_pipe(ialu_reg_reg_shift);
10058 %}
10059 
10060 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10061                          iRegIorL2I src1, iRegIorL2I src2,
10062                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10063   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10064   ins_cost(1.9 * INSN_COST);
10065   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10066 
10067   ins_encode %{
10068     __ eonw(as_Register($dst$$reg),
10069               as_Register($src1$$reg),
10070               as_Register($src2$$reg),
10071               Assembler::ASR,
10072               $src3$$constant & 0x1f);
10073   %}
10074 
10075   ins_pipe(ialu_reg_reg_shift);
10076 %}
10077 
10078 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10079                          iRegL src1, iRegL src2,
10080                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10081   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10082   ins_cost(1.9 * INSN_COST);
10083   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10084 
10085   ins_encode %{
10086     __ eon(as_Register($dst$$reg),
10087               as_Register($src1$$reg),
10088               as_Register($src2$$reg),
10089               Assembler::ASR,
10090               $src3$$constant & 0x3f);
10091   %}
10092 
10093   ins_pipe(ialu_reg_reg_shift);
10094 %}
10095 
10096 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10097                          iRegIorL2I src1, iRegIorL2I src2,
10098                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10099   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10100   ins_cost(1.9 * INSN_COST);
10101   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10102 
10103   ins_encode %{
10104     __ eonw(as_Register($dst$$reg),
10105               as_Register($src1$$reg),
10106               as_Register($src2$$reg),
10107               Assembler::LSL,
10108               $src3$$constant & 0x1f);
10109   %}
10110 
10111   ins_pipe(ialu_reg_reg_shift);
10112 %}
10113 
10114 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10115                          iRegL src1, iRegL src2,
10116                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10117   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10118   ins_cost(1.9 * INSN_COST);
10119   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10120 
10121   ins_encode %{
10122     __ eon(as_Register($dst$$reg),
10123               as_Register($src1$$reg),
10124               as_Register($src2$$reg),
10125               Assembler::LSL,
10126               $src3$$constant & 0x3f);
10127   %}
10128 
10129   ins_pipe(ialu_reg_reg_shift);
10130 %}
10131 
10132 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10133                          iRegIorL2I src1, iRegIorL2I src2,
10134                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10135   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10136   ins_cost(1.9 * INSN_COST);
10137   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10138 
10139   ins_encode %{
10140     __ ornw(as_Register($dst$$reg),
10141               as_Register($src1$$reg),
10142               as_Register($src2$$reg),
10143               Assembler::LSR,
10144               $src3$$constant & 0x1f);
10145   %}
10146 
10147   ins_pipe(ialu_reg_reg_shift);
10148 %}
10149 
10150 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10151                          iRegL src1, iRegL src2,
10152                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10153   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10154   ins_cost(1.9 * INSN_COST);
10155   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10156 
10157   ins_encode %{
10158     __ orn(as_Register($dst$$reg),
10159               as_Register($src1$$reg),
10160               as_Register($src2$$reg),
10161               Assembler::LSR,
10162               $src3$$constant & 0x3f);
10163   %}
10164 
10165   ins_pipe(ialu_reg_reg_shift);
10166 %}
10167 
10168 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10169                          iRegIorL2I src1, iRegIorL2I src2,
10170                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10171   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10172   ins_cost(1.9 * INSN_COST);
10173   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10174 
10175   ins_encode %{
10176     __ ornw(as_Register($dst$$reg),
10177               as_Register($src1$$reg),
10178               as_Register($src2$$reg),
10179               Assembler::ASR,
10180               $src3$$constant & 0x1f);
10181   %}
10182 
10183   ins_pipe(ialu_reg_reg_shift);
10184 %}
10185 
10186 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10187                          iRegL src1, iRegL src2,
10188                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10189   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10190   ins_cost(1.9 * INSN_COST);
10191   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10192 
10193   ins_encode %{
10194     __ orn(as_Register($dst$$reg),
10195               as_Register($src1$$reg),
10196               as_Register($src2$$reg),
10197               Assembler::ASR,
10198               $src3$$constant & 0x3f);
10199   %}
10200 
10201   ins_pipe(ialu_reg_reg_shift);
10202 %}
10203 
10204 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10205                          iRegIorL2I src1, iRegIorL2I src2,
10206                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10207   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10208   ins_cost(1.9 * INSN_COST);
10209   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10210 
10211   ins_encode %{
10212     __ ornw(as_Register($dst$$reg),
10213               as_Register($src1$$reg),
10214               as_Register($src2$$reg),
10215               Assembler::LSL,
10216               $src3$$constant & 0x1f);
10217   %}
10218 
10219   ins_pipe(ialu_reg_reg_shift);
10220 %}
10221 
10222 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10223                          iRegL src1, iRegL src2,
10224                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10225   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10226   ins_cost(1.9 * INSN_COST);
10227   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10228 
10229   ins_encode %{
10230     __ orn(as_Register($dst$$reg),
10231               as_Register($src1$$reg),
10232               as_Register($src2$$reg),
10233               Assembler::LSL,
10234               $src3$$constant & 0x3f);
10235   %}
10236 
10237   ins_pipe(ialu_reg_reg_shift);
10238 %}
10239 
10240 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10241                          iRegIorL2I src1, iRegIorL2I src2,
10242                          immI src3, rFlagsReg cr) %{
10243   match(Set dst (AndI src1 (URShiftI src2 src3)));
10244 
10245   ins_cost(1.9 * INSN_COST);
10246   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10247 
10248   ins_encode %{
10249     __ andw(as_Register($dst$$reg),
10250               as_Register($src1$$reg),
10251               as_Register($src2$$reg),
10252               Assembler::LSR,
10253               $src3$$constant & 0x1f);
10254   %}
10255 
10256   ins_pipe(ialu_reg_reg_shift);
10257 %}
10258 
10259 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10260                          iRegL src1, iRegL src2,
10261                          immI src3, rFlagsReg cr) %{
10262   match(Set dst (AndL src1 (URShiftL src2 src3)));
10263 
10264   ins_cost(1.9 * INSN_COST);
10265   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10266 
10267   ins_encode %{
10268     __ andr(as_Register($dst$$reg),
10269               as_Register($src1$$reg),
10270               as_Register($src2$$reg),
10271               Assembler::LSR,
10272               $src3$$constant & 0x3f);
10273   %}
10274 
10275   ins_pipe(ialu_reg_reg_shift);
10276 %}
10277 
10278 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10279                          iRegIorL2I src1, iRegIorL2I src2,
10280                          immI src3, rFlagsReg cr) %{
10281   match(Set dst (AndI src1 (RShiftI src2 src3)));
10282 
10283   ins_cost(1.9 * INSN_COST);
10284   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10285 
10286   ins_encode %{
10287     __ andw(as_Register($dst$$reg),
10288               as_Register($src1$$reg),
10289               as_Register($src2$$reg),
10290               Assembler::ASR,
10291               $src3$$constant & 0x1f);
10292   %}
10293 
10294   ins_pipe(ialu_reg_reg_shift);
10295 %}
10296 
10297 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10298                          iRegL src1, iRegL src2,
10299                          immI src3, rFlagsReg cr) %{
10300   match(Set dst (AndL src1 (RShiftL src2 src3)));
10301 
10302   ins_cost(1.9 * INSN_COST);
10303   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10304 
10305   ins_encode %{
10306     __ andr(as_Register($dst$$reg),
10307               as_Register($src1$$reg),
10308               as_Register($src2$$reg),
10309               Assembler::ASR,
10310               $src3$$constant & 0x3f);
10311   %}
10312 
10313   ins_pipe(ialu_reg_reg_shift);
10314 %}
10315 
10316 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10317                          iRegIorL2I src1, iRegIorL2I src2,
10318                          immI src3, rFlagsReg cr) %{
10319   match(Set dst (AndI src1 (LShiftI src2 src3)));
10320 
10321   ins_cost(1.9 * INSN_COST);
10322   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10323 
10324   ins_encode %{
10325     __ andw(as_Register($dst$$reg),
10326               as_Register($src1$$reg),
10327               as_Register($src2$$reg),
10328               Assembler::LSL,
10329               $src3$$constant & 0x1f);
10330   %}
10331 
10332   ins_pipe(ialu_reg_reg_shift);
10333 %}
10334 
10335 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10336                          iRegL src1, iRegL src2,
10337                          immI src3, rFlagsReg cr) %{
10338   match(Set dst (AndL src1 (LShiftL src2 src3)));
10339 
10340   ins_cost(1.9 * INSN_COST);
10341   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10342 
10343   ins_encode %{
10344     __ andr(as_Register($dst$$reg),
10345               as_Register($src1$$reg),
10346               as_Register($src2$$reg),
10347               Assembler::LSL,
10348               $src3$$constant & 0x3f);
10349   %}
10350 
10351   ins_pipe(ialu_reg_reg_shift);
10352 %}
10353 
10354 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10355                          iRegIorL2I src1, iRegIorL2I src2,
10356                          immI src3, rFlagsReg cr) %{
10357   match(Set dst (XorI src1 (URShiftI src2 src3)));
10358 
10359   ins_cost(1.9 * INSN_COST);
10360   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10361 
10362   ins_encode %{
10363     __ eorw(as_Register($dst$$reg),
10364               as_Register($src1$$reg),
10365               as_Register($src2$$reg),
10366               Assembler::LSR,
10367               $src3$$constant & 0x1f);
10368   %}
10369 
10370   ins_pipe(ialu_reg_reg_shift);
10371 %}
10372 
10373 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10374                          iRegL src1, iRegL src2,
10375                          immI src3, rFlagsReg cr) %{
10376   match(Set dst (XorL src1 (URShiftL src2 src3)));
10377 
10378   ins_cost(1.9 * INSN_COST);
10379   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10380 
10381   ins_encode %{
10382     __ eor(as_Register($dst$$reg),
10383               as_Register($src1$$reg),
10384               as_Register($src2$$reg),
10385               Assembler::LSR,
10386               $src3$$constant & 0x3f);
10387   %}
10388 
10389   ins_pipe(ialu_reg_reg_shift);
10390 %}
10391 
10392 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10393                          iRegIorL2I src1, iRegIorL2I src2,
10394                          immI src3, rFlagsReg cr) %{
10395   match(Set dst (XorI src1 (RShiftI src2 src3)));
10396 
10397   ins_cost(1.9 * INSN_COST);
10398   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10399 
10400   ins_encode %{
10401     __ eorw(as_Register($dst$$reg),
10402               as_Register($src1$$reg),
10403               as_Register($src2$$reg),
10404               Assembler::ASR,
10405               $src3$$constant & 0x1f);
10406   %}
10407 
10408   ins_pipe(ialu_reg_reg_shift);
10409 %}
10410 
10411 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10412                          iRegL src1, iRegL src2,
10413                          immI src3, rFlagsReg cr) %{
10414   match(Set dst (XorL src1 (RShiftL src2 src3)));
10415 
10416   ins_cost(1.9 * INSN_COST);
10417   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10418 
10419   ins_encode %{
10420     __ eor(as_Register($dst$$reg),
10421               as_Register($src1$$reg),
10422               as_Register($src2$$reg),
10423               Assembler::ASR,
10424               $src3$$constant & 0x3f);
10425   %}
10426 
10427   ins_pipe(ialu_reg_reg_shift);
10428 %}
10429 
10430 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10431                          iRegIorL2I src1, iRegIorL2I src2,
10432                          immI src3, rFlagsReg cr) %{
10433   match(Set dst (XorI src1 (LShiftI src2 src3)));
10434 
10435   ins_cost(1.9 * INSN_COST);
10436   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10437 
10438   ins_encode %{
10439     __ eorw(as_Register($dst$$reg),
10440               as_Register($src1$$reg),
10441               as_Register($src2$$reg),
10442               Assembler::LSL,
10443               $src3$$constant & 0x1f);
10444   %}
10445 
10446   ins_pipe(ialu_reg_reg_shift);
10447 %}
10448 
10449 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10450                          iRegL src1, iRegL src2,
10451                          immI src3, rFlagsReg cr) %{
10452   match(Set dst (XorL src1 (LShiftL src2 src3)));
10453 
10454   ins_cost(1.9 * INSN_COST);
10455   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10456 
10457   ins_encode %{
10458     __ eor(as_Register($dst$$reg),
10459               as_Register($src1$$reg),
10460               as_Register($src2$$reg),
10461               Assembler::LSL,
10462               $src3$$constant & 0x3f);
10463   %}
10464 
10465   ins_pipe(ialu_reg_reg_shift);
10466 %}
10467 
10468 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10469                          iRegIorL2I src1, iRegIorL2I src2,
10470                          immI src3, rFlagsReg cr) %{
10471   match(Set dst (OrI src1 (URShiftI src2 src3)));
10472 
10473   ins_cost(1.9 * INSN_COST);
10474   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10475 
10476   ins_encode %{
10477     __ orrw(as_Register($dst$$reg),
10478               as_Register($src1$$reg),
10479               as_Register($src2$$reg),
10480               Assembler::LSR,
10481               $src3$$constant & 0x1f);
10482   %}
10483 
10484   ins_pipe(ialu_reg_reg_shift);
10485 %}
10486 
10487 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10488                          iRegL src1, iRegL src2,
10489                          immI src3, rFlagsReg cr) %{
10490   match(Set dst (OrL src1 (URShiftL src2 src3)));
10491 
10492   ins_cost(1.9 * INSN_COST);
10493   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10494 
10495   ins_encode %{
10496     __ orr(as_Register($dst$$reg),
10497               as_Register($src1$$reg),
10498               as_Register($src2$$reg),
10499               Assembler::LSR,
10500               $src3$$constant & 0x3f);
10501   %}
10502 
10503   ins_pipe(ialu_reg_reg_shift);
10504 %}
10505 
10506 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10507                          iRegIorL2I src1, iRegIorL2I src2,
10508                          immI src3, rFlagsReg cr) %{
10509   match(Set dst (OrI src1 (RShiftI src2 src3)));
10510 
10511   ins_cost(1.9 * INSN_COST);
10512   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10513 
10514   ins_encode %{
10515     __ orrw(as_Register($dst$$reg),
10516               as_Register($src1$$reg),
10517               as_Register($src2$$reg),
10518               Assembler::ASR,
10519               $src3$$constant & 0x1f);
10520   %}
10521 
10522   ins_pipe(ialu_reg_reg_shift);
10523 %}
10524 
10525 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10526                          iRegL src1, iRegL src2,
10527                          immI src3, rFlagsReg cr) %{
10528   match(Set dst (OrL src1 (RShiftL src2 src3)));
10529 
10530   ins_cost(1.9 * INSN_COST);
10531   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10532 
10533   ins_encode %{
10534     __ orr(as_Register($dst$$reg),
10535               as_Register($src1$$reg),
10536               as_Register($src2$$reg),
10537               Assembler::ASR,
10538               $src3$$constant & 0x3f);
10539   %}
10540 
10541   ins_pipe(ialu_reg_reg_shift);
10542 %}
10543 
10544 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10545                          iRegIorL2I src1, iRegIorL2I src2,
10546                          immI src3, rFlagsReg cr) %{
10547   match(Set dst (OrI src1 (LShiftI src2 src3)));
10548 
10549   ins_cost(1.9 * INSN_COST);
10550   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10551 
10552   ins_encode %{
10553     __ orrw(as_Register($dst$$reg),
10554               as_Register($src1$$reg),
10555               as_Register($src2$$reg),
10556               Assembler::LSL,
10557               $src3$$constant & 0x1f);
10558   %}
10559 
10560   ins_pipe(ialu_reg_reg_shift);
10561 %}
10562 
10563 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10564                          iRegL src1, iRegL src2,
10565                          immI src3, rFlagsReg cr) %{
10566   match(Set dst (OrL src1 (LShiftL src2 src3)));
10567 
10568   ins_cost(1.9 * INSN_COST);
10569   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10570 
10571   ins_encode %{
10572     __ orr(as_Register($dst$$reg),
10573               as_Register($src1$$reg),
10574               as_Register($src2$$reg),
10575               Assembler::LSL,
10576               $src3$$constant & 0x3f);
10577   %}
10578 
10579   ins_pipe(ialu_reg_reg_shift);
10580 %}
10581 
10582 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10583                          iRegIorL2I src1, iRegIorL2I src2,
10584                          immI src3, rFlagsReg cr) %{
10585   match(Set dst (AddI src1 (URShiftI src2 src3)));
10586 
10587   ins_cost(1.9 * INSN_COST);
10588   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10589 
10590   ins_encode %{
10591     __ addw(as_Register($dst$$reg),
10592               as_Register($src1$$reg),
10593               as_Register($src2$$reg),
10594               Assembler::LSR,
10595               $src3$$constant & 0x1f);
10596   %}
10597 
10598   ins_pipe(ialu_reg_reg_shift);
10599 %}
10600 
10601 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10602                          iRegL src1, iRegL src2,
10603                          immI src3, rFlagsReg cr) %{
10604   match(Set dst (AddL src1 (URShiftL src2 src3)));
10605 
10606   ins_cost(1.9 * INSN_COST);
10607   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10608 
10609   ins_encode %{
10610     __ add(as_Register($dst$$reg),
10611               as_Register($src1$$reg),
10612               as_Register($src2$$reg),
10613               Assembler::LSR,
10614               $src3$$constant & 0x3f);
10615   %}
10616 
10617   ins_pipe(ialu_reg_reg_shift);
10618 %}
10619 
10620 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10621                          iRegIorL2I src1, iRegIorL2I src2,
10622                          immI src3, rFlagsReg cr) %{
10623   match(Set dst (AddI src1 (RShiftI src2 src3)));
10624 
10625   ins_cost(1.9 * INSN_COST);
10626   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10627 
10628   ins_encode %{
10629     __ addw(as_Register($dst$$reg),
10630               as_Register($src1$$reg),
10631               as_Register($src2$$reg),
10632               Assembler::ASR,
10633               $src3$$constant & 0x1f);
10634   %}
10635 
10636   ins_pipe(ialu_reg_reg_shift);
10637 %}
10638 
10639 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10640                          iRegL src1, iRegL src2,
10641                          immI src3, rFlagsReg cr) %{
10642   match(Set dst (AddL src1 (RShiftL src2 src3)));
10643 
10644   ins_cost(1.9 * INSN_COST);
10645   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10646 
10647   ins_encode %{
10648     __ add(as_Register($dst$$reg),
10649               as_Register($src1$$reg),
10650               as_Register($src2$$reg),
10651               Assembler::ASR,
10652               $src3$$constant & 0x3f);
10653   %}
10654 
10655   ins_pipe(ialu_reg_reg_shift);
10656 %}
10657 
10658 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10659                          iRegIorL2I src1, iRegIorL2I src2,
10660                          immI src3, rFlagsReg cr) %{
10661   match(Set dst (AddI src1 (LShiftI src2 src3)));
10662 
10663   ins_cost(1.9 * INSN_COST);
10664   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10665 
10666   ins_encode %{
10667     __ addw(as_Register($dst$$reg),
10668               as_Register($src1$$reg),
10669               as_Register($src2$$reg),
10670               Assembler::LSL,
10671               $src3$$constant & 0x1f);
10672   %}
10673 
10674   ins_pipe(ialu_reg_reg_shift);
10675 %}
10676 
10677 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10678                          iRegL src1, iRegL src2,
10679                          immI src3, rFlagsReg cr) %{
10680   match(Set dst (AddL src1 (LShiftL src2 src3)));
10681 
10682   ins_cost(1.9 * INSN_COST);
10683   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10684 
10685   ins_encode %{
10686     __ add(as_Register($dst$$reg),
10687               as_Register($src1$$reg),
10688               as_Register($src2$$reg),
10689               Assembler::LSL,
10690               $src3$$constant & 0x3f);
10691   %}
10692 
10693   ins_pipe(ialu_reg_reg_shift);
10694 %}
10695 
10696 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10697                          iRegIorL2I src1, iRegIorL2I src2,
10698                          immI src3, rFlagsReg cr) %{
10699   match(Set dst (SubI src1 (URShiftI src2 src3)));
10700 
10701   ins_cost(1.9 * INSN_COST);
10702   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10703 
10704   ins_encode %{
10705     __ subw(as_Register($dst$$reg),
10706               as_Register($src1$$reg),
10707               as_Register($src2$$reg),
10708               Assembler::LSR,
10709               $src3$$constant & 0x1f);
10710   %}
10711 
10712   ins_pipe(ialu_reg_reg_shift);
10713 %}
10714 
10715 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10716                          iRegL src1, iRegL src2,
10717                          immI src3, rFlagsReg cr) %{
10718   match(Set dst (SubL src1 (URShiftL src2 src3)));
10719 
10720   ins_cost(1.9 * INSN_COST);
10721   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10722 
10723   ins_encode %{
10724     __ sub(as_Register($dst$$reg),
10725               as_Register($src1$$reg),
10726               as_Register($src2$$reg),
10727               Assembler::LSR,
10728               $src3$$constant & 0x3f);
10729   %}
10730 
10731   ins_pipe(ialu_reg_reg_shift);
10732 %}
10733 
10734 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10735                          iRegIorL2I src1, iRegIorL2I src2,
10736                          immI src3, rFlagsReg cr) %{
10737   match(Set dst (SubI src1 (RShiftI src2 src3)));
10738 
10739   ins_cost(1.9 * INSN_COST);
10740   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10741 
10742   ins_encode %{
10743     __ subw(as_Register($dst$$reg),
10744               as_Register($src1$$reg),
10745               as_Register($src2$$reg),
10746               Assembler::ASR,
10747               $src3$$constant & 0x1f);
10748   %}
10749 
10750   ins_pipe(ialu_reg_reg_shift);
10751 %}
10752 
10753 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10754                          iRegL src1, iRegL src2,
10755                          immI src3, rFlagsReg cr) %{
10756   match(Set dst (SubL src1 (RShiftL src2 src3)));
10757 
10758   ins_cost(1.9 * INSN_COST);
10759   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10760 
10761   ins_encode %{
10762     __ sub(as_Register($dst$$reg),
10763               as_Register($src1$$reg),
10764               as_Register($src2$$reg),
10765               Assembler::ASR,
10766               $src3$$constant & 0x3f);
10767   %}
10768 
10769   ins_pipe(ialu_reg_reg_shift);
10770 %}
10771 
10772 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10773                          iRegIorL2I src1, iRegIorL2I src2,
10774                          immI src3, rFlagsReg cr) %{
10775   match(Set dst (SubI src1 (LShiftI src2 src3)));
10776 
10777   ins_cost(1.9 * INSN_COST);
10778   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10779 
10780   ins_encode %{
10781     __ subw(as_Register($dst$$reg),
10782               as_Register($src1$$reg),
10783               as_Register($src2$$reg),
10784               Assembler::LSL,
10785               $src3$$constant & 0x1f);
10786   %}
10787 
10788   ins_pipe(ialu_reg_reg_shift);
10789 %}
10790 
10791 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10792                          iRegL src1, iRegL src2,
10793                          immI src3, rFlagsReg cr) %{
10794   match(Set dst (SubL src1 (LShiftL src2 src3)));
10795 
10796   ins_cost(1.9 * INSN_COST);
10797   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10798 
10799   ins_encode %{
10800     __ sub(as_Register($dst$$reg),
10801               as_Register($src1$$reg),
10802               as_Register($src2$$reg),
10803               Assembler::LSL,
10804               $src3$$constant & 0x3f);
10805   %}
10806 
10807   ins_pipe(ialu_reg_reg_shift);
10808 %}
10809 
10810 
10811 
10812 // Shift Left followed by Shift Right.
10813 // This idiom is used by the compiler for the i2b bytecode etc.
10814 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10815 %{
10816   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10817   // Make sure we are not going to exceed what sbfm can do.
10818   predicate((unsigned int)n->in(2)->get_int() <= 63
10819             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10820 
10821   ins_cost(INSN_COST * 2);
10822   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10823   ins_encode %{
10824     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10825     int s = 63 - lshift;
10826     int r = (rshift - lshift) & 63;
10827     __ sbfm(as_Register($dst$$reg),
10828             as_Register($src$$reg),
10829             r, s);
10830   %}
10831 
10832   ins_pipe(ialu_reg_shift);
10833 %}
10834 
10835 // Shift Left followed by Shift Right.
10836 // This idiom is used by the compiler for the i2b bytecode etc.
10837 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10838 %{
10839   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10840   // Make sure we are not going to exceed what sbfmw can do.
10841   predicate((unsigned int)n->in(2)->get_int() <= 31
10842             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10843 
10844   ins_cost(INSN_COST * 2);
10845   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10846   ins_encode %{
10847     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10848     int s = 31 - lshift;
10849     int r = (rshift - lshift) & 31;
10850     __ sbfmw(as_Register($dst$$reg),
10851             as_Register($src$$reg),
10852             r, s);
10853   %}
10854 
10855   ins_pipe(ialu_reg_shift);
10856 %}
10857 
10858 // Shift Left followed by Shift Right.
10859 // This idiom is used by the compiler for the i2b bytecode etc.
10860 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10861 %{
10862   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10863   // Make sure we are not going to exceed what ubfm can do.
10864   predicate((unsigned int)n->in(2)->get_int() <= 63
10865             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10866 
10867   ins_cost(INSN_COST * 2);
10868   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10869   ins_encode %{
10870     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10871     int s = 63 - lshift;
10872     int r = (rshift - lshift) & 63;
10873     __ ubfm(as_Register($dst$$reg),
10874             as_Register($src$$reg),
10875             r, s);
10876   %}
10877 
10878   ins_pipe(ialu_reg_shift);
10879 %}
10880 
10881 // Shift Left followed by Shift Right.
10882 // This idiom is used by the compiler for the i2b bytecode etc.
10883 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10884 %{
10885   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10886   // Make sure we are not going to exceed what ubfmw can do.
10887   predicate((unsigned int)n->in(2)->get_int() <= 31
10888             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10889 
10890   ins_cost(INSN_COST * 2);
10891   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10892   ins_encode %{
10893     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10894     int s = 31 - lshift;
10895     int r = (rshift - lshift) & 31;
10896     __ ubfmw(as_Register($dst$$reg),
10897             as_Register($src$$reg),
10898             r, s);
10899   %}
10900 
10901   ins_pipe(ialu_reg_shift);
10902 %}
10903 // Bitfield extract with shift & mask
10904 
10905 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10906 %{
10907   match(Set dst (AndI (URShiftI src rshift) mask));
10908 
10909   ins_cost(INSN_COST);
10910   format %{ "ubfxw $dst, $src, $mask" %}
10911   ins_encode %{
10912     int rshift = $rshift$$constant;
10913     long mask = $mask$$constant;
10914     int width = exact_log2(mask+1);
10915     __ ubfxw(as_Register($dst$$reg),
10916             as_Register($src$$reg), rshift, width);
10917   %}
10918   ins_pipe(ialu_reg_shift);
10919 %}
10920 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10921 %{
10922   match(Set dst (AndL (URShiftL src rshift) mask));
10923 
10924   ins_cost(INSN_COST);
10925   format %{ "ubfx $dst, $src, $mask" %}
10926   ins_encode %{
10927     int rshift = $rshift$$constant;
10928     long mask = $mask$$constant;
10929     int width = exact_log2(mask+1);
10930     __ ubfx(as_Register($dst$$reg),
10931             as_Register($src$$reg), rshift, width);
10932   %}
10933   ins_pipe(ialu_reg_shift);
10934 %}
10935 
10936 // We can use ubfx when extending an And with a mask when we know mask
10937 // is positive.  We know that because immI_bitmask guarantees it.
10938 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10939 %{
10940   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10941 
10942   ins_cost(INSN_COST * 2);
10943   format %{ "ubfx $dst, $src, $mask" %}
10944   ins_encode %{
10945     int rshift = $rshift$$constant;
10946     long mask = $mask$$constant;
10947     int width = exact_log2(mask+1);
10948     __ ubfx(as_Register($dst$$reg),
10949             as_Register($src$$reg), rshift, width);
10950   %}
10951   ins_pipe(ialu_reg_shift);
10952 %}
10953 
10954 // Rotations
10955 
10956 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10957 %{
10958   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10959   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10960 
10961   ins_cost(INSN_COST);
10962   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10963 
10964   ins_encode %{
10965     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10966             $rshift$$constant & 63);
10967   %}
10968   ins_pipe(ialu_reg_reg_extr);
10969 %}
10970 
10971 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10972 %{
10973   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10974   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10975 
10976   ins_cost(INSN_COST);
10977   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10978 
10979   ins_encode %{
10980     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10981             $rshift$$constant & 31);
10982   %}
10983   ins_pipe(ialu_reg_reg_extr);
10984 %}
10985 
10986 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10987 %{
10988   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10989   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10990 
10991   ins_cost(INSN_COST);
10992   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10993 
10994   ins_encode %{
10995     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10996             $rshift$$constant & 63);
10997   %}
10998   ins_pipe(ialu_reg_reg_extr);
10999 %}
11000 
11001 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11002 %{
11003   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11004   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11005 
11006   ins_cost(INSN_COST);
11007   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11008 
11009   ins_encode %{
11010     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11011             $rshift$$constant & 31);
11012   %}
11013   ins_pipe(ialu_reg_reg_extr);
11014 %}
11015 
11016 
11017 // rol expander
11018 
11019 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11020 %{
11021   effect(DEF dst, USE src, USE shift);
11022 
11023   format %{ "rol    $dst, $src, $shift" %}
11024   ins_cost(INSN_COST * 3);
11025   ins_encode %{
11026     __ subw(rscratch1, zr, as_Register($shift$$reg));
11027     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11028             rscratch1);
11029     %}
11030   ins_pipe(ialu_reg_reg_vshift);
11031 %}
11032 
11033 // rol expander
11034 
11035 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11036 %{
11037   effect(DEF dst, USE src, USE shift);
11038 
11039   format %{ "rol    $dst, $src, $shift" %}
11040   ins_cost(INSN_COST * 3);
11041   ins_encode %{
11042     __ subw(rscratch1, zr, as_Register($shift$$reg));
11043     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11044             rscratch1);
11045     %}
11046   ins_pipe(ialu_reg_reg_vshift);
11047 %}
11048 
11049 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11050 %{
11051   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11052 
11053   expand %{
11054     rolL_rReg(dst, src, shift, cr);
11055   %}
11056 %}
11057 
11058 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11059 %{
11060   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11061 
11062   expand %{
11063     rolL_rReg(dst, src, shift, cr);
11064   %}
11065 %}
11066 
11067 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11068 %{
11069   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11070 
11071   expand %{
11072     rolI_rReg(dst, src, shift, cr);
11073   %}
11074 %}
11075 
11076 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11077 %{
11078   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11079 
11080   expand %{
11081     rolI_rReg(dst, src, shift, cr);
11082   %}
11083 %}
11084 
11085 // ror expander
11086 
11087 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11088 %{
11089   effect(DEF dst, USE src, USE shift);
11090 
11091   format %{ "ror    $dst, $src, $shift" %}
11092   ins_cost(INSN_COST);
11093   ins_encode %{
11094     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11095             as_Register($shift$$reg));
11096     %}
11097   ins_pipe(ialu_reg_reg_vshift);
11098 %}
11099 
11100 // ror expander
11101 
11102 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11103 %{
11104   effect(DEF dst, USE src, USE shift);
11105 
11106   format %{ "ror    $dst, $src, $shift" %}
11107   ins_cost(INSN_COST);
11108   ins_encode %{
11109     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11110             as_Register($shift$$reg));
11111     %}
11112   ins_pipe(ialu_reg_reg_vshift);
11113 %}
11114 
11115 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11116 %{
11117   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11118 
11119   expand %{
11120     rorL_rReg(dst, src, shift, cr);
11121   %}
11122 %}
11123 
11124 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11125 %{
11126   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11127 
11128   expand %{
11129     rorL_rReg(dst, src, shift, cr);
11130   %}
11131 %}
11132 
11133 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11134 %{
11135   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11136 
11137   expand %{
11138     rorI_rReg(dst, src, shift, cr);
11139   %}
11140 %}
11141 
11142 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11143 %{
11144   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11145 
11146   expand %{
11147     rorI_rReg(dst, src, shift, cr);
11148   %}
11149 %}
11150 
11151 // Add/subtract (extended)
11152 
11153 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11154 %{
11155   match(Set dst (AddL src1 (ConvI2L src2)));
11156   ins_cost(INSN_COST);
11157   format %{ "add  $dst, $src1, sxtw $src2" %}
11158 
11159    ins_encode %{
11160      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11161             as_Register($src2$$reg), ext::sxtw);
11162    %}
11163   ins_pipe(ialu_reg_reg);
11164 %};
11165 
11166 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11167 %{
11168   match(Set dst (SubL src1 (ConvI2L src2)));
11169   ins_cost(INSN_COST);
11170   format %{ "sub  $dst, $src1, sxtw $src2" %}
11171 
11172    ins_encode %{
11173      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11174             as_Register($src2$$reg), ext::sxtw);
11175    %}
11176   ins_pipe(ialu_reg_reg);
11177 %};
11178 
11179 
11180 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11181 %{
11182   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11183   ins_cost(INSN_COST);
11184   format %{ "add  $dst, $src1, sxth $src2" %}
11185 
11186    ins_encode %{
11187      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11188             as_Register($src2$$reg), ext::sxth);
11189    %}
11190   ins_pipe(ialu_reg_reg);
11191 %}
11192 
11193 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11194 %{
11195   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11196   ins_cost(INSN_COST);
11197   format %{ "add  $dst, $src1, sxtb $src2" %}
11198 
11199    ins_encode %{
11200      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11201             as_Register($src2$$reg), ext::sxtb);
11202    %}
11203   ins_pipe(ialu_reg_reg);
11204 %}
11205 
11206 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11207 %{
11208   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11209   ins_cost(INSN_COST);
11210   format %{ "add  $dst, $src1, uxtb $src2" %}
11211 
11212    ins_encode %{
11213      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11214             as_Register($src2$$reg), ext::uxtb);
11215    %}
11216   ins_pipe(ialu_reg_reg);
11217 %}
11218 
11219 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11220 %{
11221   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11222   ins_cost(INSN_COST);
11223   format %{ "add  $dst, $src1, sxth $src2" %}
11224 
11225    ins_encode %{
11226      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11227             as_Register($src2$$reg), ext::sxth);
11228    %}
11229   ins_pipe(ialu_reg_reg);
11230 %}
11231 
11232 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11233 %{
11234   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11235   ins_cost(INSN_COST);
11236   format %{ "add  $dst, $src1, sxtw $src2" %}
11237 
11238    ins_encode %{
11239      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11240             as_Register($src2$$reg), ext::sxtw);
11241    %}
11242   ins_pipe(ialu_reg_reg);
11243 %}
11244 
11245 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11246 %{
11247   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11248   ins_cost(INSN_COST);
11249   format %{ "add  $dst, $src1, sxtb $src2" %}
11250 
11251    ins_encode %{
11252      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11253             as_Register($src2$$reg), ext::sxtb);
11254    %}
11255   ins_pipe(ialu_reg_reg);
11256 %}
11257 
11258 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11259 %{
11260   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11261   ins_cost(INSN_COST);
11262   format %{ "add  $dst, $src1, uxtb $src2" %}
11263 
11264    ins_encode %{
11265      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11266             as_Register($src2$$reg), ext::uxtb);
11267    %}
11268   ins_pipe(ialu_reg_reg);
11269 %}
11270 
11271 
11272 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11273 %{
11274   match(Set dst (AddI src1 (AndI src2 mask)));
11275   ins_cost(INSN_COST);
11276   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11277 
11278    ins_encode %{
11279      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11280             as_Register($src2$$reg), ext::uxtb);
11281    %}
11282   ins_pipe(ialu_reg_reg);
11283 %}
11284 
11285 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11286 %{
11287   match(Set dst (AddI src1 (AndI src2 mask)));
11288   ins_cost(INSN_COST);
11289   format %{ "addw  $dst, $src1, $src2, uxth" %}
11290 
11291    ins_encode %{
11292      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11293             as_Register($src2$$reg), ext::uxth);
11294    %}
11295   ins_pipe(ialu_reg_reg);
11296 %}
11297 
11298 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11299 %{
11300   match(Set dst (AddL src1 (AndL src2 mask)));
11301   ins_cost(INSN_COST);
11302   format %{ "add  $dst, $src1, $src2, uxtb" %}
11303 
11304    ins_encode %{
11305      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11306             as_Register($src2$$reg), ext::uxtb);
11307    %}
11308   ins_pipe(ialu_reg_reg);
11309 %}
11310 
11311 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11312 %{
11313   match(Set dst (AddL src1 (AndL src2 mask)));
11314   ins_cost(INSN_COST);
11315   format %{ "add  $dst, $src1, $src2, uxth" %}
11316 
11317    ins_encode %{
11318      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11319             as_Register($src2$$reg), ext::uxth);
11320    %}
11321   ins_pipe(ialu_reg_reg);
11322 %}
11323 
11324 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11325 %{
11326   match(Set dst (AddL src1 (AndL src2 mask)));
11327   ins_cost(INSN_COST);
11328   format %{ "add  $dst, $src1, $src2, uxtw" %}
11329 
11330    ins_encode %{
11331      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11332             as_Register($src2$$reg), ext::uxtw);
11333    %}
11334   ins_pipe(ialu_reg_reg);
11335 %}
11336 
11337 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11338 %{
11339   match(Set dst (SubI src1 (AndI src2 mask)));
11340   ins_cost(INSN_COST);
11341   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11342 
11343    ins_encode %{
11344      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11345             as_Register($src2$$reg), ext::uxtb);
11346    %}
11347   ins_pipe(ialu_reg_reg);
11348 %}
11349 
11350 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11351 %{
11352   match(Set dst (SubI src1 (AndI src2 mask)));
11353   ins_cost(INSN_COST);
11354   format %{ "subw  $dst, $src1, $src2, uxth" %}
11355 
11356    ins_encode %{
11357      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11358             as_Register($src2$$reg), ext::uxth);
11359    %}
11360   ins_pipe(ialu_reg_reg);
11361 %}
11362 
11363 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11364 %{
11365   match(Set dst (SubL src1 (AndL src2 mask)));
11366   ins_cost(INSN_COST);
11367   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11368 
11369    ins_encode %{
11370      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11371             as_Register($src2$$reg), ext::uxtb);
11372    %}
11373   ins_pipe(ialu_reg_reg);
11374 %}
11375 
11376 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11377 %{
11378   match(Set dst (SubL src1 (AndL src2 mask)));
11379   ins_cost(INSN_COST);
11380   format %{ "sub  $dst, $src1, $src2, uxth" %}
11381 
11382    ins_encode %{
11383      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11384             as_Register($src2$$reg), ext::uxth);
11385    %}
11386   ins_pipe(ialu_reg_reg);
11387 %}
11388 
11389 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11390 %{
11391   match(Set dst (SubL src1 (AndL src2 mask)));
11392   ins_cost(INSN_COST);
11393   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11394 
11395    ins_encode %{
11396      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11397             as_Register($src2$$reg), ext::uxtw);
11398    %}
11399   ins_pipe(ialu_reg_reg);
11400 %}
11401 
11402 // END This section of the file is automatically generated. Do not edit --------------
11403 
11404 // ============================================================================
11405 // Floating Point Arithmetic Instructions
11406 
11407 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11408   match(Set dst (AddF src1 src2));
11409 
11410   ins_cost(INSN_COST * 5);
11411   format %{ "fadds   $dst, $src1, $src2" %}
11412 
11413   ins_encode %{
11414     __ fadds(as_FloatRegister($dst$$reg),
11415              as_FloatRegister($src1$$reg),
11416              as_FloatRegister($src2$$reg));
11417   %}
11418 
11419   ins_pipe(fp_dop_reg_reg_s);
11420 %}
11421 
11422 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11423   match(Set dst (AddD src1 src2));
11424 
11425   ins_cost(INSN_COST * 5);
11426   format %{ "faddd   $dst, $src1, $src2" %}
11427 
11428   ins_encode %{
11429     __ faddd(as_FloatRegister($dst$$reg),
11430              as_FloatRegister($src1$$reg),
11431              as_FloatRegister($src2$$reg));
11432   %}
11433 
11434   ins_pipe(fp_dop_reg_reg_d);
11435 %}
11436 
11437 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11438   match(Set dst (SubF src1 src2));
11439 
11440   ins_cost(INSN_COST * 5);
11441   format %{ "fsubs   $dst, $src1, $src2" %}
11442 
11443   ins_encode %{
11444     __ fsubs(as_FloatRegister($dst$$reg),
11445              as_FloatRegister($src1$$reg),
11446              as_FloatRegister($src2$$reg));
11447   %}
11448 
11449   ins_pipe(fp_dop_reg_reg_s);
11450 %}
11451 
11452 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11453   match(Set dst (SubD src1 src2));
11454 
11455   ins_cost(INSN_COST * 5);
11456   format %{ "fsubd   $dst, $src1, $src2" %}
11457 
11458   ins_encode %{
11459     __ fsubd(as_FloatRegister($dst$$reg),
11460              as_FloatRegister($src1$$reg),
11461              as_FloatRegister($src2$$reg));
11462   %}
11463 
11464   ins_pipe(fp_dop_reg_reg_d);
11465 %}
11466 
11467 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11468   match(Set dst (MulF src1 src2));
11469 
11470   ins_cost(INSN_COST * 6);
11471   format %{ "fmuls   $dst, $src1, $src2" %}
11472 
11473   ins_encode %{
11474     __ fmuls(as_FloatRegister($dst$$reg),
11475              as_FloatRegister($src1$$reg),
11476              as_FloatRegister($src2$$reg));
11477   %}
11478 
11479   ins_pipe(fp_dop_reg_reg_s);
11480 %}
11481 
11482 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11483   match(Set dst (MulD src1 src2));
11484 
11485   ins_cost(INSN_COST * 6);
11486   format %{ "fmuld   $dst, $src1, $src2" %}
11487 
11488   ins_encode %{
11489     __ fmuld(as_FloatRegister($dst$$reg),
11490              as_FloatRegister($src1$$reg),
11491              as_FloatRegister($src2$$reg));
11492   %}
11493 
11494   ins_pipe(fp_dop_reg_reg_d);
11495 %}
11496 
11497 // We cannot use these fused mul w add/sub ops because they don't
11498 // produce the same result as the equivalent separated ops
11499 // (essentially they don't round the intermediate result). that's a
11500 // shame. leaving them here in case we can idenitfy cases where it is
11501 // legitimate to use them
11502 
11503 
11504 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11505 //   match(Set dst (AddF (MulF src1 src2) src3));
11506 
11507 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11508 
11509 //   ins_encode %{
11510 //     __ fmadds(as_FloatRegister($dst$$reg),
11511 //              as_FloatRegister($src1$$reg),
11512 //              as_FloatRegister($src2$$reg),
11513 //              as_FloatRegister($src3$$reg));
11514 //   %}
11515 
11516 //   ins_pipe(pipe_class_default);
11517 // %}
11518 
11519 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11520 //   match(Set dst (AddD (MulD src1 src2) src3));
11521 
11522 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11523 
11524 //   ins_encode %{
11525 //     __ fmaddd(as_FloatRegister($dst$$reg),
11526 //              as_FloatRegister($src1$$reg),
11527 //              as_FloatRegister($src2$$reg),
11528 //              as_FloatRegister($src3$$reg));
11529 //   %}
11530 
11531 //   ins_pipe(pipe_class_default);
11532 // %}
11533 
11534 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11535 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11536 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11537 
11538 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11539 
11540 //   ins_encode %{
11541 //     __ fmsubs(as_FloatRegister($dst$$reg),
11542 //               as_FloatRegister($src1$$reg),
11543 //               as_FloatRegister($src2$$reg),
11544 //              as_FloatRegister($src3$$reg));
11545 //   %}
11546 
11547 //   ins_pipe(pipe_class_default);
11548 // %}
11549 
11550 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11551 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11552 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11553 
11554 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11555 
11556 //   ins_encode %{
11557 //     __ fmsubd(as_FloatRegister($dst$$reg),
11558 //               as_FloatRegister($src1$$reg),
11559 //               as_FloatRegister($src2$$reg),
11560 //               as_FloatRegister($src3$$reg));
11561 //   %}
11562 
11563 //   ins_pipe(pipe_class_default);
11564 // %}
11565 
11566 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11567 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11568 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11569 
11570 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11571 
11572 //   ins_encode %{
11573 //     __ fnmadds(as_FloatRegister($dst$$reg),
11574 //                as_FloatRegister($src1$$reg),
11575 //                as_FloatRegister($src2$$reg),
11576 //                as_FloatRegister($src3$$reg));
11577 //   %}
11578 
11579 //   ins_pipe(pipe_class_default);
11580 // %}
11581 
11582 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11583 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11584 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11585 
11586 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11587 
11588 //   ins_encode %{
11589 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11590 //                as_FloatRegister($src1$$reg),
11591 //                as_FloatRegister($src2$$reg),
11592 //                as_FloatRegister($src3$$reg));
11593 //   %}
11594 
11595 //   ins_pipe(pipe_class_default);
11596 // %}
11597 
11598 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11599 //   match(Set dst (SubF (MulF src1 src2) src3));
11600 
11601 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11602 
11603 //   ins_encode %{
11604 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11605 //                as_FloatRegister($src1$$reg),
11606 //                as_FloatRegister($src2$$reg),
11607 //                as_FloatRegister($src3$$reg));
11608 //   %}
11609 
11610 //   ins_pipe(pipe_class_default);
11611 // %}
11612 
11613 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11614 //   match(Set dst (SubD (MulD src1 src2) src3));
11615 
11616 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11617 
11618 //   ins_encode %{
11619 //   // n.b. insn name should be fnmsubd
11620 //     __ fnmsub(as_FloatRegister($dst$$reg),
11621 //                as_FloatRegister($src1$$reg),
11622 //                as_FloatRegister($src2$$reg),
11623 //                as_FloatRegister($src3$$reg));
11624 //   %}
11625 
11626 //   ins_pipe(pipe_class_default);
11627 // %}
11628 
11629 
11630 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11631   match(Set dst (DivF src1  src2));
11632 
11633   ins_cost(INSN_COST * 18);
11634   format %{ "fdivs   $dst, $src1, $src2" %}
11635 
11636   ins_encode %{
11637     __ fdivs(as_FloatRegister($dst$$reg),
11638              as_FloatRegister($src1$$reg),
11639              as_FloatRegister($src2$$reg));
11640   %}
11641 
11642   ins_pipe(fp_div_s);
11643 %}
11644 
11645 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11646   match(Set dst (DivD src1  src2));
11647 
11648   ins_cost(INSN_COST * 32);
11649   format %{ "fdivd   $dst, $src1, $src2" %}
11650 
11651   ins_encode %{
11652     __ fdivd(as_FloatRegister($dst$$reg),
11653              as_FloatRegister($src1$$reg),
11654              as_FloatRegister($src2$$reg));
11655   %}
11656 
11657   ins_pipe(fp_div_d);
11658 %}
11659 
11660 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11661   match(Set dst (NegF src));
11662 
11663   ins_cost(INSN_COST * 3);
11664   format %{ "fneg   $dst, $src" %}
11665 
11666   ins_encode %{
11667     __ fnegs(as_FloatRegister($dst$$reg),
11668              as_FloatRegister($src$$reg));
11669   %}
11670 
11671   ins_pipe(fp_uop_s);
11672 %}
11673 
11674 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11675   match(Set dst (NegD src));
11676 
11677   ins_cost(INSN_COST * 3);
11678   format %{ "fnegd   $dst, $src" %}
11679 
11680   ins_encode %{
11681     __ fnegd(as_FloatRegister($dst$$reg),
11682              as_FloatRegister($src$$reg));
11683   %}
11684 
11685   ins_pipe(fp_uop_d);
11686 %}
11687 
11688 instruct absF_reg(vRegF dst, vRegF src) %{
11689   match(Set dst (AbsF src));
11690 
11691   ins_cost(INSN_COST * 3);
11692   format %{ "fabss   $dst, $src" %}
11693   ins_encode %{
11694     __ fabss(as_FloatRegister($dst$$reg),
11695              as_FloatRegister($src$$reg));
11696   %}
11697 
11698   ins_pipe(fp_uop_s);
11699 %}
11700 
11701 instruct absD_reg(vRegD dst, vRegD src) %{
11702   match(Set dst (AbsD src));
11703 
11704   ins_cost(INSN_COST * 3);
11705   format %{ "fabsd   $dst, $src" %}
11706   ins_encode %{
11707     __ fabsd(as_FloatRegister($dst$$reg),
11708              as_FloatRegister($src$$reg));
11709   %}
11710 
11711   ins_pipe(fp_uop_d);
11712 %}
11713 
11714 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11715   match(Set dst (SqrtD src));
11716 
11717   ins_cost(INSN_COST * 50);
11718   format %{ "fsqrtd  $dst, $src" %}
11719   ins_encode %{
11720     __ fsqrtd(as_FloatRegister($dst$$reg),
11721              as_FloatRegister($src$$reg));
11722   %}
11723 
11724   ins_pipe(fp_div_s);
11725 %}
11726 
11727 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11728   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11729 
11730   ins_cost(INSN_COST * 50);
11731   format %{ "fsqrts  $dst, $src" %}
11732   ins_encode %{
11733     __ fsqrts(as_FloatRegister($dst$$reg),
11734              as_FloatRegister($src$$reg));
11735   %}
11736 
11737   ins_pipe(fp_div_d);
11738 %}
11739 
11740 // ============================================================================
11741 // Logical Instructions
11742 
11743 // Integer Logical Instructions
11744 
11745 // And Instructions
11746 
11747 
11748 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11749   match(Set dst (AndI src1 src2));
11750 
11751   format %{ "andw  $dst, $src1, $src2\t# int" %}
11752 
11753   ins_cost(INSN_COST);
11754   ins_encode %{
11755     __ andw(as_Register($dst$$reg),
11756             as_Register($src1$$reg),
11757             as_Register($src2$$reg));
11758   %}
11759 
11760   ins_pipe(ialu_reg_reg);
11761 %}
11762 
11763 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11764   match(Set dst (AndI src1 src2));
11765 
11766   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11767 
11768   ins_cost(INSN_COST);
11769   ins_encode %{
11770     __ andw(as_Register($dst$$reg),
11771             as_Register($src1$$reg),
11772             (unsigned long)($src2$$constant));
11773   %}
11774 
11775   ins_pipe(ialu_reg_imm);
11776 %}
11777 
11778 // Or Instructions
11779 
11780 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11781   match(Set dst (OrI src1 src2));
11782 
11783   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11784 
11785   ins_cost(INSN_COST);
11786   ins_encode %{
11787     __ orrw(as_Register($dst$$reg),
11788             as_Register($src1$$reg),
11789             as_Register($src2$$reg));
11790   %}
11791 
11792   ins_pipe(ialu_reg_reg);
11793 %}
11794 
11795 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11796   match(Set dst (OrI src1 src2));
11797 
11798   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11799 
11800   ins_cost(INSN_COST);
11801   ins_encode %{
11802     __ orrw(as_Register($dst$$reg),
11803             as_Register($src1$$reg),
11804             (unsigned long)($src2$$constant));
11805   %}
11806 
11807   ins_pipe(ialu_reg_imm);
11808 %}
11809 
11810 // Xor Instructions
11811 
11812 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11813   match(Set dst (XorI src1 src2));
11814 
11815   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11816 
11817   ins_cost(INSN_COST);
11818   ins_encode %{
11819     __ eorw(as_Register($dst$$reg),
11820             as_Register($src1$$reg),
11821             as_Register($src2$$reg));
11822   %}
11823 
11824   ins_pipe(ialu_reg_reg);
11825 %}
11826 
11827 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11828   match(Set dst (XorI src1 src2));
11829 
11830   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11831 
11832   ins_cost(INSN_COST);
11833   ins_encode %{
11834     __ eorw(as_Register($dst$$reg),
11835             as_Register($src1$$reg),
11836             (unsigned long)($src2$$constant));
11837   %}
11838 
11839   ins_pipe(ialu_reg_imm);
11840 %}
11841 
11842 // Long Logical Instructions
11843 // TODO
11844 
11845 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11846   match(Set dst (AndL src1 src2));
11847 
11848   format %{ "and  $dst, $src1, $src2\t# int" %}
11849 
11850   ins_cost(INSN_COST);
11851   ins_encode %{
11852     __ andr(as_Register($dst$$reg),
11853             as_Register($src1$$reg),
11854             as_Register($src2$$reg));
11855   %}
11856 
11857   ins_pipe(ialu_reg_reg);
11858 %}
11859 
11860 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11861   match(Set dst (AndL src1 src2));
11862 
11863   format %{ "and  $dst, $src1, $src2\t# int" %}
11864 
11865   ins_cost(INSN_COST);
11866   ins_encode %{
11867     __ andr(as_Register($dst$$reg),
11868             as_Register($src1$$reg),
11869             (unsigned long)($src2$$constant));
11870   %}
11871 
11872   ins_pipe(ialu_reg_imm);
11873 %}
11874 
11875 // Or Instructions
11876 
11877 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11878   match(Set dst (OrL src1 src2));
11879 
11880   format %{ "orr  $dst, $src1, $src2\t# int" %}
11881 
11882   ins_cost(INSN_COST);
11883   ins_encode %{
11884     __ orr(as_Register($dst$$reg),
11885            as_Register($src1$$reg),
11886            as_Register($src2$$reg));
11887   %}
11888 
11889   ins_pipe(ialu_reg_reg);
11890 %}
11891 
11892 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11893   match(Set dst (OrL src1 src2));
11894 
11895   format %{ "orr  $dst, $src1, $src2\t# int" %}
11896 
11897   ins_cost(INSN_COST);
11898   ins_encode %{
11899     __ orr(as_Register($dst$$reg),
11900            as_Register($src1$$reg),
11901            (unsigned long)($src2$$constant));
11902   %}
11903 
11904   ins_pipe(ialu_reg_imm);
11905 %}
11906 
11907 // Xor Instructions
11908 
11909 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11910   match(Set dst (XorL src1 src2));
11911 
11912   format %{ "eor  $dst, $src1, $src2\t# int" %}
11913 
11914   ins_cost(INSN_COST);
11915   ins_encode %{
11916     __ eor(as_Register($dst$$reg),
11917            as_Register($src1$$reg),
11918            as_Register($src2$$reg));
11919   %}
11920 
11921   ins_pipe(ialu_reg_reg);
11922 %}
11923 
11924 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11925   match(Set dst (XorL src1 src2));
11926 
11927   ins_cost(INSN_COST);
11928   format %{ "eor  $dst, $src1, $src2\t# int" %}
11929 
11930   ins_encode %{
11931     __ eor(as_Register($dst$$reg),
11932            as_Register($src1$$reg),
11933            (unsigned long)($src2$$constant));
11934   %}
11935 
11936   ins_pipe(ialu_reg_imm);
11937 %}
11938 
11939 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11940 %{
11941   match(Set dst (ConvI2L src));
11942 
11943   ins_cost(INSN_COST);
11944   format %{ "sxtw  $dst, $src\t# i2l" %}
11945   ins_encode %{
11946     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11947   %}
11948   ins_pipe(ialu_reg_shift);
11949 %}
11950 
11951 // this pattern occurs in bigmath arithmetic
11952 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11953 %{
11954   match(Set dst (AndL (ConvI2L src) mask));
11955 
11956   ins_cost(INSN_COST);
11957   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11958   ins_encode %{
11959     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11960   %}
11961 
11962   ins_pipe(ialu_reg_shift);
11963 %}
11964 
11965 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11966   match(Set dst (ConvL2I src));
11967 
11968   ins_cost(INSN_COST);
11969   format %{ "movw  $dst, $src \t// l2i" %}
11970 
11971   ins_encode %{
11972     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11973   %}
11974 
11975   ins_pipe(ialu_reg);
11976 %}
11977 
11978 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11979 %{
11980   match(Set dst (Conv2B src));
11981   effect(KILL cr);
11982 
11983   format %{
11984     "cmpw $src, zr\n\t"
11985     "cset $dst, ne"
11986   %}
11987 
11988   ins_encode %{
11989     __ cmpw(as_Register($src$$reg), zr);
11990     __ cset(as_Register($dst$$reg), Assembler::NE);
11991   %}
11992 
11993   ins_pipe(ialu_reg);
11994 %}
11995 
11996 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
11997 %{
11998   match(Set dst (Conv2B src));
11999   effect(KILL cr);
12000 
12001   format %{
12002     "cmp  $src, zr\n\t"
12003     "cset $dst, ne"
12004   %}
12005 
12006   ins_encode %{
12007     __ cmp(as_Register($src$$reg), zr);
12008     __ cset(as_Register($dst$$reg), Assembler::NE);
12009   %}
12010 
12011   ins_pipe(ialu_reg);
12012 %}
12013 
12014 instruct convD2F_reg(vRegF dst, vRegD src) %{
12015   match(Set dst (ConvD2F src));
12016 
12017   ins_cost(INSN_COST * 5);
12018   format %{ "fcvtd  $dst, $src \t// d2f" %}
12019 
12020   ins_encode %{
12021     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12022   %}
12023 
12024   ins_pipe(fp_d2f);
12025 %}
12026 
12027 instruct convF2D_reg(vRegD dst, vRegF src) %{
12028   match(Set dst (ConvF2D src));
12029 
12030   ins_cost(INSN_COST * 5);
12031   format %{ "fcvts  $dst, $src \t// f2d" %}
12032 
12033   ins_encode %{
12034     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12035   %}
12036 
12037   ins_pipe(fp_f2d);
12038 %}
12039 
12040 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12041   match(Set dst (ConvF2I src));
12042 
12043   ins_cost(INSN_COST * 5);
12044   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12045 
12046   ins_encode %{
12047     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12048   %}
12049 
12050   ins_pipe(fp_f2i);
12051 %}
12052 
12053 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12054   match(Set dst (ConvF2L src));
12055 
12056   ins_cost(INSN_COST * 5);
12057   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12058 
12059   ins_encode %{
12060     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12061   %}
12062 
12063   ins_pipe(fp_f2l);
12064 %}
12065 
12066 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12067   match(Set dst (ConvI2F src));
12068 
12069   ins_cost(INSN_COST * 5);
12070   format %{ "scvtfws  $dst, $src \t// i2f" %}
12071 
12072   ins_encode %{
12073     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12074   %}
12075 
12076   ins_pipe(fp_i2f);
12077 %}
12078 
12079 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12080   match(Set dst (ConvL2F src));
12081 
12082   ins_cost(INSN_COST * 5);
12083   format %{ "scvtfs  $dst, $src \t// l2f" %}
12084 
12085   ins_encode %{
12086     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12087   %}
12088 
12089   ins_pipe(fp_l2f);
12090 %}
12091 
12092 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12093   match(Set dst (ConvD2I src));
12094 
12095   ins_cost(INSN_COST * 5);
12096   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12097 
12098   ins_encode %{
12099     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12100   %}
12101 
12102   ins_pipe(fp_d2i);
12103 %}
12104 
12105 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12106   match(Set dst (ConvD2L src));
12107 
12108   ins_cost(INSN_COST * 5);
12109   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12110 
12111   ins_encode %{
12112     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12113   %}
12114 
12115   ins_pipe(fp_d2l);
12116 %}
12117 
12118 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12119   match(Set dst (ConvI2D src));
12120 
12121   ins_cost(INSN_COST * 5);
12122   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12123 
12124   ins_encode %{
12125     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12126   %}
12127 
12128   ins_pipe(fp_i2d);
12129 %}
12130 
12131 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12132   match(Set dst (ConvL2D src));
12133 
12134   ins_cost(INSN_COST * 5);
12135   format %{ "scvtfd  $dst, $src \t// l2d" %}
12136 
12137   ins_encode %{
12138     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12139   %}
12140 
12141   ins_pipe(fp_l2d);
12142 %}
12143 
12144 // stack <-> reg and reg <-> reg shuffles with no conversion
12145 
12146 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12147 
12148   match(Set dst (MoveF2I src));
12149 
12150   effect(DEF dst, USE src);
12151 
12152   ins_cost(4 * INSN_COST);
12153 
12154   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12155 
12156   ins_encode %{
12157     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12158   %}
12159 
12160   ins_pipe(iload_reg_reg);
12161 
12162 %}
12163 
12164 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12165 
12166   match(Set dst (MoveI2F src));
12167 
12168   effect(DEF dst, USE src);
12169 
12170   ins_cost(4 * INSN_COST);
12171 
12172   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12173 
12174   ins_encode %{
12175     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12176   %}
12177 
12178   ins_pipe(pipe_class_memory);
12179 
12180 %}
12181 
12182 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12183 
12184   match(Set dst (MoveD2L src));
12185 
12186   effect(DEF dst, USE src);
12187 
12188   ins_cost(4 * INSN_COST);
12189 
12190   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12191 
12192   ins_encode %{
12193     __ ldr($dst$$Register, Address(sp, $src$$disp));
12194   %}
12195 
12196   ins_pipe(iload_reg_reg);
12197 
12198 %}
12199 
12200 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12201 
12202   match(Set dst (MoveL2D src));
12203 
12204   effect(DEF dst, USE src);
12205 
12206   ins_cost(4 * INSN_COST);
12207 
12208   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12209 
12210   ins_encode %{
12211     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12212   %}
12213 
12214   ins_pipe(pipe_class_memory);
12215 
12216 %}
12217 
12218 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12219 
12220   match(Set dst (MoveF2I src));
12221 
12222   effect(DEF dst, USE src);
12223 
12224   ins_cost(INSN_COST);
12225 
12226   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12227 
12228   ins_encode %{
12229     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12230   %}
12231 
12232   ins_pipe(pipe_class_memory);
12233 
12234 %}
12235 
12236 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12237 
12238   match(Set dst (MoveI2F src));
12239 
12240   effect(DEF dst, USE src);
12241 
12242   ins_cost(INSN_COST);
12243 
12244   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12245 
12246   ins_encode %{
12247     __ strw($src$$Register, Address(sp, $dst$$disp));
12248   %}
12249 
12250   ins_pipe(istore_reg_reg);
12251 
12252 %}
12253 
12254 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12255 
12256   match(Set dst (MoveD2L src));
12257 
12258   effect(DEF dst, USE src);
12259 
12260   ins_cost(INSN_COST);
12261 
12262   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12263 
12264   ins_encode %{
12265     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12266   %}
12267 
12268   ins_pipe(pipe_class_memory);
12269 
12270 %}
12271 
12272 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12273 
12274   match(Set dst (MoveL2D src));
12275 
12276   effect(DEF dst, USE src);
12277 
12278   ins_cost(INSN_COST);
12279 
12280   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12281 
12282   ins_encode %{
12283     __ str($src$$Register, Address(sp, $dst$$disp));
12284   %}
12285 
12286   ins_pipe(istore_reg_reg);
12287 
12288 %}
12289 
12290 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12291 
12292   match(Set dst (MoveF2I src));
12293 
12294   effect(DEF dst, USE src);
12295 
12296   ins_cost(INSN_COST);
12297 
12298   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12299 
12300   ins_encode %{
12301     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12302   %}
12303 
12304   ins_pipe(fp_f2i);
12305 
12306 %}
12307 
12308 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12309 
12310   match(Set dst (MoveI2F src));
12311 
12312   effect(DEF dst, USE src);
12313 
12314   ins_cost(INSN_COST);
12315 
12316   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12317 
12318   ins_encode %{
12319     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12320   %}
12321 
12322   ins_pipe(fp_i2f);
12323 
12324 %}
12325 
12326 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12327 
12328   match(Set dst (MoveD2L src));
12329 
12330   effect(DEF dst, USE src);
12331 
12332   ins_cost(INSN_COST);
12333 
12334   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12335 
12336   ins_encode %{
12337     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12338   %}
12339 
12340   ins_pipe(fp_d2l);
12341 
12342 %}
12343 
12344 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12345 
12346   match(Set dst (MoveL2D src));
12347 
12348   effect(DEF dst, USE src);
12349 
12350   ins_cost(INSN_COST);
12351 
12352   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12353 
12354   ins_encode %{
12355     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12356   %}
12357 
12358   ins_pipe(fp_l2d);
12359 
12360 %}
12361 
12362 // ============================================================================
12363 // clearing of an array
12364 
12365 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12366 %{
12367   match(Set dummy (ClearArray cnt base));
12368   effect(USE_KILL cnt, USE_KILL base);
12369 
12370   ins_cost(4 * INSN_COST);
12371   format %{ "ClearArray $cnt, $base" %}
12372 
12373   ins_encode %{
12374     __ zero_words($base$$Register, $cnt$$Register);
12375   %}
12376 
12377   ins_pipe(pipe_class_memory);
12378 %}
12379 
12380 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
12381 %{
12382   match(Set dummy (ClearArray cnt base));
12383   effect(USE_KILL base, TEMP tmp);
12384 
12385   ins_cost(4 * INSN_COST);
12386   format %{ "ClearArray $cnt, $base" %}
12387 
12388   ins_encode %{
12389     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
12390   %}
12391 
12392   ins_pipe(pipe_class_memory);
12393 %}
12394 
12395 // ============================================================================
12396 // Overflow Math Instructions
12397 
12398 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12399 %{
12400   match(Set cr (OverflowAddI op1 op2));
12401 
12402   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12403   ins_cost(INSN_COST);
12404   ins_encode %{
12405     __ cmnw($op1$$Register, $op2$$Register);
12406   %}
12407 
12408   ins_pipe(icmp_reg_reg);
12409 %}
12410 
12411 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12412 %{
12413   match(Set cr (OverflowAddI op1 op2));
12414 
12415   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12416   ins_cost(INSN_COST);
12417   ins_encode %{
12418     __ cmnw($op1$$Register, $op2$$constant);
12419   %}
12420 
12421   ins_pipe(icmp_reg_imm);
12422 %}
12423 
12424 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12425 %{
12426   match(Set cr (OverflowAddL op1 op2));
12427 
12428   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12429   ins_cost(INSN_COST);
12430   ins_encode %{
12431     __ cmn($op1$$Register, $op2$$Register);
12432   %}
12433 
12434   ins_pipe(icmp_reg_reg);
12435 %}
12436 
12437 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12438 %{
12439   match(Set cr (OverflowAddL op1 op2));
12440 
12441   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12442   ins_cost(INSN_COST);
12443   ins_encode %{
12444     __ cmn($op1$$Register, $op2$$constant);
12445   %}
12446 
12447   ins_pipe(icmp_reg_imm);
12448 %}
12449 
12450 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12451 %{
12452   match(Set cr (OverflowSubI op1 op2));
12453 
12454   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12455   ins_cost(INSN_COST);
12456   ins_encode %{
12457     __ cmpw($op1$$Register, $op2$$Register);
12458   %}
12459 
12460   ins_pipe(icmp_reg_reg);
12461 %}
12462 
12463 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12464 %{
12465   match(Set cr (OverflowSubI op1 op2));
12466 
12467   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12468   ins_cost(INSN_COST);
12469   ins_encode %{
12470     __ cmpw($op1$$Register, $op2$$constant);
12471   %}
12472 
12473   ins_pipe(icmp_reg_imm);
12474 %}
12475 
12476 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12477 %{
12478   match(Set cr (OverflowSubL op1 op2));
12479 
12480   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12481   ins_cost(INSN_COST);
12482   ins_encode %{
12483     __ cmp($op1$$Register, $op2$$Register);
12484   %}
12485 
12486   ins_pipe(icmp_reg_reg);
12487 %}
12488 
12489 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12490 %{
12491   match(Set cr (OverflowSubL op1 op2));
12492 
12493   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12494   ins_cost(INSN_COST);
12495   ins_encode %{
12496     __ cmp($op1$$Register, $op2$$constant);
12497   %}
12498 
12499   ins_pipe(icmp_reg_imm);
12500 %}
12501 
12502 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12503 %{
12504   match(Set cr (OverflowSubI zero op1));
12505 
12506   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12507   ins_cost(INSN_COST);
12508   ins_encode %{
12509     __ cmpw(zr, $op1$$Register);
12510   %}
12511 
12512   ins_pipe(icmp_reg_imm);
12513 %}
12514 
12515 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12516 %{
12517   match(Set cr (OverflowSubL zero op1));
12518 
12519   format %{ "cmp   zr, $op1\t# overflow check long" %}
12520   ins_cost(INSN_COST);
12521   ins_encode %{
12522     __ cmp(zr, $op1$$Register);
12523   %}
12524 
12525   ins_pipe(icmp_reg_imm);
12526 %}
12527 
12528 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12529 %{
12530   match(Set cr (OverflowMulI op1 op2));
12531 
12532   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12533             "cmp   rscratch1, rscratch1, sxtw\n\t"
12534             "movw  rscratch1, #0x80000000\n\t"
12535             "cselw rscratch1, rscratch1, zr, NE\n\t"
12536             "cmpw  rscratch1, #1" %}
12537   ins_cost(5 * INSN_COST);
12538   ins_encode %{
12539     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12540     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12541     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12542     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12543     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12544   %}
12545 
12546   ins_pipe(pipe_slow);
12547 %}
12548 
12549 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12550 %{
12551   match(If cmp (OverflowMulI op1 op2));
12552   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12553             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12554   effect(USE labl, KILL cr);
12555 
12556   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12557             "cmp   rscratch1, rscratch1, sxtw\n\t"
12558             "b$cmp   $labl" %}
12559   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12560   ins_encode %{
12561     Label* L = $labl$$label;
12562     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12563     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12564     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12565     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12566   %}
12567 
12568   ins_pipe(pipe_serial);
12569 %}
12570 
12571 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12572 %{
12573   match(Set cr (OverflowMulL op1 op2));
12574 
12575   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12576             "smulh rscratch2, $op1, $op2\n\t"
12577             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12578             "movw  rscratch1, #0x80000000\n\t"
12579             "cselw rscratch1, rscratch1, zr, NE\n\t"
12580             "cmpw  rscratch1, #1" %}
12581   ins_cost(6 * INSN_COST);
12582   ins_encode %{
12583     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12584     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12585     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12586     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12587     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12588     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12589   %}
12590 
12591   ins_pipe(pipe_slow);
12592 %}
12593 
12594 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12595 %{
12596   match(If cmp (OverflowMulL op1 op2));
12597   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12598             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12599   effect(USE labl, KILL cr);
12600 
12601   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12602             "smulh rscratch2, $op1, $op2\n\t"
12603             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12604             "b$cmp $labl" %}
12605   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12606   ins_encode %{
12607     Label* L = $labl$$label;
12608     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12609     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12610     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12611     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12612     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12613   %}
12614 
12615   ins_pipe(pipe_serial);
12616 %}
12617 
12618 // ============================================================================
12619 // Compare Instructions
12620 
12621 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12622 %{
12623   match(Set cr (CmpI op1 op2));
12624 
12625   effect(DEF cr, USE op1, USE op2);
12626 
12627   ins_cost(INSN_COST);
12628   format %{ "cmpw  $op1, $op2" %}
12629 
12630   ins_encode(aarch64_enc_cmpw(op1, op2));
12631 
12632   ins_pipe(icmp_reg_reg);
12633 %}
12634 
12635 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12636 %{
12637   match(Set cr (CmpI op1 zero));
12638 
12639   effect(DEF cr, USE op1);
12640 
12641   ins_cost(INSN_COST);
12642   format %{ "cmpw $op1, 0" %}
12643 
12644   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12645 
12646   ins_pipe(icmp_reg_imm);
12647 %}
12648 
12649 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12650 %{
12651   match(Set cr (CmpI op1 op2));
12652 
12653   effect(DEF cr, USE op1);
12654 
12655   ins_cost(INSN_COST);
12656   format %{ "cmpw  $op1, $op2" %}
12657 
12658   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12659 
12660   ins_pipe(icmp_reg_imm);
12661 %}
12662 
12663 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12664 %{
12665   match(Set cr (CmpI op1 op2));
12666 
12667   effect(DEF cr, USE op1);
12668 
12669   ins_cost(INSN_COST * 2);
12670   format %{ "cmpw  $op1, $op2" %}
12671 
12672   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12673 
12674   ins_pipe(icmp_reg_imm);
12675 %}
12676 
12677 // Unsigned compare Instructions; really, same as signed compare
12678 // except it should only be used to feed an If or a CMovI which takes a
12679 // cmpOpU.
12680 
12681 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12682 %{
12683   match(Set cr (CmpU op1 op2));
12684 
12685   effect(DEF cr, USE op1, USE op2);
12686 
12687   ins_cost(INSN_COST);
12688   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12689 
12690   ins_encode(aarch64_enc_cmpw(op1, op2));
12691 
12692   ins_pipe(icmp_reg_reg);
12693 %}
12694 
12695 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12696 %{
12697   match(Set cr (CmpU op1 zero));
12698 
12699   effect(DEF cr, USE op1);
12700 
12701   ins_cost(INSN_COST);
12702   format %{ "cmpw $op1, #0\t# unsigned" %}
12703 
12704   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12705 
12706   ins_pipe(icmp_reg_imm);
12707 %}
12708 
12709 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12710 %{
12711   match(Set cr (CmpU op1 op2));
12712 
12713   effect(DEF cr, USE op1);
12714 
12715   ins_cost(INSN_COST);
12716   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12717 
12718   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12719 
12720   ins_pipe(icmp_reg_imm);
12721 %}
12722 
12723 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12724 %{
12725   match(Set cr (CmpU op1 op2));
12726 
12727   effect(DEF cr, USE op1);
12728 
12729   ins_cost(INSN_COST * 2);
12730   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12731 
12732   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12733 
12734   ins_pipe(icmp_reg_imm);
12735 %}
12736 
12737 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12738 %{
12739   match(Set cr (CmpL op1 op2));
12740 
12741   effect(DEF cr, USE op1, USE op2);
12742 
12743   ins_cost(INSN_COST);
12744   format %{ "cmp  $op1, $op2" %}
12745 
12746   ins_encode(aarch64_enc_cmp(op1, op2));
12747 
12748   ins_pipe(icmp_reg_reg);
12749 %}
12750 
12751 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
12752 %{
12753   match(Set cr (CmpL op1 zero));
12754 
12755   effect(DEF cr, USE op1);
12756 
12757   ins_cost(INSN_COST);
12758   format %{ "tst  $op1" %}
12759 
12760   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12761 
12762   ins_pipe(icmp_reg_imm);
12763 %}
12764 
12765 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12766 %{
12767   match(Set cr (CmpL op1 op2));
12768 
12769   effect(DEF cr, USE op1);
12770 
12771   ins_cost(INSN_COST);
12772   format %{ "cmp  $op1, $op2" %}
12773 
12774   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12775 
12776   ins_pipe(icmp_reg_imm);
12777 %}
12778 
12779 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12780 %{
12781   match(Set cr (CmpL op1 op2));
12782 
12783   effect(DEF cr, USE op1);
12784 
12785   ins_cost(INSN_COST * 2);
12786   format %{ "cmp  $op1, $op2" %}
12787 
12788   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12789 
12790   ins_pipe(icmp_reg_imm);
12791 %}
12792 
12793 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
12794 %{
12795   match(Set cr (CmpUL op1 op2));
12796 
12797   effect(DEF cr, USE op1, USE op2);
12798 
12799   ins_cost(INSN_COST);
12800   format %{ "cmp  $op1, $op2" %}
12801 
12802   ins_encode(aarch64_enc_cmp(op1, op2));
12803 
12804   ins_pipe(icmp_reg_reg);
12805 %}
12806 
12807 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
12808 %{
12809   match(Set cr (CmpUL op1 zero));
12810 
12811   effect(DEF cr, USE op1);
12812 
12813   ins_cost(INSN_COST);
12814   format %{ "tst  $op1" %}
12815 
12816   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12817 
12818   ins_pipe(icmp_reg_imm);
12819 %}
12820 
12821 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
12822 %{
12823   match(Set cr (CmpUL op1 op2));
12824 
12825   effect(DEF cr, USE op1);
12826 
12827   ins_cost(INSN_COST);
12828   format %{ "cmp  $op1, $op2" %}
12829 
12830   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12831 
12832   ins_pipe(icmp_reg_imm);
12833 %}
12834 
12835 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
12836 %{
12837   match(Set cr (CmpUL op1 op2));
12838 
12839   effect(DEF cr, USE op1);
12840 
12841   ins_cost(INSN_COST * 2);
12842   format %{ "cmp  $op1, $op2" %}
12843 
12844   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12845 
12846   ins_pipe(icmp_reg_imm);
12847 %}
12848 
12849 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12850 %{
12851   match(Set cr (CmpP op1 op2));
12852 
12853   effect(DEF cr, USE op1, USE op2);
12854 
12855   ins_cost(INSN_COST);
12856   format %{ "cmp  $op1, $op2\t // ptr" %}
12857 
12858   ins_encode(aarch64_enc_cmpp(op1, op2));
12859 
12860   ins_pipe(icmp_reg_reg);
12861 %}
12862 
12863 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12864 %{
12865   match(Set cr (CmpN op1 op2));
12866 
12867   effect(DEF cr, USE op1, USE op2);
12868 
12869   ins_cost(INSN_COST);
12870   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12871 
12872   ins_encode(aarch64_enc_cmpn(op1, op2));
12873 
12874   ins_pipe(icmp_reg_reg);
12875 %}
12876 
12877 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12878 %{
12879   match(Set cr (CmpP op1 zero));
12880 
12881   effect(DEF cr, USE op1, USE zero);
12882 
12883   ins_cost(INSN_COST);
12884   format %{ "cmp  $op1, 0\t // ptr" %}
12885 
12886   ins_encode(aarch64_enc_testp(op1));
12887 
12888   ins_pipe(icmp_reg_imm);
12889 %}
12890 
12891 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12892 %{
12893   match(Set cr (CmpN op1 zero));
12894 
12895   effect(DEF cr, USE op1, USE zero);
12896 
12897   ins_cost(INSN_COST);
12898   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12899 
12900   ins_encode(aarch64_enc_testn(op1));
12901 
12902   ins_pipe(icmp_reg_imm);
12903 %}
12904 
12905 // FP comparisons
12906 //
12907 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12908 // using normal cmpOp. See declaration of rFlagsReg for details.
12909 
12910 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12911 %{
12912   match(Set cr (CmpF src1 src2));
12913 
12914   ins_cost(3 * INSN_COST);
12915   format %{ "fcmps $src1, $src2" %}
12916 
12917   ins_encode %{
12918     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12919   %}
12920 
12921   ins_pipe(pipe_class_compare);
12922 %}
12923 
12924 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12925 %{
12926   match(Set cr (CmpF src1 src2));
12927 
12928   ins_cost(3 * INSN_COST);
12929   format %{ "fcmps $src1, 0.0" %}
12930 
12931   ins_encode %{
12932     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
12933   %}
12934 
12935   ins_pipe(pipe_class_compare);
12936 %}
12937 // FROM HERE
12938 
12939 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12940 %{
12941   match(Set cr (CmpD src1 src2));
12942 
12943   ins_cost(3 * INSN_COST);
12944   format %{ "fcmpd $src1, $src2" %}
12945 
12946   ins_encode %{
12947     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12948   %}
12949 
12950   ins_pipe(pipe_class_compare);
12951 %}
12952 
12953 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12954 %{
12955   match(Set cr (CmpD src1 src2));
12956 
12957   ins_cost(3 * INSN_COST);
12958   format %{ "fcmpd $src1, 0.0" %}
12959 
12960   ins_encode %{
12961     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
12962   %}
12963 
12964   ins_pipe(pipe_class_compare);
12965 %}
12966 
12967 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12968 %{
12969   match(Set dst (CmpF3 src1 src2));
12970   effect(KILL cr);
12971 
12972   ins_cost(5 * INSN_COST);
12973   format %{ "fcmps $src1, $src2\n\t"
12974             "csinvw($dst, zr, zr, eq\n\t"
12975             "csnegw($dst, $dst, $dst, lt)"
12976   %}
12977 
12978   ins_encode %{
12979     Label done;
12980     FloatRegister s1 = as_FloatRegister($src1$$reg);
12981     FloatRegister s2 = as_FloatRegister($src2$$reg);
12982     Register d = as_Register($dst$$reg);
12983     __ fcmps(s1, s2);
12984     // installs 0 if EQ else -1
12985     __ csinvw(d, zr, zr, Assembler::EQ);
12986     // keeps -1 if less or unordered else installs 1
12987     __ csnegw(d, d, d, Assembler::LT);
12988     __ bind(done);
12989   %}
12990 
12991   ins_pipe(pipe_class_default);
12992 
12993 %}
12994 
12995 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
12996 %{
12997   match(Set dst (CmpD3 src1 src2));
12998   effect(KILL cr);
12999 
13000   ins_cost(5 * INSN_COST);
13001   format %{ "fcmpd $src1, $src2\n\t"
13002             "csinvw($dst, zr, zr, eq\n\t"
13003             "csnegw($dst, $dst, $dst, lt)"
13004   %}
13005 
13006   ins_encode %{
13007     Label done;
13008     FloatRegister s1 = as_FloatRegister($src1$$reg);
13009     FloatRegister s2 = as_FloatRegister($src2$$reg);
13010     Register d = as_Register($dst$$reg);
13011     __ fcmpd(s1, s2);
13012     // installs 0 if EQ else -1
13013     __ csinvw(d, zr, zr, Assembler::EQ);
13014     // keeps -1 if less or unordered else installs 1
13015     __ csnegw(d, d, d, Assembler::LT);
13016     __ bind(done);
13017   %}
13018   ins_pipe(pipe_class_default);
13019 
13020 %}
13021 
13022 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13023 %{
13024   match(Set dst (CmpF3 src1 zero));
13025   effect(KILL cr);
13026 
13027   ins_cost(5 * INSN_COST);
13028   format %{ "fcmps $src1, 0.0\n\t"
13029             "csinvw($dst, zr, zr, eq\n\t"
13030             "csnegw($dst, $dst, $dst, lt)"
13031   %}
13032 
13033   ins_encode %{
13034     Label done;
13035     FloatRegister s1 = as_FloatRegister($src1$$reg);
13036     Register d = as_Register($dst$$reg);
13037     __ fcmps(s1, 0.0);
13038     // installs 0 if EQ else -1
13039     __ csinvw(d, zr, zr, Assembler::EQ);
13040     // keeps -1 if less or unordered else installs 1
13041     __ csnegw(d, d, d, Assembler::LT);
13042     __ bind(done);
13043   %}
13044 
13045   ins_pipe(pipe_class_default);
13046 
13047 %}
13048 
13049 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13050 %{
13051   match(Set dst (CmpD3 src1 zero));
13052   effect(KILL cr);
13053 
13054   ins_cost(5 * INSN_COST);
13055   format %{ "fcmpd $src1, 0.0\n\t"
13056             "csinvw($dst, zr, zr, eq\n\t"
13057             "csnegw($dst, $dst, $dst, lt)"
13058   %}
13059 
13060   ins_encode %{
13061     Label done;
13062     FloatRegister s1 = as_FloatRegister($src1$$reg);
13063     Register d = as_Register($dst$$reg);
13064     __ fcmpd(s1, 0.0);
13065     // installs 0 if EQ else -1
13066     __ csinvw(d, zr, zr, Assembler::EQ);
13067     // keeps -1 if less or unordered else installs 1
13068     __ csnegw(d, d, d, Assembler::LT);
13069     __ bind(done);
13070   %}
13071   ins_pipe(pipe_class_default);
13072 
13073 %}
13074 
13075 // Manifest a CmpL result in an integer register.
13076 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
13077 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
13078 %{
13079   match(Set dst (CmpL3 src1 src2));
13080   effect(KILL flags);
13081 
13082   ins_cost(INSN_COST * 6);
13083   format %{
13084       "cmp $src1, $src2"
13085       "csetw $dst, ne"
13086       "cnegw $dst, lt"
13087   %}
13088   // format %{ "CmpL3 $dst, $src1, $src2" %}
13089   ins_encode %{
13090     __ cmp($src1$$Register, $src2$$Register);
13091     __ csetw($dst$$Register, Assembler::NE);
13092     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
13093   %}
13094 
13095   ins_pipe(ialu_reg_reg);
13096 %}
13097 
13098 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13099 %{
13100   match(Set dst (CmpLTMask p q));
13101   effect(KILL cr);
13102 
13103   ins_cost(3 * INSN_COST);
13104 
13105   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13106             "csetw $dst, lt\n\t"
13107             "subw $dst, zr, $dst"
13108   %}
13109 
13110   ins_encode %{
13111     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13112     __ csetw(as_Register($dst$$reg), Assembler::LT);
13113     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13114   %}
13115 
13116   ins_pipe(ialu_reg_reg);
13117 %}
13118 
13119 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13120 %{
13121   match(Set dst (CmpLTMask src zero));
13122   effect(KILL cr);
13123 
13124   ins_cost(INSN_COST);
13125 
13126   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13127 
13128   ins_encode %{
13129     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13130   %}
13131 
13132   ins_pipe(ialu_reg_shift);
13133 %}
13134 
13135 // ============================================================================
13136 // Max and Min
13137 
13138 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13139 %{
13140   match(Set dst (MinI src1 src2));
13141 
13142   effect(DEF dst, USE src1, USE src2, KILL cr);
13143   size(8);
13144 
13145   ins_cost(INSN_COST * 3);
13146   format %{
13147     "cmpw $src1 $src2\t signed int\n\t"
13148     "cselw $dst, $src1, $src2 lt\t"
13149   %}
13150 
13151   ins_encode %{
13152     __ cmpw(as_Register($src1$$reg),
13153             as_Register($src2$$reg));
13154     __ cselw(as_Register($dst$$reg),
13155              as_Register($src1$$reg),
13156              as_Register($src2$$reg),
13157              Assembler::LT);
13158   %}
13159 
13160   ins_pipe(ialu_reg_reg);
13161 %}
13162 // FROM HERE
13163 
13164 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13165 %{
13166   match(Set dst (MaxI src1 src2));
13167 
13168   effect(DEF dst, USE src1, USE src2, KILL cr);
13169   size(8);
13170 
13171   ins_cost(INSN_COST * 3);
13172   format %{
13173     "cmpw $src1 $src2\t signed int\n\t"
13174     "cselw $dst, $src1, $src2 gt\t"
13175   %}
13176 
13177   ins_encode %{
13178     __ cmpw(as_Register($src1$$reg),
13179             as_Register($src2$$reg));
13180     __ cselw(as_Register($dst$$reg),
13181              as_Register($src1$$reg),
13182              as_Register($src2$$reg),
13183              Assembler::GT);
13184   %}
13185 
13186   ins_pipe(ialu_reg_reg);
13187 %}
13188 
13189 // ============================================================================
13190 // Branch Instructions
13191 
13192 // Direct Branch.
13193 instruct branch(label lbl)
13194 %{
13195   match(Goto);
13196 
13197   effect(USE lbl);
13198 
13199   ins_cost(BRANCH_COST);
13200   format %{ "b  $lbl" %}
13201 
13202   ins_encode(aarch64_enc_b(lbl));
13203 
13204   ins_pipe(pipe_branch);
13205 %}
13206 
13207 // Conditional Near Branch
13208 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13209 %{
13210   // Same match rule as `branchConFar'.
13211   match(If cmp cr);
13212 
13213   effect(USE lbl);
13214 
13215   ins_cost(BRANCH_COST);
13216   // If set to 1 this indicates that the current instruction is a
13217   // short variant of a long branch. This avoids using this
13218   // instruction in first-pass matching. It will then only be used in
13219   // the `Shorten_branches' pass.
13220   // ins_short_branch(1);
13221   format %{ "b$cmp  $lbl" %}
13222 
13223   ins_encode(aarch64_enc_br_con(cmp, lbl));
13224 
13225   ins_pipe(pipe_branch_cond);
13226 %}
13227 
13228 // Conditional Near Branch Unsigned
13229 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13230 %{
13231   // Same match rule as `branchConFar'.
13232   match(If cmp cr);
13233 
13234   effect(USE lbl);
13235 
13236   ins_cost(BRANCH_COST);
13237   // If set to 1 this indicates that the current instruction is a
13238   // short variant of a long branch. This avoids using this
13239   // instruction in first-pass matching. It will then only be used in
13240   // the `Shorten_branches' pass.
13241   // ins_short_branch(1);
13242   format %{ "b$cmp  $lbl\t# unsigned" %}
13243 
13244   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13245 
13246   ins_pipe(pipe_branch_cond);
13247 %}
13248 
13249 // Make use of CBZ and CBNZ.  These instructions, as well as being
13250 // shorter than (cmp; branch), have the additional benefit of not
13251 // killing the flags.
13252 
13253 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13254   match(If cmp (CmpI op1 op2));
13255   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13256             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13257   effect(USE labl);
13258 
13259   ins_cost(BRANCH_COST);
13260   format %{ "cbw$cmp   $op1, $labl" %}
13261   ins_encode %{
13262     Label* L = $labl$$label;
13263     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13264     if (cond == Assembler::EQ)
13265       __ cbzw($op1$$Register, *L);
13266     else
13267       __ cbnzw($op1$$Register, *L);
13268   %}
13269   ins_pipe(pipe_cmp_branch);
13270 %}
13271 
13272 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13273   match(If cmp (CmpL op1 op2));
13274   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13275             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13276   effect(USE labl);
13277 
13278   ins_cost(BRANCH_COST);
13279   format %{ "cb$cmp   $op1, $labl" %}
13280   ins_encode %{
13281     Label* L = $labl$$label;
13282     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13283     if (cond == Assembler::EQ)
13284       __ cbz($op1$$Register, *L);
13285     else
13286       __ cbnz($op1$$Register, *L);
13287   %}
13288   ins_pipe(pipe_cmp_branch);
13289 %}
13290 
13291 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13292   match(If cmp (CmpP op1 op2));
13293   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13294             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13295   effect(USE labl);
13296 
13297   ins_cost(BRANCH_COST);
13298   format %{ "cb$cmp   $op1, $labl" %}
13299   ins_encode %{
13300     Label* L = $labl$$label;
13301     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13302     if (cond == Assembler::EQ)
13303       __ cbz($op1$$Register, *L);
13304     else
13305       __ cbnz($op1$$Register, *L);
13306   %}
13307   ins_pipe(pipe_cmp_branch);
13308 %}
13309 
13310 instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
13311   match(If cmp (CmpN op1 op2));
13312   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13313             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13314   effect(USE labl);
13315 
13316   ins_cost(BRANCH_COST);
13317   format %{ "cbw$cmp   $op1, $labl" %}
13318   ins_encode %{
13319     Label* L = $labl$$label;
13320     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13321     if (cond == Assembler::EQ)
13322       __ cbzw($op1$$Register, *L);
13323     else
13324       __ cbnzw($op1$$Register, *L);
13325   %}
13326   ins_pipe(pipe_cmp_branch);
13327 %}
13328 
13329 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
13330   match(If cmp (CmpP (DecodeN oop) zero));
13331   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13332             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13333   effect(USE labl);
13334 
13335   ins_cost(BRANCH_COST);
13336   format %{ "cb$cmp   $oop, $labl" %}
13337   ins_encode %{
13338     Label* L = $labl$$label;
13339     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13340     if (cond == Assembler::EQ)
13341       __ cbzw($oop$$Register, *L);
13342     else
13343       __ cbnzw($oop$$Register, *L);
13344   %}
13345   ins_pipe(pipe_cmp_branch);
13346 %}
13347 
13348 instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
13349   match(If cmp (CmpU op1 op2));
13350   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13351             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13352             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13353             ||  n->in(1)->as_Bool()->_test._test == BoolTest::le);
13354   effect(USE labl);
13355 
13356   ins_cost(BRANCH_COST);
13357   format %{ "cbw$cmp   $op1, $labl" %}
13358   ins_encode %{
13359     Label* L = $labl$$label;
13360     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13361     if (cond == Assembler::EQ || cond == Assembler::LS)
13362       __ cbzw($op1$$Register, *L);
13363     else
13364       __ cbnzw($op1$$Register, *L);
13365   %}
13366   ins_pipe(pipe_cmp_branch);
13367 %}
13368 
13369 instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
13370   match(If cmp (CmpUL op1 op2));
13371   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13372             || n->in(1)->as_Bool()->_test._test == BoolTest::eq
13373             || n->in(1)->as_Bool()->_test._test == BoolTest::gt
13374             || n->in(1)->as_Bool()->_test._test == BoolTest::le);
13375   effect(USE labl);
13376 
13377   ins_cost(BRANCH_COST);
13378   format %{ "cb$cmp   $op1, $labl" %}
13379   ins_encode %{
13380     Label* L = $labl$$label;
13381     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13382     if (cond == Assembler::EQ || cond == Assembler::LS)
13383       __ cbz($op1$$Register, *L);
13384     else
13385       __ cbnz($op1$$Register, *L);
13386   %}
13387   ins_pipe(pipe_cmp_branch);
13388 %}
13389 
13390 // Test bit and Branch
13391 
13392 // Patterns for short (< 32KiB) variants
13393 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13394   match(If cmp (CmpL op1 op2));
13395   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13396             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13397   effect(USE labl);
13398 
13399   ins_cost(BRANCH_COST);
13400   format %{ "cb$cmp   $op1, $labl # long" %}
13401   ins_encode %{
13402     Label* L = $labl$$label;
13403     Assembler::Condition cond =
13404       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13405     __ tbr(cond, $op1$$Register, 63, *L);
13406   %}
13407   ins_pipe(pipe_cmp_branch);
13408   ins_short_branch(1);
13409 %}
13410 
13411 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13412   match(If cmp (CmpI op1 op2));
13413   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13414             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13415   effect(USE labl);
13416 
13417   ins_cost(BRANCH_COST);
13418   format %{ "cb$cmp   $op1, $labl # int" %}
13419   ins_encode %{
13420     Label* L = $labl$$label;
13421     Assembler::Condition cond =
13422       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13423     __ tbr(cond, $op1$$Register, 31, *L);
13424   %}
13425   ins_pipe(pipe_cmp_branch);
13426   ins_short_branch(1);
13427 %}
13428 
13429 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13430   match(If cmp (CmpL (AndL op1 op2) op3));
13431   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13432             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13433             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13434   effect(USE labl);
13435 
13436   ins_cost(BRANCH_COST);
13437   format %{ "tb$cmp   $op1, $op2, $labl" %}
13438   ins_encode %{
13439     Label* L = $labl$$label;
13440     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13441     int bit = exact_log2($op2$$constant);
13442     __ tbr(cond, $op1$$Register, bit, *L);
13443   %}
13444   ins_pipe(pipe_cmp_branch);
13445   ins_short_branch(1);
13446 %}
13447 
13448 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13449   match(If cmp (CmpI (AndI op1 op2) op3));
13450   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13451             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13452             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13453   effect(USE labl);
13454 
13455   ins_cost(BRANCH_COST);
13456   format %{ "tb$cmp   $op1, $op2, $labl" %}
13457   ins_encode %{
13458     Label* L = $labl$$label;
13459     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13460     int bit = exact_log2($op2$$constant);
13461     __ tbr(cond, $op1$$Register, bit, *L);
13462   %}
13463   ins_pipe(pipe_cmp_branch);
13464   ins_short_branch(1);
13465 %}
13466 
13467 // And far variants
13468 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
13469   match(If cmp (CmpL op1 op2));
13470   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13471             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13472   effect(USE labl);
13473 
13474   ins_cost(BRANCH_COST);
13475   format %{ "cb$cmp   $op1, $labl # long" %}
13476   ins_encode %{
13477     Label* L = $labl$$label;
13478     Assembler::Condition cond =
13479       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13480     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
13481   %}
13482   ins_pipe(pipe_cmp_branch);
13483 %}
13484 
13485 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
13486   match(If cmp (CmpI op1 op2));
13487   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
13488             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
13489   effect(USE labl);
13490 
13491   ins_cost(BRANCH_COST);
13492   format %{ "cb$cmp   $op1, $labl # int" %}
13493   ins_encode %{
13494     Label* L = $labl$$label;
13495     Assembler::Condition cond =
13496       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
13497     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
13498   %}
13499   ins_pipe(pipe_cmp_branch);
13500 %}
13501 
13502 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
13503   match(If cmp (CmpL (AndL op1 op2) op3));
13504   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13505             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13506             && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
13507   effect(USE labl);
13508 
13509   ins_cost(BRANCH_COST);
13510   format %{ "tb$cmp   $op1, $op2, $labl" %}
13511   ins_encode %{
13512     Label* L = $labl$$label;
13513     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13514     int bit = exact_log2($op2$$constant);
13515     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13516   %}
13517   ins_pipe(pipe_cmp_branch);
13518 %}
13519 
13520 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
13521   match(If cmp (CmpI (AndI op1 op2) op3));
13522   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
13523             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
13524             && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
13525   effect(USE labl);
13526 
13527   ins_cost(BRANCH_COST);
13528   format %{ "tb$cmp   $op1, $op2, $labl" %}
13529   ins_encode %{
13530     Label* L = $labl$$label;
13531     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13532     int bit = exact_log2($op2$$constant);
13533     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
13534   %}
13535   ins_pipe(pipe_cmp_branch);
13536 %}
13537 
13538 // Test bits
13539 
13540 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
13541   match(Set cr (CmpL (AndL op1 op2) op3));
13542   predicate(Assembler::operand_valid_for_logical_immediate
13543             (/*is_32*/false, n->in(1)->in(2)->get_long()));
13544 
13545   ins_cost(INSN_COST);
13546   format %{ "tst $op1, $op2 # long" %}
13547   ins_encode %{
13548     __ tst($op1$$Register, $op2$$constant);
13549   %}
13550   ins_pipe(ialu_reg_reg);
13551 %}
13552 
13553 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
13554   match(Set cr (CmpI (AndI op1 op2) op3));
13555   predicate(Assembler::operand_valid_for_logical_immediate
13556             (/*is_32*/true, n->in(1)->in(2)->get_int()));
13557 
13558   ins_cost(INSN_COST);
13559   format %{ "tst $op1, $op2 # int" %}
13560   ins_encode %{
13561     __ tstw($op1$$Register, $op2$$constant);
13562   %}
13563   ins_pipe(ialu_reg_reg);
13564 %}
13565 
13566 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
13567   match(Set cr (CmpL (AndL op1 op2) op3));
13568 
13569   ins_cost(INSN_COST);
13570   format %{ "tst $op1, $op2 # long" %}
13571   ins_encode %{
13572     __ tst($op1$$Register, $op2$$Register);
13573   %}
13574   ins_pipe(ialu_reg_reg);
13575 %}
13576 
13577 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
13578   match(Set cr (CmpI (AndI op1 op2) op3));
13579 
13580   ins_cost(INSN_COST);
13581   format %{ "tstw $op1, $op2 # int" %}
13582   ins_encode %{
13583     __ tstw($op1$$Register, $op2$$Register);
13584   %}
13585   ins_pipe(ialu_reg_reg);
13586 %}
13587 
13588 
13589 // Conditional Far Branch
13590 // Conditional Far Branch Unsigned
13591 // TODO: fixme
13592 
13593 // counted loop end branch near
13594 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13595 %{
13596   match(CountedLoopEnd cmp cr);
13597 
13598   effect(USE lbl);
13599 
13600   ins_cost(BRANCH_COST);
13601   // short variant.
13602   // ins_short_branch(1);
13603   format %{ "b$cmp $lbl \t// counted loop end" %}
13604 
13605   ins_encode(aarch64_enc_br_con(cmp, lbl));
13606 
13607   ins_pipe(pipe_branch);
13608 %}
13609 
13610 // counted loop end branch near Unsigned
13611 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13612 %{
13613   match(CountedLoopEnd cmp cr);
13614 
13615   effect(USE lbl);
13616 
13617   ins_cost(BRANCH_COST);
13618   // short variant.
13619   // ins_short_branch(1);
13620   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13621 
13622   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13623 
13624   ins_pipe(pipe_branch);
13625 %}
13626 
13627 // counted loop end branch far
13628 // counted loop end branch far unsigned
13629 // TODO: fixme
13630 
13631 // ============================================================================
13632 // inlined locking and unlocking
13633 
13634 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13635 %{
13636   match(Set cr (FastLock object box));
13637   effect(TEMP tmp, TEMP tmp2);
13638 
13639   // TODO
13640   // identify correct cost
13641   ins_cost(5 * INSN_COST);
13642   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13643 
13644   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13645 
13646   ins_pipe(pipe_serial);
13647 %}
13648 
13649 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13650 %{
13651   match(Set cr (FastUnlock object box));
13652   effect(TEMP tmp, TEMP tmp2);
13653 
13654   ins_cost(5 * INSN_COST);
13655   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13656 
13657   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13658 
13659   ins_pipe(pipe_serial);
13660 %}
13661 
13662 
13663 // ============================================================================
13664 // Safepoint Instructions
13665 
13666 // TODO
13667 // provide a near and far version of this code
13668 
13669 instruct safePoint(rFlagsReg cr, iRegP poll)
13670 %{
13671   match(SafePoint poll);
13672   effect(KILL cr);
13673 
13674   format %{
13675     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13676   %}
13677   ins_encode %{
13678     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13679   %}
13680   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13681 %}
13682 
13683 
13684 // ============================================================================
13685 // Procedure Call/Return Instructions
13686 
13687 // Call Java Static Instruction
13688 
13689 instruct CallStaticJavaDirect(method meth)
13690 %{
13691   match(CallStaticJava);
13692 
13693   effect(USE meth);
13694 
13695   predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
13696 
13697   ins_cost(CALL_COST);
13698 
13699   format %{ "call,static $meth \t// ==> " %}
13700 
13701   ins_encode( aarch64_enc_java_static_call(meth),
13702               aarch64_enc_call_epilog );
13703 
13704   ins_pipe(pipe_class_call);
13705 %}
13706 
13707 // TO HERE
13708 
13709 // Call Java Static Instruction (method handle version)
13710 
13711 instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
13712 %{
13713   match(CallStaticJava);
13714 
13715   effect(USE meth);
13716 
13717   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13718 
13719   ins_cost(CALL_COST);
13720 
13721   format %{ "call,static $meth \t// (methodhandle) ==> " %}
13722 
13723   ins_encode( aarch64_enc_java_handle_call(meth),
13724               aarch64_enc_call_epilog );
13725 
13726   ins_pipe(pipe_class_call);
13727 %}
13728 
13729 // Call Java Dynamic Instruction
13730 instruct CallDynamicJavaDirect(method meth)
13731 %{
13732   match(CallDynamicJava);
13733 
13734   effect(USE meth);
13735 
13736   ins_cost(CALL_COST);
13737 
13738   format %{ "CALL,dynamic $meth \t// ==> " %}
13739 
13740   ins_encode( aarch64_enc_java_dynamic_call(meth),
13741                aarch64_enc_call_epilog );
13742 
13743   ins_pipe(pipe_class_call);
13744 %}
13745 
13746 // Call Runtime Instruction
13747 
13748 instruct CallRuntimeDirect(method meth)
13749 %{
13750   match(CallRuntime);
13751 
13752   effect(USE meth);
13753 
13754   ins_cost(CALL_COST);
13755 
13756   format %{ "CALL, runtime $meth" %}
13757 
13758   ins_encode( aarch64_enc_java_to_runtime(meth) );
13759 
13760   ins_pipe(pipe_class_call);
13761 %}
13762 
13763 // Call Runtime Instruction
13764 
13765 instruct CallLeafDirect(method meth)
13766 %{
13767   match(CallLeaf);
13768 
13769   effect(USE meth);
13770 
13771   ins_cost(CALL_COST);
13772 
13773   format %{ "CALL, runtime leaf $meth" %}
13774 
13775   ins_encode( aarch64_enc_java_to_runtime(meth) );
13776 
13777   ins_pipe(pipe_class_call);
13778 %}
13779 
13780 // Call Runtime Instruction
13781 
13782 instruct CallLeafNoFPDirect(method meth)
13783 %{
13784   match(CallLeafNoFP);
13785 
13786   effect(USE meth);
13787 
13788   ins_cost(CALL_COST);
13789 
13790   format %{ "CALL, runtime leaf nofp $meth" %}
13791 
13792   ins_encode( aarch64_enc_java_to_runtime(meth) );
13793 
13794   ins_pipe(pipe_class_call);
13795 %}
13796 
13797 // Tail Call; Jump from runtime stub to Java code.
13798 // Also known as an 'interprocedural jump'.
13799 // Target of jump will eventually return to caller.
13800 // TailJump below removes the return address.
13801 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13802 %{
13803   match(TailCall jump_target method_oop);
13804 
13805   ins_cost(CALL_COST);
13806 
13807   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13808 
13809   ins_encode(aarch64_enc_tail_call(jump_target));
13810 
13811   ins_pipe(pipe_class_call);
13812 %}
13813 
13814 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13815 %{
13816   match(TailJump jump_target ex_oop);
13817 
13818   ins_cost(CALL_COST);
13819 
13820   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13821 
13822   ins_encode(aarch64_enc_tail_jmp(jump_target));
13823 
13824   ins_pipe(pipe_class_call);
13825 %}
13826 
13827 // Create exception oop: created by stack-crawling runtime code.
13828 // Created exception is now available to this handler, and is setup
13829 // just prior to jumping to this handler. No code emitted.
13830 // TODO check
13831 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13832 instruct CreateException(iRegP_R0 ex_oop)
13833 %{
13834   match(Set ex_oop (CreateEx));
13835 
13836   format %{ " -- \t// exception oop; no code emitted" %}
13837 
13838   size(0);
13839 
13840   ins_encode( /*empty*/ );
13841 
13842   ins_pipe(pipe_class_empty);
13843 %}
13844 
13845 // Rethrow exception: The exception oop will come in the first
13846 // argument position. Then JUMP (not call) to the rethrow stub code.
13847 instruct RethrowException() %{
13848   match(Rethrow);
13849   ins_cost(CALL_COST);
13850 
13851   format %{ "b rethrow_stub" %}
13852 
13853   ins_encode( aarch64_enc_rethrow() );
13854 
13855   ins_pipe(pipe_class_call);
13856 %}
13857 
13858 
13859 // Return Instruction
13860 // epilog node loads ret address into lr as part of frame pop
13861 instruct Ret()
13862 %{
13863   match(Return);
13864 
13865   format %{ "ret\t// return register" %}
13866 
13867   ins_encode( aarch64_enc_ret() );
13868 
13869   ins_pipe(pipe_branch);
13870 %}
13871 
13872 // Die now.
13873 instruct ShouldNotReachHere() %{
13874   match(Halt);
13875 
13876   ins_cost(CALL_COST);
13877   format %{ "ShouldNotReachHere" %}
13878 
13879   ins_encode %{
13880     // TODO
13881     // implement proper trap call here
13882     __ brk(999);
13883   %}
13884 
13885   ins_pipe(pipe_class_default);
13886 %}
13887 
13888 // ============================================================================
13889 // Partial Subtype Check
13890 // 
13891 // superklass array for an instance of the superklass.  Set a hidden
13892 // internal cache on a hit (cache is checked with exposed code in
13893 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13894 // encoding ALSO sets flags.
13895 
13896 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13897 %{
13898   match(Set result (PartialSubtypeCheck sub super));
13899   effect(KILL cr, KILL temp);
13900 
13901   ins_cost(1100);  // slightly larger than the next version
13902   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13903 
13904   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13905 
13906   opcode(0x1); // Force zero of result reg on hit
13907 
13908   ins_pipe(pipe_class_memory);
13909 %}
13910 
13911 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13912 %{
13913   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13914   effect(KILL temp, KILL result);
13915 
13916   ins_cost(1100);  // slightly larger than the next version
13917   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13918 
13919   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13920 
13921   opcode(0x0); // Don't zero result reg on hit
13922 
13923   ins_pipe(pipe_class_memory);
13924 %}
13925 
13926 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13927                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13928 %{
13929   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13930   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13931 
13932   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13933   ins_encode %{
13934     __ string_compare($str1$$Register, $str2$$Register,
13935                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13936                       $tmp1$$Register);
13937   %}
13938   ins_pipe(pipe_class_memory);
13939 %}
13940 
13941 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13942        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13943 %{
13944   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13945   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13946          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13947   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13948 
13949   ins_encode %{
13950     __ string_indexof($str1$$Register, $str2$$Register,
13951                       $cnt1$$Register, $cnt2$$Register,
13952                       $tmp1$$Register, $tmp2$$Register,
13953                       $tmp3$$Register, $tmp4$$Register,
13954                       -1, $result$$Register);
13955   %}
13956   ins_pipe(pipe_class_memory);
13957 %}
13958 
13959 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13960                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
13961                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
13962 %{
13963   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13964   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13965          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13966   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13967 
13968   ins_encode %{
13969     int icnt2 = (int)$int_cnt2$$constant;
13970     __ string_indexof($str1$$Register, $str2$$Register,
13971                       $cnt1$$Register, zr,
13972                       $tmp1$$Register, $tmp2$$Register,
13973                       $tmp3$$Register, $tmp4$$Register,
13974                       icnt2, $result$$Register);
13975   %}
13976   ins_pipe(pipe_class_memory);
13977 %}
13978 
13979 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13980                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13981 %{
13982   match(Set result (StrEquals (Binary str1 str2) cnt));
13983   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13984 
13985   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13986   ins_encode %{
13987     __ string_equals($str1$$Register, $str2$$Register,
13988                       $cnt$$Register, $result$$Register,
13989                       $tmp$$Register);
13990   %}
13991   ins_pipe(pipe_class_memory);
13992 %}
13993 
13994 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13995                       iRegP_R10 tmp, rFlagsReg cr)
13996 %{
13997   match(Set result (AryEq ary1 ary2));
13998   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13999 
14000   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14001   ins_encode %{
14002     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
14003                           $result$$Register, $tmp$$Register);
14004   %}
14005   ins_pipe(pipe_class_memory);
14006 %}
14007 
14008 // encode char[] to byte[] in ISO_8859_1
14009 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14010                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14011                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14012                           iRegI_R0 result, rFlagsReg cr)
14013 %{
14014   match(Set result (EncodeISOArray src (Binary dst len)));
14015   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14016          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14017 
14018   format %{ "Encode array $src,$dst,$len -> $result" %}
14019   ins_encode %{
14020     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14021          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14022          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
14023   %}
14024   ins_pipe( pipe_class_memory );
14025 %}
14026 
14027 // ============================================================================
14028 // This name is KNOWN by the ADLC and cannot be changed.
14029 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
14030 // for this guy.
14031 instruct tlsLoadP(thread_RegP dst)
14032 %{
14033   match(Set dst (ThreadLocal));
14034 
14035   ins_cost(0);
14036 
14037   format %{ " -- \t// $dst=Thread::current(), empty" %}
14038 
14039   size(0);
14040 
14041   ins_encode( /*empty*/ );
14042 
14043   ins_pipe(pipe_class_empty);
14044 %}
14045 
14046 // ====================VECTOR INSTRUCTIONS=====================================
14047 
14048 // Load vector (32 bits)
14049 instruct loadV4(vecD dst, vmem4 mem)
14050 %{
14051   predicate(n->as_LoadVector()->memory_size() == 4);
14052   match(Set dst (LoadVector mem));
14053   ins_cost(4 * INSN_COST);
14054   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
14055   ins_encode( aarch64_enc_ldrvS(dst, mem) );
14056   ins_pipe(vload_reg_mem64);
14057 %}
14058 
14059 // Load vector (64 bits)
14060 instruct loadV8(vecD dst, vmem8 mem)
14061 %{
14062   predicate(n->as_LoadVector()->memory_size() == 8);
14063   match(Set dst (LoadVector mem));
14064   ins_cost(4 * INSN_COST);
14065   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
14066   ins_encode( aarch64_enc_ldrvD(dst, mem) );
14067   ins_pipe(vload_reg_mem64);
14068 %}
14069 
14070 // Load Vector (128 bits)
14071 instruct loadV16(vecX dst, vmem16 mem)
14072 %{
14073   predicate(n->as_LoadVector()->memory_size() == 16);
14074   match(Set dst (LoadVector mem));
14075   ins_cost(4 * INSN_COST);
14076   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
14077   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
14078   ins_pipe(vload_reg_mem128);
14079 %}
14080 
14081 // Store Vector (32 bits)
14082 instruct storeV4(vecD src, vmem4 mem)
14083 %{
14084   predicate(n->as_StoreVector()->memory_size() == 4);
14085   match(Set mem (StoreVector mem src));
14086   ins_cost(4 * INSN_COST);
14087   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
14088   ins_encode( aarch64_enc_strvS(src, mem) );
14089   ins_pipe(vstore_reg_mem64);
14090 %}
14091 
14092 // Store Vector (64 bits)
14093 instruct storeV8(vecD src, vmem8 mem)
14094 %{
14095   predicate(n->as_StoreVector()->memory_size() == 8);
14096   match(Set mem (StoreVector mem src));
14097   ins_cost(4 * INSN_COST);
14098   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
14099   ins_encode( aarch64_enc_strvD(src, mem) );
14100   ins_pipe(vstore_reg_mem64);
14101 %}
14102 
14103 // Store Vector (128 bits)
14104 instruct storeV16(vecX src, vmem16 mem)
14105 %{
14106   predicate(n->as_StoreVector()->memory_size() == 16);
14107   match(Set mem (StoreVector mem src));
14108   ins_cost(4 * INSN_COST);
14109   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
14110   ins_encode( aarch64_enc_strvQ(src, mem) );
14111   ins_pipe(vstore_reg_mem128);
14112 %}
14113 
14114 instruct replicate8B(vecD dst, iRegIorL2I src)
14115 %{
14116   predicate(n->as_Vector()->length() == 4 ||
14117             n->as_Vector()->length() == 8);
14118   match(Set dst (ReplicateB src));
14119   ins_cost(INSN_COST);
14120   format %{ "dup  $dst, $src\t# vector (8B)" %}
14121   ins_encode %{
14122     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
14123   %}
14124   ins_pipe(vdup_reg_reg64);
14125 %}
14126 
14127 instruct replicate16B(vecX dst, iRegIorL2I src)
14128 %{
14129   predicate(n->as_Vector()->length() == 16);
14130   match(Set dst (ReplicateB src));
14131   ins_cost(INSN_COST);
14132   format %{ "dup  $dst, $src\t# vector (16B)" %}
14133   ins_encode %{
14134     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
14135   %}
14136   ins_pipe(vdup_reg_reg128);
14137 %}
14138 
14139 instruct replicate8B_imm(vecD dst, immI con)
14140 %{
14141   predicate(n->as_Vector()->length() == 4 ||
14142             n->as_Vector()->length() == 8);
14143   match(Set dst (ReplicateB con));
14144   ins_cost(INSN_COST);
14145   format %{ "movi  $dst, $con\t# vector(8B)" %}
14146   ins_encode %{
14147     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
14148   %}
14149   ins_pipe(vmovi_reg_imm64);
14150 %}
14151 
14152 instruct replicate16B_imm(vecX dst, immI con)
14153 %{
14154   predicate(n->as_Vector()->length() == 16);
14155   match(Set dst (ReplicateB con));
14156   ins_cost(INSN_COST);
14157   format %{ "movi  $dst, $con\t# vector(16B)" %}
14158   ins_encode %{
14159     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
14160   %}
14161   ins_pipe(vmovi_reg_imm128);
14162 %}
14163 
14164 instruct replicate4S(vecD dst, iRegIorL2I src)
14165 %{
14166   predicate(n->as_Vector()->length() == 2 ||
14167             n->as_Vector()->length() == 4);
14168   match(Set dst (ReplicateS src));
14169   ins_cost(INSN_COST);
14170   format %{ "dup  $dst, $src\t# vector (4S)" %}
14171   ins_encode %{
14172     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
14173   %}
14174   ins_pipe(vdup_reg_reg64);
14175 %}
14176 
14177 instruct replicate8S(vecX dst, iRegIorL2I src)
14178 %{
14179   predicate(n->as_Vector()->length() == 8);
14180   match(Set dst (ReplicateS src));
14181   ins_cost(INSN_COST);
14182   format %{ "dup  $dst, $src\t# vector (8S)" %}
14183   ins_encode %{
14184     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
14185   %}
14186   ins_pipe(vdup_reg_reg128);
14187 %}
14188 
14189 instruct replicate4S_imm(vecD dst, immI con)
14190 %{
14191   predicate(n->as_Vector()->length() == 2 ||
14192             n->as_Vector()->length() == 4);
14193   match(Set dst (ReplicateS con));
14194   ins_cost(INSN_COST);
14195   format %{ "movi  $dst, $con\t# vector(4H)" %}
14196   ins_encode %{
14197     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
14198   %}
14199   ins_pipe(vmovi_reg_imm64);
14200 %}
14201 
14202 instruct replicate8S_imm(vecX dst, immI con)
14203 %{
14204   predicate(n->as_Vector()->length() == 8);
14205   match(Set dst (ReplicateS con));
14206   ins_cost(INSN_COST);
14207   format %{ "movi  $dst, $con\t# vector(8H)" %}
14208   ins_encode %{
14209     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
14210   %}
14211   ins_pipe(vmovi_reg_imm128);
14212 %}
14213 
14214 instruct replicate2I(vecD dst, iRegIorL2I src)
14215 %{
14216   predicate(n->as_Vector()->length() == 2);
14217   match(Set dst (ReplicateI src));
14218   ins_cost(INSN_COST);
14219   format %{ "dup  $dst, $src\t# vector (2I)" %}
14220   ins_encode %{
14221     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
14222   %}
14223   ins_pipe(vdup_reg_reg64);
14224 %}
14225 
14226 instruct replicate4I(vecX dst, iRegIorL2I src)
14227 %{
14228   predicate(n->as_Vector()->length() == 4);
14229   match(Set dst (ReplicateI src));
14230   ins_cost(INSN_COST);
14231   format %{ "dup  $dst, $src\t# vector (4I)" %}
14232   ins_encode %{
14233     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
14234   %}
14235   ins_pipe(vdup_reg_reg128);
14236 %}
14237 
14238 instruct replicate2I_imm(vecD dst, immI con)
14239 %{
14240   predicate(n->as_Vector()->length() == 2);
14241   match(Set dst (ReplicateI con));
14242   ins_cost(INSN_COST);
14243   format %{ "movi  $dst, $con\t# vector(2I)" %}
14244   ins_encode %{
14245     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
14246   %}
14247   ins_pipe(vmovi_reg_imm64);
14248 %}
14249 
14250 instruct replicate4I_imm(vecX dst, immI con)
14251 %{
14252   predicate(n->as_Vector()->length() == 4);
14253   match(Set dst (ReplicateI con));
14254   ins_cost(INSN_COST);
14255   format %{ "movi  $dst, $con\t# vector(4I)" %}
14256   ins_encode %{
14257     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
14258   %}
14259   ins_pipe(vmovi_reg_imm128);
14260 %}
14261 
14262 instruct replicate2L(vecX dst, iRegL src)
14263 %{
14264   predicate(n->as_Vector()->length() == 2);
14265   match(Set dst (ReplicateL src));
14266   ins_cost(INSN_COST);
14267   format %{ "dup  $dst, $src\t# vector (2L)" %}
14268   ins_encode %{
14269     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
14270   %}
14271   ins_pipe(vdup_reg_reg128);
14272 %}
14273 
14274 instruct replicate2L_zero(vecX dst, immI0 zero)
14275 %{
14276   predicate(n->as_Vector()->length() == 2);
14277   match(Set dst (ReplicateI zero));
14278   ins_cost(INSN_COST);
14279   format %{ "movi  $dst, $zero\t# vector(4I)" %}
14280   ins_encode %{
14281     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14282            as_FloatRegister($dst$$reg),
14283            as_FloatRegister($dst$$reg));
14284   %}
14285   ins_pipe(vmovi_reg_imm128);
14286 %}
14287 
14288 instruct replicate2F(vecD dst, vRegF src)
14289 %{
14290   predicate(n->as_Vector()->length() == 2);
14291   match(Set dst (ReplicateF src));
14292   ins_cost(INSN_COST);
14293   format %{ "dup  $dst, $src\t# vector (2F)" %}
14294   ins_encode %{
14295     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14296            as_FloatRegister($src$$reg));
14297   %}
14298   ins_pipe(vdup_reg_freg64);
14299 %}
14300 
14301 instruct replicate4F(vecX dst, vRegF src)
14302 %{
14303   predicate(n->as_Vector()->length() == 4);
14304   match(Set dst (ReplicateF src));
14305   ins_cost(INSN_COST);
14306   format %{ "dup  $dst, $src\t# vector (4F)" %}
14307   ins_encode %{
14308     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14309            as_FloatRegister($src$$reg));
14310   %}
14311   ins_pipe(vdup_reg_freg128);
14312 %}
14313 
14314 instruct replicate2D(vecX dst, vRegD src)
14315 %{
14316   predicate(n->as_Vector()->length() == 2);
14317   match(Set dst (ReplicateD src));
14318   ins_cost(INSN_COST);
14319   format %{ "dup  $dst, $src\t# vector (2D)" %}
14320   ins_encode %{
14321     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14322            as_FloatRegister($src$$reg));
14323   %}
14324   ins_pipe(vdup_reg_dreg128);
14325 %}
14326 
14327 // ====================VECTOR ARITHMETIC=======================================
14328 
14329 // --------------------------------- ADD --------------------------------------
14330 
14331 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14332 %{
14333   predicate(n->as_Vector()->length() == 4 ||
14334             n->as_Vector()->length() == 8);
14335   match(Set dst (AddVB src1 src2));
14336   ins_cost(INSN_COST);
14337   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14338   ins_encode %{
14339     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14340             as_FloatRegister($src1$$reg),
14341             as_FloatRegister($src2$$reg));
14342   %}
14343   ins_pipe(vdop64);
14344 %}
14345 
14346 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14347 %{
14348   predicate(n->as_Vector()->length() == 16);
14349   match(Set dst (AddVB src1 src2));
14350   ins_cost(INSN_COST);
14351   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14352   ins_encode %{
14353     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14354             as_FloatRegister($src1$$reg),
14355             as_FloatRegister($src2$$reg));
14356   %}
14357   ins_pipe(vdop128);
14358 %}
14359 
14360 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14361 %{
14362   predicate(n->as_Vector()->length() == 2 ||
14363             n->as_Vector()->length() == 4);
14364   match(Set dst (AddVS src1 src2));
14365   ins_cost(INSN_COST);
14366   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14367   ins_encode %{
14368     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14369             as_FloatRegister($src1$$reg),
14370             as_FloatRegister($src2$$reg));
14371   %}
14372   ins_pipe(vdop64);
14373 %}
14374 
14375 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14376 %{
14377   predicate(n->as_Vector()->length() == 8);
14378   match(Set dst (AddVS src1 src2));
14379   ins_cost(INSN_COST);
14380   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14381   ins_encode %{
14382     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14383             as_FloatRegister($src1$$reg),
14384             as_FloatRegister($src2$$reg));
14385   %}
14386   ins_pipe(vdop128);
14387 %}
14388 
14389 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14390 %{
14391   predicate(n->as_Vector()->length() == 2);
14392   match(Set dst (AddVI src1 src2));
14393   ins_cost(INSN_COST);
14394   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14395   ins_encode %{
14396     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14397             as_FloatRegister($src1$$reg),
14398             as_FloatRegister($src2$$reg));
14399   %}
14400   ins_pipe(vdop64);
14401 %}
14402 
14403 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14404 %{
14405   predicate(n->as_Vector()->length() == 4);
14406   match(Set dst (AddVI src1 src2));
14407   ins_cost(INSN_COST);
14408   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14409   ins_encode %{
14410     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14411             as_FloatRegister($src1$$reg),
14412             as_FloatRegister($src2$$reg));
14413   %}
14414   ins_pipe(vdop128);
14415 %}
14416 
14417 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14418 %{
14419   predicate(n->as_Vector()->length() == 2);
14420   match(Set dst (AddVL src1 src2));
14421   ins_cost(INSN_COST);
14422   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14423   ins_encode %{
14424     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14425             as_FloatRegister($src1$$reg),
14426             as_FloatRegister($src2$$reg));
14427   %}
14428   ins_pipe(vdop128);
14429 %}
14430 
14431 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14432 %{
14433   predicate(n->as_Vector()->length() == 2);
14434   match(Set dst (AddVF src1 src2));
14435   ins_cost(INSN_COST);
14436   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14437   ins_encode %{
14438     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14439             as_FloatRegister($src1$$reg),
14440             as_FloatRegister($src2$$reg));
14441   %}
14442   ins_pipe(vdop_fp64);
14443 %}
14444 
14445 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14446 %{
14447   predicate(n->as_Vector()->length() == 4);
14448   match(Set dst (AddVF src1 src2));
14449   ins_cost(INSN_COST);
14450   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14451   ins_encode %{
14452     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14453             as_FloatRegister($src1$$reg),
14454             as_FloatRegister($src2$$reg));
14455   %}
14456   ins_pipe(vdop_fp128);
14457 %}
14458 
14459 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14460 %{
14461   match(Set dst (AddVD src1 src2));
14462   ins_cost(INSN_COST);
14463   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14464   ins_encode %{
14465     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14466             as_FloatRegister($src1$$reg),
14467             as_FloatRegister($src2$$reg));
14468   %}
14469   ins_pipe(vdop_fp128);
14470 %}
14471 
14472 // --------------------------------- SUB --------------------------------------
14473 
14474 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14475 %{
14476   predicate(n->as_Vector()->length() == 4 ||
14477             n->as_Vector()->length() == 8);
14478   match(Set dst (SubVB src1 src2));
14479   ins_cost(INSN_COST);
14480   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14481   ins_encode %{
14482     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14483             as_FloatRegister($src1$$reg),
14484             as_FloatRegister($src2$$reg));
14485   %}
14486   ins_pipe(vdop64);
14487 %}
14488 
14489 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14490 %{
14491   predicate(n->as_Vector()->length() == 16);
14492   match(Set dst (SubVB src1 src2));
14493   ins_cost(INSN_COST);
14494   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14495   ins_encode %{
14496     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14497             as_FloatRegister($src1$$reg),
14498             as_FloatRegister($src2$$reg));
14499   %}
14500   ins_pipe(vdop128);
14501 %}
14502 
14503 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14504 %{
14505   predicate(n->as_Vector()->length() == 2 ||
14506             n->as_Vector()->length() == 4);
14507   match(Set dst (SubVS src1 src2));
14508   ins_cost(INSN_COST);
14509   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14510   ins_encode %{
14511     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14512             as_FloatRegister($src1$$reg),
14513             as_FloatRegister($src2$$reg));
14514   %}
14515   ins_pipe(vdop64);
14516 %}
14517 
14518 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14519 %{
14520   predicate(n->as_Vector()->length() == 8);
14521   match(Set dst (SubVS src1 src2));
14522   ins_cost(INSN_COST);
14523   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14524   ins_encode %{
14525     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14526             as_FloatRegister($src1$$reg),
14527             as_FloatRegister($src2$$reg));
14528   %}
14529   ins_pipe(vdop128);
14530 %}
14531 
14532 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14533 %{
14534   predicate(n->as_Vector()->length() == 2);
14535   match(Set dst (SubVI src1 src2));
14536   ins_cost(INSN_COST);
14537   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14538   ins_encode %{
14539     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14540             as_FloatRegister($src1$$reg),
14541             as_FloatRegister($src2$$reg));
14542   %}
14543   ins_pipe(vdop64);
14544 %}
14545 
14546 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14547 %{
14548   predicate(n->as_Vector()->length() == 4);
14549   match(Set dst (SubVI src1 src2));
14550   ins_cost(INSN_COST);
14551   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14552   ins_encode %{
14553     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14554             as_FloatRegister($src1$$reg),
14555             as_FloatRegister($src2$$reg));
14556   %}
14557   ins_pipe(vdop128);
14558 %}
14559 
14560 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14561 %{
14562   predicate(n->as_Vector()->length() == 2);
14563   match(Set dst (SubVL src1 src2));
14564   ins_cost(INSN_COST);
14565   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14566   ins_encode %{
14567     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14568             as_FloatRegister($src1$$reg),
14569             as_FloatRegister($src2$$reg));
14570   %}
14571   ins_pipe(vdop128);
14572 %}
14573 
14574 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14575 %{
14576   predicate(n->as_Vector()->length() == 2);
14577   match(Set dst (SubVF src1 src2));
14578   ins_cost(INSN_COST);
14579   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14580   ins_encode %{
14581     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14582             as_FloatRegister($src1$$reg),
14583             as_FloatRegister($src2$$reg));
14584   %}
14585   ins_pipe(vdop_fp64);
14586 %}
14587 
14588 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14589 %{
14590   predicate(n->as_Vector()->length() == 4);
14591   match(Set dst (SubVF src1 src2));
14592   ins_cost(INSN_COST);
14593   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14594   ins_encode %{
14595     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14596             as_FloatRegister($src1$$reg),
14597             as_FloatRegister($src2$$reg));
14598   %}
14599   ins_pipe(vdop_fp128);
14600 %}
14601 
14602 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14603 %{
14604   predicate(n->as_Vector()->length() == 2);
14605   match(Set dst (SubVD src1 src2));
14606   ins_cost(INSN_COST);
14607   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14608   ins_encode %{
14609     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14610             as_FloatRegister($src1$$reg),
14611             as_FloatRegister($src2$$reg));
14612   %}
14613   ins_pipe(vdop_fp128);
14614 %}
14615 
14616 // --------------------------------- MUL --------------------------------------
14617 
14618 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14619 %{
14620   predicate(n->as_Vector()->length() == 2 ||
14621             n->as_Vector()->length() == 4);
14622   match(Set dst (MulVS src1 src2));
14623   ins_cost(INSN_COST);
14624   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14625   ins_encode %{
14626     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14627             as_FloatRegister($src1$$reg),
14628             as_FloatRegister($src2$$reg));
14629   %}
14630   ins_pipe(vmul64);
14631 %}
14632 
14633 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14634 %{
14635   predicate(n->as_Vector()->length() == 8);
14636   match(Set dst (MulVS src1 src2));
14637   ins_cost(INSN_COST);
14638   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14639   ins_encode %{
14640     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14641             as_FloatRegister($src1$$reg),
14642             as_FloatRegister($src2$$reg));
14643   %}
14644   ins_pipe(vmul128);
14645 %}
14646 
14647 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14648 %{
14649   predicate(n->as_Vector()->length() == 2);
14650   match(Set dst (MulVI src1 src2));
14651   ins_cost(INSN_COST);
14652   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14653   ins_encode %{
14654     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14655             as_FloatRegister($src1$$reg),
14656             as_FloatRegister($src2$$reg));
14657   %}
14658   ins_pipe(vmul64);
14659 %}
14660 
14661 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14662 %{
14663   predicate(n->as_Vector()->length() == 4);
14664   match(Set dst (MulVI src1 src2));
14665   ins_cost(INSN_COST);
14666   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14667   ins_encode %{
14668     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14669             as_FloatRegister($src1$$reg),
14670             as_FloatRegister($src2$$reg));
14671   %}
14672   ins_pipe(vmul128);
14673 %}
14674 
14675 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14676 %{
14677   predicate(n->as_Vector()->length() == 2);
14678   match(Set dst (MulVF src1 src2));
14679   ins_cost(INSN_COST);
14680   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14681   ins_encode %{
14682     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14683             as_FloatRegister($src1$$reg),
14684             as_FloatRegister($src2$$reg));
14685   %}
14686   ins_pipe(vmuldiv_fp64);
14687 %}
14688 
14689 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14690 %{
14691   predicate(n->as_Vector()->length() == 4);
14692   match(Set dst (MulVF src1 src2));
14693   ins_cost(INSN_COST);
14694   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14695   ins_encode %{
14696     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14697             as_FloatRegister($src1$$reg),
14698             as_FloatRegister($src2$$reg));
14699   %}
14700   ins_pipe(vmuldiv_fp128);
14701 %}
14702 
14703 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14704 %{
14705   predicate(n->as_Vector()->length() == 2);
14706   match(Set dst (MulVD src1 src2));
14707   ins_cost(INSN_COST);
14708   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14709   ins_encode %{
14710     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14711             as_FloatRegister($src1$$reg),
14712             as_FloatRegister($src2$$reg));
14713   %}
14714   ins_pipe(vmuldiv_fp128);
14715 %}
14716 
14717 // --------------------------------- MLA --------------------------------------
14718 
14719 instruct vmla4S(vecD dst, vecD src1, vecD src2)
14720 %{
14721   predicate(n->as_Vector()->length() == 2 ||
14722             n->as_Vector()->length() == 4);
14723   match(Set dst (AddVS dst (MulVS src1 src2)));
14724   ins_cost(INSN_COST);
14725   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
14726   ins_encode %{
14727     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
14728             as_FloatRegister($src1$$reg),
14729             as_FloatRegister($src2$$reg));
14730   %}
14731   ins_pipe(vmla64);
14732 %}
14733 
14734 instruct vmla8S(vecX dst, vecX src1, vecX src2)
14735 %{
14736   predicate(n->as_Vector()->length() == 8);
14737   match(Set dst (AddVS dst (MulVS src1 src2)));
14738   ins_cost(INSN_COST);
14739   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
14740   ins_encode %{
14741     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
14742             as_FloatRegister($src1$$reg),
14743             as_FloatRegister($src2$$reg));
14744   %}
14745   ins_pipe(vmla128);
14746 %}
14747 
14748 instruct vmla2I(vecD dst, vecD src1, vecD src2)
14749 %{
14750   predicate(n->as_Vector()->length() == 2);
14751   match(Set dst (AddVI dst (MulVI src1 src2)));
14752   ins_cost(INSN_COST);
14753   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
14754   ins_encode %{
14755     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
14756             as_FloatRegister($src1$$reg),
14757             as_FloatRegister($src2$$reg));
14758   %}
14759   ins_pipe(vmla64);
14760 %}
14761 
14762 instruct vmla4I(vecX dst, vecX src1, vecX src2)
14763 %{
14764   predicate(n->as_Vector()->length() == 4);
14765   match(Set dst (AddVI dst (MulVI src1 src2)));
14766   ins_cost(INSN_COST);
14767   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
14768   ins_encode %{
14769     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
14770             as_FloatRegister($src1$$reg),
14771             as_FloatRegister($src2$$reg));
14772   %}
14773   ins_pipe(vmla128);
14774 %}
14775 
14776 // --------------------------------- MLS --------------------------------------
14777 
14778 instruct vmls4S(vecD dst, vecD src1, vecD src2)
14779 %{
14780   predicate(n->as_Vector()->length() == 2 ||
14781             n->as_Vector()->length() == 4);
14782   match(Set dst (SubVS dst (MulVS src1 src2)));
14783   ins_cost(INSN_COST);
14784   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
14785   ins_encode %{
14786     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
14787             as_FloatRegister($src1$$reg),
14788             as_FloatRegister($src2$$reg));
14789   %}
14790   ins_pipe(vmla64);
14791 %}
14792 
14793 instruct vmls8S(vecX dst, vecX src1, vecX src2)
14794 %{
14795   predicate(n->as_Vector()->length() == 8);
14796   match(Set dst (SubVS dst (MulVS src1 src2)));
14797   ins_cost(INSN_COST);
14798   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
14799   ins_encode %{
14800     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
14801             as_FloatRegister($src1$$reg),
14802             as_FloatRegister($src2$$reg));
14803   %}
14804   ins_pipe(vmla128);
14805 %}
14806 
14807 instruct vmls2I(vecD dst, vecD src1, vecD src2)
14808 %{
14809   predicate(n->as_Vector()->length() == 2);
14810   match(Set dst (SubVI dst (MulVI src1 src2)));
14811   ins_cost(INSN_COST);
14812   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
14813   ins_encode %{
14814     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
14815             as_FloatRegister($src1$$reg),
14816             as_FloatRegister($src2$$reg));
14817   %}
14818   ins_pipe(vmla64);
14819 %}
14820 
14821 instruct vmls4I(vecX dst, vecX src1, vecX src2)
14822 %{
14823   predicate(n->as_Vector()->length() == 4);
14824   match(Set dst (SubVI dst (MulVI src1 src2)));
14825   ins_cost(INSN_COST);
14826   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
14827   ins_encode %{
14828     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
14829             as_FloatRegister($src1$$reg),
14830             as_FloatRegister($src2$$reg));
14831   %}
14832   ins_pipe(vmla128);
14833 %}
14834 
14835 // --------------------------------- DIV --------------------------------------
14836 
14837 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14838 %{
14839   predicate(n->as_Vector()->length() == 2);
14840   match(Set dst (DivVF src1 src2));
14841   ins_cost(INSN_COST);
14842   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14843   ins_encode %{
14844     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14845             as_FloatRegister($src1$$reg),
14846             as_FloatRegister($src2$$reg));
14847   %}
14848   ins_pipe(vmuldiv_fp64);
14849 %}
14850 
14851 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14852 %{
14853   predicate(n->as_Vector()->length() == 4);
14854   match(Set dst (DivVF src1 src2));
14855   ins_cost(INSN_COST);
14856   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14857   ins_encode %{
14858     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14859             as_FloatRegister($src1$$reg),
14860             as_FloatRegister($src2$$reg));
14861   %}
14862   ins_pipe(vmuldiv_fp128);
14863 %}
14864 
14865 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14866 %{
14867   predicate(n->as_Vector()->length() == 2);
14868   match(Set dst (DivVD src1 src2));
14869   ins_cost(INSN_COST);
14870   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14871   ins_encode %{
14872     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14873             as_FloatRegister($src1$$reg),
14874             as_FloatRegister($src2$$reg));
14875   %}
14876   ins_pipe(vmuldiv_fp128);
14877 %}
14878 
14879 // --------------------------------- AND --------------------------------------
14880 
14881 instruct vand8B(vecD dst, vecD src1, vecD src2)
14882 %{
14883   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14884             n->as_Vector()->length_in_bytes() == 8);
14885   match(Set dst (AndV src1 src2));
14886   ins_cost(INSN_COST);
14887   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14888   ins_encode %{
14889     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14890             as_FloatRegister($src1$$reg),
14891             as_FloatRegister($src2$$reg));
14892   %}
14893   ins_pipe(vlogical64);
14894 %}
14895 
14896 instruct vand16B(vecX dst, vecX src1, vecX src2)
14897 %{
14898   predicate(n->as_Vector()->length_in_bytes() == 16);
14899   match(Set dst (AndV src1 src2));
14900   ins_cost(INSN_COST);
14901   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14902   ins_encode %{
14903     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14904             as_FloatRegister($src1$$reg),
14905             as_FloatRegister($src2$$reg));
14906   %}
14907   ins_pipe(vlogical128);
14908 %}
14909 
14910 // --------------------------------- OR ---------------------------------------
14911 
14912 instruct vor8B(vecD dst, vecD src1, vecD src2)
14913 %{
14914   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14915             n->as_Vector()->length_in_bytes() == 8);
14916   match(Set dst (OrV src1 src2));
14917   ins_cost(INSN_COST);
14918   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14919   ins_encode %{
14920     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14921             as_FloatRegister($src1$$reg),
14922             as_FloatRegister($src2$$reg));
14923   %}
14924   ins_pipe(vlogical64);
14925 %}
14926 
14927 instruct vor16B(vecX dst, vecX src1, vecX src2)
14928 %{
14929   predicate(n->as_Vector()->length_in_bytes() == 16);
14930   match(Set dst (OrV src1 src2));
14931   ins_cost(INSN_COST);
14932   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14933   ins_encode %{
14934     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14935             as_FloatRegister($src1$$reg),
14936             as_FloatRegister($src2$$reg));
14937   %}
14938   ins_pipe(vlogical128);
14939 %}
14940 
14941 // --------------------------------- XOR --------------------------------------
14942 
14943 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14944 %{
14945   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14946             n->as_Vector()->length_in_bytes() == 8);
14947   match(Set dst (XorV src1 src2));
14948   ins_cost(INSN_COST);
14949   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14950   ins_encode %{
14951     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14952             as_FloatRegister($src1$$reg),
14953             as_FloatRegister($src2$$reg));
14954   %}
14955   ins_pipe(vlogical64);
14956 %}
14957 
14958 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14959 %{
14960   predicate(n->as_Vector()->length_in_bytes() == 16);
14961   match(Set dst (XorV src1 src2));
14962   ins_cost(INSN_COST);
14963   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14964   ins_encode %{
14965     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14966             as_FloatRegister($src1$$reg),
14967             as_FloatRegister($src2$$reg));
14968   %}
14969   ins_pipe(vlogical128);
14970 %}
14971 
14972 // ------------------------------ Shift ---------------------------------------
14973 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
14974   predicate(n->as_Vector()->length_in_bytes() == 8);
14975   match(Set dst (LShiftCntV cnt));
14976   match(Set dst (RShiftCntV cnt));
14977   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
14978   ins_encode %{
14979     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
14980   %}
14981   ins_pipe(vdup_reg_reg64);
14982 %}
14983 
14984 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
14985   predicate(n->as_Vector()->length_in_bytes() == 16);
14986   match(Set dst (LShiftCntV cnt));
14987   match(Set dst (RShiftCntV cnt));
14988   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
14989   ins_encode %{
14990     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14991   %}
14992   ins_pipe(vdup_reg_reg128);
14993 %}
14994 
14995 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
14996   predicate(n->as_Vector()->length() == 4 ||
14997             n->as_Vector()->length() == 8);
14998   match(Set dst (LShiftVB src shift));
14999   ins_cost(INSN_COST);
15000   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
15001   ins_encode %{
15002     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15003             as_FloatRegister($src$$reg),
15004             as_FloatRegister($shift$$reg));
15005   %}
15006   ins_pipe(vshift64);
15007 %}
15008 
15009 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
15010   predicate(n->as_Vector()->length() == 16);
15011   match(Set dst (LShiftVB src shift));
15012   ins_cost(INSN_COST);
15013   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
15014   ins_encode %{
15015     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15016             as_FloatRegister($src$$reg),
15017             as_FloatRegister($shift$$reg));
15018   %}
15019   ins_pipe(vshift128);
15020 %}
15021 
15022 // Right shifts with vector shift count on aarch64 SIMD are implemented
15023 // as left shift by negative shift count.
15024 // There are two cases for vector shift count.
15025 //
15026 // Case 1: The vector shift count is from replication.
15027 //        |            |
15028 //    LoadVector  RShiftCntV
15029 //        |       /
15030 //     RShiftVI
15031 // Note: In inner loop, multiple neg instructions are used, which can be
15032 // moved to outer loop and merge into one neg instruction.
15033 //
15034 // Case 2: The vector shift count is from loading.
15035 // This case isn't supported by middle-end now. But it's supported by
15036 // panama/vectorIntrinsics(JEP 338: Vector API).
15037 //        |            |
15038 //    LoadVector  LoadVector
15039 //        |       /
15040 //     RShiftVI
15041 //
15042 
15043 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
15044   predicate(n->as_Vector()->length() == 4 ||
15045             n->as_Vector()->length() == 8);
15046   match(Set dst (RShiftVB src shift));
15047   ins_cost(INSN_COST);
15048   effect(TEMP tmp);
15049   format %{ "negr  $tmp,$shift\t"
15050             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
15051   ins_encode %{
15052     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15053             as_FloatRegister($shift$$reg));
15054     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
15055             as_FloatRegister($src$$reg),
15056             as_FloatRegister($tmp$$reg));
15057   %}
15058   ins_pipe(vshift64);
15059 %}
15060 
15061 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15062   predicate(n->as_Vector()->length() == 16);
15063   match(Set dst (RShiftVB src shift));
15064   ins_cost(INSN_COST);
15065   effect(TEMP tmp);
15066   format %{ "negr  $tmp,$shift\t"
15067             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
15068   ins_encode %{
15069     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15070             as_FloatRegister($shift$$reg));
15071     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
15072             as_FloatRegister($src$$reg),
15073             as_FloatRegister($tmp$$reg));
15074   %}
15075   ins_pipe(vshift128);
15076 %}
15077 
15078 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
15079   predicate(n->as_Vector()->length() == 4 ||
15080             n->as_Vector()->length() == 8);
15081   match(Set dst (URShiftVB src shift));
15082   ins_cost(INSN_COST);
15083   effect(TEMP tmp);
15084   format %{ "negr  $tmp,$shift\t"
15085             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
15086   ins_encode %{
15087     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15088             as_FloatRegister($shift$$reg));
15089     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
15090             as_FloatRegister($src$$reg),
15091             as_FloatRegister($tmp$$reg));
15092   %}
15093   ins_pipe(vshift64);
15094 %}
15095 
15096 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
15097   predicate(n->as_Vector()->length() == 16);
15098   match(Set dst (URShiftVB src shift));
15099   ins_cost(INSN_COST);
15100   effect(TEMP tmp);
15101   format %{ "negr  $tmp,$shift\t"
15102             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
15103   ins_encode %{
15104     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15105             as_FloatRegister($shift$$reg));
15106     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
15107             as_FloatRegister($src$$reg),
15108             as_FloatRegister($tmp$$reg));
15109   %}
15110   ins_pipe(vshift128);
15111 %}
15112 
15113 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
15114   predicate(n->as_Vector()->length() == 4 ||
15115             n->as_Vector()->length() == 8);
15116   match(Set dst (LShiftVB src shift));
15117   ins_cost(INSN_COST);
15118   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
15119   ins_encode %{
15120     int sh = (int)$shift$$constant & 31;
15121     if (sh >= 8) {
15122       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15123              as_FloatRegister($src$$reg),
15124              as_FloatRegister($src$$reg));
15125     } else {
15126       __ shl(as_FloatRegister($dst$$reg), __ T8B,
15127              as_FloatRegister($src$$reg), sh);
15128     }
15129   %}
15130   ins_pipe(vshift64_imm);
15131 %}
15132 
15133 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
15134   predicate(n->as_Vector()->length() == 16);
15135   match(Set dst (LShiftVB src shift));
15136   ins_cost(INSN_COST);
15137   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
15138   ins_encode %{
15139     int sh = (int)$shift$$constant & 31;
15140     if (sh >= 8) {
15141       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15142              as_FloatRegister($src$$reg),
15143              as_FloatRegister($src$$reg));
15144     } else {
15145       __ shl(as_FloatRegister($dst$$reg), __ T16B,
15146              as_FloatRegister($src$$reg), sh);
15147     }
15148   %}
15149   ins_pipe(vshift128_imm);
15150 %}
15151 
15152 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
15153   predicate(n->as_Vector()->length() == 4 ||
15154             n->as_Vector()->length() == 8);
15155   match(Set dst (RShiftVB src shift));
15156   ins_cost(INSN_COST);
15157   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
15158   ins_encode %{
15159     int sh = (int)$shift$$constant & 31;
15160     if (sh >= 8) sh = 7;
15161     sh = -sh & 7;
15162     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
15163            as_FloatRegister($src$$reg), sh);
15164   %}
15165   ins_pipe(vshift64_imm);
15166 %}
15167 
15168 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
15169   predicate(n->as_Vector()->length() == 16);
15170   match(Set dst (RShiftVB src shift));
15171   ins_cost(INSN_COST);
15172   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
15173   ins_encode %{
15174     int sh = (int)$shift$$constant & 31;
15175     if (sh >= 8) sh = 7;
15176     sh = -sh & 7;
15177     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
15178            as_FloatRegister($src$$reg), sh);
15179   %}
15180   ins_pipe(vshift128_imm);
15181 %}
15182 
15183 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
15184   predicate(n->as_Vector()->length() == 4 ||
15185             n->as_Vector()->length() == 8);
15186   match(Set dst (URShiftVB src shift));
15187   ins_cost(INSN_COST);
15188   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
15189   ins_encode %{
15190     int sh = (int)$shift$$constant & 31;
15191     if (sh >= 8) {
15192       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15193              as_FloatRegister($src$$reg),
15194              as_FloatRegister($src$$reg));
15195     } else {
15196       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
15197              as_FloatRegister($src$$reg), -sh & 7);
15198     }
15199   %}
15200   ins_pipe(vshift64_imm);
15201 %}
15202 
15203 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
15204   predicate(n->as_Vector()->length() == 16);
15205   match(Set dst (URShiftVB src shift));
15206   ins_cost(INSN_COST);
15207   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
15208   ins_encode %{
15209     int sh = (int)$shift$$constant & 31;
15210     if (sh >= 8) {
15211       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15212              as_FloatRegister($src$$reg),
15213              as_FloatRegister($src$$reg));
15214     } else {
15215       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
15216              as_FloatRegister($src$$reg), -sh & 7);
15217     }
15218   %}
15219   ins_pipe(vshift128_imm);
15220 %}
15221 
15222 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
15223   predicate(n->as_Vector()->length() == 2 ||
15224             n->as_Vector()->length() == 4);
15225   match(Set dst (LShiftVS src shift));
15226   ins_cost(INSN_COST);
15227   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
15228   ins_encode %{
15229     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15230             as_FloatRegister($src$$reg),
15231             as_FloatRegister($shift$$reg));
15232   %}
15233   ins_pipe(vshift64);
15234 %}
15235 
15236 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15237   predicate(n->as_Vector()->length() == 8);
15238   match(Set dst (LShiftVS src shift));
15239   ins_cost(INSN_COST);
15240   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15241   ins_encode %{
15242     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15243             as_FloatRegister($src$$reg),
15244             as_FloatRegister($shift$$reg));
15245   %}
15246   ins_pipe(vshift128);
15247 %}
15248 
15249 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15250   predicate(n->as_Vector()->length() == 2 ||
15251             n->as_Vector()->length() == 4);
15252   match(Set dst (RShiftVS src shift));
15253   ins_cost(INSN_COST);
15254   effect(TEMP tmp);
15255   format %{ "negr  $tmp,$shift\t"
15256             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
15257   ins_encode %{
15258     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15259             as_FloatRegister($shift$$reg));
15260     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
15261             as_FloatRegister($src$$reg),
15262             as_FloatRegister($tmp$$reg));
15263   %}
15264   ins_pipe(vshift64);
15265 %}
15266 
15267 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15268   predicate(n->as_Vector()->length() == 8);
15269   match(Set dst (RShiftVS src shift));
15270   ins_cost(INSN_COST);
15271   effect(TEMP tmp);
15272   format %{ "negr  $tmp,$shift\t"
15273             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
15274   ins_encode %{
15275     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15276             as_FloatRegister($shift$$reg));
15277     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15278             as_FloatRegister($src$$reg),
15279             as_FloatRegister($tmp$$reg));
15280   %}
15281   ins_pipe(vshift128);
15282 %}
15283 
15284 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
15285   predicate(n->as_Vector()->length() == 2 ||
15286             n->as_Vector()->length() == 4);
15287   match(Set dst (URShiftVS src shift));
15288   ins_cost(INSN_COST);
15289   effect(TEMP tmp);
15290   format %{ "negr  $tmp,$shift\t"
15291             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
15292   ins_encode %{
15293     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15294             as_FloatRegister($shift$$reg));
15295     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15296             as_FloatRegister($src$$reg),
15297             as_FloatRegister($tmp$$reg));
15298   %}
15299   ins_pipe(vshift64);
15300 %}
15301 
15302 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
15303   predicate(n->as_Vector()->length() == 8);
15304   match(Set dst (URShiftVS src shift));
15305   ins_cost(INSN_COST);
15306   effect(TEMP tmp);
15307   format %{ "negr  $tmp,$shift\t"
15308             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
15309   ins_encode %{
15310     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15311             as_FloatRegister($shift$$reg));
15312     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15313             as_FloatRegister($src$$reg),
15314             as_FloatRegister($tmp$$reg));
15315   %}
15316   ins_pipe(vshift128);
15317 %}
15318 
15319 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15320   predicate(n->as_Vector()->length() == 2 ||
15321             n->as_Vector()->length() == 4);
15322   match(Set dst (LShiftVS src shift));
15323   ins_cost(INSN_COST);
15324   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15325   ins_encode %{
15326     int sh = (int)$shift$$constant & 31;
15327     if (sh >= 16) {
15328       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15329              as_FloatRegister($src$$reg),
15330              as_FloatRegister($src$$reg));
15331     } else {
15332       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15333              as_FloatRegister($src$$reg), sh);
15334     }
15335   %}
15336   ins_pipe(vshift64_imm);
15337 %}
15338 
15339 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15340   predicate(n->as_Vector()->length() == 8);
15341   match(Set dst (LShiftVS src shift));
15342   ins_cost(INSN_COST);
15343   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15344   ins_encode %{
15345     int sh = (int)$shift$$constant & 31;
15346     if (sh >= 16) {
15347       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15348              as_FloatRegister($src$$reg),
15349              as_FloatRegister($src$$reg));
15350     } else {
15351       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15352              as_FloatRegister($src$$reg), sh);
15353     }
15354   %}
15355   ins_pipe(vshift128_imm);
15356 %}
15357 
15358 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15359   predicate(n->as_Vector()->length() == 2 ||
15360             n->as_Vector()->length() == 4);
15361   match(Set dst (RShiftVS src shift));
15362   ins_cost(INSN_COST);
15363   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15364   ins_encode %{
15365     int sh = (int)$shift$$constant & 31;
15366     if (sh >= 16) sh = 15;
15367     sh = -sh & 15;
15368     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15369            as_FloatRegister($src$$reg), sh);
15370   %}
15371   ins_pipe(vshift64_imm);
15372 %}
15373 
15374 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15375   predicate(n->as_Vector()->length() == 8);
15376   match(Set dst (RShiftVS src shift));
15377   ins_cost(INSN_COST);
15378   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15379   ins_encode %{
15380     int sh = (int)$shift$$constant & 31;
15381     if (sh >= 16) sh = 15;
15382     sh = -sh & 15;
15383     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15384            as_FloatRegister($src$$reg), sh);
15385   %}
15386   ins_pipe(vshift128_imm);
15387 %}
15388 
15389 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15390   predicate(n->as_Vector()->length() == 2 ||
15391             n->as_Vector()->length() == 4);
15392   match(Set dst (URShiftVS src shift));
15393   ins_cost(INSN_COST);
15394   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15395   ins_encode %{
15396     int sh = (int)$shift$$constant & 31;
15397     if (sh >= 16) {
15398       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15399              as_FloatRegister($src$$reg),
15400              as_FloatRegister($src$$reg));
15401     } else {
15402       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15403              as_FloatRegister($src$$reg), -sh & 15);
15404     }
15405   %}
15406   ins_pipe(vshift64_imm);
15407 %}
15408 
15409 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15410   predicate(n->as_Vector()->length() == 8);
15411   match(Set dst (URShiftVS src shift));
15412   ins_cost(INSN_COST);
15413   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15414   ins_encode %{
15415     int sh = (int)$shift$$constant & 31;
15416     if (sh >= 16) {
15417       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15418              as_FloatRegister($src$$reg),
15419              as_FloatRegister($src$$reg));
15420     } else {
15421       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15422              as_FloatRegister($src$$reg), -sh & 15);
15423     }
15424   %}
15425   ins_pipe(vshift128_imm);
15426 %}
15427 
15428 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
15429   predicate(n->as_Vector()->length() == 2);
15430   match(Set dst (LShiftVI src shift));
15431   ins_cost(INSN_COST);
15432   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15433   ins_encode %{
15434     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15435             as_FloatRegister($src$$reg),
15436             as_FloatRegister($shift$$reg));
15437   %}
15438   ins_pipe(vshift64);
15439 %}
15440 
15441 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15442   predicate(n->as_Vector()->length() == 4);
15443   match(Set dst (LShiftVI src shift));
15444   ins_cost(INSN_COST);
15445   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15446   ins_encode %{
15447     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15448             as_FloatRegister($src$$reg),
15449             as_FloatRegister($shift$$reg));
15450   %}
15451   ins_pipe(vshift128);
15452 %}
15453 
15454 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15455   predicate(n->as_Vector()->length() == 2);
15456   match(Set dst (RShiftVI src shift));
15457   ins_cost(INSN_COST);
15458   effect(TEMP tmp);
15459   format %{ "negr  $tmp,$shift\t"
15460             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
15461   ins_encode %{
15462     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15463             as_FloatRegister($shift$$reg));
15464     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15465             as_FloatRegister($src$$reg),
15466             as_FloatRegister($tmp$$reg));
15467   %}
15468   ins_pipe(vshift64);
15469 %}
15470 
15471 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15472   predicate(n->as_Vector()->length() == 4);
15473   match(Set dst (RShiftVI src shift));
15474   ins_cost(INSN_COST);
15475   effect(TEMP tmp);
15476   format %{ "negr  $tmp,$shift\t"
15477             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
15478   ins_encode %{
15479     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15480             as_FloatRegister($shift$$reg));
15481     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15482             as_FloatRegister($src$$reg),
15483             as_FloatRegister($tmp$$reg));
15484   %}
15485   ins_pipe(vshift128);
15486 %}
15487 
15488 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
15489   predicate(n->as_Vector()->length() == 2);
15490   match(Set dst (URShiftVI src shift));
15491   ins_cost(INSN_COST);
15492   effect(TEMP tmp);
15493   format %{ "negr  $tmp,$shift\t"
15494             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
15495   ins_encode %{
15496     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
15497             as_FloatRegister($shift$$reg));
15498     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15499             as_FloatRegister($src$$reg),
15500             as_FloatRegister($tmp$$reg));
15501   %}
15502   ins_pipe(vshift64);
15503 %}
15504 
15505 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
15506   predicate(n->as_Vector()->length() == 4);
15507   match(Set dst (URShiftVI src shift));
15508   ins_cost(INSN_COST);
15509   effect(TEMP tmp);
15510   format %{ "negr  $tmp,$shift\t"
15511             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
15512   ins_encode %{
15513     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15514             as_FloatRegister($shift$$reg));
15515     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15516             as_FloatRegister($src$$reg),
15517             as_FloatRegister($tmp$$reg));
15518   %}
15519   ins_pipe(vshift128);
15520 %}
15521 
15522 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15523   predicate(n->as_Vector()->length() == 2);
15524   match(Set dst (LShiftVI src shift));
15525   ins_cost(INSN_COST);
15526   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15527   ins_encode %{
15528     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15529            as_FloatRegister($src$$reg),
15530            (int)$shift$$constant & 31);
15531   %}
15532   ins_pipe(vshift64_imm);
15533 %}
15534 
15535 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15536   predicate(n->as_Vector()->length() == 4);
15537   match(Set dst (LShiftVI src shift));
15538   ins_cost(INSN_COST);
15539   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15540   ins_encode %{
15541     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15542            as_FloatRegister($src$$reg),
15543            (int)$shift$$constant & 31);
15544   %}
15545   ins_pipe(vshift128_imm);
15546 %}
15547 
15548 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15549   predicate(n->as_Vector()->length() == 2);
15550   match(Set dst (RShiftVI src shift));
15551   ins_cost(INSN_COST);
15552   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15553   ins_encode %{
15554     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15555             as_FloatRegister($src$$reg),
15556             -(int)$shift$$constant & 31);
15557   %}
15558   ins_pipe(vshift64_imm);
15559 %}
15560 
15561 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15562   predicate(n->as_Vector()->length() == 4);
15563   match(Set dst (RShiftVI src shift));
15564   ins_cost(INSN_COST);
15565   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15566   ins_encode %{
15567     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15568             as_FloatRegister($src$$reg),
15569             -(int)$shift$$constant & 31);
15570   %}
15571   ins_pipe(vshift128_imm);
15572 %}
15573 
15574 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15575   predicate(n->as_Vector()->length() == 2);
15576   match(Set dst (URShiftVI src shift));
15577   ins_cost(INSN_COST);
15578   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15579   ins_encode %{
15580     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15581             as_FloatRegister($src$$reg),
15582             -(int)$shift$$constant & 31);
15583   %}
15584   ins_pipe(vshift64_imm);
15585 %}
15586 
15587 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15588   predicate(n->as_Vector()->length() == 4);
15589   match(Set dst (URShiftVI src shift));
15590   ins_cost(INSN_COST);
15591   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15592   ins_encode %{
15593     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15594             as_FloatRegister($src$$reg),
15595             -(int)$shift$$constant & 31);
15596   %}
15597   ins_pipe(vshift128_imm);
15598 %}
15599 
15600 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15601   predicate(n->as_Vector()->length() == 2);
15602   match(Set dst (LShiftVL src shift));
15603   ins_cost(INSN_COST);
15604   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15605   ins_encode %{
15606     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15607             as_FloatRegister($src$$reg),
15608             as_FloatRegister($shift$$reg));
15609   %}
15610   ins_pipe(vshift128);
15611 %}
15612 
15613 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15614   predicate(n->as_Vector()->length() == 2);
15615   match(Set dst (RShiftVL src shift));
15616   ins_cost(INSN_COST);
15617   effect(TEMP tmp);
15618   format %{ "negr  $tmp,$shift\t"
15619             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
15620   ins_encode %{
15621     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15622             as_FloatRegister($shift$$reg));
15623     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15624             as_FloatRegister($src$$reg),
15625             as_FloatRegister($tmp$$reg));
15626   %}
15627   ins_pipe(vshift128);
15628 %}
15629 
15630 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
15631   predicate(n->as_Vector()->length() == 2);
15632   match(Set dst (URShiftVL src shift));
15633   ins_cost(INSN_COST);
15634   effect(TEMP tmp);
15635   format %{ "negr  $tmp,$shift\t"
15636             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
15637   ins_encode %{
15638     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
15639             as_FloatRegister($shift$$reg));
15640     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15641             as_FloatRegister($src$$reg),
15642             as_FloatRegister($tmp$$reg));
15643   %}
15644   ins_pipe(vshift128);
15645 %}
15646 
15647 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15648   predicate(n->as_Vector()->length() == 2);
15649   match(Set dst (LShiftVL src shift));
15650   ins_cost(INSN_COST);
15651   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15652   ins_encode %{
15653     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15654            as_FloatRegister($src$$reg),
15655            (int)$shift$$constant & 63);
15656   %}
15657   ins_pipe(vshift128_imm);
15658 %}
15659 
15660 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15661   predicate(n->as_Vector()->length() == 2);
15662   match(Set dst (RShiftVL src shift));
15663   ins_cost(INSN_COST);
15664   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15665   ins_encode %{
15666     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15667             as_FloatRegister($src$$reg),
15668             -(int)$shift$$constant & 63);
15669   %}
15670   ins_pipe(vshift128_imm);
15671 %}
15672 
15673 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15674   predicate(n->as_Vector()->length() == 2);
15675   match(Set dst (URShiftVL src shift));
15676   ins_cost(INSN_COST);
15677   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15678   ins_encode %{
15679     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15680             as_FloatRegister($src$$reg),
15681             -(int)$shift$$constant & 63);
15682   %}
15683   ins_pipe(vshift128_imm);
15684 %}
15685 
15686 //----------PEEPHOLE RULES-----------------------------------------------------
15687 // These must follow all instruction definitions as they use the names
15688 // defined in the instructions definitions.
15689 //
15690 // peepmatch ( root_instr_name [preceding_instruction]* );
15691 //
15692 // peepconstraint %{
15693 // (instruction_number.operand_name relational_op instruction_number.operand_name
15694 //  [, ...] );
15695 // // instruction numbers are zero-based using left to right order in peepmatch
15696 //
15697 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15698 // // provide an instruction_number.operand_name for each operand that appears
15699 // // in the replacement instruction's match rule
15700 //
15701 // ---------VM FLAGS---------------------------------------------------------
15702 //
15703 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15704 //
15705 // Each peephole rule is given an identifying number starting with zero and
15706 // increasing by one in the order seen by the parser.  An individual peephole
15707 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15708 // on the command-line.
15709 //
15710 // ---------CURRENT LIMITATIONS----------------------------------------------
15711 //
15712 // Only match adjacent instructions in same basic block
15713 // Only equality constraints
15714 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15715 // Only one replacement instruction
15716 //
15717 // ---------EXAMPLE----------------------------------------------------------
15718 //
15719 // // pertinent parts of existing instructions in architecture description
15720 // instruct movI(iRegINoSp dst, iRegI src)
15721 // %{
15722 //   match(Set dst (CopyI src));
15723 // %}
15724 //
15725 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15726 // %{
15727 //   match(Set dst (AddI dst src));
15728 //   effect(KILL cr);
15729 // %}
15730 //
15731 // // Change (inc mov) to lea
15732 // peephole %{
15733 //   // increment preceeded by register-register move
15734 //   peepmatch ( incI_iReg movI );
15735 //   // require that the destination register of the increment
15736 //   // match the destination register of the move
15737 //   peepconstraint ( 0.dst == 1.dst );
15738 //   // construct a replacement instruction that sets
15739 //   // the destination to ( move's source register + one )
15740 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15741 // %}
15742 //
15743 
15744 // Implementation no longer uses movX instructions since
15745 // machine-independent system no longer uses CopyX nodes.
15746 //
15747 // peephole
15748 // %{
15749 //   peepmatch (incI_iReg movI);
15750 //   peepconstraint (0.dst == 1.dst);
15751 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15752 // %}
15753 
15754 // peephole
15755 // %{
15756 //   peepmatch (decI_iReg movI);
15757 //   peepconstraint (0.dst == 1.dst);
15758 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15759 // %}
15760 
15761 // peephole
15762 // %{
15763 //   peepmatch (addI_iReg_imm movI);
15764 //   peepconstraint (0.dst == 1.dst);
15765 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15766 // %}
15767 
15768 // peephole
15769 // %{
15770 //   peepmatch (incL_iReg movL);
15771 //   peepconstraint (0.dst == 1.dst);
15772 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15773 // %}
15774 
15775 // peephole
15776 // %{
15777 //   peepmatch (decL_iReg movL);
15778 //   peepconstraint (0.dst == 1.dst);
15779 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15780 // %}
15781 
15782 // peephole
15783 // %{
15784 //   peepmatch (addL_iReg_imm movL);
15785 //   peepconstraint (0.dst == 1.dst);
15786 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15787 // %}
15788 
15789 // peephole
15790 // %{
15791 //   peepmatch (addP_iReg_imm movP);
15792 //   peepconstraint (0.dst == 1.dst);
15793 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15794 // %}
15795 
15796 // // Change load of spilled value to only a spill
15797 // instruct storeI(memory mem, iRegI src)
15798 // %{
15799 //   match(Set mem (StoreI mem src));
15800 // %}
15801 //
15802 // instruct loadI(iRegINoSp dst, memory mem)
15803 // %{
15804 //   match(Set dst (LoadI mem));
15805 // %}
15806 //
15807 
15808 //----------SMARTSPILL RULES---------------------------------------------------
15809 // These must follow all instruction definitions as they use the names
15810 // defined in the instructions definitions.
15811 
15812 // Local Variables:
15813 // mode: c++
15814 // End: