1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036   MemBarNode *has_parent_membar(const Node *n,
1037                                 ProjNode *&ctl, ProjNode *&mem);
1038   MemBarNode *has_child_membar(const MemBarNode *n,
1039                                ProjNode *&ctl, ProjNode *&mem);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042   bool unnecessary_acquire(const Node *barrier);
1043   bool needs_acquiring_load(const Node *load);
1044 
1045   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1046   bool unnecessary_release(const Node *barrier);
1047   bool unnecessary_volatile(const Node *barrier);
1048   bool needs_releasing_store(const Node *store);
1049 
1050   // Use barrier instructions for unsafe volatile gets rather than
1051   // trying to identify an exact signature for them
1052   const bool UseBarriersForUnsafeVolatileGet = false;
1053 %}
1054 
1055 source %{
1056 
1057   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1058   // use to implement volatile reads and writes. For a volatile read
1059   // we simply need
1060   //
1061   //   ldar<x>
1062   //
1063   // and for a volatile write we need
1064   //
1065   //   stlr<x>
1066   // 
1067   // Alternatively, we can implement them by pairing a normal
1068   // load/store with a memory barrier. For a volatile read we need
1069   // 
1070   //   ldr<x>
1071   //   dmb ishld
1072   //
1073   // for a volatile write
1074   //
1075   //   dmb ish
1076   //   str<x>
1077   //   dmb ish
1078   //
1079   // In order to generate the desired instruction sequence we need to
1080   // be able to identify specific 'signature' ideal graph node
1081   // sequences which i) occur as a translation of a volatile reads or
1082   // writes and ii) do not occur through any other translation or
1083   // graph transformation. We can then provide alternative aldc
1084   // matching rules which translate these node sequences to the
1085   // desired machine code sequences. Selection of the alternative
1086   // rules can be implemented by predicates which identify the
1087   // relevant node sequences.
1088   //
1089   // The ideal graph generator translates a volatile read to the node
1090   // sequence
1091   //
1092   //   LoadX[mo_acquire]
1093   //   MemBarAcquire
1094   //
1095   // As a special case when using the compressed oops optimization we
1096   // may also see this variant
1097   //
1098   //   LoadN[mo_acquire]
1099   //   DecodeN
1100   //   MemBarAcquire
1101   //
1102   // A volatile write is translated to the node sequence
1103   //
1104   //   MemBarRelease
1105   //   StoreX[mo_release]
1106   //   MemBarVolatile
1107   //
1108   // n.b. the above node patterns are generated with a strict
1109   // 'signature' configuration of input and output dependencies (see
1110   // the predicates below for exact details). The two signatures are
1111   // unique to translated volatile reads/stores -- they will not
1112   // appear as a result of any other bytecode translation or inlining
1113   // nor as a consequence of optimizing transforms.
1114   //
1115   // We also want to catch inlined unsafe volatile gets and puts and
1116   // be able to implement them using either ldar<x>/stlr<x> or some
1117   // combination of ldr<x>/stlr<x> and dmb instructions.
1118   //
1119   // Inlined unsafe volatiles puts manifest as a minor variant of the
1120   // normal volatile put node sequence containing an extra cpuorder
1121   // membar
1122   //
1123   //   MemBarRelease
1124   //   MemBarCPUOrder
1125   //   StoreX[mo_release]
1126   //   MemBarVolatile
1127   //
1128   // n.b. as an aside, the cpuorder membar is not itself subject to
1129   // matching and translation by adlc rules.  However, the rule
1130   // predicates need to detect its presence in order to correctly
1131   // select the desired adlc rules.
1132   //
1133   // Inlined unsafe volatiles gets manifest as a somewhat different
1134   // node sequence to a normal volatile get
1135   //
1136   //   MemBarCPUOrder
1137   //        ||       \\
1138   //   MemBarAcquire LoadX[mo_acquire]
1139   //        ||
1140   //   MemBarCPUOrder
1141   //
1142   // In this case the acquire membar does not directly depend on the
1143   // load. However, we can be sure that the load is generated from an
1144   // inlined unsafe volatile get if we see it dependent on this unique
1145   // sequence of membar nodes. Similarly, given an acquire membar we
1146   // can know that it was added because of an inlined unsafe volatile
1147   // get if it is fed and feeds a cpuorder membar and if its feed
1148   // membar also feeds an acquiring load.
1149   //
1150   // So, where we can identify these volatile read and write
1151   // signatures we can choose to plant either of the above two code
1152   // sequences. For a volatile read we can simply plant a normal
1153   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1154   // also choose to inhibit translation of the MemBarAcquire and
1155   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1156   //
1157   // When we recognise a volatile store signature we can choose to
1158   // plant at a dmb ish as a translation for the MemBarRelease, a
1159   // normal str<x> and then a dmb ish for the MemBarVolatile.
1160   // Alternatively, we can inhibit translation of the MemBarRelease
1161   // and MemBarVolatile and instead plant a simple stlr<x>
1162   // instruction.
1163   //
1164   // Of course, the above only applies when we see these signature
1165   // configurations. We still want to plant dmb instructions in any
1166   // other cases where we may see a MemBarAcquire, MemBarRelease or
1167   // MemBarVolatile. For example, at the end of a constructor which
1168   // writes final/volatile fields we will see a MemBarRelease
1169   // instruction and this needs a 'dmb ish' lest we risk the
1170   // constructed object being visible without making the
1171   // final/volatile field writes visible.
1172   //
1173   // n.b. the translation rules below which rely on detection of the
1174   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1175   // If we see anything other than the signature configurations we
1176   // always just translate the loads and stors to ldr<x> and str<x>
1177   // and translate acquire, release and volatile membars to the
1178   // relevant dmb instructions.
1179   //
1180   // n.b.b as a case in point for the above comment, the current
1181   // predicates don't detect the precise signature for certain types
1182   // of volatile object stores (where the heap_base input type is not
1183   // known at compile-time to be non-NULL). In those cases the
1184   // MemBarRelease and MemBarVolatile bracket an if-then-else sequence
1185   // with a store in each branch (we need a different store depending
1186   // on whether heap_base is actually NULL). In such a case we will
1187   // just plant a dmb both before and after the branch/merge. The
1188   // predicate could (and probably should) be fixed later to also
1189   // detect this case.
1190 
1191   // graph traversal helpers
1192 
1193   // if node n is linked to a parent MemBarNode by an intervening
1194   // Control or Memory ProjNode return the MemBarNode otherwise return
1195   // NULL.
1196   //
1197   // n may only be a Load or a MemBar.
1198   //
1199   // The ProjNode* references c and m are used to return the relevant
1200   // nodes.
1201 
1202   MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m)
1203   {
1204     Node *ctl = NULL;
1205     Node *mem = NULL;
1206     Node *membar = NULL;
1207 
1208     if (n->is_Load()) {
1209       ctl = n->lookup(LoadNode::Control);
1210       mem = n->lookup(LoadNode::Memory);
1211     } else if (n->is_MemBar()) {
1212       ctl = n->lookup(TypeFunc::Control);
1213       mem = n->lookup(TypeFunc::Memory);
1214     } else {
1215         return NULL;
1216     }
1217 
1218     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
1219       return NULL;
1220 
1221     c = ctl->as_Proj();
1222 
1223     membar = ctl->lookup(0);
1224 
1225     if (!membar || !membar->is_MemBar())
1226       return NULL;
1227 
1228     m = mem->as_Proj();
1229 
1230     if (mem->lookup(0) != membar)
1231       return NULL;
1232 
1233     return membar->as_MemBar();
1234   }
1235 
1236   // if n is linked to a child MemBarNode by intervening Control and
1237   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1238   //
1239   // The ProjNode** arguments c and m are used to return pointers to
1240   // the relevant nodes. A null argument means don't don't return a
1241   // value.
1242 
1243   MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m)
1244   {
1245     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1246     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1247 
1248     // MemBar needs to have both a Ctl and Mem projection
1249     if (! ctl || ! mem)
1250       return NULL;
1251 
1252     c = ctl;
1253     m = mem;
1254 
1255     MemBarNode *child = NULL;
1256     Node *x;
1257 
1258     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1259       x = ctl->fast_out(i);
1260       // if we see a membar we keep hold of it. we may also see a new
1261       // arena copy of the original but it will appear later
1262       if (x->is_MemBar()) {
1263           child = x->as_MemBar();
1264           break;
1265       }
1266     }
1267 
1268     if (child == NULL)
1269       return NULL;
1270 
1271     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1272       x = mem->fast_out(i);
1273       // if we see a membar we keep hold of it. we may also see a new
1274       // arena copy of the original but it will appear later
1275       if (x == child) {
1276         return child;
1277       }
1278     }
1279     return NULL;
1280   }
1281 
1282   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1283 
1284 bool unnecessary_acquire(const Node *barrier) {
1285   // assert barrier->is_MemBar();
1286   if (UseBarriersForVolatile)
1287     // we need to plant a dmb
1288     return false;
1289 
1290   // a volatile read derived from bytecode (or also from an inlined
1291   // SHA field read via LibraryCallKit::load_field_from_object)
1292   // manifests as a LoadX[mo_acquire] followed by an acquire membar
1293   // with a bogus read dependency on it's preceding load. so in those
1294   // cases we will find the load node at the PARMS offset of the
1295   // acquire membar.  n.b. there may be an intervening DecodeN node.
1296   //
1297   // a volatile load derived from an inlined unsafe field access
1298   // manifests as a cpuorder membar with Ctl and Mem projections
1299   // feeding both an acquire membar and a LoadX[mo_acquire]. The
1300   // acquire then feeds another cpuorder membar via Ctl and Mem
1301   // projections. The load has no output dependency on these trailing
1302   // membars because subsequent nodes inserted into the graph take
1303   // their control feed from the final membar cpuorder meaning they
1304   // are all ordered after the load.
1305 
1306   Node *x = barrier->lookup(TypeFunc::Parms);
1307   if (x) {
1308     // we are starting from an acquire and it has a fake dependency
1309     //
1310     // need to check for
1311     //
1312     //   LoadX[mo_acquire]
1313     //   {  |1   }
1314     //   {DecodeN}
1315     //      |Parms
1316     //   MemBarAcquire*
1317     //
1318     // where * tags node we were passed
1319     // and |k means input k
1320     if (x->is_DecodeNarrowPtr())
1321       x = x->in(1);
1322 
1323     return (x->is_Load() && x->as_Load()->is_acquire());
1324   }
1325   
1326   // only continue if we want to try to match unsafe volatile gets
1327   if (UseBarriersForUnsafeVolatileGet)
1328     return false;
1329 
1330   // need to check for
1331   //
1332   //     MemBarCPUOrder
1333   //        ||       \\
1334   //   MemBarAcquire* LoadX[mo_acquire]
1335   //        ||
1336   //   MemBarCPUOrder
1337   //
1338   // where * tags node we were passed
1339   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
1340 
1341   // check for a parent MemBarCPUOrder
1342   ProjNode *ctl;
1343   ProjNode *mem;
1344   MemBarNode *parent = has_parent_membar(barrier, ctl, mem);
1345   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
1346     return false;
1347   // ensure the proj nodes both feed a LoadX[mo_acquire]
1348   LoadNode *ld = NULL;
1349   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1350     x = ctl->fast_out(i);
1351     // if we see a load we keep hold of it and stop searching
1352     if (x->is_Load()) {
1353       ld = x->as_Load();
1354       break;
1355     }
1356   }
1357   // it must be an acquiring load
1358   if (! ld || ! ld->is_acquire())
1359     return false;
1360   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1361     x = mem->fast_out(i);
1362     // if we see the same load we drop it and stop searching
1363     if (x == ld) {
1364       ld = NULL;
1365       break;
1366     }
1367   }
1368   // we must have dropped the load
1369   if (ld)
1370     return false;
1371   // check for a child cpuorder membar
1372   MemBarNode *child  = has_child_membar(barrier->as_MemBar(), ctl, mem);
1373   if (!child || child->Opcode() != Op_MemBarCPUOrder)
1374     return false;
1375 
1376   return true;
1377 }
1378 
1379 bool needs_acquiring_load(const Node *n)
1380 {
1381   // assert n->is_Load();
1382   if (UseBarriersForVolatile)
1383     // we use a normal load and a dmb
1384     return false;
1385 
1386   LoadNode *ld = n->as_Load();
1387 
1388   if (!ld->is_acquire())
1389     return false;
1390 
1391   // check if this load is feeding an acquire membar
1392   //
1393   //   LoadX[mo_acquire]
1394   //   {  |1   }
1395   //   {DecodeN}
1396   //      |Parms
1397   //   MemBarAcquire*
1398   //
1399   // where * tags node we were passed
1400   // and |k means input k
1401 
1402   Node *start = ld;
1403   Node *mbacq = NULL;
1404 
1405   // if we hit a DecodeNarrowPtr we reset the start node and restart
1406   // the search through the outputs
1407  restart:
1408 
1409   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
1410     Node *x = start->fast_out(i);
1411     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
1412       mbacq = x;
1413     } else if (!mbacq &&
1414                (x->is_DecodeNarrowPtr() ||
1415                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
1416       start = x;
1417       goto restart;
1418     }
1419   }
1420 
1421   if (mbacq) {
1422     return true;
1423   }
1424 
1425   // only continue if we want to try to match unsafe volatile gets
1426   if (UseBarriersForUnsafeVolatileGet)
1427     return false;
1428 
1429   // check if Ctl and Proj feed comes from a MemBarCPUOrder
1430   //
1431   //     MemBarCPUOrder
1432   //        ||       \\
1433   //   MemBarAcquire* LoadX[mo_acquire]
1434   //        ||
1435   //   MemBarCPUOrder
1436 
1437   MemBarNode *membar;
1438   ProjNode *ctl;
1439   ProjNode *mem;
1440 
1441   membar = has_parent_membar(ld, ctl, mem);
1442 
1443   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
1444     return false;
1445 
1446   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
1447 
1448   membar = has_child_membar(membar, ctl, mem);
1449 
1450   if (!membar || !membar->Opcode() == Op_MemBarAcquire)
1451     return false;
1452 
1453   membar = has_child_membar(membar, ctl, mem);
1454   
1455   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
1456     return false;
1457 
1458   return true;
1459 }
1460 
1461 bool unnecessary_release(const Node *n) {
1462   // assert n->is_MemBar();
1463   if (UseBarriersForVolatile)
1464     // we need to plant a dmb
1465     return false;
1466 
1467   // ok, so we can omit this release barrier if it has been inserted
1468   // as part of a volatile store sequence
1469   //
1470   //   MemBarRelease
1471   //  {      ||      }
1472   //  {MemBarCPUOrder} -- optional
1473   //         ||     \\
1474   //         ||     StoreX[mo_release]
1475   //         | \     /
1476   //         | MergeMem
1477   //         | /
1478   //   MemBarVolatile
1479   //
1480   // where
1481   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1482   //  | \ and / indicate further routing of the Ctl and Mem feeds
1483   // 
1484   // so we need to check that
1485   //
1486   // ia) the release membar (or its dependent cpuorder membar) feeds
1487   // control to a store node (via a Control project node)
1488   //
1489   // ii) the store is ordered release
1490   //
1491   // iii) the release membar (or its dependent cpuorder membar) feeds
1492   // control to a volatile membar (via the same Control project node)
1493   //
1494   // iv) the release membar feeds memory to a merge mem and to the
1495   // same store (both via a single Memory proj node)
1496   //
1497   // v) the store outputs to the merge mem
1498   //
1499   // vi) the merge mem outputs to the same volatile membar
1500   //
1501   // n.b. if this is an inlined unsafe node then the release membar
1502   // may feed its control and memory links via an intervening cpuorder
1503   // membar. this case can be dealt with when we check the release
1504   // membar projections. if they both feed a single cpuorder membar
1505   // node continue to make the same checks as above but with the
1506   // cpuorder membar substituted for the release membar. if they don't
1507   // both feed a cpuorder membar then the check fails.
1508   //
1509   // n.b.b. for an inlined unsafe store of an object in the case where
1510   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1511   // an embedded if then else where we expect the store. this is
1512   // needed to do the right type of store depending on whether
1513   // heap_base is NULL. We could check for that but for now we can
1514   // just take the hit of on inserting a redundant dmb for this
1515   // redundant volatile membar
1516 
1517   MemBarNode *barrier = n->as_MemBar();
1518   ProjNode *ctl;
1519   ProjNode *mem;
1520   // check for an intervening cpuorder membar
1521   MemBarNode *b = has_child_membar(barrier, ctl, mem);
1522   if (b && b->Opcode() == Op_MemBarCPUOrder) {
1523     // ok, so start form the dependent cpuorder barrier
1524     barrier = b;
1525   }
1526   // check the ctl and mem flow
1527   ctl = barrier->proj_out(TypeFunc::Control);
1528   mem = barrier->proj_out(TypeFunc::Memory);
1529 
1530   // the barrier needs to have both a Ctl and Mem projection
1531   if (! ctl || ! mem)
1532     return false;
1533 
1534   Node *x = NULL;
1535   Node *mbvol = NULL;
1536   StoreNode * st = NULL;
1537 
1538   // For a normal volatile write the Ctl ProjNode should have output
1539   // to a MemBarVolatile and a Store marked as releasing
1540   //
1541   // n.b. for an inlined unsafe store of an object in the case where
1542   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1543   // an embedded if then else where we expect the store. this is
1544   // needed to do the right type of store depending on whether
1545   // heap_base is NULL. We could check for that case too but for now
1546   // we can just take the hit of inserting a dmb and a non-volatile
1547   // store to implement the volatile store
1548 
1549   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1550     x = ctl->fast_out(i);
1551     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1552       if (mbvol) {
1553         return false;
1554       }
1555       mbvol = x;
1556     } else if (x->is_Store()) {
1557       st = x->as_Store();
1558       if (! st->is_release()) {
1559         return false;
1560       }
1561     } else if (!x->is_Mach()) {
1562       // we may see mach nodes added during matching but nothing else
1563       return false;
1564     }
1565   }
1566 
1567   if (!mbvol || !st)
1568     return false;
1569 
1570   // the Mem ProjNode should output to a MergeMem and the same Store
1571   Node *mm = NULL;
1572   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1573     x = mem->fast_out(i);
1574     if (!mm && x->is_MergeMem()) {
1575       mm = x;
1576     } else if (x != st && !x->is_Mach()) {
1577       // we may see mach nodes added during matching but nothing else
1578       return false;
1579     }
1580   }
1581 
1582   if (!mm)
1583     return false;
1584 
1585   // the MergeMem should output to the MemBarVolatile
1586   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1587     x = mm->fast_out(i);
1588     if (x != mbvol && !x->is_Mach()) {
1589       // we may see mach nodes added during matching but nothing else
1590       return false;
1591     }
1592   }
1593 
1594   return true;
1595 }
1596 
1597 bool unnecessary_volatile(const Node *n) {
1598   // assert n->is_MemBar();
1599   if (UseBarriersForVolatile)
1600     // we need to plant a dmb
1601     return false;
1602 
1603   // ok, so we can omit this volatile barrier if it has been inserted
1604   // as part of a volatile store sequence
1605   //
1606   //   MemBarRelease
1607   //  {      ||      }
1608   //  {MemBarCPUOrder} -- optional
1609   //         ||     \\
1610   //         ||     StoreX[mo_release]
1611   //         | \     /
1612   //         | MergeMem
1613   //         | /
1614   //   MemBarVolatile
1615   //
1616   // where
1617   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1618   //  | \ and / indicate further routing of the Ctl and Mem feeds
1619   // 
1620   // we need to check that
1621   //
1622   // i) the volatile membar gets its control feed from a release
1623   // membar (or its dependent cpuorder membar) via a Control project
1624   // node
1625   //
1626   // ii) the release membar (or its dependent cpuorder membar) also
1627   // feeds control to a store node via the same proj node
1628   //
1629   // iii) the store is ordered release
1630   //
1631   // iv) the release membar (or its dependent cpuorder membar) feeds
1632   // memory to a merge mem and to the same store (both via a single
1633   // Memory proj node)
1634   //
1635   // v) the store outputs to the merge mem
1636   //
1637   // vi) the merge mem outputs to the volatile membar
1638   //
1639   // n.b. for an inlined unsafe store of an object in the case where
1640   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1641   // an embedded if then else where we expect the store. this is
1642   // needed to do the right type of store depending on whether
1643   // heap_base is NULL. We could check for that but for now we can
1644   // just take the hit of on inserting a redundant dmb for this
1645   // redundant volatile membar
1646 
1647   MemBarNode *mbvol = n->as_MemBar();
1648   Node *x = n->lookup(TypeFunc::Control);
1649 
1650   if (! x || !x->is_Proj())
1651     return false;
1652 
1653   ProjNode *proj = x->as_Proj();
1654 
1655   x = proj->lookup(0);
1656 
1657   if (!x || !x->is_MemBar())
1658     return false;
1659 
1660   MemBarNode *barrier = x->as_MemBar();
1661 
1662   // if the barrier is a release membar we have what we want. if it is
1663   // a cpuorder membar then we need to ensure that it is fed by a
1664   // release membar in which case we proceed to check the graph below
1665   // this cpuorder membar as the feed
1666 
1667   if (x->Opcode() != Op_MemBarRelease) {
1668     if (x->Opcode() != Op_MemBarCPUOrder)
1669       return false;
1670     ProjNode *ctl;
1671     ProjNode *mem;
1672     MemBarNode *b = has_parent_membar(x, ctl, mem);
1673     if (!b || !b->Opcode() == Op_MemBarRelease)
1674       return false;
1675   }
1676 
1677   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
1678   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1679 
1680   // barrier needs to have both a Ctl and Mem projection
1681   // and we need to have reached it via the Ctl projection
1682   if (! ctl || ! mem || ctl != proj)
1683     return false;
1684 
1685   StoreNode * st = NULL;
1686 
1687   // The Ctl ProjNode should have output to a MemBarVolatile and
1688   // a Store marked as releasing
1689   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1690     x = ctl->fast_out(i);
1691     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1692       if (x != mbvol) {
1693         return false;
1694       }
1695     } else if (x->is_Store()) {
1696       st = x->as_Store();
1697       if (! st->is_release()) {
1698         return false;
1699       }
1700     } else if (!x->is_Mach()){
1701       // we may see mach nodes added during matching but nothing else
1702       return false;
1703     }
1704   }
1705 
1706   if (!st)
1707     return false;
1708 
1709   // the Mem ProjNode should output to a MergeMem and the same Store
1710   Node *mm = NULL;
1711   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1712     x = mem->fast_out(i);
1713     if (!mm && x->is_MergeMem()) {
1714       mm = x;
1715     } else if (x != st && !x->is_Mach()) {
1716       // we may see mach nodes added during matching but nothing else
1717       return false;
1718     }
1719   }
1720 
1721   if (!mm)
1722     return false;
1723 
1724   // the MergeMem should output to the MemBarVolatile
1725   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1726     x = mm->fast_out(i);
1727     if (x != mbvol && !x->is_Mach()) {
1728       // we may see mach nodes added during matching but nothing else
1729       return false;
1730     }
1731   }
1732 
1733   return true;
1734 }
1735 
1736 
1737 
1738 bool needs_releasing_store(const Node *n)
1739 {
1740   // assert n->is_Store();
1741   if (UseBarriersForVolatile)
1742     // we use a normal store and dmb combination
1743     return false;
1744 
1745   StoreNode *st = n->as_Store();
1746 
1747   if (!st->is_release())
1748     return false;
1749 
1750   // check if this store is bracketed by a release (or its dependent
1751   // cpuorder membar) and a volatile membar
1752   //
1753   //   MemBarRelease
1754   //  {      ||      }
1755   //  {MemBarCPUOrder} -- optional
1756   //         ||     \\
1757   //         ||     StoreX[mo_release]
1758   //         | \     /
1759   //         | MergeMem
1760   //         | /
1761   //   MemBarVolatile
1762   //
1763   // where
1764   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1765   //  | \ and / indicate further routing of the Ctl and Mem feeds
1766   // 
1767 
1768 
1769   Node *x = st->lookup(TypeFunc::Control);
1770 
1771   if (! x || !x->is_Proj())
1772     return false;
1773 
1774   ProjNode *proj = x->as_Proj();
1775 
1776   x = proj->lookup(0);
1777 
1778   if (!x || !x->is_MemBar())
1779     return false;
1780 
1781   MemBarNode *barrier = x->as_MemBar();
1782 
1783   // if the barrier is a release membar we have what we want. if it is
1784   // a cpuorder membar then we need to ensure that it is fed by a
1785   // release membar in which case we proceed to check the graph below
1786   // this cpuorder membar as the feed
1787 
1788   if (x->Opcode() != Op_MemBarRelease) {
1789     if (x->Opcode() != Op_MemBarCPUOrder)
1790       return false;
1791     Node *ctl = x->lookup(TypeFunc::Control);
1792     Node *mem = x->lookup(TypeFunc::Memory);
1793     if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj())
1794       return false;
1795     x = ctl->lookup(0);
1796     if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease)
1797       return false;
1798     Node *y = mem->lookup(0);
1799     if (!y || y != x)
1800       return false;
1801   }
1802 
1803   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
1804   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1805 
1806   // MemBarRelease needs to have both a Ctl and Mem projection
1807   // and we need to have reached it via the Ctl projection
1808   if (! ctl || ! mem || ctl != proj)
1809     return false;
1810 
1811   MemBarNode *mbvol = NULL;
1812 
1813   // The Ctl ProjNode should have output to a MemBarVolatile and
1814   // a Store marked as releasing
1815   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1816     x = ctl->fast_out(i);
1817     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1818       mbvol = x->as_MemBar();
1819     } else if (x->is_Store()) {
1820       if (x != st) {
1821         return false;
1822       }
1823     } else if (!x->is_Mach()){
1824       return false;
1825     }
1826   }
1827 
1828   if (!mbvol)
1829     return false;
1830 
1831   // the Mem ProjNode should output to a MergeMem and the same Store
1832   Node *mm = NULL;
1833   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1834     x = mem->fast_out(i);
1835     if (!mm && x->is_MergeMem()) {
1836       mm = x;
1837     } else if (x != st && !x->is_Mach()) {
1838       return false;
1839     }
1840   }
1841 
1842   if (!mm)
1843     return false;
1844 
1845   // the MergeMem should output to the MemBarVolatile
1846   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1847     x = mm->fast_out(i);
1848     if (x != mbvol && !x->is_Mach()) {
1849       return false;
1850     }
1851   }
1852 
1853   return true;
1854 }
1855 
1856 
1857 
1858 #define __ _masm.
1859 
1860 // advance declarations for helper functions to convert register
1861 // indices to register objects
1862 
1863 // the ad file has to provide implementations of certain methods
1864 // expected by the generic code
1865 //
1866 // REQUIRED FUNCTIONALITY
1867 
1868 //=============================================================================
1869 
1870 // !!!!! Special hack to get all types of calls to specify the byte offset
1871 //       from the start of the call to the point where the return address
1872 //       will point.
1873 
1874 int MachCallStaticJavaNode::ret_addr_offset()
1875 {
1876   // call should be a simple bl
1877   int off = 4;
1878   return off;
1879 }
1880 
1881 int MachCallDynamicJavaNode::ret_addr_offset()
1882 {
1883   return 16; // movz, movk, movk, bl
1884 }
1885 
1886 int MachCallRuntimeNode::ret_addr_offset() {
1887   // for generated stubs the call will be
1888   //   far_call(addr)
1889   // for real runtime callouts it will be six instructions
1890   // see aarch64_enc_java_to_runtime
1891   //   adr(rscratch2, retaddr)
1892   //   lea(rscratch1, RuntimeAddress(addr)
1893   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1894   //   blrt rscratch1
1895   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1896   if (cb) {
1897     return MacroAssembler::far_branch_size();
1898   } else {
1899     return 6 * NativeInstruction::instruction_size;
1900   }
1901 }
1902 
1903 // Indicate if the safepoint node needs the polling page as an input
1904 
1905 // the shared code plants the oop data at the start of the generated
1906 // code for the safepoint node and that needs ot be at the load
1907 // instruction itself. so we cannot plant a mov of the safepoint poll
1908 // address followed by a load. setting this to true means the mov is
1909 // scheduled as a prior instruction. that's better for scheduling
1910 // anyway.
1911 
1912 bool SafePointNode::needs_polling_address_input()
1913 {
1914   return true;
1915 }
1916 
1917 //=============================================================================
1918 
1919 #ifndef PRODUCT
1920 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1921   st->print("BREAKPOINT");
1922 }
1923 #endif
1924 
1925 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1926   MacroAssembler _masm(&cbuf);
1927   __ brk(0);
1928 }
1929 
1930 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1931   return MachNode::size(ra_);
1932 }
1933 
1934 //=============================================================================
1935 
1936 #ifndef PRODUCT
1937   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1938     st->print("nop \t# %d bytes pad for loops and calls", _count);
1939   }
1940 #endif
1941 
1942   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1943     MacroAssembler _masm(&cbuf);
1944     for (int i = 0; i < _count; i++) {
1945       __ nop();
1946     }
1947   }
1948 
1949   uint MachNopNode::size(PhaseRegAlloc*) const {
1950     return _count * NativeInstruction::instruction_size;
1951   }
1952 
1953 //=============================================================================
1954 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1955 
1956 int Compile::ConstantTable::calculate_table_base_offset() const {
1957   return 0;  // absolute addressing, no offset
1958 }
1959 
1960 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1961 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1962   ShouldNotReachHere();
1963 }
1964 
1965 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1966   // Empty encoding
1967 }
1968 
1969 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1970   return 0;
1971 }
1972 
1973 #ifndef PRODUCT
1974 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1975   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1976 }
1977 #endif
1978 
1979 #ifndef PRODUCT
1980 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1981   Compile* C = ra_->C;
1982 
1983   int framesize = C->frame_slots() << LogBytesPerInt;
1984 
1985   if (C->need_stack_bang(framesize))
1986     st->print("# stack bang size=%d\n\t", framesize);
1987 
1988   if (framesize < ((1 << 9) + 2 * wordSize)) {
1989     st->print("sub  sp, sp, #%d\n\t", framesize);
1990     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1991     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1992   } else {
1993     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1994     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1995     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1996     st->print("sub  sp, sp, rscratch1");
1997   }
1998 }
1999 #endif
2000 
2001 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2002   Compile* C = ra_->C;
2003   MacroAssembler _masm(&cbuf);
2004 
2005   // n.b. frame size includes space for return pc and rfp
2006   const long framesize = C->frame_size_in_bytes();
2007   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2008 
2009   // insert a nop at the start of the prolog so we can patch in a
2010   // branch if we need to invalidate the method later
2011   __ nop();
2012 
2013   int bangsize = C->bang_size_in_bytes();
2014   if (C->need_stack_bang(bangsize) && UseStackBanging)
2015     __ generate_stack_overflow_check(bangsize);
2016 
2017   __ build_frame(framesize);
2018 
2019   if (NotifySimulator) {
2020     __ notify(Assembler::method_entry);
2021   }
2022 
2023   if (VerifyStackAtCalls) {
2024     Unimplemented();
2025   }
2026 
2027   C->set_frame_complete(cbuf.insts_size());
2028 
2029   if (C->has_mach_constant_base_node()) {
2030     // NOTE: We set the table base offset here because users might be
2031     // emitted before MachConstantBaseNode.
2032     Compile::ConstantTable& constant_table = C->constant_table();
2033     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2034   }
2035 }
2036 
2037 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2038 {
2039   return MachNode::size(ra_); // too many variables; just compute it
2040                               // the hard way
2041 }
2042 
2043 int MachPrologNode::reloc() const
2044 {
2045   return 0;
2046 }
2047 
2048 //=============================================================================
2049 
2050 #ifndef PRODUCT
2051 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2052   Compile* C = ra_->C;
2053   int framesize = C->frame_slots() << LogBytesPerInt;
2054 
2055   st->print("# pop frame %d\n\t",framesize);
2056 
2057   if (framesize == 0) {
2058     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2059   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2060     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2061     st->print("add  sp, sp, #%d\n\t", framesize);
2062   } else {
2063     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2064     st->print("add  sp, sp, rscratch1\n\t");
2065     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2066   }
2067 
2068   if (do_polling() && C->is_method_compilation()) {
2069     st->print("# touch polling page\n\t");
2070     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2071     st->print("ldr zr, [rscratch1]");
2072   }
2073 }
2074 #endif
2075 
2076 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2077   Compile* C = ra_->C;
2078   MacroAssembler _masm(&cbuf);
2079   int framesize = C->frame_slots() << LogBytesPerInt;
2080 
2081   __ remove_frame(framesize);
2082 
2083   if (NotifySimulator) {
2084     __ notify(Assembler::method_reentry);
2085   }
2086 
2087   if (do_polling() && C->is_method_compilation()) {
2088     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
2089   }
2090 }
2091 
2092 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
2093   // Variable size. Determine dynamically.
2094   return MachNode::size(ra_);
2095 }
2096 
2097 int MachEpilogNode::reloc() const {
2098   // Return number of relocatable values contained in this instruction.
2099   return 1; // 1 for polling page.
2100 }
2101 
2102 const Pipeline * MachEpilogNode::pipeline() const {
2103   return MachNode::pipeline_class();
2104 }
2105 
2106 // This method seems to be obsolete. It is declared in machnode.hpp
2107 // and defined in all *.ad files, but it is never called. Should we
2108 // get rid of it?
2109 int MachEpilogNode::safepoint_offset() const {
2110   assert(do_polling(), "no return for this epilog node");
2111   return 4;
2112 }
2113 
2114 //=============================================================================
2115 
2116 // Figure out which register class each belongs in: rc_int, rc_float or
2117 // rc_stack.
2118 enum RC { rc_bad, rc_int, rc_float, rc_stack };
2119 
2120 static enum RC rc_class(OptoReg::Name reg) {
2121 
2122   if (reg == OptoReg::Bad) {
2123     return rc_bad;
2124   }
2125 
2126   // we have 30 int registers * 2 halves
2127   // (rscratch1 and rscratch2 are omitted)
2128 
2129   if (reg < 60) {
2130     return rc_int;
2131   }
2132 
2133   // we have 32 float register * 2 halves
2134   if (reg < 60 + 128) {
2135     return rc_float;
2136   }
2137 
2138   // Between float regs & stack is the flags regs.
2139   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
2140 
2141   return rc_stack;
2142 }
2143 
2144 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
2145   Compile* C = ra_->C;
2146 
2147   // Get registers to move.
2148   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
2149   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
2150   OptoReg::Name dst_hi = ra_->get_reg_second(this);
2151   OptoReg::Name dst_lo = ra_->get_reg_first(this);
2152 
2153   enum RC src_hi_rc = rc_class(src_hi);
2154   enum RC src_lo_rc = rc_class(src_lo);
2155   enum RC dst_hi_rc = rc_class(dst_hi);
2156   enum RC dst_lo_rc = rc_class(dst_lo);
2157 
2158   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2159 
2160   if (src_hi != OptoReg::Bad) {
2161     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2162            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2163            "expected aligned-adjacent pairs");
2164   }
2165 
2166   if (src_lo == dst_lo && src_hi == dst_hi) {
2167     return 0;            // Self copy, no move.
2168   }
2169 
2170   if (bottom_type()->isa_vect() != NULL) {
2171     uint len = 4;
2172     uint ireg = ideal_reg();
2173     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2174     if (cbuf) {
2175       MacroAssembler _masm(cbuf);
2176       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2177       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2178         // stack->stack
2179         int src_offset = ra_->reg2offset(src_lo);
2180         int dst_offset = ra_->reg2offset(dst_lo);
2181         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2182         len = 8;
2183         if (ireg == Op_VecD) {
2184           __ ldr(rscratch1, Address(sp, src_offset));
2185           __ str(rscratch1, Address(sp, dst_offset));
2186         } else {
2187           if (src_offset < 512) {
2188             __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
2189           } else {
2190             __ ldr(rscratch1, Address(sp, src_offset));
2191             __ ldr(rscratch2, Address(sp, src_offset+4));
2192             len += 4;
2193           }
2194           if (dst_offset < 512) {
2195             __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
2196           } else {
2197             __ str(rscratch1, Address(sp, dst_offset));
2198             __ str(rscratch2, Address(sp, dst_offset+4));
2199             len += 4;
2200           }
2201         }
2202       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2203         __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2204                ireg == Op_VecD ? __ T8B : __ T16B,
2205                as_FloatRegister(Matcher::_regEncode[src_lo]),
2206                as_FloatRegister(Matcher::_regEncode[src_lo]));
2207       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2208         __ str(as_FloatRegister(Matcher::_regEncode[src_lo]),
2209                ireg == Op_VecD ? __ D : __ Q,
2210                Address(sp, ra_->reg2offset(dst_lo)));
2211       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2212         __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2213                ireg == Op_VecD ? __ D : __ Q,
2214                Address(sp, ra_->reg2offset(src_lo)));
2215       } else {
2216         ShouldNotReachHere();
2217       }
2218     } else if (st) {
2219       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2220         // stack->stack
2221         int src_offset = ra_->reg2offset(src_lo);
2222         int dst_offset = ra_->reg2offset(dst_lo);
2223         if (ireg == Op_VecD) {
2224           st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2225           st->print("str  rscratch1, [sp, #%d]", dst_offset);
2226         } else {
2227           if (src_offset < 512) {
2228             st->print("ldp  rscratch1, rscratch2, [sp, #%d]", src_offset);
2229           } else {
2230             st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2231             st->print("\nldr  rscratch2, [sp, #%d]", src_offset+4);
2232           }
2233           if (dst_offset < 512) {
2234             st->print("\nstp  rscratch1, rscratch2, [sp, #%d]", dst_offset);
2235           } else {
2236             st->print("\nstr  rscratch1, [sp, #%d]", dst_offset);
2237             st->print("\nstr  rscratch2, [sp, #%d]", dst_offset+4);
2238           }
2239         }
2240         st->print("\t# vector spill, stack to stack");
2241       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2242         st->print("mov  %s, %s\t# vector spill, reg to reg",
2243                    Matcher::regName[dst_lo], Matcher::regName[src_lo]);
2244       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2245         st->print("str  %s, [sp, #%d]\t# vector spill, reg to stack",
2246                    Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
2247       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2248         st->print("ldr  %s, [sp, #%d]\t# vector spill, stack to reg",
2249                    Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
2250       }
2251     }
2252     return len;
2253   }
2254 
2255   switch (src_lo_rc) {
2256   case rc_int:
2257     if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2258       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2259           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2260           // 64 bit
2261         if (cbuf) {
2262           MacroAssembler _masm(cbuf);
2263           __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2264                  as_Register(Matcher::_regEncode[src_lo]));
2265         } else if (st) {
2266           st->print("mov  %s, %s\t# shuffle",
2267                     Matcher::regName[dst_lo],
2268                     Matcher::regName[src_lo]);
2269         }
2270       } else {
2271         // 32 bit
2272         if (cbuf) {
2273           MacroAssembler _masm(cbuf);
2274           __ movw(as_Register(Matcher::_regEncode[dst_lo]),
2275                   as_Register(Matcher::_regEncode[src_lo]));
2276         } else if (st) {
2277           st->print("movw  %s, %s\t# shuffle",
2278                     Matcher::regName[dst_lo],
2279                     Matcher::regName[src_lo]);
2280         }
2281       }
2282     } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
2283       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2284           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2285           // 64 bit
2286         if (cbuf) {
2287           MacroAssembler _masm(cbuf);
2288           __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2289                    as_Register(Matcher::_regEncode[src_lo]));
2290         } else if (st) {
2291           st->print("fmovd  %s, %s\t# shuffle",
2292                     Matcher::regName[dst_lo],
2293                     Matcher::regName[src_lo]);
2294         }
2295       } else {
2296         // 32 bit
2297         if (cbuf) {
2298           MacroAssembler _masm(cbuf);
2299           __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2300                    as_Register(Matcher::_regEncode[src_lo]));
2301         } else if (st) {
2302           st->print("fmovs  %s, %s\t# shuffle",
2303                     Matcher::regName[dst_lo],
2304                     Matcher::regName[src_lo]);
2305         }
2306       }
2307     } else {                    // gpr --> stack spill
2308       assert(dst_lo_rc == rc_stack, "spill to bad register class");
2309       int dst_offset = ra_->reg2offset(dst_lo);
2310       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2311           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2312           // 64 bit
2313         if (cbuf) {
2314           MacroAssembler _masm(cbuf);
2315           __ str(as_Register(Matcher::_regEncode[src_lo]),
2316                  Address(sp, dst_offset));
2317         } else if (st) {
2318           st->print("str  %s, [sp, #%d]\t# spill",
2319                     Matcher::regName[src_lo],
2320                     dst_offset);
2321         }
2322       } else {
2323         // 32 bit
2324         if (cbuf) {
2325           MacroAssembler _masm(cbuf);
2326           __ strw(as_Register(Matcher::_regEncode[src_lo]),
2327                  Address(sp, dst_offset));
2328         } else if (st) {
2329           st->print("strw  %s, [sp, #%d]\t# spill",
2330                     Matcher::regName[src_lo],
2331                     dst_offset);
2332         }
2333       }
2334     }
2335     return 4;
2336   case rc_float:
2337     if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
2338       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2339           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2340           // 64 bit
2341         if (cbuf) {
2342           MacroAssembler _masm(cbuf);
2343           __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
2344                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2345         } else if (st) {
2346           st->print("fmovd  %s, %s\t# shuffle",
2347                     Matcher::regName[dst_lo],
2348                     Matcher::regName[src_lo]);
2349         }
2350       } else {
2351         // 32 bit
2352         if (cbuf) {
2353           MacroAssembler _masm(cbuf);
2354           __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
2355                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2356         } else if (st) {
2357           st->print("fmovs  %s, %s\t# shuffle",
2358                     Matcher::regName[dst_lo],
2359                     Matcher::regName[src_lo]);
2360         }
2361       }
2362     } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
2363       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2364           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2365           // 64 bit
2366         if (cbuf) {
2367           MacroAssembler _masm(cbuf);
2368           __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2369                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2370         } else if (st) {
2371           st->print("fmovd  %s, %s\t# shuffle",
2372                     Matcher::regName[dst_lo],
2373                     Matcher::regName[src_lo]);
2374         }
2375       } else {
2376         // 32 bit
2377         if (cbuf) {
2378           MacroAssembler _masm(cbuf);
2379           __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2380                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2381         } else if (st) {
2382           st->print("fmovs  %s, %s\t# shuffle",
2383                     Matcher::regName[dst_lo],
2384                     Matcher::regName[src_lo]);
2385         }
2386       }
2387     } else {                    // fpr --> stack spill
2388       assert(dst_lo_rc == rc_stack, "spill to bad register class");
2389       int dst_offset = ra_->reg2offset(dst_lo);
2390       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2391           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2392           // 64 bit
2393         if (cbuf) {
2394           MacroAssembler _masm(cbuf);
2395           __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]),
2396                  Address(sp, dst_offset));
2397         } else if (st) {
2398           st->print("strd  %s, [sp, #%d]\t# spill",
2399                     Matcher::regName[src_lo],
2400                     dst_offset);
2401         }
2402       } else {
2403         // 32 bit
2404         if (cbuf) {
2405           MacroAssembler _masm(cbuf);
2406           __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]),
2407                  Address(sp, dst_offset));
2408         } else if (st) {
2409           st->print("strs  %s, [sp, #%d]\t# spill",
2410                     Matcher::regName[src_lo],
2411                     dst_offset);
2412         }
2413       }
2414     }
2415     return 4;
2416   case rc_stack:
2417     int src_offset = ra_->reg2offset(src_lo);
2418     if (dst_lo_rc == rc_int) {  // stack --> gpr load
2419       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2420           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2421           // 64 bit
2422         if (cbuf) {
2423           MacroAssembler _masm(cbuf);
2424           __ ldr(as_Register(Matcher::_regEncode[dst_lo]),
2425                  Address(sp, src_offset));
2426         } else if (st) {
2427           st->print("ldr  %s, [sp, %d]\t# restore",
2428                     Matcher::regName[dst_lo],
2429                     src_offset);
2430         }
2431       } else {
2432         // 32 bit
2433         if (cbuf) {
2434           MacroAssembler _masm(cbuf);
2435           __ ldrw(as_Register(Matcher::_regEncode[dst_lo]),
2436                   Address(sp, src_offset));
2437         } else if (st) {
2438           st->print("ldr  %s, [sp, %d]\t# restore",
2439                     Matcher::regName[dst_lo],
2440                    src_offset);
2441         }
2442       }
2443       return 4;
2444     } else if (dst_lo_rc == rc_float) { // stack --> fpr load
2445       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2446           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2447           // 64 bit
2448         if (cbuf) {
2449           MacroAssembler _masm(cbuf);
2450           __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2451                  Address(sp, src_offset));
2452         } else if (st) {
2453           st->print("ldrd  %s, [sp, %d]\t# restore",
2454                     Matcher::regName[dst_lo],
2455                     src_offset);
2456         }
2457       } else {
2458         // 32 bit
2459         if (cbuf) {
2460           MacroAssembler _masm(cbuf);
2461           __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2462                   Address(sp, src_offset));
2463         } else if (st) {
2464           st->print("ldrs  %s, [sp, %d]\t# restore",
2465                     Matcher::regName[dst_lo],
2466                    src_offset);
2467         }
2468       }
2469       return 4;
2470     } else {                    // stack --> stack copy
2471       assert(dst_lo_rc == rc_stack, "spill to bad register class");
2472       int dst_offset = ra_->reg2offset(dst_lo);
2473       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2474           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2475           // 64 bit
2476         if (cbuf) {
2477           MacroAssembler _masm(cbuf);
2478           __ ldr(rscratch1, Address(sp, src_offset));
2479           __ str(rscratch1, Address(sp, dst_offset));
2480         } else if (st) {
2481           st->print("ldr  rscratch1, [sp, %d]\t# mem-mem spill",
2482                     src_offset);
2483           st->print("\n\t");
2484           st->print("str  rscratch1, [sp, %d]",
2485                     dst_offset);
2486         }
2487       } else {
2488         // 32 bit
2489         if (cbuf) {
2490           MacroAssembler _masm(cbuf);
2491           __ ldrw(rscratch1, Address(sp, src_offset));
2492           __ strw(rscratch1, Address(sp, dst_offset));
2493         } else if (st) {
2494           st->print("ldrw  rscratch1, [sp, %d]\t# mem-mem spill",
2495                     src_offset);
2496           st->print("\n\t");
2497           st->print("strw  rscratch1, [sp, %d]",
2498                     dst_offset);
2499         }
2500       }
2501       return 8;
2502     }
2503   }
2504 
2505   assert(false," bad rc_class for spill ");
2506   Unimplemented();
2507   return 0;
2508 
2509 }
2510 
2511 #ifndef PRODUCT
2512 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2513   if (!ra_)
2514     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2515   else
2516     implementation(NULL, ra_, false, st);
2517 }
2518 #endif
2519 
2520 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2521   implementation(&cbuf, ra_, false, NULL);
2522 }
2523 
2524 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2525   return implementation(NULL, ra_, true, NULL);
2526 }
2527 
2528 //=============================================================================
2529 
2530 #ifndef PRODUCT
2531 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2532   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2533   int reg = ra_->get_reg_first(this);
2534   st->print("add %s, rsp, #%d]\t# box lock",
2535             Matcher::regName[reg], offset);
2536 }
2537 #endif
2538 
2539 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2540   MacroAssembler _masm(&cbuf);
2541 
2542   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2543   int reg    = ra_->get_encode(this);
2544 
2545   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2546     __ add(as_Register(reg), sp, offset);
2547   } else {
2548     ShouldNotReachHere();
2549   }
2550 }
2551 
2552 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2553   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2554   return 4;
2555 }
2556 
2557 //=============================================================================
2558 
2559 #ifndef PRODUCT
2560 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2561 {
2562   st->print_cr("# MachUEPNode");
2563   if (UseCompressedClassPointers) {
2564     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2565     if (Universe::narrow_klass_shift() != 0) {
2566       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2567     }
2568   } else {
2569    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2570   }
2571   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2572   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2573 }
2574 #endif
2575 
2576 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2577 {
2578   // This is the unverified entry point.
2579   MacroAssembler _masm(&cbuf);
2580 
2581   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2582   Label skip;
2583   // TODO
2584   // can we avoid this skip and still use a reloc?
2585   __ br(Assembler::EQ, skip);
2586   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2587   __ bind(skip);
2588 }
2589 
2590 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2591 {
2592   return MachNode::size(ra_);
2593 }
2594 
2595 // REQUIRED EMIT CODE
2596 
2597 //=============================================================================
2598 
2599 // Emit exception handler code.
2600 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2601 {
2602   // mov rscratch1 #exception_blob_entry_point
2603   // br rscratch1
2604   // Note that the code buffer's insts_mark is always relative to insts.
2605   // That's why we must use the macroassembler to generate a handler.
2606   MacroAssembler _masm(&cbuf);
2607   address base =
2608   __ start_a_stub(size_exception_handler());
2609   if (base == NULL)  return 0;  // CodeBuffer::expand failed
2610   int offset = __ offset();
2611   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2612   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2613   __ end_a_stub();
2614   return offset;
2615 }
2616 
2617 // Emit deopt handler code.
2618 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2619 {
2620   // Note that the code buffer's insts_mark is always relative to insts.
2621   // That's why we must use the macroassembler to generate a handler.
2622   MacroAssembler _masm(&cbuf);
2623   address base =
2624   __ start_a_stub(size_deopt_handler());
2625   if (base == NULL)  return 0;  // CodeBuffer::expand failed
2626   int offset = __ offset();
2627 
2628   __ adr(lr, __ pc());
2629   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2630 
2631   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2632   __ end_a_stub();
2633   return offset;
2634 }
2635 
2636 // REQUIRED MATCHER CODE
2637 
2638 //=============================================================================
2639 
2640 const bool Matcher::match_rule_supported(int opcode) {
2641 
2642   // TODO
2643   // identify extra cases that we might want to provide match rules for
2644   // e.g. Op_StrEquals and other intrinsics
2645   if (!has_match_rule(opcode)) {
2646     return false;
2647   }
2648 
2649   return true;  // Per default match rules are supported.
2650 }
2651 
2652 int Matcher::regnum_to_fpu_offset(int regnum)
2653 {
2654   Unimplemented();
2655   return 0;
2656 }
2657 
2658 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2659 {
2660   Unimplemented();
2661   return false;
2662 }
2663 
2664 const bool Matcher::isSimpleConstant64(jlong value) {
2665   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2666   // Probably always true, even if a temp register is required.
2667   return true;
2668 }
2669 
2670 // true just means we have fast l2f conversion
2671 const bool Matcher::convL2FSupported(void) {
2672   return true;
2673 }
2674 
2675 // Vector width in bytes.
2676 const int Matcher::vector_width_in_bytes(BasicType bt) {
2677   int size = MIN2(16,(int)MaxVectorSize);
2678   // Minimum 2 values in vector
2679   if (size < 2*type2aelembytes(bt)) size = 0;
2680   // But never < 4
2681   if (size < 4) size = 0;
2682   return size;
2683 }
2684 
2685 // Limits on vector size (number of elements) loaded into vector.
2686 const int Matcher::max_vector_size(const BasicType bt) {
2687   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2688 }
2689 const int Matcher::min_vector_size(const BasicType bt) {
2690 //  For the moment limit the vector size to 8 bytes
2691     int size = 8 / type2aelembytes(bt);
2692     if (size < 2) size = 2;
2693     return size;
2694 }
2695 
2696 // Vector ideal reg.
2697 const int Matcher::vector_ideal_reg(int len) {
2698   switch(len) {
2699     case  8: return Op_VecD;
2700     case 16: return Op_VecX;
2701   }
2702   ShouldNotReachHere();
2703   return 0;
2704 }
2705 
2706 const int Matcher::vector_shift_count_ideal_reg(int size) {
2707   return Op_VecX;
2708 }
2709 
2710 // AES support not yet implemented
2711 const bool Matcher::pass_original_key_for_aes() {
2712   return false;
2713 }
2714 
2715 // x86 supports misaligned vectors store/load.
2716 const bool Matcher::misaligned_vectors_ok() {
2717   return !AlignVector; // can be changed by flag
2718 }
2719 
2720 // false => size gets scaled to BytesPerLong, ok.
2721 const bool Matcher::init_array_count_is_in_bytes = false;
2722 
2723 // Threshold size for cleararray.
2724 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2725 
2726 // Use conditional move (CMOVL)
2727 const int Matcher::long_cmove_cost() {
2728   // long cmoves are no more expensive than int cmoves
2729   return 0;
2730 }
2731 
2732 const int Matcher::float_cmove_cost() {
2733   // float cmoves are no more expensive than int cmoves
2734   return 0;
2735 }
2736 
2737 // Does the CPU require late expand (see block.cpp for description of late expand)?
2738 const bool Matcher::require_postalloc_expand = false;
2739 
2740 // Should the Matcher clone shifts on addressing modes, expecting them
2741 // to be subsumed into complex addressing expressions or compute them
2742 // into registers?  True for Intel but false for most RISCs
2743 const bool Matcher::clone_shift_expressions = false;
2744 
2745 // Do we need to mask the count passed to shift instructions or does
2746 // the cpu only look at the lower 5/6 bits anyway?
2747 const bool Matcher::need_masked_shift_count = false;
2748 
2749 // This affects two different things:
2750 //  - how Decode nodes are matched
2751 //  - how ImplicitNullCheck opportunities are recognized
2752 // If true, the matcher will try to remove all Decodes and match them
2753 // (as operands) into nodes. NullChecks are not prepared to deal with
2754 // Decodes by final_graph_reshaping().
2755 // If false, final_graph_reshaping() forces the decode behind the Cmp
2756 // for a NullCheck. The matcher matches the Decode node into a register.
2757 // Implicit_null_check optimization moves the Decode along with the
2758 // memory operation back up before the NullCheck.
2759 bool Matcher::narrow_oop_use_complex_address() {
2760   return Universe::narrow_oop_shift() == 0;
2761 }
2762 
2763 bool Matcher::narrow_klass_use_complex_address() {
2764 // TODO
2765 // decide whether we need to set this to true
2766   return false;
2767 }
2768 
2769 // Is it better to copy float constants, or load them directly from
2770 // memory?  Intel can load a float constant from a direct address,
2771 // requiring no extra registers.  Most RISCs will have to materialize
2772 // an address into a register first, so they would do better to copy
2773 // the constant from stack.
2774 const bool Matcher::rematerialize_float_constants = false;
2775 
2776 // If CPU can load and store mis-aligned doubles directly then no
2777 // fixup is needed.  Else we split the double into 2 integer pieces
2778 // and move it piece-by-piece.  Only happens when passing doubles into
2779 // C code as the Java calling convention forces doubles to be aligned.
2780 const bool Matcher::misaligned_doubles_ok = true;
2781 
2782 // No-op on amd64
2783 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2784   Unimplemented();
2785 }
2786 
2787 // Advertise here if the CPU requires explicit rounding operations to
2788 // implement the UseStrictFP mode.
2789 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2790 
2791 // Are floats converted to double when stored to stack during
2792 // deoptimization?
2793 bool Matcher::float_in_double() { return true; }
2794 
2795 // Do ints take an entire long register or just half?
2796 // The relevant question is how the int is callee-saved:
2797 // the whole long is written but de-opt'ing will have to extract
2798 // the relevant 32 bits.
2799 const bool Matcher::int_in_long = true;
2800 
2801 // Return whether or not this register is ever used as an argument.
2802 // This function is used on startup to build the trampoline stubs in
2803 // generateOptoStub.  Registers not mentioned will be killed by the VM
2804 // call in the trampoline, and arguments in those registers not be
2805 // available to the callee.
2806 bool Matcher::can_be_java_arg(int reg)
2807 {
2808   return
2809     reg ==  R0_num || reg == R0_H_num ||
2810     reg ==  R1_num || reg == R1_H_num ||
2811     reg ==  R2_num || reg == R2_H_num ||
2812     reg ==  R3_num || reg == R3_H_num ||
2813     reg ==  R4_num || reg == R4_H_num ||
2814     reg ==  R5_num || reg == R5_H_num ||
2815     reg ==  R6_num || reg == R6_H_num ||
2816     reg ==  R7_num || reg == R7_H_num ||
2817     reg ==  V0_num || reg == V0_H_num ||
2818     reg ==  V1_num || reg == V1_H_num ||
2819     reg ==  V2_num || reg == V2_H_num ||
2820     reg ==  V3_num || reg == V3_H_num ||
2821     reg ==  V4_num || reg == V4_H_num ||
2822     reg ==  V5_num || reg == V5_H_num ||
2823     reg ==  V6_num || reg == V6_H_num ||
2824     reg ==  V7_num || reg == V7_H_num;
2825 }
2826 
2827 bool Matcher::is_spillable_arg(int reg)
2828 {
2829   return can_be_java_arg(reg);
2830 }
2831 
2832 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2833   return false;
2834 }
2835 
2836 RegMask Matcher::divI_proj_mask() {
2837   ShouldNotReachHere();
2838   return RegMask();
2839 }
2840 
2841 // Register for MODI projection of divmodI.
2842 RegMask Matcher::modI_proj_mask() {
2843   ShouldNotReachHere();
2844   return RegMask();
2845 }
2846 
2847 // Register for DIVL projection of divmodL.
2848 RegMask Matcher::divL_proj_mask() {
2849   ShouldNotReachHere();
2850   return RegMask();
2851 }
2852 
2853 // Register for MODL projection of divmodL.
2854 RegMask Matcher::modL_proj_mask() {
2855   ShouldNotReachHere();
2856   return RegMask();
2857 }
2858 
2859 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2860   return FP_REG_mask();
2861 }
2862 
2863 // helper for encoding java_to_runtime calls on sim
2864 //
2865 // this is needed to compute the extra arguments required when
2866 // planting a call to the simulator blrt instruction. the TypeFunc
2867 // can be queried to identify the counts for integral, and floating
2868 // arguments and the return type
2869 
2870 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2871 {
2872   int gps = 0;
2873   int fps = 0;
2874   const TypeTuple *domain = tf->domain();
2875   int max = domain->cnt();
2876   for (int i = TypeFunc::Parms; i < max; i++) {
2877     const Type *t = domain->field_at(i);
2878     switch(t->basic_type()) {
2879     case T_FLOAT:
2880     case T_DOUBLE:
2881       fps++;
2882     default:
2883       gps++;
2884     }
2885   }
2886   gpcnt = gps;
2887   fpcnt = fps;
2888   BasicType rt = tf->return_type();
2889   switch (rt) {
2890   case T_VOID:
2891     rtype = MacroAssembler::ret_type_void;
2892     break;
2893   default:
2894     rtype = MacroAssembler::ret_type_integral;
2895     break;
2896   case T_FLOAT:
2897     rtype = MacroAssembler::ret_type_float;
2898     break;
2899   case T_DOUBLE:
2900     rtype = MacroAssembler::ret_type_double;
2901     break;
2902   }
2903 }
2904 
2905 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2906   MacroAssembler _masm(&cbuf);                                          \
2907   {                                                                     \
2908     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2909     guarantee(DISP == 0, "mode not permitted for volatile");            \
2910     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2911     __ INSN(REG, as_Register(BASE));                                    \
2912   }
2913 
2914 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2915 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2916 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2917                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2918 
2919   // Used for all non-volatile memory accesses.  The use of
2920   // $mem->opcode() to discover whether this pattern uses sign-extended
2921   // offsets is something of a kludge.
2922   static void loadStore(MacroAssembler masm, mem_insn insn,
2923                          Register reg, int opcode,
2924                          Register base, int index, int size, int disp)
2925   {
2926     Address::extend scale;
2927 
2928     // Hooboy, this is fugly.  We need a way to communicate to the
2929     // encoder that the index needs to be sign extended, so we have to
2930     // enumerate all the cases.
2931     switch (opcode) {
2932     case INDINDEXSCALEDOFFSETI2L:
2933     case INDINDEXSCALEDI2L:
2934     case INDINDEXSCALEDOFFSETI2LN:
2935     case INDINDEXSCALEDI2LN:
2936     case INDINDEXOFFSETI2L:
2937     case INDINDEXOFFSETI2LN:
2938       scale = Address::sxtw(size);
2939       break;
2940     default:
2941       scale = Address::lsl(size);
2942     }
2943 
2944     if (index == -1) {
2945       (masm.*insn)(reg, Address(base, disp));
2946     } else {
2947       if (disp == 0) {
2948         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2949       } else {
2950         masm.lea(rscratch1, Address(base, disp));
2951         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2952       }
2953     }
2954   }
2955 
2956   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2957                          FloatRegister reg, int opcode,
2958                          Register base, int index, int size, int disp)
2959   {
2960     Address::extend scale;
2961 
2962     switch (opcode) {
2963     case INDINDEXSCALEDOFFSETI2L:
2964     case INDINDEXSCALEDI2L:
2965     case INDINDEXSCALEDOFFSETI2LN:
2966     case INDINDEXSCALEDI2LN:
2967       scale = Address::sxtw(size);
2968       break;
2969     default:
2970       scale = Address::lsl(size);
2971     }
2972 
2973      if (index == -1) {
2974       (masm.*insn)(reg, Address(base, disp));
2975     } else {
2976       if (disp == 0) {
2977         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2978       } else {
2979         masm.lea(rscratch1, Address(base, disp));
2980         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2981       }
2982     }
2983   }
2984 
2985   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2986                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2987                          int opcode, Register base, int index, int size, int disp)
2988   {
2989     if (index == -1) {
2990       (masm.*insn)(reg, T, Address(base, disp));
2991     } else {
2992       assert(disp == 0, "unsupported address mode");
2993       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2994     }
2995   }
2996 
2997 %}
2998 
2999 
3000 
3001 //----------ENCODING BLOCK-----------------------------------------------------
3002 // This block specifies the encoding classes used by the compiler to
3003 // output byte streams.  Encoding classes are parameterized macros
3004 // used by Machine Instruction Nodes in order to generate the bit
3005 // encoding of the instruction.  Operands specify their base encoding
3006 // interface with the interface keyword.  There are currently
3007 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3008 // COND_INTER.  REG_INTER causes an operand to generate a function
3009 // which returns its register number when queried.  CONST_INTER causes
3010 // an operand to generate a function which returns the value of the
3011 // constant when queried.  MEMORY_INTER causes an operand to generate
3012 // four functions which return the Base Register, the Index Register,
3013 // the Scale Value, and the Offset Value of the operand when queried.
3014 // COND_INTER causes an operand to generate six functions which return
3015 // the encoding code (ie - encoding bits for the instruction)
3016 // associated with each basic boolean condition for a conditional
3017 // instruction.
3018 //
3019 // Instructions specify two basic values for encoding.  Again, a
3020 // function is available to check if the constant displacement is an
3021 // oop. They use the ins_encode keyword to specify their encoding
3022 // classes (which must be a sequence of enc_class names, and their
3023 // parameters, specified in the encoding block), and they use the
3024 // opcode keyword to specify, in order, their primary, secondary, and
3025 // tertiary opcode.  Only the opcode sections which a particular
3026 // instruction needs for encoding need to be specified.
3027 encode %{
3028   // Build emit functions for each basic byte or larger field in the
3029   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3030   // from C++ code in the enc_class source block.  Emit functions will
3031   // live in the main source block for now.  In future, we can
3032   // generalize this by adding a syntax that specifies the sizes of
3033   // fields in an order, so that the adlc can build the emit functions
3034   // automagically
3035 
3036   // catch all for unimplemented encodings
3037   enc_class enc_unimplemented %{
3038     MacroAssembler _masm(&cbuf);
3039     __ unimplemented("C2 catch all");
3040   %}
3041 
3042   // BEGIN Non-volatile memory access
3043 
3044   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3045     Register dst_reg = as_Register($dst$$reg);
3046     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3047                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3048   %}
3049 
3050   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3051     Register dst_reg = as_Register($dst$$reg);
3052     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3053                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3054   %}
3055 
3056   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3057     Register dst_reg = as_Register($dst$$reg);
3058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3059                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3060   %}
3061 
3062   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3063     Register dst_reg = as_Register($dst$$reg);
3064     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3065                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3066   %}
3067 
3068   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3069     Register dst_reg = as_Register($dst$$reg);
3070     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3071                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3072   %}
3073 
3074   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3075     Register dst_reg = as_Register($dst$$reg);
3076     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3077                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3078   %}
3079 
3080   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3081     Register dst_reg = as_Register($dst$$reg);
3082     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3083                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3084   %}
3085 
3086   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3087     Register dst_reg = as_Register($dst$$reg);
3088     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3089                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3090   %}
3091 
3092   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3093     Register dst_reg = as_Register($dst$$reg);
3094     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3095                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3096   %}
3097 
3098   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3099     Register dst_reg = as_Register($dst$$reg);
3100     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3101                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3102   %}
3103 
3104   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3105     Register dst_reg = as_Register($dst$$reg);
3106     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3107                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3108   %}
3109 
3110   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3111     Register dst_reg = as_Register($dst$$reg);
3112     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3113                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3114   %}
3115 
3116   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3117     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3118     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3119                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3120   %}
3121 
3122   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3123     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3124     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3125                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3126   %}
3127 
3128   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3129     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3130     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3131        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3132   %}
3133 
3134   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3135     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3136     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3137        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3138   %}
3139 
3140   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3141     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3142     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3143        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3144   %}
3145 
3146   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3147     Register src_reg = as_Register($src$$reg);
3148     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3149                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3150   %}
3151 
3152   enc_class aarch64_enc_strb0(memory mem) %{
3153     MacroAssembler _masm(&cbuf);
3154     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3155                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3156   %}
3157 
3158   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3159     Register src_reg = as_Register($src$$reg);
3160     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3161                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3162   %}
3163 
3164   enc_class aarch64_enc_strh0(memory mem) %{
3165     MacroAssembler _masm(&cbuf);
3166     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
3167                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3168   %}
3169 
3170   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
3171     Register src_reg = as_Register($src$$reg);
3172     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
3173                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3174   %}
3175 
3176   enc_class aarch64_enc_strw0(memory mem) %{
3177     MacroAssembler _masm(&cbuf);
3178     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
3179                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3180   %}
3181 
3182   enc_class aarch64_enc_str(iRegL src, memory mem) %{
3183     Register src_reg = as_Register($src$$reg);
3184     // we sometimes get asked to store the stack pointer into the
3185     // current thread -- we cannot do that directly on AArch64
3186     if (src_reg == r31_sp) {
3187       MacroAssembler _masm(&cbuf);
3188       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3189       __ mov(rscratch2, sp);
3190       src_reg = rscratch2;
3191     }
3192     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
3193                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3194   %}
3195 
3196   enc_class aarch64_enc_str0(memory mem) %{
3197     MacroAssembler _masm(&cbuf);
3198     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3199                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3200   %}
3201 
3202   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3203     FloatRegister src_reg = as_FloatRegister($src$$reg);
3204     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3205                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3206   %}
3207 
3208   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3209     FloatRegister src_reg = as_FloatRegister($src$$reg);
3210     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3211                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3212   %}
3213 
3214   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
3215     FloatRegister src_reg = as_FloatRegister($src$$reg);
3216     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3217        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3218   %}
3219 
3220   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
3221     FloatRegister src_reg = as_FloatRegister($src$$reg);
3222     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3223        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3224   %}
3225 
3226   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3227     FloatRegister src_reg = as_FloatRegister($src$$reg);
3228     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3229        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3230   %}
3231 
3232   // END Non-volatile memory access
3233 
3234   // volatile loads and stores
3235 
3236   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3237     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3238                  rscratch1, stlrb);
3239   %}
3240 
3241   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
3242     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3243                  rscratch1, stlrh);
3244   %}
3245 
3246   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
3247     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3248                  rscratch1, stlrw);
3249   %}
3250 
3251 
3252   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
3253     Register dst_reg = as_Register($dst$$reg);
3254     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3255              rscratch1, ldarb);
3256     __ sxtbw(dst_reg, dst_reg);
3257   %}
3258 
3259   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
3260     Register dst_reg = as_Register($dst$$reg);
3261     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3262              rscratch1, ldarb);
3263     __ sxtb(dst_reg, dst_reg);
3264   %}
3265 
3266   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
3267     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3268              rscratch1, ldarb);
3269   %}
3270 
3271   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
3272     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3273              rscratch1, ldarb);
3274   %}
3275 
3276   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
3277     Register dst_reg = as_Register($dst$$reg);
3278     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3279              rscratch1, ldarh);
3280     __ sxthw(dst_reg, dst_reg);
3281   %}
3282 
3283   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
3284     Register dst_reg = as_Register($dst$$reg);
3285     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3286              rscratch1, ldarh);
3287     __ sxth(dst_reg, dst_reg);
3288   %}
3289 
3290   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
3291     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3292              rscratch1, ldarh);
3293   %}
3294 
3295   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
3296     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3297              rscratch1, ldarh);
3298   %}
3299 
3300   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
3301     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3302              rscratch1, ldarw);
3303   %}
3304 
3305   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
3306     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3307              rscratch1, ldarw);
3308   %}
3309 
3310   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
3311     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3312              rscratch1, ldar);
3313   %}
3314 
3315   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
3316     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3317              rscratch1, ldarw);
3318     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
3319   %}
3320 
3321   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
3322     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3323              rscratch1, ldar);
3324     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
3325   %}
3326 
3327   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
3328     Register src_reg = as_Register($src$$reg);
3329     // we sometimes get asked to store the stack pointer into the
3330     // current thread -- we cannot do that directly on AArch64
3331     if (src_reg == r31_sp) {
3332         MacroAssembler _masm(&cbuf);
3333       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3334       __ mov(rscratch2, sp);
3335       src_reg = rscratch2;
3336     }
3337     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3338                  rscratch1, stlr);
3339   %}
3340 
3341   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
3342     {
3343       MacroAssembler _masm(&cbuf);
3344       FloatRegister src_reg = as_FloatRegister($src$$reg);
3345       __ fmovs(rscratch2, src_reg);
3346     }
3347     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3348                  rscratch1, stlrw);
3349   %}
3350 
3351   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
3352     {
3353       MacroAssembler _masm(&cbuf);
3354       FloatRegister src_reg = as_FloatRegister($src$$reg);
3355       __ fmovd(rscratch2, src_reg);
3356     }
3357     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3358                  rscratch1, stlr);
3359   %}
3360 
3361   // synchronized read/update encodings
3362 
3363   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
3364     MacroAssembler _masm(&cbuf);
3365     Register dst_reg = as_Register($dst$$reg);
3366     Register base = as_Register($mem$$base);
3367     int index = $mem$$index;
3368     int scale = $mem$$scale;
3369     int disp = $mem$$disp;
3370     if (index == -1) {
3371        if (disp != 0) {
3372         __ lea(rscratch1, Address(base, disp));
3373         __ ldaxr(dst_reg, rscratch1);
3374       } else {
3375         // TODO
3376         // should we ever get anything other than this case?
3377         __ ldaxr(dst_reg, base);
3378       }
3379     } else {
3380       Register index_reg = as_Register(index);
3381       if (disp == 0) {
3382         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
3383         __ ldaxr(dst_reg, rscratch1);
3384       } else {
3385         __ lea(rscratch1, Address(base, disp));
3386         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
3387         __ ldaxr(dst_reg, rscratch1);
3388       }
3389     }
3390   %}
3391 
3392   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
3393     MacroAssembler _masm(&cbuf);
3394     Register src_reg = as_Register($src$$reg);
3395     Register base = as_Register($mem$$base);
3396     int index = $mem$$index;
3397     int scale = $mem$$scale;
3398     int disp = $mem$$disp;
3399     if (index == -1) {
3400        if (disp != 0) {
3401         __ lea(rscratch2, Address(base, disp));
3402         __ stlxr(rscratch1, src_reg, rscratch2);
3403       } else {
3404         // TODO
3405         // should we ever get anything other than this case?
3406         __ stlxr(rscratch1, src_reg, base);
3407       }
3408     } else {
3409       Register index_reg = as_Register(index);
3410       if (disp == 0) {
3411         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3412         __ stlxr(rscratch1, src_reg, rscratch2);
3413       } else {
3414         __ lea(rscratch2, Address(base, disp));
3415         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3416         __ stlxr(rscratch1, src_reg, rscratch2);
3417       }
3418     }
3419     __ cmpw(rscratch1, zr);
3420   %}
3421 
3422   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
3423     MacroAssembler _masm(&cbuf);
3424     Register old_reg = as_Register($oldval$$reg);
3425     Register new_reg = as_Register($newval$$reg);
3426     Register base = as_Register($mem$$base);
3427     Register addr_reg;
3428     int index = $mem$$index;
3429     int scale = $mem$$scale;
3430     int disp = $mem$$disp;
3431     if (index == -1) {
3432        if (disp != 0) {
3433         __ lea(rscratch2, Address(base, disp));
3434         addr_reg = rscratch2;
3435       } else {
3436         // TODO
3437         // should we ever get anything other than this case?
3438         addr_reg = base;
3439       }
3440     } else {
3441       Register index_reg = as_Register(index);
3442       if (disp == 0) {
3443         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3444         addr_reg = rscratch2;
3445       } else {
3446         __ lea(rscratch2, Address(base, disp));
3447         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3448         addr_reg = rscratch2;
3449       }
3450     }
3451     Label retry_load, done;
3452     __ bind(retry_load);
3453     __ ldxr(rscratch1, addr_reg);
3454     __ cmp(rscratch1, old_reg);
3455     __ br(Assembler::NE, done);
3456     __ stlxr(rscratch1, new_reg, addr_reg);
3457     __ cbnzw(rscratch1, retry_load);
3458     __ bind(done);
3459   %}
3460 
3461   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3462     MacroAssembler _masm(&cbuf);
3463     Register old_reg = as_Register($oldval$$reg);
3464     Register new_reg = as_Register($newval$$reg);
3465     Register base = as_Register($mem$$base);
3466     Register addr_reg;
3467     int index = $mem$$index;
3468     int scale = $mem$$scale;
3469     int disp = $mem$$disp;
3470     if (index == -1) {
3471        if (disp != 0) {
3472         __ lea(rscratch2, Address(base, disp));
3473         addr_reg = rscratch2;
3474       } else {
3475         // TODO
3476         // should we ever get anything other than this case?
3477         addr_reg = base;
3478       }
3479     } else {
3480       Register index_reg = as_Register(index);
3481       if (disp == 0) {
3482         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3483         addr_reg = rscratch2;
3484       } else {
3485         __ lea(rscratch2, Address(base, disp));
3486         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3487         addr_reg = rscratch2;
3488       }
3489     }
3490     Label retry_load, done;
3491     __ bind(retry_load);
3492     __ ldxrw(rscratch1, addr_reg);
3493     __ cmpw(rscratch1, old_reg);
3494     __ br(Assembler::NE, done);
3495     __ stlxrw(rscratch1, new_reg, addr_reg);
3496     __ cbnzw(rscratch1, retry_load);
3497     __ bind(done);
3498   %}
3499 
3500   // auxiliary used for CompareAndSwapX to set result register
3501   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
3502     MacroAssembler _masm(&cbuf);
3503     Register res_reg = as_Register($res$$reg);
3504     __ cset(res_reg, Assembler::EQ);
3505   %}
3506 
3507   // prefetch encodings
3508 
3509   enc_class aarch64_enc_prefetchw(memory mem) %{
3510     MacroAssembler _masm(&cbuf);
3511     Register base = as_Register($mem$$base);
3512     int index = $mem$$index;
3513     int scale = $mem$$scale;
3514     int disp = $mem$$disp;
3515     if (index == -1) {
3516       __ prfm(Address(base, disp), PSTL1KEEP);
3517       __ nop();
3518     } else {
3519       Register index_reg = as_Register(index);
3520       if (disp == 0) {
3521         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
3522       } else {
3523         __ lea(rscratch1, Address(base, disp));
3524         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3525       }
3526     }
3527   %}
3528 
3529   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
3530     MacroAssembler _masm(&cbuf);
3531     Register cnt_reg = as_Register($cnt$$reg);
3532     Register base_reg = as_Register($base$$reg);
3533     // base is word aligned
3534     // cnt is count of words
3535 
3536     Label loop;
3537     Label entry;
3538 
3539 //  Algorithm:
3540 //
3541 //    scratch1 = cnt & 7;
3542 //    cnt -= scratch1;
3543 //    p += scratch1;
3544 //    switch (scratch1) {
3545 //      do {
3546 //        cnt -= 8;
3547 //          p[-8] = 0;
3548 //        case 7:
3549 //          p[-7] = 0;
3550 //        case 6:
3551 //          p[-6] = 0;
3552 //          // ...
3553 //        case 1:
3554 //          p[-1] = 0;
3555 //        case 0:
3556 //          p += 8;
3557 //      } while (cnt);
3558 //    }
3559 
3560     const int unroll = 8; // Number of str(zr) instructions we'll unroll
3561 
3562     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
3563     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
3564     // base_reg always points to the end of the region we're about to zero
3565     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
3566     __ adr(rscratch2, entry);
3567     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
3568     __ br(rscratch2);
3569     __ bind(loop);
3570     __ sub(cnt_reg, cnt_reg, unroll);
3571     for (int i = -unroll; i < 0; i++)
3572       __ str(zr, Address(base_reg, i * wordSize));
3573     __ bind(entry);
3574     __ add(base_reg, base_reg, unroll * wordSize);
3575     __ cbnz(cnt_reg, loop);
3576   %}
3577 
3578   /// mov envcodings
3579 
3580   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3581     MacroAssembler _masm(&cbuf);
3582     u_int32_t con = (u_int32_t)$src$$constant;
3583     Register dst_reg = as_Register($dst$$reg);
3584     if (con == 0) {
3585       __ movw(dst_reg, zr);
3586     } else {
3587       __ movw(dst_reg, con);
3588     }
3589   %}
3590 
3591   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3592     MacroAssembler _masm(&cbuf);
3593     Register dst_reg = as_Register($dst$$reg);
3594     u_int64_t con = (u_int64_t)$src$$constant;
3595     if (con == 0) {
3596       __ mov(dst_reg, zr);
3597     } else {
3598       __ mov(dst_reg, con);
3599     }
3600   %}
3601 
3602   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3603     MacroAssembler _masm(&cbuf);
3604     Register dst_reg = as_Register($dst$$reg);
3605     address con = (address)$src$$constant;
3606     if (con == NULL || con == (address)1) {
3607       ShouldNotReachHere();
3608     } else {
3609       relocInfo::relocType rtype = $src->constant_reloc();
3610       if (rtype == relocInfo::oop_type) {
3611         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3612       } else if (rtype == relocInfo::metadata_type) {
3613         __ mov_metadata(dst_reg, (Metadata*)con);
3614       } else {
3615         assert(rtype == relocInfo::none, "unexpected reloc type");
3616         if (con < (address)(uintptr_t)os::vm_page_size()) {
3617           __ mov(dst_reg, con);
3618         } else {
3619           unsigned long offset;
3620           __ adrp(dst_reg, con, offset);
3621           __ add(dst_reg, dst_reg, offset);
3622         }
3623       }
3624     }
3625   %}
3626 
3627   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3628     MacroAssembler _masm(&cbuf);
3629     Register dst_reg = as_Register($dst$$reg);
3630     __ mov(dst_reg, zr);
3631   %}
3632 
3633   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3634     MacroAssembler _masm(&cbuf);
3635     Register dst_reg = as_Register($dst$$reg);
3636     __ mov(dst_reg, (u_int64_t)1);
3637   %}
3638 
3639   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3640     MacroAssembler _masm(&cbuf);
3641     address page = (address)$src$$constant;
3642     Register dst_reg = as_Register($dst$$reg);
3643     unsigned long off;
3644     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3645     assert(off == 0, "assumed offset == 0");
3646   %}
3647 
3648   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3649     MacroAssembler _masm(&cbuf);
3650     address page = (address)$src$$constant;
3651     Register dst_reg = as_Register($dst$$reg);
3652     unsigned long off;
3653     __ adrp(dst_reg, ExternalAddress(page), off);
3654     assert(off == 0, "assumed offset == 0");
3655   %}
3656 
3657   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3658     MacroAssembler _masm(&cbuf);
3659     Register dst_reg = as_Register($dst$$reg);
3660     address con = (address)$src$$constant;
3661     if (con == NULL) {
3662       ShouldNotReachHere();
3663     } else {
3664       relocInfo::relocType rtype = $src->constant_reloc();
3665       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3666       __ set_narrow_oop(dst_reg, (jobject)con);
3667     }
3668   %}
3669 
3670   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3671     MacroAssembler _masm(&cbuf);
3672     Register dst_reg = as_Register($dst$$reg);
3673     __ mov(dst_reg, zr);
3674   %}
3675 
3676   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3677     MacroAssembler _masm(&cbuf);
3678     Register dst_reg = as_Register($dst$$reg);
3679     address con = (address)$src$$constant;
3680     if (con == NULL) {
3681       ShouldNotReachHere();
3682     } else {
3683       relocInfo::relocType rtype = $src->constant_reloc();
3684       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3685       __ set_narrow_klass(dst_reg, (Klass *)con);
3686     }
3687   %}
3688 
3689   // arithmetic encodings
3690 
3691   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3692     MacroAssembler _masm(&cbuf);
3693     Register dst_reg = as_Register($dst$$reg);
3694     Register src_reg = as_Register($src1$$reg);
3695     int32_t con = (int32_t)$src2$$constant;
3696     // add has primary == 0, subtract has primary == 1
3697     if ($primary) { con = -con; }
3698     if (con < 0) {
3699       __ subw(dst_reg, src_reg, -con);
3700     } else {
3701       __ addw(dst_reg, src_reg, con);
3702     }
3703   %}
3704 
3705   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3706     MacroAssembler _masm(&cbuf);
3707     Register dst_reg = as_Register($dst$$reg);
3708     Register src_reg = as_Register($src1$$reg);
3709     int32_t con = (int32_t)$src2$$constant;
3710     // add has primary == 0, subtract has primary == 1
3711     if ($primary) { con = -con; }
3712     if (con < 0) {
3713       __ sub(dst_reg, src_reg, -con);
3714     } else {
3715       __ add(dst_reg, src_reg, con);
3716     }
3717   %}
3718 
3719   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3720     MacroAssembler _masm(&cbuf);
3721    Register dst_reg = as_Register($dst$$reg);
3722    Register src1_reg = as_Register($src1$$reg);
3723    Register src2_reg = as_Register($src2$$reg);
3724     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3725   %}
3726 
3727   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3728     MacroAssembler _masm(&cbuf);
3729    Register dst_reg = as_Register($dst$$reg);
3730    Register src1_reg = as_Register($src1$$reg);
3731    Register src2_reg = as_Register($src2$$reg);
3732     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3733   %}
3734 
3735   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3736     MacroAssembler _masm(&cbuf);
3737    Register dst_reg = as_Register($dst$$reg);
3738    Register src1_reg = as_Register($src1$$reg);
3739    Register src2_reg = as_Register($src2$$reg);
3740     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3741   %}
3742 
3743   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3744     MacroAssembler _masm(&cbuf);
3745    Register dst_reg = as_Register($dst$$reg);
3746    Register src1_reg = as_Register($src1$$reg);
3747    Register src2_reg = as_Register($src2$$reg);
3748     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3749   %}
3750 
3751   // compare instruction encodings
3752 
3753   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3754     MacroAssembler _masm(&cbuf);
3755     Register reg1 = as_Register($src1$$reg);
3756     Register reg2 = as_Register($src2$$reg);
3757     __ cmpw(reg1, reg2);
3758   %}
3759 
3760   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3761     MacroAssembler _masm(&cbuf);
3762     Register reg = as_Register($src1$$reg);
3763     int32_t val = $src2$$constant;
3764     if (val >= 0) {
3765       __ subsw(zr, reg, val);
3766     } else {
3767       __ addsw(zr, reg, -val);
3768     }
3769   %}
3770 
3771   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3772     MacroAssembler _masm(&cbuf);
3773     Register reg1 = as_Register($src1$$reg);
3774     u_int32_t val = (u_int32_t)$src2$$constant;
3775     __ movw(rscratch1, val);
3776     __ cmpw(reg1, rscratch1);
3777   %}
3778 
3779   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3780     MacroAssembler _masm(&cbuf);
3781     Register reg1 = as_Register($src1$$reg);
3782     Register reg2 = as_Register($src2$$reg);
3783     __ cmp(reg1, reg2);
3784   %}
3785 
3786   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3787     MacroAssembler _masm(&cbuf);
3788     Register reg = as_Register($src1$$reg);
3789     int64_t val = $src2$$constant;
3790     if (val >= 0) {
3791       __ subs(zr, reg, val);
3792     } else if (val != -val) {
3793       __ adds(zr, reg, -val);
3794     } else {
3795     // aargh, Long.MIN_VALUE is a special case
3796       __ orr(rscratch1, zr, (u_int64_t)val);
3797       __ subs(zr, reg, rscratch1);
3798     }
3799   %}
3800 
3801   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3802     MacroAssembler _masm(&cbuf);
3803     Register reg1 = as_Register($src1$$reg);
3804     u_int64_t val = (u_int64_t)$src2$$constant;
3805     __ mov(rscratch1, val);
3806     __ cmp(reg1, rscratch1);
3807   %}
3808 
3809   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3810     MacroAssembler _masm(&cbuf);
3811     Register reg1 = as_Register($src1$$reg);
3812     Register reg2 = as_Register($src2$$reg);
3813     __ cmp(reg1, reg2);
3814   %}
3815 
3816   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3817     MacroAssembler _masm(&cbuf);
3818     Register reg1 = as_Register($src1$$reg);
3819     Register reg2 = as_Register($src2$$reg);
3820     __ cmpw(reg1, reg2);
3821   %}
3822 
3823   enc_class aarch64_enc_testp(iRegP src) %{
3824     MacroAssembler _masm(&cbuf);
3825     Register reg = as_Register($src$$reg);
3826     __ cmp(reg, zr);
3827   %}
3828 
3829   enc_class aarch64_enc_testn(iRegN src) %{
3830     MacroAssembler _masm(&cbuf);
3831     Register reg = as_Register($src$$reg);
3832     __ cmpw(reg, zr);
3833   %}
3834 
3835   enc_class aarch64_enc_b(label lbl) %{
3836     MacroAssembler _masm(&cbuf);
3837     Label *L = $lbl$$label;
3838     __ b(*L);
3839   %}
3840 
3841   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3842     MacroAssembler _masm(&cbuf);
3843     Label *L = $lbl$$label;
3844     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3845   %}
3846 
3847   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3848     MacroAssembler _masm(&cbuf);
3849     Label *L = $lbl$$label;
3850     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3851   %}
3852 
3853   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3854   %{
3855      Register sub_reg = as_Register($sub$$reg);
3856      Register super_reg = as_Register($super$$reg);
3857      Register temp_reg = as_Register($temp$$reg);
3858      Register result_reg = as_Register($result$$reg);
3859 
3860      Label miss;
3861      MacroAssembler _masm(&cbuf);
3862      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3863                                      NULL, &miss,
3864                                      /*set_cond_codes:*/ true);
3865      if ($primary) {
3866        __ mov(result_reg, zr);
3867      }
3868      __ bind(miss);
3869   %}
3870 
3871   enc_class aarch64_enc_java_static_call(method meth) %{
3872     MacroAssembler _masm(&cbuf);
3873 
3874     address addr = (address)$meth$$method;
3875     if (!_method) {
3876       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3877       __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3878     } else if (_optimized_virtual) {
3879       __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3880     } else {
3881       __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3882     }
3883 
3884     if (_method) {
3885       // Emit stub for static call
3886       CompiledStaticCall::emit_to_interp_stub(cbuf);
3887     }
3888   %}
3889 
3890   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3891     MacroAssembler _masm(&cbuf);
3892     __ ic_call((address)$meth$$method);
3893   %}
3894 
3895   enc_class aarch64_enc_call_epilog() %{
3896     MacroAssembler _masm(&cbuf);
3897     if (VerifyStackAtCalls) {
3898       // Check that stack depth is unchanged: find majik cookie on stack
3899       __ call_Unimplemented();
3900     }
3901   %}
3902 
3903   enc_class aarch64_enc_java_to_runtime(method meth) %{
3904     MacroAssembler _masm(&cbuf);
3905 
3906     // some calls to generated routines (arraycopy code) are scheduled
3907     // by C2 as runtime calls. if so we can call them using a br (they
3908     // will be in a reachable segment) otherwise we have to use a blrt
3909     // which loads the absolute address into a register.
3910     address entry = (address)$meth$$method;
3911     CodeBlob *cb = CodeCache::find_blob(entry);
3912     if (cb) {
3913       __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3914     } else {
3915       int gpcnt;
3916       int fpcnt;
3917       int rtype;
3918       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3919       Label retaddr;
3920       __ adr(rscratch2, retaddr);
3921       __ lea(rscratch1, RuntimeAddress(entry));
3922       // Leave a breadcrumb for JavaThread::pd_last_frame().
3923       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3924       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3925       __ bind(retaddr);
3926       __ add(sp, sp, 2 * wordSize);
3927     }
3928   %}
3929 
3930   enc_class aarch64_enc_rethrow() %{
3931     MacroAssembler _masm(&cbuf);
3932     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3933   %}
3934 
3935   enc_class aarch64_enc_ret() %{
3936     MacroAssembler _masm(&cbuf);
3937     __ ret(lr);
3938   %}
3939 
3940   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3941     MacroAssembler _masm(&cbuf);
3942     Register target_reg = as_Register($jump_target$$reg);
3943     __ br(target_reg);
3944   %}
3945 
3946   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3947     MacroAssembler _masm(&cbuf);
3948     Register target_reg = as_Register($jump_target$$reg);
3949     // exception oop should be in r0
3950     // ret addr has been popped into lr
3951     // callee expects it in r3
3952     __ mov(r3, lr);
3953     __ br(target_reg);
3954   %}
3955 
3956   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3957     MacroAssembler _masm(&cbuf);
3958     Register oop = as_Register($object$$reg);
3959     Register box = as_Register($box$$reg);
3960     Register disp_hdr = as_Register($tmp$$reg);
3961     Register tmp = as_Register($tmp2$$reg);
3962     Label cont;
3963     Label object_has_monitor;
3964     Label cas_failed;
3965 
3966     assert_different_registers(oop, box, tmp, disp_hdr);
3967 
3968     // Load markOop from object into displaced_header.
3969     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3970 
3971     // Always do locking in runtime.
3972     if (EmitSync & 0x01) {
3973       __ cmp(oop, zr);
3974       return;
3975     }
3976 
3977     if (UseBiasedLocking) {
3978       __ biased_locking_enter(disp_hdr, oop, box, tmp, true, cont);
3979     }
3980 
3981     // Handle existing monitor
3982     if (EmitSync & 0x02) {
3983       // we can use AArch64's bit test and branch here but
3984       // markoopDesc does not define a bit index just the bit value
3985       // so assert in case the bit pos changes
3986 #     define __monitor_value_log2 1
3987       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3988       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3989 #     undef __monitor_value_log2
3990     }
3991 
3992     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3993     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3994 
3995     // Load Compare Value application register.
3996 
3997     // Initialize the box. (Must happen before we update the object mark!)
3998     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3999 
4000     // Compare object markOop with mark and if equal exchange scratch1
4001     // with object markOop.
4002     // Note that this is simply a CAS: it does not generate any
4003     // barriers.  These are separately generated by
4004     // membar_acquire_lock().
4005     {
4006       Label retry_load;
4007       __ bind(retry_load);
4008       __ ldxr(tmp, oop);
4009       __ cmp(tmp, disp_hdr);
4010       __ br(Assembler::NE, cas_failed);
4011       // use stlxr to ensure update is immediately visible
4012       __ stlxr(tmp, box, oop);
4013       __ cbzw(tmp, cont);
4014       __ b(retry_load);
4015     }
4016 
4017     // Formerly:
4018     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4019     //               /*newv=*/box,
4020     //               /*addr=*/oop,
4021     //               /*tmp=*/tmp,
4022     //               cont,
4023     //               /*fail*/NULL);
4024 
4025     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4026 
4027     // If the compare-and-exchange succeeded, then we found an unlocked
4028     // object, will have now locked it will continue at label cont
4029 
4030     __ bind(cas_failed);
4031     // We did not see an unlocked object so try the fast recursive case.
4032 
4033     // Check if the owner is self by comparing the value in the
4034     // markOop of object (disp_hdr) with the stack pointer.
4035     __ mov(rscratch1, sp);
4036     __ sub(disp_hdr, disp_hdr, rscratch1);
4037     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4038     // If condition is true we are cont and hence we can store 0 as the
4039     // displaced header in the box, which indicates that it is a recursive lock.
4040     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4041     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4042 
4043     // Handle existing monitor.
4044     if ((EmitSync & 0x02) == 0) {
4045       __ b(cont);
4046 
4047       __ bind(object_has_monitor);
4048       // The object's monitor m is unlocked iff m->owner == NULL,
4049       // otherwise m->owner may contain a thread or a stack address.
4050       //
4051       // Try to CAS m->owner from NULL to current thread.
4052       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4053       __ mov(disp_hdr, zr);
4054 
4055       {
4056         Label retry_load, fail;
4057         __ bind(retry_load);
4058         __ ldxr(rscratch1, tmp);
4059         __ cmp(disp_hdr, rscratch1);
4060         __ br(Assembler::NE, fail);
4061         // use stlxr to ensure update is immediately visible
4062         __ stlxr(rscratch1, rthread, tmp);
4063         __ cbnzw(rscratch1, retry_load);
4064         __ bind(fail);
4065       }
4066 
4067       // Label next;
4068       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4069       //               /*newv=*/rthread,
4070       //               /*addr=*/tmp,
4071       //               /*tmp=*/rscratch1,
4072       //               /*succeed*/next,
4073       //               /*fail*/NULL);
4074       // __ bind(next);
4075 
4076       // store a non-null value into the box.
4077       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4078 
4079       // PPC port checks the following invariants
4080       // #ifdef ASSERT
4081       // bne(flag, cont);
4082       // We have acquired the monitor, check some invariants.
4083       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4084       // Invariant 1: _recursions should be 0.
4085       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4086       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4087       //                        "monitor->_recursions should be 0", -1);
4088       // Invariant 2: OwnerIsThread shouldn't be 0.
4089       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4090       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4091       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4092       // #endif
4093     }
4094 
4095     __ bind(cont);
4096     // flag == EQ indicates success
4097     // flag == NE indicates failure
4098 
4099   %}
4100 
4101   // TODO
4102   // reimplement this with custom cmpxchgptr code
4103   // which avoids some of the unnecessary branching
4104   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4105     MacroAssembler _masm(&cbuf);
4106     Register oop = as_Register($object$$reg);
4107     Register box = as_Register($box$$reg);
4108     Register disp_hdr = as_Register($tmp$$reg);
4109     Register tmp = as_Register($tmp2$$reg);
4110     Label cont;
4111     Label object_has_monitor;
4112     Label cas_failed;
4113 
4114     assert_different_registers(oop, box, tmp, disp_hdr);
4115 
4116     // Always do locking in runtime.
4117     if (EmitSync & 0x01) {
4118       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4119       return;
4120     }
4121 
4122     if (UseBiasedLocking) {
4123       __ biased_locking_exit(oop, tmp, cont);
4124     }
4125 
4126     // Find the lock address and load the displaced header from the stack.
4127     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4128 
4129     // If the displaced header is 0, we have a recursive unlock.
4130     __ cmp(disp_hdr, zr);
4131     __ br(Assembler::EQ, cont);
4132 
4133 
4134     // Handle existing monitor.
4135     if ((EmitSync & 0x02) == 0) {
4136       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4137       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4138     }
4139 
4140     // Check if it is still a light weight lock, this is is true if we
4141     // see the stack address of the basicLock in the markOop of the
4142     // object.
4143 
4144       {
4145         Label retry_load;
4146         __ bind(retry_load);
4147         __ ldxr(tmp, oop);
4148         __ cmp(box, tmp);
4149         __ br(Assembler::NE, cas_failed);
4150         // use stlxr to ensure update is immediately visible
4151         __ stlxr(tmp, disp_hdr, oop);
4152         __ cbzw(tmp, cont);
4153         __ b(retry_load);
4154       }
4155 
4156     // __ cmpxchgptr(/*compare_value=*/box,
4157     //               /*exchange_value=*/disp_hdr,
4158     //               /*where=*/oop,
4159     //               /*result=*/tmp,
4160     //               cont,
4161     //               /*cas_failed*/NULL);
4162     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4163 
4164     __ bind(cas_failed);
4165 
4166     // Handle existing monitor.
4167     if ((EmitSync & 0x02) == 0) {
4168       __ b(cont);
4169 
4170       __ bind(object_has_monitor);
4171       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4172       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4173       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4174       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4175       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4176       __ cmp(rscratch1, zr);
4177       __ br(Assembler::NE, cont);
4178 
4179       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4180       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4181       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4182       __ cmp(rscratch1, zr);
4183       __ cbnz(rscratch1, cont);
4184       // need a release store here
4185       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4186       __ stlr(rscratch1, tmp); // rscratch1 is zero
4187     }
4188 
4189     __ bind(cont);
4190     // flag == EQ indicates success
4191     // flag == NE indicates failure
4192   %}
4193 
4194 %}
4195 
4196 //----------FRAME--------------------------------------------------------------
4197 // Definition of frame structure and management information.
4198 //
4199 //  S T A C K   L A Y O U T    Allocators stack-slot number
4200 //                             |   (to get allocators register number
4201 //  G  Owned by    |        |  v    add OptoReg::stack0())
4202 //  r   CALLER     |        |
4203 //  o     |        +--------+      pad to even-align allocators stack-slot
4204 //  w     V        |  pad0  |        numbers; owned by CALLER
4205 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4206 //  h     ^        |   in   |  5
4207 //        |        |  args  |  4   Holes in incoming args owned by SELF
4208 //  |     |        |        |  3
4209 //  |     |        +--------+
4210 //  V     |        | old out|      Empty on Intel, window on Sparc
4211 //        |    old |preserve|      Must be even aligned.
4212 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4213 //        |        |   in   |  3   area for Intel ret address
4214 //     Owned by    |preserve|      Empty on Sparc.
4215 //       SELF      +--------+
4216 //        |        |  pad2  |  2   pad to align old SP
4217 //        |        +--------+  1
4218 //        |        | locks  |  0
4219 //        |        +--------+----> OptoReg::stack0(), even aligned
4220 //        |        |  pad1  | 11   pad to align new SP
4221 //        |        +--------+
4222 //        |        |        | 10
4223 //        |        | spills |  9   spills
4224 //        V        |        |  8   (pad0 slot for callee)
4225 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4226 //        ^        |  out   |  7
4227 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4228 //     Owned by    +--------+
4229 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4230 //        |    new |preserve|      Must be even-aligned.
4231 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4232 //        |        |        |
4233 //
4234 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4235 //         known from SELF's arguments and the Java calling convention.
4236 //         Region 6-7 is determined per call site.
4237 // Note 2: If the calling convention leaves holes in the incoming argument
4238 //         area, those holes are owned by SELF.  Holes in the outgoing area
4239 //         are owned by the CALLEE.  Holes should not be nessecary in the
4240 //         incoming area, as the Java calling convention is completely under
4241 //         the control of the AD file.  Doubles can be sorted and packed to
4242 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4243 //         varargs C calling conventions.
4244 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4245 //         even aligned with pad0 as needed.
4246 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4247 //           (the latter is true on Intel but is it false on AArch64?)
4248 //         region 6-11 is even aligned; it may be padded out more so that
4249 //         the region from SP to FP meets the minimum stack alignment.
4250 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4251 //         alignment.  Region 11, pad1, may be dynamically extended so that
4252 //         SP meets the minimum alignment.
4253 
4254 frame %{
4255   // What direction does stack grow in (assumed to be same for C & Java)
4256   stack_direction(TOWARDS_LOW);
4257 
4258   // These three registers define part of the calling convention
4259   // between compiled code and the interpreter.
4260 
4261   // Inline Cache Register or methodOop for I2C.
4262   inline_cache_reg(R12);
4263 
4264   // Method Oop Register when calling interpreter.
4265   interpreter_method_oop_reg(R12);
4266 
4267   // Number of stack slots consumed by locking an object
4268   sync_stack_slots(2);
4269 
4270   // Compiled code's Frame Pointer
4271   frame_pointer(R31);
4272 
4273   // Interpreter stores its frame pointer in a register which is
4274   // stored to the stack by I2CAdaptors.
4275   // I2CAdaptors convert from interpreted java to compiled java.
4276   interpreter_frame_pointer(R29);
4277 
4278   // Stack alignment requirement
4279   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4280 
4281   // Number of stack slots between incoming argument block and the start of
4282   // a new frame.  The PROLOG must add this many slots to the stack.  The
4283   // EPILOG must remove this many slots. aarch64 needs two slots for
4284   // return address and fp.
4285   // TODO think this is correct but check
4286   in_preserve_stack_slots(4);
4287 
4288   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4289   // for calls to C.  Supports the var-args backing area for register parms.
4290   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4291 
4292   // The after-PROLOG location of the return address.  Location of
4293   // return address specifies a type (REG or STACK) and a number
4294   // representing the register number (i.e. - use a register name) or
4295   // stack slot.
4296   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4297   // Otherwise, it is above the locks and verification slot and alignment word
4298   // TODO this may well be correct but need to check why that - 2 is there
4299   // ppc port uses 0 but we definitely need to allow for fixed_slots
4300   // which folds in the space used for monitors
4301   return_addr(STACK - 2 +
4302               round_to((Compile::current()->in_preserve_stack_slots() +
4303                         Compile::current()->fixed_slots()),
4304                        stack_alignment_in_slots()));
4305 
4306   // Body of function which returns an integer array locating
4307   // arguments either in registers or in stack slots.  Passed an array
4308   // of ideal registers called "sig" and a "length" count.  Stack-slot
4309   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4310   // arguments for a CALLEE.  Incoming stack arguments are
4311   // automatically biased by the preserve_stack_slots field above.
4312 
4313   calling_convention
4314   %{
4315     // No difference between ingoing/outgoing just pass false
4316     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4317   %}
4318 
4319   c_calling_convention
4320   %{
4321     // This is obviously always outgoing
4322     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
4323   %}
4324 
4325   // Location of compiled Java return values.  Same as C for now.
4326   return_value
4327   %{
4328     // TODO do we allow ideal_reg == Op_RegN???
4329     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4330            "only return normal values");
4331 
4332     static const int lo[Op_RegL + 1] = { // enum name
4333       0,                                 // Op_Node
4334       0,                                 // Op_Set
4335       R0_num,                            // Op_RegN
4336       R0_num,                            // Op_RegI
4337       R0_num,                            // Op_RegP
4338       V0_num,                            // Op_RegF
4339       V0_num,                            // Op_RegD
4340       R0_num                             // Op_RegL
4341     };
4342 
4343     static const int hi[Op_RegL + 1] = { // enum name
4344       0,                                 // Op_Node
4345       0,                                 // Op_Set
4346       OptoReg::Bad,                       // Op_RegN
4347       OptoReg::Bad,                      // Op_RegI
4348       R0_H_num,                          // Op_RegP
4349       OptoReg::Bad,                      // Op_RegF
4350       V0_H_num,                          // Op_RegD
4351       R0_H_num                           // Op_RegL
4352     };
4353 
4354     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4355   %}
4356 %}
4357 
4358 //----------ATTRIBUTES---------------------------------------------------------
4359 //----------Operand Attributes-------------------------------------------------
4360 op_attrib op_cost(1);        // Required cost attribute
4361 
4362 //----------Instruction Attributes---------------------------------------------
4363 ins_attrib ins_cost(INSN_COST); // Required cost attribute
4364 ins_attrib ins_size(32);        // Required size attribute (in bits)
4365 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4366                                 // a non-matching short branch variant
4367                                 // of some long branch?
4368 ins_attrib ins_alignment(4);    // Required alignment attribute (must
4369                                 // be a power of 2) specifies the
4370                                 // alignment that some part of the
4371                                 // instruction (not necessarily the
4372                                 // start) requires.  If > 1, a
4373                                 // compute_padding() function must be
4374                                 // provided for the instruction
4375 
4376 //----------OPERANDS-----------------------------------------------------------
4377 // Operand definitions must precede instruction definitions for correct parsing
4378 // in the ADLC because operands constitute user defined types which are used in
4379 // instruction definitions.
4380 
4381 //----------Simple Operands----------------------------------------------------
4382 
4383 // Integer operands 32 bit
4384 // 32 bit immediate
4385 operand immI()
4386 %{
4387   match(ConI);
4388 
4389   op_cost(0);
4390   format %{ %}
4391   interface(CONST_INTER);
4392 %}
4393 
4394 // 32 bit zero
4395 operand immI0()
4396 %{
4397   predicate(n->get_int() == 0);
4398   match(ConI);
4399 
4400   op_cost(0);
4401   format %{ %}
4402   interface(CONST_INTER);
4403 %}
4404 
4405 // 32 bit unit increment
4406 operand immI_1()
4407 %{
4408   predicate(n->get_int() == 1);
4409   match(ConI);
4410 
4411   op_cost(0);
4412   format %{ %}
4413   interface(CONST_INTER);
4414 %}
4415 
4416 // 32 bit unit decrement
4417 operand immI_M1()
4418 %{
4419   predicate(n->get_int() == -1);
4420   match(ConI);
4421 
4422   op_cost(0);
4423   format %{ %}
4424   interface(CONST_INTER);
4425 %}
4426 
4427 operand immI_le_4()
4428 %{
4429   predicate(n->get_int() <= 4);
4430   match(ConI);
4431 
4432   op_cost(0);
4433   format %{ %}
4434   interface(CONST_INTER);
4435 %}
4436 
4437 operand immI_31()
4438 %{
4439   predicate(n->get_int() == 31);
4440   match(ConI);
4441 
4442   op_cost(0);
4443   format %{ %}
4444   interface(CONST_INTER);
4445 %}
4446 
4447 operand immI_8()
4448 %{
4449   predicate(n->get_int() == 8);
4450   match(ConI);
4451 
4452   op_cost(0);
4453   format %{ %}
4454   interface(CONST_INTER);
4455 %}
4456 
4457 operand immI_16()
4458 %{
4459   predicate(n->get_int() == 16);
4460   match(ConI);
4461 
4462   op_cost(0);
4463   format %{ %}
4464   interface(CONST_INTER);
4465 %}
4466 
4467 operand immI_24()
4468 %{
4469   predicate(n->get_int() == 24);
4470   match(ConI);
4471 
4472   op_cost(0);
4473   format %{ %}
4474   interface(CONST_INTER);
4475 %}
4476 
4477 operand immI_32()
4478 %{
4479   predicate(n->get_int() == 32);
4480   match(ConI);
4481 
4482   op_cost(0);
4483   format %{ %}
4484   interface(CONST_INTER);
4485 %}
4486 
4487 operand immI_48()
4488 %{
4489   predicate(n->get_int() == 48);
4490   match(ConI);
4491 
4492   op_cost(0);
4493   format %{ %}
4494   interface(CONST_INTER);
4495 %}
4496 
4497 operand immI_56()
4498 %{
4499   predicate(n->get_int() == 56);
4500   match(ConI);
4501 
4502   op_cost(0);
4503   format %{ %}
4504   interface(CONST_INTER);
4505 %}
4506 
4507 operand immI_64()
4508 %{
4509   predicate(n->get_int() == 64);
4510   match(ConI);
4511 
4512   op_cost(0);
4513   format %{ %}
4514   interface(CONST_INTER);
4515 %}
4516 
4517 operand immI_255()
4518 %{
4519   predicate(n->get_int() == 255);
4520   match(ConI);
4521 
4522   op_cost(0);
4523   format %{ %}
4524   interface(CONST_INTER);
4525 %}
4526 
4527 operand immI_65535()
4528 %{
4529   predicate(n->get_int() == 65535);
4530   match(ConI);
4531 
4532   op_cost(0);
4533   format %{ %}
4534   interface(CONST_INTER);
4535 %}
4536 
4537 operand immL_63()
4538 %{
4539   predicate(n->get_int() == 63);
4540   match(ConI);
4541 
4542   op_cost(0);
4543   format %{ %}
4544   interface(CONST_INTER);
4545 %}
4546 
4547 operand immL_255()
4548 %{
4549   predicate(n->get_int() == 255);
4550   match(ConI);
4551 
4552   op_cost(0);
4553   format %{ %}
4554   interface(CONST_INTER);
4555 %}
4556 
4557 operand immL_65535()
4558 %{
4559   predicate(n->get_long() == 65535L);
4560   match(ConL);
4561 
4562   op_cost(0);
4563   format %{ %}
4564   interface(CONST_INTER);
4565 %}
4566 
4567 operand immL_4294967295()
4568 %{
4569   predicate(n->get_long() == 4294967295L);
4570   match(ConL);
4571 
4572   op_cost(0);
4573   format %{ %}
4574   interface(CONST_INTER);
4575 %}
4576 
4577 operand immL_bitmask()
4578 %{
4579   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4580             && is_power_of_2(n->get_long() + 1));
4581   match(ConL);
4582 
4583   op_cost(0);
4584   format %{ %}
4585   interface(CONST_INTER);
4586 %}
4587 
4588 operand immI_bitmask()
4589 %{
4590   predicate(((n->get_int() & 0xc0000000) == 0)
4591             && is_power_of_2(n->get_int() + 1));
4592   match(ConI);
4593 
4594   op_cost(0);
4595   format %{ %}
4596   interface(CONST_INTER);
4597 %}
4598 
4599 // Scale values for scaled offset addressing modes (up to long but not quad)
4600 operand immIScale()
4601 %{
4602   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4603   match(ConI);
4604 
4605   op_cost(0);
4606   format %{ %}
4607   interface(CONST_INTER);
4608 %}
4609 
4610 // 26 bit signed offset -- for pc-relative branches
4611 operand immI26()
4612 %{
4613   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4614   match(ConI);
4615 
4616   op_cost(0);
4617   format %{ %}
4618   interface(CONST_INTER);
4619 %}
4620 
4621 // 19 bit signed offset -- for pc-relative loads
4622 operand immI19()
4623 %{
4624   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4625   match(ConI);
4626 
4627   op_cost(0);
4628   format %{ %}
4629   interface(CONST_INTER);
4630 %}
4631 
4632 // 12 bit unsigned offset -- for base plus immediate loads
4633 operand immIU12()
4634 %{
4635   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4636   match(ConI);
4637 
4638   op_cost(0);
4639   format %{ %}
4640   interface(CONST_INTER);
4641 %}
4642 
4643 operand immLU12()
4644 %{
4645   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4646   match(ConL);
4647 
4648   op_cost(0);
4649   format %{ %}
4650   interface(CONST_INTER);
4651 %}
4652 
4653 // Offset for scaled or unscaled immediate loads and stores
4654 operand immIOffset()
4655 %{
4656   predicate(Address::offset_ok_for_immed(n->get_int()));
4657   match(ConI);
4658 
4659   op_cost(0);
4660   format %{ %}
4661   interface(CONST_INTER);
4662 %}
4663 
4664 operand immLoffset()
4665 %{
4666   predicate(Address::offset_ok_for_immed(n->get_long()));
4667   match(ConL);
4668 
4669   op_cost(0);
4670   format %{ %}
4671   interface(CONST_INTER);
4672 %}
4673 
4674 // 32 bit integer valid for add sub immediate
4675 operand immIAddSub()
4676 %{
4677   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4678   match(ConI);
4679   op_cost(0);
4680   format %{ %}
4681   interface(CONST_INTER);
4682 %}
4683 
4684 // 32 bit unsigned integer valid for logical immediate
4685 // TODO -- check this is right when e.g the mask is 0x80000000
4686 operand immILog()
4687 %{
4688   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4689   match(ConI);
4690 
4691   op_cost(0);
4692   format %{ %}
4693   interface(CONST_INTER);
4694 %}
4695 
4696 // Integer operands 64 bit
4697 // 64 bit immediate
4698 operand immL()
4699 %{
4700   match(ConL);
4701 
4702   op_cost(0);
4703   format %{ %}
4704   interface(CONST_INTER);
4705 %}
4706 
4707 // 64 bit zero
4708 operand immL0()
4709 %{
4710   predicate(n->get_long() == 0);
4711   match(ConL);
4712 
4713   op_cost(0);
4714   format %{ %}
4715   interface(CONST_INTER);
4716 %}
4717 
4718 // 64 bit unit increment
4719 operand immL_1()
4720 %{
4721   predicate(n->get_long() == 1);
4722   match(ConL);
4723 
4724   op_cost(0);
4725   format %{ %}
4726   interface(CONST_INTER);
4727 %}
4728 
4729 // 64 bit unit decrement
4730 operand immL_M1()
4731 %{
4732   predicate(n->get_long() == -1);
4733   match(ConL);
4734 
4735   op_cost(0);
4736   format %{ %}
4737   interface(CONST_INTER);
4738 %}
4739 
4740 // 32 bit offset of pc in thread anchor
4741 
4742 operand immL_pc_off()
4743 %{
4744   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4745                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4746   match(ConL);
4747 
4748   op_cost(0);
4749   format %{ %}
4750   interface(CONST_INTER);
4751 %}
4752 
4753 // 64 bit integer valid for add sub immediate
4754 operand immLAddSub()
4755 %{
4756   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4757   match(ConL);
4758   op_cost(0);
4759   format %{ %}
4760   interface(CONST_INTER);
4761 %}
4762 
4763 // 64 bit integer valid for logical immediate
4764 operand immLLog()
4765 %{
4766   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4767   match(ConL);
4768   op_cost(0);
4769   format %{ %}
4770   interface(CONST_INTER);
4771 %}
4772 
4773 // Long Immediate: low 32-bit mask
4774 operand immL_32bits()
4775 %{
4776   predicate(n->get_long() == 0xFFFFFFFFL);
4777   match(ConL);
4778   op_cost(0);
4779   format %{ %}
4780   interface(CONST_INTER);
4781 %}
4782 
4783 // Pointer operands
4784 // Pointer Immediate
4785 operand immP()
4786 %{
4787   match(ConP);
4788 
4789   op_cost(0);
4790   format %{ %}
4791   interface(CONST_INTER);
4792 %}
4793 
4794 // NULL Pointer Immediate
4795 operand immP0()
4796 %{
4797   predicate(n->get_ptr() == 0);
4798   match(ConP);
4799 
4800   op_cost(0);
4801   format %{ %}
4802   interface(CONST_INTER);
4803 %}
4804 
4805 // Pointer Immediate One
4806 // this is used in object initialization (initial object header)
4807 operand immP_1()
4808 %{
4809   predicate(n->get_ptr() == 1);
4810   match(ConP);
4811 
4812   op_cost(0);
4813   format %{ %}
4814   interface(CONST_INTER);
4815 %}
4816 
4817 // Polling Page Pointer Immediate
4818 operand immPollPage()
4819 %{
4820   predicate((address)n->get_ptr() == os::get_polling_page());
4821   match(ConP);
4822 
4823   op_cost(0);
4824   format %{ %}
4825   interface(CONST_INTER);
4826 %}
4827 
4828 // Card Table Byte Map Base
4829 operand immByteMapBase()
4830 %{
4831   // Get base of card map
4832   predicate((jbyte*)n->get_ptr() ==
4833         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4834   match(ConP);
4835 
4836   op_cost(0);
4837   format %{ %}
4838   interface(CONST_INTER);
4839 %}
4840 
4841 // Pointer Immediate Minus One
4842 // this is used when we want to write the current PC to the thread anchor
4843 operand immP_M1()
4844 %{
4845   predicate(n->get_ptr() == -1);
4846   match(ConP);
4847 
4848   op_cost(0);
4849   format %{ %}
4850   interface(CONST_INTER);
4851 %}
4852 
4853 // Pointer Immediate Minus Two
4854 // this is used when we want to write the current PC to the thread anchor
4855 operand immP_M2()
4856 %{
4857   predicate(n->get_ptr() == -2);
4858   match(ConP);
4859 
4860   op_cost(0);
4861   format %{ %}
4862   interface(CONST_INTER);
4863 %}
4864 
4865 // Float and Double operands
4866 // Double Immediate
4867 operand immD()
4868 %{
4869   match(ConD);
4870   op_cost(0);
4871   format %{ %}
4872   interface(CONST_INTER);
4873 %}
4874 
4875 // Double Immediate: +0.0d
4876 operand immD0()
4877 %{
4878   predicate(jlong_cast(n->getd()) == 0);
4879   match(ConD);
4880 
4881   op_cost(0);
4882   format %{ %}
4883   interface(CONST_INTER);
4884 %}
4885 
4886 // constant 'double +0.0'.
4887 operand immDPacked()
4888 %{
4889   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4890   match(ConD);
4891   op_cost(0);
4892   format %{ %}
4893   interface(CONST_INTER);
4894 %}
4895 
4896 // Float Immediate
4897 operand immF()
4898 %{
4899   match(ConF);
4900   op_cost(0);
4901   format %{ %}
4902   interface(CONST_INTER);
4903 %}
4904 
4905 // Float Immediate: +0.0f.
4906 operand immF0()
4907 %{
4908   predicate(jint_cast(n->getf()) == 0);
4909   match(ConF);
4910 
4911   op_cost(0);
4912   format %{ %}
4913   interface(CONST_INTER);
4914 %}
4915 
4916 //
4917 operand immFPacked()
4918 %{
4919   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4920   match(ConF);
4921   op_cost(0);
4922   format %{ %}
4923   interface(CONST_INTER);
4924 %}
4925 
4926 // Narrow pointer operands
4927 // Narrow Pointer Immediate
4928 operand immN()
4929 %{
4930   match(ConN);
4931 
4932   op_cost(0);
4933   format %{ %}
4934   interface(CONST_INTER);
4935 %}
4936 
4937 // Narrow NULL Pointer Immediate
4938 operand immN0()
4939 %{
4940   predicate(n->get_narrowcon() == 0);
4941   match(ConN);
4942 
4943   op_cost(0);
4944   format %{ %}
4945   interface(CONST_INTER);
4946 %}
4947 
4948 operand immNKlass()
4949 %{
4950   match(ConNKlass);
4951 
4952   op_cost(0);
4953   format %{ %}
4954   interface(CONST_INTER);
4955 %}
4956 
4957 // Integer 32 bit Register Operands
4958 // Integer 32 bitRegister (excludes SP)
4959 operand iRegI()
4960 %{
4961   constraint(ALLOC_IN_RC(any_reg32));
4962   match(RegI);
4963   match(iRegINoSp);
4964   op_cost(0);
4965   format %{ %}
4966   interface(REG_INTER);
4967 %}
4968 
4969 // Integer 32 bit Register not Special
4970 operand iRegINoSp()
4971 %{
4972   constraint(ALLOC_IN_RC(no_special_reg32));
4973   match(RegI);
4974   op_cost(0);
4975   format %{ %}
4976   interface(REG_INTER);
4977 %}
4978 
4979 // Integer 64 bit Register Operands
4980 // Integer 64 bit Register (includes SP)
4981 operand iRegL()
4982 %{
4983   constraint(ALLOC_IN_RC(any_reg));
4984   match(RegL);
4985   match(iRegLNoSp);
4986   op_cost(0);
4987   format %{ %}
4988   interface(REG_INTER);
4989 %}
4990 
4991 // Integer 64 bit Register not Special
4992 operand iRegLNoSp()
4993 %{
4994   constraint(ALLOC_IN_RC(no_special_reg));
4995   match(RegL);
4996   format %{ %}
4997   interface(REG_INTER);
4998 %}
4999 
5000 // Pointer Register Operands
5001 // Pointer Register
5002 operand iRegP()
5003 %{
5004   constraint(ALLOC_IN_RC(ptr_reg));
5005   match(RegP);
5006   match(iRegPNoSp);
5007   match(iRegP_R0);
5008   //match(iRegP_R2);
5009   //match(iRegP_R4);
5010   //match(iRegP_R5);
5011   match(thread_RegP);
5012   op_cost(0);
5013   format %{ %}
5014   interface(REG_INTER);
5015 %}
5016 
5017 // Pointer 64 bit Register not Special
5018 operand iRegPNoSp()
5019 %{
5020   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5021   match(RegP);
5022   // match(iRegP);
5023   // match(iRegP_R0);
5024   // match(iRegP_R2);
5025   // match(iRegP_R4);
5026   // match(iRegP_R5);
5027   // match(thread_RegP);
5028   op_cost(0);
5029   format %{ %}
5030   interface(REG_INTER);
5031 %}
5032 
5033 // Pointer 64 bit Register R0 only
5034 operand iRegP_R0()
5035 %{
5036   constraint(ALLOC_IN_RC(r0_reg));
5037   match(RegP);
5038   // match(iRegP);
5039   match(iRegPNoSp);
5040   op_cost(0);
5041   format %{ %}
5042   interface(REG_INTER);
5043 %}
5044 
5045 // Pointer 64 bit Register R1 only
5046 operand iRegP_R1()
5047 %{
5048   constraint(ALLOC_IN_RC(r1_reg));
5049   match(RegP);
5050   // match(iRegP);
5051   match(iRegPNoSp);
5052   op_cost(0);
5053   format %{ %}
5054   interface(REG_INTER);
5055 %}
5056 
5057 // Pointer 64 bit Register R2 only
5058 operand iRegP_R2()
5059 %{
5060   constraint(ALLOC_IN_RC(r2_reg));
5061   match(RegP);
5062   // match(iRegP);
5063   match(iRegPNoSp);
5064   op_cost(0);
5065   format %{ %}
5066   interface(REG_INTER);
5067 %}
5068 
5069 // Pointer 64 bit Register R3 only
5070 operand iRegP_R3()
5071 %{
5072   constraint(ALLOC_IN_RC(r3_reg));
5073   match(RegP);
5074   // match(iRegP);
5075   match(iRegPNoSp);
5076   op_cost(0);
5077   format %{ %}
5078   interface(REG_INTER);
5079 %}
5080 
5081 // Pointer 64 bit Register R4 only
5082 operand iRegP_R4()
5083 %{
5084   constraint(ALLOC_IN_RC(r4_reg));
5085   match(RegP);
5086   // match(iRegP);
5087   match(iRegPNoSp);
5088   op_cost(0);
5089   format %{ %}
5090   interface(REG_INTER);
5091 %}
5092 
5093 // Pointer 64 bit Register R5 only
5094 operand iRegP_R5()
5095 %{
5096   constraint(ALLOC_IN_RC(r5_reg));
5097   match(RegP);
5098   // match(iRegP);
5099   match(iRegPNoSp);
5100   op_cost(0);
5101   format %{ %}
5102   interface(REG_INTER);
5103 %}
5104 
5105 // Pointer 64 bit Register R10 only
5106 operand iRegP_R10()
5107 %{
5108   constraint(ALLOC_IN_RC(r10_reg));
5109   match(RegP);
5110   // match(iRegP);
5111   match(iRegPNoSp);
5112   op_cost(0);
5113   format %{ %}
5114   interface(REG_INTER);
5115 %}
5116 
5117 // Long 64 bit Register R11 only
5118 operand iRegL_R11()
5119 %{
5120   constraint(ALLOC_IN_RC(r11_reg));
5121   match(RegL);
5122   match(iRegLNoSp);
5123   op_cost(0);
5124   format %{ %}
5125   interface(REG_INTER);
5126 %}
5127 
5128 // Pointer 64 bit Register FP only
5129 operand iRegP_FP()
5130 %{
5131   constraint(ALLOC_IN_RC(fp_reg));
5132   match(RegP);
5133   // match(iRegP);
5134   op_cost(0);
5135   format %{ %}
5136   interface(REG_INTER);
5137 %}
5138 
5139 // Register R0 only
5140 operand iRegI_R0()
5141 %{
5142   constraint(ALLOC_IN_RC(int_r0_reg));
5143   match(RegI);
5144   match(iRegINoSp);
5145   op_cost(0);
5146   format %{ %}
5147   interface(REG_INTER);
5148 %}
5149 
5150 // Register R2 only
5151 operand iRegI_R2()
5152 %{
5153   constraint(ALLOC_IN_RC(int_r2_reg));
5154   match(RegI);
5155   match(iRegINoSp);
5156   op_cost(0);
5157   format %{ %}
5158   interface(REG_INTER);
5159 %}
5160 
5161 // Register R3 only
5162 operand iRegI_R3()
5163 %{
5164   constraint(ALLOC_IN_RC(int_r3_reg));
5165   match(RegI);
5166   match(iRegINoSp);
5167   op_cost(0);
5168   format %{ %}
5169   interface(REG_INTER);
5170 %}
5171 
5172 
5173 // Register R2 only
5174 operand iRegI_R4()
5175 %{
5176   constraint(ALLOC_IN_RC(int_r4_reg));
5177   match(RegI);
5178   match(iRegINoSp);
5179   op_cost(0);
5180   format %{ %}
5181   interface(REG_INTER);
5182 %}
5183 
5184 
5185 // Pointer Register Operands
5186 // Narrow Pointer Register
5187 operand iRegN()
5188 %{
5189   constraint(ALLOC_IN_RC(any_reg32));
5190   match(RegN);
5191   match(iRegNNoSp);
5192   op_cost(0);
5193   format %{ %}
5194   interface(REG_INTER);
5195 %}
5196 
5197 // Integer 64 bit Register not Special
5198 operand iRegNNoSp()
5199 %{
5200   constraint(ALLOC_IN_RC(no_special_reg32));
5201   match(RegN);
5202   op_cost(0);
5203   format %{ %}
5204   interface(REG_INTER);
5205 %}
5206 
5207 // heap base register -- used for encoding immN0
5208 
5209 operand iRegIHeapbase()
5210 %{
5211   constraint(ALLOC_IN_RC(heapbase_reg));
5212   match(RegI);
5213   op_cost(0);
5214   format %{ %}
5215   interface(REG_INTER);
5216 %}
5217 
5218 // Float Register
5219 // Float register operands
5220 operand vRegF()
5221 %{
5222   constraint(ALLOC_IN_RC(float_reg));
5223   match(RegF);
5224 
5225   op_cost(0);
5226   format %{ %}
5227   interface(REG_INTER);
5228 %}
5229 
5230 // Double Register
5231 // Double register operands
5232 operand vRegD()
5233 %{
5234   constraint(ALLOC_IN_RC(double_reg));
5235   match(RegD);
5236 
5237   op_cost(0);
5238   format %{ %}
5239   interface(REG_INTER);
5240 %}
5241 
5242 operand vecD()
5243 %{
5244   constraint(ALLOC_IN_RC(vectord_reg));
5245   match(VecD);
5246 
5247   op_cost(0);
5248   format %{ %}
5249   interface(REG_INTER);
5250 %}
5251 
5252 operand vecX()
5253 %{
5254   constraint(ALLOC_IN_RC(vectorx_reg));
5255   match(VecX);
5256 
5257   op_cost(0);
5258   format %{ %}
5259   interface(REG_INTER);
5260 %}
5261 
5262 operand vRegD_V0()
5263 %{
5264   constraint(ALLOC_IN_RC(v0_reg));
5265   match(RegD);
5266   op_cost(0);
5267   format %{ %}
5268   interface(REG_INTER);
5269 %}
5270 
5271 operand vRegD_V1()
5272 %{
5273   constraint(ALLOC_IN_RC(v1_reg));
5274   match(RegD);
5275   op_cost(0);
5276   format %{ %}
5277   interface(REG_INTER);
5278 %}
5279 
5280 operand vRegD_V2()
5281 %{
5282   constraint(ALLOC_IN_RC(v2_reg));
5283   match(RegD);
5284   op_cost(0);
5285   format %{ %}
5286   interface(REG_INTER);
5287 %}
5288 
5289 operand vRegD_V3()
5290 %{
5291   constraint(ALLOC_IN_RC(v3_reg));
5292   match(RegD);
5293   op_cost(0);
5294   format %{ %}
5295   interface(REG_INTER);
5296 %}
5297 
5298 // Flags register, used as output of signed compare instructions
5299 
5300 // note that on AArch64 we also use this register as the output for
5301 // for floating point compare instructions (CmpF CmpD). this ensures
5302 // that ordered inequality tests use GT, GE, LT or LE none of which
5303 // pass through cases where the result is unordered i.e. one or both
5304 // inputs to the compare is a NaN. this means that the ideal code can
5305 // replace e.g. a GT with an LE and not end up capturing the NaN case
5306 // (where the comparison should always fail). EQ and NE tests are
5307 // always generated in ideal code so that unordered folds into the NE
5308 // case, matching the behaviour of AArch64 NE.
5309 //
5310 // This differs from x86 where the outputs of FP compares use a
5311 // special FP flags registers and where compares based on this
5312 // register are distinguished into ordered inequalities (cmpOpUCF) and
5313 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
5314 // to explicitly handle the unordered case in branches. x86 also has
5315 // to include extra CMoveX rules to accept a cmpOpUCF input.
5316 
5317 operand rFlagsReg()
5318 %{
5319   constraint(ALLOC_IN_RC(int_flags));
5320   match(RegFlags);
5321 
5322   op_cost(0);
5323   format %{ "RFLAGS" %}
5324   interface(REG_INTER);
5325 %}
5326 
5327 // Flags register, used as output of unsigned compare instructions
5328 operand rFlagsRegU()
5329 %{
5330   constraint(ALLOC_IN_RC(int_flags));
5331   match(RegFlags);
5332 
5333   op_cost(0);
5334   format %{ "RFLAGSU" %}
5335   interface(REG_INTER);
5336 %}
5337 
5338 // Special Registers
5339 
5340 // Method Register
5341 operand inline_cache_RegP(iRegP reg)
5342 %{
5343   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
5344   match(reg);
5345   match(iRegPNoSp);
5346   op_cost(0);
5347   format %{ %}
5348   interface(REG_INTER);
5349 %}
5350 
5351 operand interpreter_method_oop_RegP(iRegP reg)
5352 %{
5353   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
5354   match(reg);
5355   match(iRegPNoSp);
5356   op_cost(0);
5357   format %{ %}
5358   interface(REG_INTER);
5359 %}
5360 
5361 // Thread Register
5362 operand thread_RegP(iRegP reg)
5363 %{
5364   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
5365   match(reg);
5366   op_cost(0);
5367   format %{ %}
5368   interface(REG_INTER);
5369 %}
5370 
5371 operand lr_RegP(iRegP reg)
5372 %{
5373   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
5374   match(reg);
5375   op_cost(0);
5376   format %{ %}
5377   interface(REG_INTER);
5378 %}
5379 
5380 //----------Memory Operands----------------------------------------------------
5381 
5382 operand indirect(iRegP reg)
5383 %{
5384   constraint(ALLOC_IN_RC(ptr_reg));
5385   match(reg);
5386   op_cost(0);
5387   format %{ "[$reg]" %}
5388   interface(MEMORY_INTER) %{
5389     base($reg);
5390     index(0xffffffff);
5391     scale(0x0);
5392     disp(0x0);
5393   %}
5394 %}
5395 
5396 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
5397 %{
5398   constraint(ALLOC_IN_RC(ptr_reg));
5399   match(AddP (AddP reg (LShiftL lreg scale)) off);
5400   op_cost(INSN_COST);
5401   format %{ "$reg, $lreg lsl($scale), $off" %}
5402   interface(MEMORY_INTER) %{
5403     base($reg);
5404     index($lreg);
5405     scale($scale);
5406     disp($off);
5407   %}
5408 %}
5409 
5410 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
5411 %{
5412   constraint(ALLOC_IN_RC(ptr_reg));
5413   match(AddP (AddP reg (LShiftL lreg scale)) off);
5414   op_cost(INSN_COST);
5415   format %{ "$reg, $lreg lsl($scale), $off" %}
5416   interface(MEMORY_INTER) %{
5417     base($reg);
5418     index($lreg);
5419     scale($scale);
5420     disp($off);
5421   %}
5422 %}
5423 
5424 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
5425 %{
5426   constraint(ALLOC_IN_RC(ptr_reg));
5427   match(AddP (AddP reg (ConvI2L ireg)) off);
5428   op_cost(INSN_COST);
5429   format %{ "$reg, $ireg, $off I2L" %}
5430   interface(MEMORY_INTER) %{
5431     base($reg);
5432     index($ireg);
5433     scale(0x0);
5434     disp($off);
5435   %}
5436 %}
5437 
5438 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
5439 %{
5440   constraint(ALLOC_IN_RC(ptr_reg));
5441   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
5442   op_cost(INSN_COST);
5443   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
5444   interface(MEMORY_INTER) %{
5445     base($reg);
5446     index($ireg);
5447     scale($scale);
5448     disp($off);
5449   %}
5450 %}
5451 
5452 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
5453 %{
5454   constraint(ALLOC_IN_RC(ptr_reg));
5455   match(AddP reg (LShiftL (ConvI2L ireg) scale));
5456   op_cost(0);
5457   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
5458   interface(MEMORY_INTER) %{
5459     base($reg);
5460     index($ireg);
5461     scale($scale);
5462     disp(0x0);
5463   %}
5464 %}
5465 
5466 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
5467 %{
5468   constraint(ALLOC_IN_RC(ptr_reg));
5469   match(AddP reg (LShiftL lreg scale));
5470   op_cost(0);
5471   format %{ "$reg, $lreg lsl($scale)" %}
5472   interface(MEMORY_INTER) %{
5473     base($reg);
5474     index($lreg);
5475     scale($scale);
5476     disp(0x0);
5477   %}
5478 %}
5479 
5480 operand indIndex(iRegP reg, iRegL lreg)
5481 %{
5482   constraint(ALLOC_IN_RC(ptr_reg));
5483   match(AddP reg lreg);
5484   op_cost(0);
5485   format %{ "$reg, $lreg" %}
5486   interface(MEMORY_INTER) %{
5487     base($reg);
5488     index($lreg);
5489     scale(0x0);
5490     disp(0x0);
5491   %}
5492 %}
5493 
5494 operand indOffI(iRegP reg, immIOffset off)
5495 %{
5496   constraint(ALLOC_IN_RC(ptr_reg));
5497   match(AddP reg off);
5498   op_cost(0);
5499   format %{ "[$reg, $off]" %}
5500   interface(MEMORY_INTER) %{
5501     base($reg);
5502     index(0xffffffff);
5503     scale(0x0);
5504     disp($off);
5505   %}
5506 %}
5507 
5508 operand indOffL(iRegP reg, immLoffset off)
5509 %{
5510   constraint(ALLOC_IN_RC(ptr_reg));
5511   match(AddP reg off);
5512   op_cost(0);
5513   format %{ "[$reg, $off]" %}
5514   interface(MEMORY_INTER) %{
5515     base($reg);
5516     index(0xffffffff);
5517     scale(0x0);
5518     disp($off);
5519   %}
5520 %}
5521 
5522 
5523 operand indirectN(iRegN reg)
5524 %{
5525   predicate(Universe::narrow_oop_shift() == 0);
5526   constraint(ALLOC_IN_RC(ptr_reg));
5527   match(DecodeN reg);
5528   op_cost(0);
5529   format %{ "[$reg]\t# narrow" %}
5530   interface(MEMORY_INTER) %{
5531     base($reg);
5532     index(0xffffffff);
5533     scale(0x0);
5534     disp(0x0);
5535   %}
5536 %}
5537 
5538 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5539 %{
5540   predicate(Universe::narrow_oop_shift() == 0);
5541   constraint(ALLOC_IN_RC(ptr_reg));
5542   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5543   op_cost(0);
5544   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5545   interface(MEMORY_INTER) %{
5546     base($reg);
5547     index($lreg);
5548     scale($scale);
5549     disp($off);
5550   %}
5551 %}
5552 
5553 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5554 %{
5555   predicate(Universe::narrow_oop_shift() == 0);
5556   constraint(ALLOC_IN_RC(ptr_reg));
5557   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5558   op_cost(INSN_COST);
5559   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5560   interface(MEMORY_INTER) %{
5561     base($reg);
5562     index($lreg);
5563     scale($scale);
5564     disp($off);
5565   %}
5566 %}
5567 
5568 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5569 %{
5570   predicate(Universe::narrow_oop_shift() == 0);
5571   constraint(ALLOC_IN_RC(ptr_reg));
5572   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5573   op_cost(INSN_COST);
5574   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5575   interface(MEMORY_INTER) %{
5576     base($reg);
5577     index($ireg);
5578     scale(0x0);
5579     disp($off);
5580   %}
5581 %}
5582 
5583 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5584 %{
5585   predicate(Universe::narrow_oop_shift() == 0);
5586   constraint(ALLOC_IN_RC(ptr_reg));
5587   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5588   op_cost(INSN_COST);
5589   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5590   interface(MEMORY_INTER) %{
5591     base($reg);
5592     index($ireg);
5593     scale($scale);
5594     disp($off);
5595   %}
5596 %}
5597 
5598 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5599 %{
5600   predicate(Universe::narrow_oop_shift() == 0);
5601   constraint(ALLOC_IN_RC(ptr_reg));
5602   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5603   op_cost(0);
5604   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5605   interface(MEMORY_INTER) %{
5606     base($reg);
5607     index($ireg);
5608     scale($scale);
5609     disp(0x0);
5610   %}
5611 %}
5612 
5613 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5614 %{
5615   predicate(Universe::narrow_oop_shift() == 0);
5616   constraint(ALLOC_IN_RC(ptr_reg));
5617   match(AddP (DecodeN reg) (LShiftL lreg scale));
5618   op_cost(0);
5619   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5620   interface(MEMORY_INTER) %{
5621     base($reg);
5622     index($lreg);
5623     scale($scale);
5624     disp(0x0);
5625   %}
5626 %}
5627 
5628 operand indIndexN(iRegN reg, iRegL lreg)
5629 %{
5630   predicate(Universe::narrow_oop_shift() == 0);
5631   constraint(ALLOC_IN_RC(ptr_reg));
5632   match(AddP (DecodeN reg) lreg);
5633   op_cost(0);
5634   format %{ "$reg, $lreg\t# narrow" %}
5635   interface(MEMORY_INTER) %{
5636     base($reg);
5637     index($lreg);
5638     scale(0x0);
5639     disp(0x0);
5640   %}
5641 %}
5642 
5643 operand indOffIN(iRegN reg, immIOffset off)
5644 %{
5645   predicate(Universe::narrow_oop_shift() == 0);
5646   constraint(ALLOC_IN_RC(ptr_reg));
5647   match(AddP (DecodeN reg) off);
5648   op_cost(0);
5649   format %{ "[$reg, $off]\t# narrow" %}
5650   interface(MEMORY_INTER) %{
5651     base($reg);
5652     index(0xffffffff);
5653     scale(0x0);
5654     disp($off);
5655   %}
5656 %}
5657 
5658 operand indOffLN(iRegN reg, immLoffset off)
5659 %{
5660   predicate(Universe::narrow_oop_shift() == 0);
5661   constraint(ALLOC_IN_RC(ptr_reg));
5662   match(AddP (DecodeN reg) off);
5663   op_cost(0);
5664   format %{ "[$reg, $off]\t# narrow" %}
5665   interface(MEMORY_INTER) %{
5666     base($reg);
5667     index(0xffffffff);
5668     scale(0x0);
5669     disp($off);
5670   %}
5671 %}
5672 
5673 
5674 
5675 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5676 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5677 %{
5678   constraint(ALLOC_IN_RC(ptr_reg));
5679   match(AddP reg off);
5680   op_cost(0);
5681   format %{ "[$reg, $off]" %}
5682   interface(MEMORY_INTER) %{
5683     base($reg);
5684     index(0xffffffff);
5685     scale(0x0);
5686     disp($off);
5687   %}
5688 %}
5689 
5690 //----------Special Memory Operands--------------------------------------------
5691 // Stack Slot Operand - This operand is used for loading and storing temporary
5692 //                      values on the stack where a match requires a value to
5693 //                      flow through memory.
5694 operand stackSlotP(sRegP reg)
5695 %{
5696   constraint(ALLOC_IN_RC(stack_slots));
5697   op_cost(100);
5698   // No match rule because this operand is only generated in matching
5699   // match(RegP);
5700   format %{ "[$reg]" %}
5701   interface(MEMORY_INTER) %{
5702     base(0x1e);  // RSP
5703     index(0x0);  // No Index
5704     scale(0x0);  // No Scale
5705     disp($reg);  // Stack Offset
5706   %}
5707 %}
5708 
5709 operand stackSlotI(sRegI reg)
5710 %{
5711   constraint(ALLOC_IN_RC(stack_slots));
5712   // No match rule because this operand is only generated in matching
5713   // match(RegI);
5714   format %{ "[$reg]" %}
5715   interface(MEMORY_INTER) %{
5716     base(0x1e);  // RSP
5717     index(0x0);  // No Index
5718     scale(0x0);  // No Scale
5719     disp($reg);  // Stack Offset
5720   %}
5721 %}
5722 
5723 operand stackSlotF(sRegF reg)
5724 %{
5725   constraint(ALLOC_IN_RC(stack_slots));
5726   // No match rule because this operand is only generated in matching
5727   // match(RegF);
5728   format %{ "[$reg]" %}
5729   interface(MEMORY_INTER) %{
5730     base(0x1e);  // RSP
5731     index(0x0);  // No Index
5732     scale(0x0);  // No Scale
5733     disp($reg);  // Stack Offset
5734   %}
5735 %}
5736 
5737 operand stackSlotD(sRegD reg)
5738 %{
5739   constraint(ALLOC_IN_RC(stack_slots));
5740   // No match rule because this operand is only generated in matching
5741   // match(RegD);
5742   format %{ "[$reg]" %}
5743   interface(MEMORY_INTER) %{
5744     base(0x1e);  // RSP
5745     index(0x0);  // No Index
5746     scale(0x0);  // No Scale
5747     disp($reg);  // Stack Offset
5748   %}
5749 %}
5750 
5751 operand stackSlotL(sRegL reg)
5752 %{
5753   constraint(ALLOC_IN_RC(stack_slots));
5754   // No match rule because this operand is only generated in matching
5755   // match(RegL);
5756   format %{ "[$reg]" %}
5757   interface(MEMORY_INTER) %{
5758     base(0x1e);  // RSP
5759     index(0x0);  // No Index
5760     scale(0x0);  // No Scale
5761     disp($reg);  // Stack Offset
5762   %}
5763 %}
5764 
5765 // Operands for expressing Control Flow
5766 // NOTE: Label is a predefined operand which should not be redefined in
5767 //       the AD file. It is generically handled within the ADLC.
5768 
5769 //----------Conditional Branch Operands----------------------------------------
5770 // Comparison Op  - This is the operation of the comparison, and is limited to
5771 //                  the following set of codes:
5772 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5773 //
5774 // Other attributes of the comparison, such as unsignedness, are specified
5775 // by the comparison instruction that sets a condition code flags register.
5776 // That result is represented by a flags operand whose subtype is appropriate
5777 // to the unsignedness (etc.) of the comparison.
5778 //
5779 // Later, the instruction which matches both the Comparison Op (a Bool) and
5780 // the flags (produced by the Cmp) specifies the coding of the comparison op
5781 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5782 
5783 // used for signed integral comparisons and fp comparisons
5784 
5785 operand cmpOp()
5786 %{
5787   match(Bool);
5788 
5789   format %{ "" %}
5790   interface(COND_INTER) %{
5791     equal(0x0, "eq");
5792     not_equal(0x1, "ne");
5793     less(0xb, "lt");
5794     greater_equal(0xa, "ge");
5795     less_equal(0xd, "le");
5796     greater(0xc, "gt");
5797     overflow(0x6, "vs");
5798     no_overflow(0x7, "vc");
5799   %}
5800 %}
5801 
5802 // used for unsigned integral comparisons
5803 
5804 operand cmpOpU()
5805 %{
5806   match(Bool);
5807 
5808   format %{ "" %}
5809   interface(COND_INTER) %{
5810     equal(0x0, "eq");
5811     not_equal(0x1, "ne");
5812     less(0x3, "lo");
5813     greater_equal(0x2, "hs");
5814     less_equal(0x9, "ls");
5815     greater(0x8, "hi");
5816     overflow(0x6, "vs");
5817     no_overflow(0x7, "vc");
5818   %}
5819 %}
5820 
5821 // Special operand allowing long args to int ops to be truncated for free
5822 
5823 operand iRegL2I(iRegL reg) %{
5824 
5825   op_cost(0);
5826 
5827   match(ConvL2I reg);
5828 
5829   format %{ "l2i($reg)" %}
5830 
5831   interface(REG_INTER)
5832 %}
5833 
5834 opclass vmem(indirect, indIndex, indOffI, indOffL);
5835 
5836 //----------OPERAND CLASSES----------------------------------------------------
5837 // Operand Classes are groups of operands that are used as to simplify
5838 // instruction definitions by not requiring the AD writer to specify
5839 // separate instructions for every form of operand when the
5840 // instruction accepts multiple operand types with the same basic
5841 // encoding and format. The classic case of this is memory operands.
5842 
5843 // memory is used to define read/write location for load/store
5844 // instruction defs. we can turn a memory op into an Address
5845 
5846 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5847                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5848 
5849 
5850 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5851 // operations. it allows the src to be either an iRegI or a (ConvL2I
5852 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5853 // can be elided because the 32-bit instruction will just employ the
5854 // lower 32 bits anyway.
5855 //
5856 // n.b. this does not elide all L2I conversions. if the truncated
5857 // value is consumed by more than one operation then the ConvL2I
5858 // cannot be bundled into the consuming nodes so an l2i gets planted
5859 // (actually a movw $dst $src) and the downstream instructions consume
5860 // the result of the l2i as an iRegI input. That's a shame since the
5861 // movw is actually redundant but its not too costly.
5862 
5863 opclass iRegIorL2I(iRegI, iRegL2I);
5864 
5865 //----------PIPELINE-----------------------------------------------------------
5866 // Rules which define the behavior of the target architectures pipeline.
5867 // Integer ALU reg operation
5868 pipeline %{
5869 
5870 attributes %{
5871   // ARM instructions are of fixed length
5872   fixed_size_instructions;        // Fixed size instructions TODO does
5873   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5874   // ARM instructions come in 32-bit word units
5875   instruction_unit_size = 4;         // An instruction is 4 bytes long
5876   instruction_fetch_unit_size = 64;  // The processor fetches one line
5877   instruction_fetch_units = 1;       // of 64 bytes
5878 
5879   // List of nop instructions
5880   nops( MachNop );
5881 %}
5882 
5883 // We don't use an actual pipeline model so don't care about resources
5884 // or description. we do use pipeline classes to introduce fixed
5885 // latencies
5886 
5887 //----------RESOURCES----------------------------------------------------------
5888 // Resources are the functional units available to the machine
5889 
5890 resources( INS0, INS1, INS01 = INS0 | INS1,
5891            ALU0, ALU1, ALU = ALU0 | ALU1,
5892            MAC,
5893            DIV,
5894            BRANCH,
5895            LDST,
5896            NEON_FP);
5897 
5898 //----------PIPELINE DESCRIPTION-----------------------------------------------
5899 // Pipeline Description specifies the stages in the machine's pipeline
5900 
5901 pipe_desc(ISS, EX1, EX2, WR);
5902 
5903 //----------PIPELINE CLASSES---------------------------------------------------
5904 // Pipeline Classes describe the stages in which input and output are
5905 // referenced by the hardware pipeline.
5906 
5907 //------- Integer ALU operations --------------------------
5908 
5909 // Integer ALU reg-reg operation
5910 // Operands needed in EX1, result generated in EX2
5911 // Eg.  ADD     x0, x1, x2
5912 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5913 %{
5914   single_instruction;
5915   dst    : EX2(write);
5916   src1   : EX1(read);
5917   src2   : EX1(read);
5918   INS01  : ISS; // Dual issue as instruction 0 or 1
5919   ALU    : EX2;
5920 %}
5921 
5922 // Integer ALU reg-reg operation with constant shift
5923 // Shifted register must be available in LATE_ISS instead of EX1
5924 // Eg.  ADD     x0, x1, x2, LSL #2
5925 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5926 %{
5927   single_instruction;
5928   dst    : EX2(write);
5929   src1   : EX1(read);
5930   src2   : ISS(read);
5931   INS01  : ISS;
5932   ALU    : EX2;
5933 %}
5934 
5935 // Integer ALU reg operation with constant shift
5936 // Eg.  LSL     x0, x1, #shift
5937 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5938 %{
5939   single_instruction;
5940   dst    : EX2(write);
5941   src1   : ISS(read);
5942   INS01  : ISS;
5943   ALU    : EX2;
5944 %}
5945 
5946 // Integer ALU reg-reg operation with variable shift
5947 // Both operands must be available in LATE_ISS instead of EX1
5948 // Result is available in EX1 instead of EX2
5949 // Eg.  LSLV    x0, x1, x2
5950 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5951 %{
5952   single_instruction;
5953   dst    : EX1(write);
5954   src1   : ISS(read);
5955   src2   : ISS(read);
5956   INS01  : ISS;
5957   ALU    : EX1;
5958 %}
5959 
5960 // Integer ALU reg-reg operation with extract
5961 // As for _vshift above, but result generated in EX2
5962 // Eg.  EXTR    x0, x1, x2, #N
5963 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5964 %{
5965   single_instruction;
5966   dst    : EX2(write);
5967   src1   : ISS(read);
5968   src2   : ISS(read);
5969   INS1   : ISS; // Can only dual issue as Instruction 1
5970   ALU    : EX1;
5971 %}
5972 
5973 // Integer ALU reg operation
5974 // Eg.  NEG     x0, x1
5975 pipe_class ialu_reg(iRegI dst, iRegI src)
5976 %{
5977   single_instruction;
5978   dst    : EX2(write);
5979   src    : EX1(read);
5980   INS01  : ISS;
5981   ALU    : EX2;
5982 %}
5983 
5984 // Integer ALU reg mmediate operation
5985 // Eg.  ADD     x0, x1, #N
5986 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5987 %{
5988   single_instruction;
5989   dst    : EX2(write);
5990   src1   : EX1(read);
5991   INS01  : ISS;
5992   ALU    : EX2;
5993 %}
5994 
5995 // Integer ALU immediate operation (no source operands)
5996 // Eg.  MOV     x0, #N
5997 pipe_class ialu_imm(iRegI dst)
5998 %{
5999   single_instruction;
6000   dst    : EX1(write);
6001   INS01  : ISS;
6002   ALU    : EX1;
6003 %}
6004 
6005 //------- Compare operation -------------------------------
6006 
6007 // Compare reg-reg
6008 // Eg.  CMP     x0, x1
6009 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6010 %{
6011   single_instruction;
6012 //  fixed_latency(16);
6013   cr     : EX2(write);
6014   op1    : EX1(read);
6015   op2    : EX1(read);
6016   INS01  : ISS;
6017   ALU    : EX2;
6018 %}
6019 
6020 // Compare reg-reg
6021 // Eg.  CMP     x0, #N
6022 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6023 %{
6024   single_instruction;
6025 //  fixed_latency(16);
6026   cr     : EX2(write);
6027   op1    : EX1(read);
6028   INS01  : ISS;
6029   ALU    : EX2;
6030 %}
6031 
6032 //------- Conditional instructions ------------------------
6033 
6034 // Conditional no operands
6035 // Eg.  CSINC   x0, zr, zr, <cond>
6036 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6037 %{
6038   single_instruction;
6039   cr     : EX1(read);
6040   dst    : EX2(write);
6041   INS01  : ISS;
6042   ALU    : EX2;
6043 %}
6044 
6045 // Conditional 2 operand
6046 // EG.  CSEL    X0, X1, X2, <cond>
6047 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6048 %{
6049   single_instruction;
6050   cr     : EX1(read);
6051   src1   : EX1(read);
6052   src2   : EX1(read);
6053   dst    : EX2(write);
6054   INS01  : ISS;
6055   ALU    : EX2;
6056 %}
6057 
6058 // Conditional 2 operand
6059 // EG.  CSEL    X0, X1, X2, <cond>
6060 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6061 %{
6062   single_instruction;
6063   cr     : EX1(read);
6064   src    : EX1(read);
6065   dst    : EX2(write);
6066   INS01  : ISS;
6067   ALU    : EX2;
6068 %}
6069 
6070 //------- Multiply pipeline operations --------------------
6071 
6072 // Multiply reg-reg
6073 // Eg.  MUL     w0, w1, w2
6074 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6075 %{
6076   single_instruction;
6077   dst    : WR(write);
6078   src1   : ISS(read);
6079   src2   : ISS(read);
6080   INS01  : ISS;
6081   MAC    : WR;
6082 %}
6083 
6084 // Multiply accumulate
6085 // Eg.  MADD    w0, w1, w2, w3
6086 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6087 %{
6088   single_instruction;
6089   dst    : WR(write);
6090   src1   : ISS(read);
6091   src2   : ISS(read);
6092   src3   : ISS(read);
6093   INS01  : ISS;
6094   MAC    : WR;
6095 %}
6096 
6097 // Eg.  MUL     w0, w1, w2
6098 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6099 %{
6100   single_instruction;
6101   fixed_latency(3); // Maximum latency for 64 bit mul
6102   dst    : WR(write);
6103   src1   : ISS(read);
6104   src2   : ISS(read);
6105   INS01  : ISS;
6106   MAC    : WR;
6107 %}
6108 
6109 // Multiply accumulate
6110 // Eg.  MADD    w0, w1, w2, w3
6111 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6112 %{
6113   single_instruction;
6114   fixed_latency(3); // Maximum latency for 64 bit mul
6115   dst    : WR(write);
6116   src1   : ISS(read);
6117   src2   : ISS(read);
6118   src3   : ISS(read);
6119   INS01  : ISS;
6120   MAC    : WR;
6121 %}
6122 
6123 //------- Divide pipeline operations --------------------
6124 
6125 // Eg.  SDIV    w0, w1, w2
6126 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6127 %{
6128   single_instruction;
6129   fixed_latency(8); // Maximum latency for 32 bit divide
6130   dst    : WR(write);
6131   src1   : ISS(read);
6132   src2   : ISS(read);
6133   INS0   : ISS; // Can only dual issue as instruction 0
6134   DIV    : WR;
6135 %}
6136 
6137 // Eg.  SDIV    x0, x1, x2
6138 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6139 %{
6140   single_instruction;
6141   fixed_latency(16); // Maximum latency for 64 bit divide
6142   dst    : WR(write);
6143   src1   : ISS(read);
6144   src2   : ISS(read);
6145   INS0   : ISS; // Can only dual issue as instruction 0
6146   DIV    : WR;
6147 %}
6148 
6149 //------- Load pipeline operations ------------------------
6150 
6151 // Load - prefetch
6152 // Eg.  PFRM    <mem>
6153 pipe_class iload_prefetch(memory mem)
6154 %{
6155   single_instruction;
6156   mem    : ISS(read);
6157   INS01  : ISS;
6158   LDST   : WR;
6159 %}
6160 
6161 // Load - reg, mem
6162 // Eg.  LDR     x0, <mem>
6163 pipe_class iload_reg_mem(iRegI dst, memory mem)
6164 %{
6165   single_instruction;
6166   dst    : WR(write);
6167   mem    : ISS(read);
6168   INS01  : ISS;
6169   LDST   : WR;
6170 %}
6171 
6172 // Load - reg, reg
6173 // Eg.  LDR     x0, [sp, x1]
6174 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6175 %{
6176   single_instruction;
6177   dst    : WR(write);
6178   src    : ISS(read);
6179   INS01  : ISS;
6180   LDST   : WR;
6181 %}
6182 
6183 //------- Store pipeline operations -----------------------
6184 
6185 // Store - zr, mem
6186 // Eg.  STR     zr, <mem>
6187 pipe_class istore_mem(memory mem)
6188 %{
6189   single_instruction;
6190   mem    : ISS(read);
6191   INS01  : ISS;
6192   LDST   : WR;
6193 %}
6194 
6195 // Store - reg, mem
6196 // Eg.  STR     x0, <mem>
6197 pipe_class istore_reg_mem(iRegI src, memory mem)
6198 %{
6199   single_instruction;
6200   mem    : ISS(read);
6201   src    : EX2(read);
6202   INS01  : ISS;
6203   LDST   : WR;
6204 %}
6205 
6206 // Store - reg, reg
6207 // Eg. STR      x0, [sp, x1]
6208 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6209 %{
6210   single_instruction;
6211   dst    : ISS(read);
6212   src    : EX2(read);
6213   INS01  : ISS;
6214   LDST   : WR;
6215 %}
6216 
6217 //------- Store pipeline operations -----------------------
6218 
6219 // Branch
6220 pipe_class pipe_branch()
6221 %{
6222   single_instruction;
6223   INS01  : ISS;
6224   BRANCH : EX1;
6225 %}
6226 
6227 // Conditional branch
6228 pipe_class pipe_branch_cond(rFlagsReg cr)
6229 %{
6230   single_instruction;
6231   cr     : EX1(read);
6232   INS01  : ISS;
6233   BRANCH : EX1;
6234 %}
6235 
6236 // Compare & Branch
6237 // EG.  CBZ/CBNZ
6238 pipe_class pipe_cmp_branch(iRegI op1)
6239 %{
6240   single_instruction;
6241   op1    : EX1(read);
6242   INS01  : ISS;
6243   BRANCH : EX1;
6244 %}
6245 
6246 //------- Synchronisation operations ----------------------
6247 
6248 // Any operation requiring serialization.
6249 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6250 pipe_class pipe_serial()
6251 %{
6252   single_instruction;
6253   force_serialization;
6254   fixed_latency(16);
6255   INS01  : ISS(2); // Cannot dual issue with any other instruction
6256   LDST   : WR;
6257 %}
6258 
6259 // Generic big/slow expanded idiom - also serialized
6260 pipe_class pipe_slow()
6261 %{
6262   instruction_count(10);
6263   multiple_bundles;
6264   force_serialization;
6265   fixed_latency(16);
6266   INS01  : ISS(2); // Cannot dual issue with any other instruction
6267   LDST   : WR;
6268 %}
6269 
6270 // Empty pipeline class
6271 pipe_class pipe_class_empty()
6272 %{
6273   single_instruction;
6274   fixed_latency(0);
6275 %}
6276 
6277 // Default pipeline class.
6278 pipe_class pipe_class_default()
6279 %{
6280   single_instruction;
6281   fixed_latency(2);
6282 %}
6283 
6284 // Pipeline class for compares.
6285 pipe_class pipe_class_compare()
6286 %{
6287   single_instruction;
6288   fixed_latency(16);
6289 %}
6290 
6291 // Pipeline class for memory operations.
6292 pipe_class pipe_class_memory()
6293 %{
6294   single_instruction;
6295   fixed_latency(16);
6296 %}
6297 
6298 // Pipeline class for call.
6299 pipe_class pipe_class_call()
6300 %{
6301   single_instruction;
6302   fixed_latency(100);
6303 %}
6304 
6305 // Define the class for the Nop node.
6306 define %{
6307    MachNop = pipe_class_empty;
6308 %}
6309 
6310 %}
6311 //----------INSTRUCTIONS-------------------------------------------------------
6312 //
6313 // match      -- States which machine-independent subtree may be replaced
6314 //               by this instruction.
6315 // ins_cost   -- The estimated cost of this instruction is used by instruction
6316 //               selection to identify a minimum cost tree of machine
6317 //               instructions that matches a tree of machine-independent
6318 //               instructions.
6319 // format     -- A string providing the disassembly for this instruction.
6320 //               The value of an instruction's operand may be inserted
6321 //               by referring to it with a '$' prefix.
6322 // opcode     -- Three instruction opcodes may be provided.  These are referred
6323 //               to within an encode class as $primary, $secondary, and $tertiary
6324 //               rrspectively.  The primary opcode is commonly used to
6325 //               indicate the type of machine instruction, while secondary
6326 //               and tertiary are often used for prefix options or addressing
6327 //               modes.
6328 // ins_encode -- A list of encode classes with parameters. The encode class
6329 //               name must have been defined in an 'enc_class' specification
6330 //               in the encode section of the architecture description.
6331 
6332 // ============================================================================
6333 // Memory (Load/Store) Instructions
6334 
6335 // Load Instructions
6336 
6337 // Load Byte (8 bit signed)
6338 instruct loadB(iRegINoSp dst, memory mem)
6339 %{
6340   match(Set dst (LoadB mem));
6341   predicate(!needs_acquiring_load(n));
6342 
6343   ins_cost(4 * INSN_COST);
6344   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6345 
6346   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6347 
6348   ins_pipe(iload_reg_mem);
6349 %}
6350 
6351 // Load Byte (8 bit signed) into long
6352 instruct loadB2L(iRegLNoSp dst, memory mem)
6353 %{
6354   match(Set dst (ConvI2L (LoadB mem)));
6355   predicate(!needs_acquiring_load(n->in(1)));
6356 
6357   ins_cost(4 * INSN_COST);
6358   format %{ "ldrsb  $dst, $mem\t# byte" %}
6359 
6360   ins_encode(aarch64_enc_ldrsb(dst, mem));
6361 
6362   ins_pipe(iload_reg_mem);
6363 %}
6364 
6365 // Load Byte (8 bit unsigned)
6366 instruct loadUB(iRegINoSp dst, memory mem)
6367 %{
6368   match(Set dst (LoadUB mem));
6369   predicate(!needs_acquiring_load(n));
6370 
6371   ins_cost(4 * INSN_COST);
6372   format %{ "ldrbw  $dst, $mem\t# byte" %}
6373 
6374   ins_encode(aarch64_enc_ldrb(dst, mem));
6375 
6376   ins_pipe(iload_reg_mem);
6377 %}
6378 
6379 // Load Byte (8 bit unsigned) into long
6380 instruct loadUB2L(iRegLNoSp dst, memory mem)
6381 %{
6382   match(Set dst (ConvI2L (LoadUB mem)));
6383   predicate(!needs_acquiring_load(n->in(1)));
6384 
6385   ins_cost(4 * INSN_COST);
6386   format %{ "ldrb  $dst, $mem\t# byte" %}
6387 
6388   ins_encode(aarch64_enc_ldrb(dst, mem));
6389 
6390   ins_pipe(iload_reg_mem);
6391 %}
6392 
6393 // Load Short (16 bit signed)
6394 instruct loadS(iRegINoSp dst, memory mem)
6395 %{
6396   match(Set dst (LoadS mem));
6397   predicate(!needs_acquiring_load(n));
6398 
6399   ins_cost(4 * INSN_COST);
6400   format %{ "ldrshw  $dst, $mem\t# short" %}
6401 
6402   ins_encode(aarch64_enc_ldrshw(dst, mem));
6403 
6404   ins_pipe(iload_reg_mem);
6405 %}
6406 
6407 // Load Short (16 bit signed) into long
6408 instruct loadS2L(iRegLNoSp dst, memory mem)
6409 %{
6410   match(Set dst (ConvI2L (LoadS mem)));
6411   predicate(!needs_acquiring_load(n->in(1)));
6412 
6413   ins_cost(4 * INSN_COST);
6414   format %{ "ldrsh  $dst, $mem\t# short" %}
6415 
6416   ins_encode(aarch64_enc_ldrsh(dst, mem));
6417 
6418   ins_pipe(iload_reg_mem);
6419 %}
6420 
6421 // Load Char (16 bit unsigned)
6422 instruct loadUS(iRegINoSp dst, memory mem)
6423 %{
6424   match(Set dst (LoadUS mem));
6425   predicate(!needs_acquiring_load(n));
6426 
6427   ins_cost(4 * INSN_COST);
6428   format %{ "ldrh  $dst, $mem\t# short" %}
6429 
6430   ins_encode(aarch64_enc_ldrh(dst, mem));
6431 
6432   ins_pipe(iload_reg_mem);
6433 %}
6434 
6435 // Load Short/Char (16 bit unsigned) into long
6436 instruct loadUS2L(iRegLNoSp dst, memory mem)
6437 %{
6438   match(Set dst (ConvI2L (LoadUS mem)));
6439   predicate(!needs_acquiring_load(n->in(1)));
6440 
6441   ins_cost(4 * INSN_COST);
6442   format %{ "ldrh  $dst, $mem\t# short" %}
6443 
6444   ins_encode(aarch64_enc_ldrh(dst, mem));
6445 
6446   ins_pipe(iload_reg_mem);
6447 %}
6448 
6449 // Load Integer (32 bit signed)
6450 instruct loadI(iRegINoSp dst, memory mem)
6451 %{
6452   match(Set dst (LoadI mem));
6453   predicate(!needs_acquiring_load(n));
6454 
6455   ins_cost(4 * INSN_COST);
6456   format %{ "ldrw  $dst, $mem\t# int" %}
6457 
6458   ins_encode(aarch64_enc_ldrw(dst, mem));
6459 
6460   ins_pipe(iload_reg_mem);
6461 %}
6462 
6463 // Load Integer (32 bit signed) into long
6464 instruct loadI2L(iRegLNoSp dst, memory mem)
6465 %{
6466   match(Set dst (ConvI2L (LoadI mem)));
6467   predicate(!needs_acquiring_load(n->in(1)));
6468 
6469   ins_cost(4 * INSN_COST);
6470   format %{ "ldrsw  $dst, $mem\t# int" %}
6471 
6472   ins_encode(aarch64_enc_ldrsw(dst, mem));
6473 
6474   ins_pipe(iload_reg_mem);
6475 %}
6476 
6477 // Load Integer (32 bit unsigned) into long
6478 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6479 %{
6480   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6481   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6482 
6483   ins_cost(4 * INSN_COST);
6484   format %{ "ldrw  $dst, $mem\t# int" %}
6485 
6486   ins_encode(aarch64_enc_ldrw(dst, mem));
6487 
6488   ins_pipe(iload_reg_mem);
6489 %}
6490 
6491 // Load Long (64 bit signed)
6492 instruct loadL(iRegLNoSp dst, memory mem)
6493 %{
6494   match(Set dst (LoadL mem));
6495   predicate(!needs_acquiring_load(n));
6496 
6497   ins_cost(4 * INSN_COST);
6498   format %{ "ldr  $dst, $mem\t# int" %}
6499 
6500   ins_encode(aarch64_enc_ldr(dst, mem));
6501 
6502   ins_pipe(iload_reg_mem);
6503 %}
6504 
6505 // Load Range
6506 instruct loadRange(iRegINoSp dst, memory mem)
6507 %{
6508   match(Set dst (LoadRange mem));
6509 
6510   ins_cost(4 * INSN_COST);
6511   format %{ "ldrw  $dst, $mem\t# range" %}
6512 
6513   ins_encode(aarch64_enc_ldrw(dst, mem));
6514 
6515   ins_pipe(iload_reg_mem);
6516 %}
6517 
6518 // Load Pointer
6519 instruct loadP(iRegPNoSp dst, memory mem)
6520 %{
6521   match(Set dst (LoadP mem));
6522   predicate(!needs_acquiring_load(n));
6523 
6524   ins_cost(4 * INSN_COST);
6525   format %{ "ldr  $dst, $mem\t# ptr" %}
6526 
6527   ins_encode(aarch64_enc_ldr(dst, mem));
6528 
6529   ins_pipe(iload_reg_mem);
6530 %}
6531 
6532 // Load Compressed Pointer
6533 instruct loadN(iRegNNoSp dst, memory mem)
6534 %{
6535   match(Set dst (LoadN mem));
6536   predicate(!needs_acquiring_load(n));
6537 
6538   ins_cost(4 * INSN_COST);
6539   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6540 
6541   ins_encode(aarch64_enc_ldrw(dst, mem));
6542 
6543   ins_pipe(iload_reg_mem);
6544 %}
6545 
6546 // Load Klass Pointer
6547 instruct loadKlass(iRegPNoSp dst, memory mem)
6548 %{
6549   match(Set dst (LoadKlass mem));
6550   predicate(!needs_acquiring_load(n));
6551 
6552   ins_cost(4 * INSN_COST);
6553   format %{ "ldr  $dst, $mem\t# class" %}
6554 
6555   ins_encode(aarch64_enc_ldr(dst, mem));
6556 
6557   ins_pipe(iload_reg_mem);
6558 %}
6559 
6560 // Load Narrow Klass Pointer
6561 instruct loadNKlass(iRegNNoSp dst, memory mem)
6562 %{
6563   match(Set dst (LoadNKlass mem));
6564   predicate(!needs_acquiring_load(n));
6565 
6566   ins_cost(4 * INSN_COST);
6567   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6568 
6569   ins_encode(aarch64_enc_ldrw(dst, mem));
6570 
6571   ins_pipe(iload_reg_mem);
6572 %}
6573 
6574 // Load Float
6575 instruct loadF(vRegF dst, memory mem)
6576 %{
6577   match(Set dst (LoadF mem));
6578   predicate(!needs_acquiring_load(n));
6579 
6580   ins_cost(4 * INSN_COST);
6581   format %{ "ldrs  $dst, $mem\t# float" %}
6582 
6583   ins_encode( aarch64_enc_ldrs(dst, mem) );
6584 
6585   ins_pipe(pipe_class_memory);
6586 %}
6587 
6588 // Load Double
6589 instruct loadD(vRegD dst, memory mem)
6590 %{
6591   match(Set dst (LoadD mem));
6592   predicate(!needs_acquiring_load(n));
6593 
6594   ins_cost(4 * INSN_COST);
6595   format %{ "ldrd  $dst, $mem\t# double" %}
6596 
6597   ins_encode( aarch64_enc_ldrd(dst, mem) );
6598 
6599   ins_pipe(pipe_class_memory);
6600 %}
6601 
6602 
6603 // Load Int Constant
6604 instruct loadConI(iRegINoSp dst, immI src)
6605 %{
6606   match(Set dst src);
6607 
6608   ins_cost(INSN_COST);
6609   format %{ "mov $dst, $src\t# int" %}
6610 
6611   ins_encode( aarch64_enc_movw_imm(dst, src) );
6612 
6613   ins_pipe(ialu_imm);
6614 %}
6615 
6616 // Load Long Constant
6617 instruct loadConL(iRegLNoSp dst, immL src)
6618 %{
6619   match(Set dst src);
6620 
6621   ins_cost(INSN_COST);
6622   format %{ "mov $dst, $src\t# long" %}
6623 
6624   ins_encode( aarch64_enc_mov_imm(dst, src) );
6625 
6626   ins_pipe(ialu_imm);
6627 %}
6628 
6629 // Load Pointer Constant
6630 
6631 instruct loadConP(iRegPNoSp dst, immP con)
6632 %{
6633   match(Set dst con);
6634 
6635   ins_cost(INSN_COST * 4);
6636   format %{
6637     "mov  $dst, $con\t# ptr\n\t"
6638   %}
6639 
6640   ins_encode(aarch64_enc_mov_p(dst, con));
6641 
6642   ins_pipe(ialu_imm);
6643 %}
6644 
6645 // Load Null Pointer Constant
6646 
6647 instruct loadConP0(iRegPNoSp dst, immP0 con)
6648 %{
6649   match(Set dst con);
6650 
6651   ins_cost(INSN_COST);
6652   format %{ "mov  $dst, $con\t# NULL ptr" %}
6653 
6654   ins_encode(aarch64_enc_mov_p0(dst, con));
6655 
6656   ins_pipe(ialu_imm);
6657 %}
6658 
6659 // Load Pointer Constant One
6660 
6661 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6662 %{
6663   match(Set dst con);
6664 
6665   ins_cost(INSN_COST);
6666   format %{ "mov  $dst, $con\t# NULL ptr" %}
6667 
6668   ins_encode(aarch64_enc_mov_p1(dst, con));
6669 
6670   ins_pipe(ialu_imm);
6671 %}
6672 
6673 // Load Poll Page Constant
6674 
6675 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6676 %{
6677   match(Set dst con);
6678 
6679   ins_cost(INSN_COST);
6680   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6681 
6682   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6683 
6684   ins_pipe(ialu_imm);
6685 %}
6686 
6687 // Load Byte Map Base Constant
6688 
6689 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6690 %{
6691   match(Set dst con);
6692 
6693   ins_cost(INSN_COST);
6694   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6695 
6696   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6697 
6698   ins_pipe(ialu_imm);
6699 %}
6700 
6701 // Load Narrow Pointer Constant
6702 
6703 instruct loadConN(iRegNNoSp dst, immN con)
6704 %{
6705   match(Set dst con);
6706 
6707   ins_cost(INSN_COST * 4);
6708   format %{ "mov  $dst, $con\t# compressed ptr" %}
6709 
6710   ins_encode(aarch64_enc_mov_n(dst, con));
6711 
6712   ins_pipe(ialu_imm);
6713 %}
6714 
6715 // Load Narrow Null Pointer Constant
6716 
6717 instruct loadConN0(iRegNNoSp dst, immN0 con)
6718 %{
6719   match(Set dst con);
6720 
6721   ins_cost(INSN_COST);
6722   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6723 
6724   ins_encode(aarch64_enc_mov_n0(dst, con));
6725 
6726   ins_pipe(ialu_imm);
6727 %}
6728 
6729 // Load Narrow Klass Constant
6730 
6731 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6732 %{
6733   match(Set dst con);
6734 
6735   ins_cost(INSN_COST);
6736   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6737 
6738   ins_encode(aarch64_enc_mov_nk(dst, con));
6739 
6740   ins_pipe(ialu_imm);
6741 %}
6742 
6743 // Load Packed Float Constant
6744 
6745 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6746   match(Set dst con);
6747   ins_cost(INSN_COST * 4);
6748   format %{ "fmovs  $dst, $con"%}
6749   ins_encode %{
6750     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6751   %}
6752 
6753   ins_pipe(pipe_class_default);
6754 %}
6755 
6756 // Load Float Constant
6757 
6758 instruct loadConF(vRegF dst, immF con) %{
6759   match(Set dst con);
6760 
6761   ins_cost(INSN_COST * 4);
6762 
6763   format %{
6764     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6765   %}
6766 
6767   ins_encode %{
6768     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6769   %}
6770 
6771   ins_pipe(pipe_class_default);
6772 %}
6773 
6774 // Load Packed Double Constant
6775 
6776 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6777   match(Set dst con);
6778   ins_cost(INSN_COST);
6779   format %{ "fmovd  $dst, $con"%}
6780   ins_encode %{
6781     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6782   %}
6783 
6784   ins_pipe(pipe_class_default);
6785 %}
6786 
6787 // Load Double Constant
6788 
6789 instruct loadConD(vRegD dst, immD con) %{
6790   match(Set dst con);
6791 
6792   ins_cost(INSN_COST * 5);
6793   format %{
6794     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6795   %}
6796 
6797   ins_encode %{
6798     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6799   %}
6800 
6801   ins_pipe(pipe_class_default);
6802 %}
6803 
6804 // Store Instructions
6805 
6806 // Store CMS card-mark Immediate
6807 instruct storeimmCM0(immI0 zero, memory mem)
6808 %{
6809   match(Set mem (StoreCM mem zero));
6810 
6811   ins_cost(INSN_COST);
6812   format %{ "strb zr, $mem\t# byte" %}
6813 
6814   ins_encode(aarch64_enc_strb0(mem));
6815 
6816   ins_pipe(istore_mem);
6817 %}
6818 
6819 // Store Byte
6820 instruct storeB(iRegIorL2I src, memory mem)
6821 %{
6822   match(Set mem (StoreB mem src));
6823   predicate(!needs_releasing_store(n));
6824 
6825   ins_cost(INSN_COST);
6826   format %{ "strb  $src, $mem\t# byte" %}
6827 
6828   ins_encode(aarch64_enc_strb(src, mem));
6829 
6830   ins_pipe(istore_reg_mem);
6831 %}
6832 
6833 
6834 instruct storeimmB0(immI0 zero, memory mem)
6835 %{
6836   match(Set mem (StoreB mem zero));
6837   predicate(!needs_releasing_store(n));
6838 
6839   ins_cost(INSN_COST);
6840   format %{ "strb zr, $mem\t# byte" %}
6841 
6842   ins_encode(aarch64_enc_strb0(mem));
6843 
6844   ins_pipe(istore_mem);
6845 %}
6846 
6847 // Store Char/Short
6848 instruct storeC(iRegIorL2I src, memory mem)
6849 %{
6850   match(Set mem (StoreC mem src));
6851   predicate(!needs_releasing_store(n));
6852 
6853   ins_cost(INSN_COST);
6854   format %{ "strh  $src, $mem\t# short" %}
6855 
6856   ins_encode(aarch64_enc_strh(src, mem));
6857 
6858   ins_pipe(istore_reg_mem);
6859 %}
6860 
6861 instruct storeimmC0(immI0 zero, memory mem)
6862 %{
6863   match(Set mem (StoreC mem zero));
6864   predicate(!needs_releasing_store(n));
6865 
6866   ins_cost(INSN_COST);
6867   format %{ "strh  zr, $mem\t# short" %}
6868 
6869   ins_encode(aarch64_enc_strh0(mem));
6870 
6871   ins_pipe(istore_mem);
6872 %}
6873 
6874 // Store Integer
6875 
6876 instruct storeI(iRegIorL2I src, memory mem)
6877 %{
6878   match(Set mem(StoreI mem src));
6879   predicate(!needs_releasing_store(n));
6880 
6881   ins_cost(INSN_COST);
6882   format %{ "strw  $src, $mem\t# int" %}
6883 
6884   ins_encode(aarch64_enc_strw(src, mem));
6885 
6886   ins_pipe(istore_reg_mem);
6887 %}
6888 
6889 instruct storeimmI0(immI0 zero, memory mem)
6890 %{
6891   match(Set mem(StoreI mem zero));
6892   predicate(!needs_releasing_store(n));
6893 
6894   ins_cost(INSN_COST);
6895   format %{ "strw  zr, $mem\t# int" %}
6896 
6897   ins_encode(aarch64_enc_strw0(mem));
6898 
6899   ins_pipe(istore_mem);
6900 %}
6901 
6902 // Store Long (64 bit signed)
6903 instruct storeL(iRegL src, memory mem)
6904 %{
6905   match(Set mem (StoreL mem src));
6906   predicate(!needs_releasing_store(n));
6907 
6908   ins_cost(INSN_COST);
6909   format %{ "str  $src, $mem\t# int" %}
6910 
6911   ins_encode(aarch64_enc_str(src, mem));
6912 
6913   ins_pipe(istore_reg_mem);
6914 %}
6915 
6916 // Store Long (64 bit signed)
6917 instruct storeimmL0(immL0 zero, memory mem)
6918 %{
6919   match(Set mem (StoreL mem zero));
6920   predicate(!needs_releasing_store(n));
6921 
6922   ins_cost(INSN_COST);
6923   format %{ "str  zr, $mem\t# int" %}
6924 
6925   ins_encode(aarch64_enc_str0(mem));
6926 
6927   ins_pipe(istore_mem);
6928 %}
6929 
6930 // Store Pointer
6931 instruct storeP(iRegP src, memory mem)
6932 %{
6933   match(Set mem (StoreP mem src));
6934   predicate(!needs_releasing_store(n));
6935 
6936   ins_cost(INSN_COST);
6937   format %{ "str  $src, $mem\t# ptr" %}
6938 
6939   ins_encode(aarch64_enc_str(src, mem));
6940 
6941   ins_pipe(istore_reg_mem);
6942 %}
6943 
6944 // Store Pointer
6945 instruct storeimmP0(immP0 zero, memory mem)
6946 %{
6947   match(Set mem (StoreP mem zero));
6948   predicate(!needs_releasing_store(n));
6949 
6950   ins_cost(INSN_COST);
6951   format %{ "str zr, $mem\t# ptr" %}
6952 
6953   ins_encode(aarch64_enc_str0(mem));
6954 
6955   ins_pipe(istore_mem);
6956 %}
6957 
6958 // Store Compressed Pointer
6959 instruct storeN(iRegN src, memory mem)
6960 %{
6961   match(Set mem (StoreN mem src));
6962   predicate(!needs_releasing_store(n));
6963 
6964   ins_cost(INSN_COST);
6965   format %{ "strw  $src, $mem\t# compressed ptr" %}
6966 
6967   ins_encode(aarch64_enc_strw(src, mem));
6968 
6969   ins_pipe(istore_reg_mem);
6970 %}
6971 
6972 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6973 %{
6974   match(Set mem (StoreN mem zero));
6975   predicate(Universe::narrow_oop_base() == NULL &&
6976             Universe::narrow_klass_base() == NULL &&
6977             (!needs_releasing_store(n)));
6978 
6979   ins_cost(INSN_COST);
6980   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6981 
6982   ins_encode(aarch64_enc_strw(heapbase, mem));
6983 
6984   ins_pipe(istore_reg_mem);
6985 %}
6986 
6987 // Store Float
6988 instruct storeF(vRegF src, memory mem)
6989 %{
6990   match(Set mem (StoreF mem src));
6991   predicate(!needs_releasing_store(n));
6992 
6993   ins_cost(INSN_COST);
6994   format %{ "strs  $src, $mem\t# float" %}
6995 
6996   ins_encode( aarch64_enc_strs(src, mem) );
6997 
6998   ins_pipe(pipe_class_memory);
6999 %}
7000 
7001 // TODO
7002 // implement storeImmF0 and storeFImmPacked
7003 
7004 // Store Double
7005 instruct storeD(vRegD src, memory mem)
7006 %{
7007   match(Set mem (StoreD mem src));
7008   predicate(!needs_releasing_store(n));
7009 
7010   ins_cost(INSN_COST);
7011   format %{ "strd  $src, $mem\t# double" %}
7012 
7013   ins_encode( aarch64_enc_strd(src, mem) );
7014 
7015   ins_pipe(pipe_class_memory);
7016 %}
7017 
7018 // Store Compressed Klass Pointer
7019 instruct storeNKlass(iRegN src, memory mem)
7020 %{
7021   predicate(!needs_releasing_store(n));
7022   match(Set mem (StoreNKlass mem src));
7023 
7024   ins_cost(INSN_COST);
7025   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7026 
7027   ins_encode(aarch64_enc_strw(src, mem));
7028 
7029   ins_pipe(istore_reg_mem);
7030 %}
7031 
7032 // TODO
7033 // implement storeImmD0 and storeDImmPacked
7034 
7035 // prefetch instructions
7036 // Must be safe to execute with invalid address (cannot fault).
7037 
7038 instruct prefetchalloc( memory mem ) %{
7039   match(PrefetchAllocation mem);
7040 
7041   ins_cost(INSN_COST);
7042   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7043 
7044   ins_encode( aarch64_enc_prefetchw(mem) );
7045 
7046   ins_pipe(iload_prefetch);
7047 %}
7048 
7049 //  ---------------- volatile loads and stores ----------------
7050 
7051 // Load Byte (8 bit signed)
7052 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7053 %{
7054   match(Set dst (LoadB mem));
7055 
7056   ins_cost(VOLATILE_REF_COST);
7057   format %{ "ldarsb  $dst, $mem\t# byte" %}
7058 
7059   ins_encode(aarch64_enc_ldarsb(dst, mem));
7060 
7061   ins_pipe(pipe_serial);
7062 %}
7063 
7064 // Load Byte (8 bit signed) into long
7065 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7066 %{
7067   match(Set dst (ConvI2L (LoadB mem)));
7068 
7069   ins_cost(VOLATILE_REF_COST);
7070   format %{ "ldarsb  $dst, $mem\t# byte" %}
7071 
7072   ins_encode(aarch64_enc_ldarsb(dst, mem));
7073 
7074   ins_pipe(pipe_serial);
7075 %}
7076 
7077 // Load Byte (8 bit unsigned)
7078 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7079 %{
7080   match(Set dst (LoadUB mem));
7081 
7082   ins_cost(VOLATILE_REF_COST);
7083   format %{ "ldarb  $dst, $mem\t# byte" %}
7084 
7085   ins_encode(aarch64_enc_ldarb(dst, mem));
7086 
7087   ins_pipe(pipe_serial);
7088 %}
7089 
7090 // Load Byte (8 bit unsigned) into long
7091 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7092 %{
7093   match(Set dst (ConvI2L (LoadUB mem)));
7094 
7095   ins_cost(VOLATILE_REF_COST);
7096   format %{ "ldarb  $dst, $mem\t# byte" %}
7097 
7098   ins_encode(aarch64_enc_ldarb(dst, mem));
7099 
7100   ins_pipe(pipe_serial);
7101 %}
7102 
7103 // Load Short (16 bit signed)
7104 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7105 %{
7106   match(Set dst (LoadS mem));
7107 
7108   ins_cost(VOLATILE_REF_COST);
7109   format %{ "ldarshw  $dst, $mem\t# short" %}
7110 
7111   ins_encode(aarch64_enc_ldarshw(dst, mem));
7112 
7113   ins_pipe(pipe_serial);
7114 %}
7115 
7116 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7117 %{
7118   match(Set dst (LoadUS mem));
7119 
7120   ins_cost(VOLATILE_REF_COST);
7121   format %{ "ldarhw  $dst, $mem\t# short" %}
7122 
7123   ins_encode(aarch64_enc_ldarhw(dst, mem));
7124 
7125   ins_pipe(pipe_serial);
7126 %}
7127 
7128 // Load Short/Char (16 bit unsigned) into long
7129 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7130 %{
7131   match(Set dst (ConvI2L (LoadUS mem)));
7132 
7133   ins_cost(VOLATILE_REF_COST);
7134   format %{ "ldarh  $dst, $mem\t# short" %}
7135 
7136   ins_encode(aarch64_enc_ldarh(dst, mem));
7137 
7138   ins_pipe(pipe_serial);
7139 %}
7140 
7141 // Load Short/Char (16 bit signed) into long
7142 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7143 %{
7144   match(Set dst (ConvI2L (LoadS mem)));
7145 
7146   ins_cost(VOLATILE_REF_COST);
7147   format %{ "ldarh  $dst, $mem\t# short" %}
7148 
7149   ins_encode(aarch64_enc_ldarsh(dst, mem));
7150 
7151   ins_pipe(pipe_serial);
7152 %}
7153 
7154 // Load Integer (32 bit signed)
7155 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7156 %{
7157   match(Set dst (LoadI mem));
7158 
7159   ins_cost(VOLATILE_REF_COST);
7160   format %{ "ldarw  $dst, $mem\t# int" %}
7161 
7162   ins_encode(aarch64_enc_ldarw(dst, mem));
7163 
7164   ins_pipe(pipe_serial);
7165 %}
7166 
7167 // Load Integer (32 bit unsigned) into long
7168 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7169 %{
7170   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7171 
7172   ins_cost(VOLATILE_REF_COST);
7173   format %{ "ldarw  $dst, $mem\t# int" %}
7174 
7175   ins_encode(aarch64_enc_ldarw(dst, mem));
7176 
7177   ins_pipe(pipe_serial);
7178 %}
7179 
7180 // Load Long (64 bit signed)
7181 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7182 %{
7183   match(Set dst (LoadL mem));
7184 
7185   ins_cost(VOLATILE_REF_COST);
7186   format %{ "ldar  $dst, $mem\t# int" %}
7187 
7188   ins_encode(aarch64_enc_ldar(dst, mem));
7189 
7190   ins_pipe(pipe_serial);
7191 %}
7192 
7193 // Load Pointer
7194 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7195 %{
7196   match(Set dst (LoadP mem));
7197 
7198   ins_cost(VOLATILE_REF_COST);
7199   format %{ "ldar  $dst, $mem\t# ptr" %}
7200 
7201   ins_encode(aarch64_enc_ldar(dst, mem));
7202 
7203   ins_pipe(pipe_serial);
7204 %}
7205 
7206 // Load Compressed Pointer
7207 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7208 %{
7209   match(Set dst (LoadN mem));
7210 
7211   ins_cost(VOLATILE_REF_COST);
7212   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7213 
7214   ins_encode(aarch64_enc_ldarw(dst, mem));
7215 
7216   ins_pipe(pipe_serial);
7217 %}
7218 
7219 // Load Float
7220 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7221 %{
7222   match(Set dst (LoadF mem));
7223 
7224   ins_cost(VOLATILE_REF_COST);
7225   format %{ "ldars  $dst, $mem\t# float" %}
7226 
7227   ins_encode( aarch64_enc_fldars(dst, mem) );
7228 
7229   ins_pipe(pipe_serial);
7230 %}
7231 
7232 // Load Double
7233 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7234 %{
7235   match(Set dst (LoadD mem));
7236 
7237   ins_cost(VOLATILE_REF_COST);
7238   format %{ "ldard  $dst, $mem\t# double" %}
7239 
7240   ins_encode( aarch64_enc_fldard(dst, mem) );
7241 
7242   ins_pipe(pipe_serial);
7243 %}
7244 
7245 // Store Byte
7246 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7247 %{
7248   match(Set mem (StoreB mem src));
7249 
7250   ins_cost(VOLATILE_REF_COST);
7251   format %{ "stlrb  $src, $mem\t# byte" %}
7252 
7253   ins_encode(aarch64_enc_stlrb(src, mem));
7254 
7255   ins_pipe(pipe_class_memory);
7256 %}
7257 
7258 // Store Char/Short
7259 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7260 %{
7261   match(Set mem (StoreC mem src));
7262 
7263   ins_cost(VOLATILE_REF_COST);
7264   format %{ "stlrh  $src, $mem\t# short" %}
7265 
7266   ins_encode(aarch64_enc_stlrh(src, mem));
7267 
7268   ins_pipe(pipe_class_memory);
7269 %}
7270 
7271 // Store Integer
7272 
7273 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7274 %{
7275   match(Set mem(StoreI mem src));
7276 
7277   ins_cost(VOLATILE_REF_COST);
7278   format %{ "stlrw  $src, $mem\t# int" %}
7279 
7280   ins_encode(aarch64_enc_stlrw(src, mem));
7281 
7282   ins_pipe(pipe_class_memory);
7283 %}
7284 
7285 // Store Long (64 bit signed)
7286 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7287 %{
7288   match(Set mem (StoreL mem src));
7289 
7290   ins_cost(VOLATILE_REF_COST);
7291   format %{ "stlr  $src, $mem\t# int" %}
7292 
7293   ins_encode(aarch64_enc_stlr(src, mem));
7294 
7295   ins_pipe(pipe_class_memory);
7296 %}
7297 
7298 // Store Pointer
7299 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7300 %{
7301   match(Set mem (StoreP mem src));
7302 
7303   ins_cost(VOLATILE_REF_COST);
7304   format %{ "stlr  $src, $mem\t# ptr" %}
7305 
7306   ins_encode(aarch64_enc_stlr(src, mem));
7307 
7308   ins_pipe(pipe_class_memory);
7309 %}
7310 
7311 // Store Compressed Pointer
7312 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7313 %{
7314   match(Set mem (StoreN mem src));
7315 
7316   ins_cost(VOLATILE_REF_COST);
7317   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7318 
7319   ins_encode(aarch64_enc_stlrw(src, mem));
7320 
7321   ins_pipe(pipe_class_memory);
7322 %}
7323 
7324 // Store Float
7325 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7326 %{
7327   match(Set mem (StoreF mem src));
7328 
7329   ins_cost(VOLATILE_REF_COST);
7330   format %{ "stlrs  $src, $mem\t# float" %}
7331 
7332   ins_encode( aarch64_enc_fstlrs(src, mem) );
7333 
7334   ins_pipe(pipe_class_memory);
7335 %}
7336 
7337 // TODO
7338 // implement storeImmF0 and storeFImmPacked
7339 
7340 // Store Double
7341 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7342 %{
7343   match(Set mem (StoreD mem src));
7344 
7345   ins_cost(VOLATILE_REF_COST);
7346   format %{ "stlrd  $src, $mem\t# double" %}
7347 
7348   ins_encode( aarch64_enc_fstlrd(src, mem) );
7349 
7350   ins_pipe(pipe_class_memory);
7351 %}
7352 
7353 //  ---------------- end of volatile loads and stores ----------------
7354 
7355 // ============================================================================
7356 // BSWAP Instructions
7357 
7358 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7359   match(Set dst (ReverseBytesI src));
7360 
7361   ins_cost(INSN_COST);
7362   format %{ "revw  $dst, $src" %}
7363 
7364   ins_encode %{
7365     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7366   %}
7367 
7368   ins_pipe(ialu_reg);
7369 %}
7370 
7371 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7372   match(Set dst (ReverseBytesL src));
7373 
7374   ins_cost(INSN_COST);
7375   format %{ "rev  $dst, $src" %}
7376 
7377   ins_encode %{
7378     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7379   %}
7380 
7381   ins_pipe(ialu_reg);
7382 %}
7383 
7384 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7385   match(Set dst (ReverseBytesUS src));
7386 
7387   ins_cost(INSN_COST);
7388   format %{ "rev16w  $dst, $src" %}
7389 
7390   ins_encode %{
7391     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7392   %}
7393 
7394   ins_pipe(ialu_reg);
7395 %}
7396 
7397 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7398   match(Set dst (ReverseBytesS src));
7399 
7400   ins_cost(INSN_COST);
7401   format %{ "rev16w  $dst, $src\n\t"
7402             "sbfmw $dst, $dst, #0, #15" %}
7403 
7404   ins_encode %{
7405     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7406     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7407   %}
7408 
7409   ins_pipe(ialu_reg);
7410 %}
7411 
7412 // ============================================================================
7413 // Zero Count Instructions
7414 
7415 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7416   match(Set dst (CountLeadingZerosI src));
7417 
7418   ins_cost(INSN_COST);
7419   format %{ "clzw  $dst, $src" %}
7420   ins_encode %{
7421     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7422   %}
7423 
7424   ins_pipe(ialu_reg);
7425 %}
7426 
7427 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7428   match(Set dst (CountLeadingZerosL src));
7429 
7430   ins_cost(INSN_COST);
7431   format %{ "clz   $dst, $src" %}
7432   ins_encode %{
7433     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7434   %}
7435 
7436   ins_pipe(ialu_reg);
7437 %}
7438 
7439 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7440   match(Set dst (CountTrailingZerosI src));
7441 
7442   ins_cost(INSN_COST * 2);
7443   format %{ "rbitw  $dst, $src\n\t"
7444             "clzw   $dst, $dst" %}
7445   ins_encode %{
7446     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7447     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7448   %}
7449 
7450   ins_pipe(ialu_reg);
7451 %}
7452 
7453 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7454   match(Set dst (CountTrailingZerosL src));
7455 
7456   ins_cost(INSN_COST * 2);
7457   format %{ "rbit   $dst, $src\n\t"
7458             "clz    $dst, $dst" %}
7459   ins_encode %{
7460     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7461     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7462   %}
7463 
7464   ins_pipe(ialu_reg);
7465 %}
7466 
7467 //---------- Population Count Instructions -------------------------------------
7468 //
7469 
7470 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7471   predicate(UsePopCountInstruction);
7472   match(Set dst (PopCountI src));
7473   effect(TEMP tmp);
7474   ins_cost(INSN_COST * 13);
7475 
7476   format %{ "movw   $src, $src\n\t"
7477             "mov    $tmp, $src\t# vector (1D)\n\t"
7478             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7479             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7480             "mov    $dst, $tmp\t# vector (1D)" %}
7481   ins_encode %{
7482     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7483     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7484     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7485     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7486     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7487   %}
7488 
7489   ins_pipe(pipe_class_default);
7490 %}
7491 
7492 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7493   predicate(UsePopCountInstruction);
7494   match(Set dst (PopCountI (LoadI mem)));
7495   effect(TEMP tmp);
7496   ins_cost(INSN_COST * 13);
7497 
7498   format %{ "ldrs   $tmp, $mem\n\t"
7499             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7500             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7501             "mov    $dst, $tmp\t# vector (1D)" %}
7502   ins_encode %{
7503     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7504     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7505                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7506     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7507     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7508     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7509   %}
7510 
7511   ins_pipe(pipe_class_default);
7512 %}
7513 
7514 // Note: Long.bitCount(long) returns an int.
7515 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7516   predicate(UsePopCountInstruction);
7517   match(Set dst (PopCountL src));
7518   effect(TEMP tmp);
7519   ins_cost(INSN_COST * 13);
7520 
7521   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7522             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7523             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7524             "mov    $dst, $tmp\t# vector (1D)" %}
7525   ins_encode %{
7526     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7527     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7528     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7529     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7530   %}
7531 
7532   ins_pipe(pipe_class_default);
7533 %}
7534 
7535 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7536   predicate(UsePopCountInstruction);
7537   match(Set dst (PopCountL (LoadL mem)));
7538   effect(TEMP tmp);
7539   ins_cost(INSN_COST * 13);
7540 
7541   format %{ "ldrd   $tmp, $mem\n\t"
7542             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7543             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7544             "mov    $dst, $tmp\t# vector (1D)" %}
7545   ins_encode %{
7546     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7547     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7548                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7549     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7550     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7551     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7552   %}
7553 
7554   ins_pipe(pipe_class_default);
7555 %}
7556 
7557 // ============================================================================
7558 // MemBar Instruction
7559 
7560 instruct load_fence() %{
7561   match(LoadFence);
7562   ins_cost(VOLATILE_REF_COST);
7563 
7564   format %{ "load_fence" %}
7565 
7566   ins_encode %{
7567     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7568   %}
7569   ins_pipe(pipe_serial);
7570 %}
7571 
7572 instruct unnecessary_membar_acquire() %{
7573   predicate(unnecessary_acquire(n));
7574   match(MemBarAcquire);
7575   ins_cost(0);
7576 
7577   format %{ "membar_acquire (elided)" %}
7578 
7579   ins_encode %{
7580     __ block_comment("membar_acquire (elided)");
7581   %}
7582 
7583   ins_pipe(pipe_class_empty);
7584 %}
7585 
7586 instruct membar_acquire() %{
7587   match(MemBarAcquire);
7588   ins_cost(VOLATILE_REF_COST);
7589 
7590   format %{ "membar_acquire" %}
7591 
7592   ins_encode %{
7593     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7594   %}
7595 
7596   ins_pipe(pipe_serial);
7597 %}
7598 
7599 
7600 instruct membar_acquire_lock() %{
7601   match(MemBarAcquireLock);
7602   ins_cost(VOLATILE_REF_COST);
7603 
7604   format %{ "membar_acquire_lock" %}
7605 
7606   ins_encode %{
7607     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7608   %}
7609 
7610   ins_pipe(pipe_serial);
7611 %}
7612 
7613 instruct store_fence() %{
7614   match(StoreFence);
7615   ins_cost(VOLATILE_REF_COST);
7616 
7617   format %{ "store_fence" %}
7618 
7619   ins_encode %{
7620     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7621   %}
7622   ins_pipe(pipe_serial);
7623 %}
7624 
7625 instruct unnecessary_membar_release() %{
7626   predicate(unnecessary_release(n));
7627   match(MemBarRelease);
7628   ins_cost(0);
7629 
7630   format %{ "membar_release (elided)" %}
7631 
7632   ins_encode %{
7633     __ block_comment("membar_release (elided)");
7634   %}
7635   ins_pipe(pipe_serial);
7636 %}
7637 
7638 instruct membar_release() %{
7639   match(MemBarRelease);
7640   ins_cost(VOLATILE_REF_COST);
7641 
7642   format %{ "membar_release" %}
7643 
7644   ins_encode %{
7645     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7646   %}
7647   ins_pipe(pipe_serial);
7648 %}
7649 
7650 instruct membar_storestore() %{
7651   match(MemBarStoreStore);
7652   ins_cost(VOLATILE_REF_COST);
7653 
7654   format %{ "MEMBAR-store-store" %}
7655 
7656   ins_encode %{
7657     __ membar(Assembler::StoreStore);
7658   %}
7659   ins_pipe(pipe_serial);
7660 %}
7661 
7662 instruct membar_release_lock() %{
7663   match(MemBarReleaseLock);
7664   ins_cost(VOLATILE_REF_COST);
7665 
7666   format %{ "membar_release_lock" %}
7667 
7668   ins_encode %{
7669     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7670   %}
7671 
7672   ins_pipe(pipe_serial);
7673 %}
7674 
7675 instruct unnecessary_membar_volatile() %{
7676   predicate(unnecessary_volatile(n));
7677   match(MemBarVolatile);
7678   ins_cost(0);
7679 
7680   format %{ "membar_volatile (elided)" %}
7681 
7682   ins_encode %{
7683     __ block_comment("membar_volatile (elided)");
7684   %}
7685 
7686   ins_pipe(pipe_serial);
7687 %}
7688 
7689 instruct membar_volatile() %{
7690   match(MemBarVolatile);
7691   ins_cost(VOLATILE_REF_COST*100);
7692 
7693   format %{ "membar_volatile" %}
7694 
7695   ins_encode %{
7696     __ membar(Assembler::StoreLoad);
7697   %}
7698 
7699   ins_pipe(pipe_serial);
7700 %}
7701 
7702 // ============================================================================
7703 // Cast/Convert Instructions
7704 
7705 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7706   match(Set dst (CastX2P src));
7707 
7708   ins_cost(INSN_COST);
7709   format %{ "mov $dst, $src\t# long -> ptr" %}
7710 
7711   ins_encode %{
7712     if ($dst$$reg != $src$$reg) {
7713       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7714     }
7715   %}
7716 
7717   ins_pipe(ialu_reg);
7718 %}
7719 
7720 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7721   match(Set dst (CastP2X src));
7722 
7723   ins_cost(INSN_COST);
7724   format %{ "mov $dst, $src\t# ptr -> long" %}
7725 
7726   ins_encode %{
7727     if ($dst$$reg != $src$$reg) {
7728       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7729     }
7730   %}
7731 
7732   ins_pipe(ialu_reg);
7733 %}
7734 
7735 // Convert oop into int for vectors alignment masking
7736 instruct convP2I(iRegINoSp dst, iRegP src) %{
7737   match(Set dst (ConvL2I (CastP2X src)));
7738 
7739   ins_cost(INSN_COST);
7740   format %{ "movw $dst, $src\t# ptr -> int" %}
7741   ins_encode %{
7742     __ movw($dst$$Register, $src$$Register);
7743   %}
7744 
7745   ins_pipe(ialu_reg);
7746 %}
7747 
7748 // Convert compressed oop into int for vectors alignment masking
7749 // in case of 32bit oops (heap < 4Gb).
7750 instruct convN2I(iRegINoSp dst, iRegN src)
7751 %{
7752   predicate(Universe::narrow_oop_shift() == 0);
7753   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7754 
7755   ins_cost(INSN_COST);
7756   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7757   ins_encode %{
7758     __ movw($dst$$Register, $src$$Register);
7759   %}
7760 
7761   ins_pipe(ialu_reg);
7762 %}
7763 
7764 
7765 // Convert oop pointer into compressed form
7766 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7767   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7768   match(Set dst (EncodeP src));
7769   effect(KILL cr);
7770   ins_cost(INSN_COST * 3);
7771   format %{ "encode_heap_oop $dst, $src" %}
7772   ins_encode %{
7773     Register s = $src$$Register;
7774     Register d = $dst$$Register;
7775     __ encode_heap_oop(d, s);
7776   %}
7777   ins_pipe(ialu_reg);
7778 %}
7779 
7780 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7781   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7782   match(Set dst (EncodeP src));
7783   ins_cost(INSN_COST * 3);
7784   format %{ "encode_heap_oop_not_null $dst, $src" %}
7785   ins_encode %{
7786     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7787   %}
7788   ins_pipe(ialu_reg);
7789 %}
7790 
7791 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7792   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7793             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7794   match(Set dst (DecodeN src));
7795   ins_cost(INSN_COST * 3);
7796   format %{ "decode_heap_oop $dst, $src" %}
7797   ins_encode %{
7798     Register s = $src$$Register;
7799     Register d = $dst$$Register;
7800     __ decode_heap_oop(d, s);
7801   %}
7802   ins_pipe(ialu_reg);
7803 %}
7804 
7805 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7806   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7807             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7808   match(Set dst (DecodeN src));
7809   ins_cost(INSN_COST * 3);
7810   format %{ "decode_heap_oop_not_null $dst, $src" %}
7811   ins_encode %{
7812     Register s = $src$$Register;
7813     Register d = $dst$$Register;
7814     __ decode_heap_oop_not_null(d, s);
7815   %}
7816   ins_pipe(ialu_reg);
7817 %}
7818 
7819 // n.b. AArch64 implementations of encode_klass_not_null and
7820 // decode_klass_not_null do not modify the flags register so, unlike
7821 // Intel, we don't kill CR as a side effect here
7822 
7823 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7824   match(Set dst (EncodePKlass src));
7825 
7826   ins_cost(INSN_COST * 3);
7827   format %{ "encode_klass_not_null $dst,$src" %}
7828 
7829   ins_encode %{
7830     Register src_reg = as_Register($src$$reg);
7831     Register dst_reg = as_Register($dst$$reg);
7832     __ encode_klass_not_null(dst_reg, src_reg);
7833   %}
7834 
7835    ins_pipe(ialu_reg);
7836 %}
7837 
7838 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7839   match(Set dst (DecodeNKlass src));
7840 
7841   ins_cost(INSN_COST * 3);
7842   format %{ "decode_klass_not_null $dst,$src" %}
7843 
7844   ins_encode %{
7845     Register src_reg = as_Register($src$$reg);
7846     Register dst_reg = as_Register($dst$$reg);
7847     if (dst_reg != src_reg) {
7848       __ decode_klass_not_null(dst_reg, src_reg);
7849     } else {
7850       __ decode_klass_not_null(dst_reg);
7851     }
7852   %}
7853 
7854    ins_pipe(ialu_reg);
7855 %}
7856 
7857 instruct checkCastPP(iRegPNoSp dst)
7858 %{
7859   match(Set dst (CheckCastPP dst));
7860 
7861   size(0);
7862   format %{ "# checkcastPP of $dst" %}
7863   ins_encode(/* empty encoding */);
7864   ins_pipe(pipe_class_empty);
7865 %}
7866 
7867 instruct castPP(iRegPNoSp dst)
7868 %{
7869   match(Set dst (CastPP dst));
7870 
7871   size(0);
7872   format %{ "# castPP of $dst" %}
7873   ins_encode(/* empty encoding */);
7874   ins_pipe(pipe_class_empty);
7875 %}
7876 
7877 instruct castII(iRegI dst)
7878 %{
7879   match(Set dst (CastII dst));
7880 
7881   size(0);
7882   format %{ "# castII of $dst" %}
7883   ins_encode(/* empty encoding */);
7884   ins_cost(0);
7885   ins_pipe(pipe_class_empty);
7886 %}
7887 
7888 // ============================================================================
7889 // Atomic operation instructions
7890 //
7891 // Intel and SPARC both implement Ideal Node LoadPLocked and
7892 // Store{PIL}Conditional instructions using a normal load for the
7893 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7894 //
7895 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7896 // pair to lock object allocations from Eden space when not using
7897 // TLABs.
7898 //
7899 // There does not appear to be a Load{IL}Locked Ideal Node and the
7900 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7901 // and to use StoreIConditional only for 32-bit and StoreLConditional
7902 // only for 64-bit.
7903 //
7904 // We implement LoadPLocked and StorePLocked instructions using,
7905 // respectively the AArch64 hw load-exclusive and store-conditional
7906 // instructions. Whereas we must implement each of
7907 // Store{IL}Conditional using a CAS which employs a pair of
7908 // instructions comprising a load-exclusive followed by a
7909 // store-conditional.
7910 
7911 
7912 // Locked-load (linked load) of the current heap-top
7913 // used when updating the eden heap top
7914 // implemented using ldaxr on AArch64
7915 
7916 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7917 %{
7918   match(Set dst (LoadPLocked mem));
7919 
7920   ins_cost(VOLATILE_REF_COST);
7921 
7922   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7923 
7924   ins_encode(aarch64_enc_ldaxr(dst, mem));
7925 
7926   ins_pipe(pipe_serial);
7927 %}
7928 
7929 // Conditional-store of the updated heap-top.
7930 // Used during allocation of the shared heap.
7931 // Sets flag (EQ) on success.
7932 // implemented using stlxr on AArch64.
7933 
7934 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7935 %{
7936   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7937 
7938   ins_cost(VOLATILE_REF_COST);
7939 
7940  // TODO
7941  // do we need to do a store-conditional release or can we just use a
7942  // plain store-conditional?
7943 
7944   format %{
7945     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7946     "cmpw rscratch1, zr\t# EQ on successful write"
7947   %}
7948 
7949   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7950 
7951   ins_pipe(pipe_serial);
7952 %}
7953 
7954 // this has to be implemented as a CAS
7955 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7956 %{
7957   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7958 
7959   ins_cost(VOLATILE_REF_COST);
7960 
7961   format %{
7962     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7963     "cmpw rscratch1, zr\t# EQ on successful write"
7964   %}
7965 
7966   ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
7967 
7968   ins_pipe(pipe_slow);
7969 %}
7970 
7971 // this has to be implemented as a CAS
7972 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
7973 %{
7974   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7975 
7976   ins_cost(VOLATILE_REF_COST);
7977 
7978   format %{
7979     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7980     "cmpw rscratch1, zr\t# EQ on successful write"
7981   %}
7982 
7983   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
7984 
7985   ins_pipe(pipe_slow);
7986 %}
7987 
7988 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
7989 // can't match them
7990 
7991 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
7992 
7993   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
7994 
7995   effect(KILL cr);
7996 
7997  format %{
7998     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
7999     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8000  %}
8001 
8002  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8003             aarch64_enc_cset_eq(res));
8004 
8005   ins_pipe(pipe_slow);
8006 %}
8007 
8008 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8009 
8010   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8011 
8012   effect(KILL cr);
8013 
8014  format %{
8015     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8016     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8017  %}
8018 
8019  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8020             aarch64_enc_cset_eq(res));
8021 
8022   ins_pipe(pipe_slow);
8023 %}
8024 
8025 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8026 
8027   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8028 
8029   effect(KILL cr);
8030 
8031  format %{
8032     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8033     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8034  %}
8035 
8036  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8037             aarch64_enc_cset_eq(res));
8038 
8039   ins_pipe(pipe_slow);
8040 %}
8041 
8042 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8043 
8044   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8045 
8046   effect(KILL cr);
8047 
8048  format %{
8049     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8050     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8051  %}
8052 
8053  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8054             aarch64_enc_cset_eq(res));
8055 
8056   ins_pipe(pipe_slow);
8057 %}
8058 
8059 
8060 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8061   match(Set prev (GetAndSetI mem newv));
8062   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8063   ins_encode %{
8064     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8065   %}
8066   ins_pipe(pipe_serial);
8067 %}
8068 
8069 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8070   match(Set prev (GetAndSetL mem newv));
8071   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8072   ins_encode %{
8073     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8074   %}
8075   ins_pipe(pipe_serial);
8076 %}
8077 
8078 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
8079   match(Set prev (GetAndSetN mem newv));
8080   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8081   ins_encode %{
8082     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8083   %}
8084   ins_pipe(pipe_serial);
8085 %}
8086 
8087 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
8088   match(Set prev (GetAndSetP mem newv));
8089   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8090   ins_encode %{
8091     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8092   %}
8093   ins_pipe(pipe_serial);
8094 %}
8095 
8096 
8097 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8098   match(Set newval (GetAndAddL mem incr));
8099   ins_cost(INSN_COST * 10);
8100   format %{ "get_and_addL $newval, [$mem], $incr" %}
8101   ins_encode %{
8102     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8103   %}
8104   ins_pipe(pipe_serial);
8105 %}
8106 
8107 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8108   predicate(n->as_LoadStore()->result_not_used());
8109   match(Set dummy (GetAndAddL mem incr));
8110   ins_cost(INSN_COST * 9);
8111   format %{ "get_and_addL [$mem], $incr" %}
8112   ins_encode %{
8113     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8114   %}
8115   ins_pipe(pipe_serial);
8116 %}
8117 
8118 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8119   match(Set newval (GetAndAddL mem incr));
8120   ins_cost(INSN_COST * 10);
8121   format %{ "get_and_addL $newval, [$mem], $incr" %}
8122   ins_encode %{
8123     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8124   %}
8125   ins_pipe(pipe_serial);
8126 %}
8127 
8128 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8129   predicate(n->as_LoadStore()->result_not_used());
8130   match(Set dummy (GetAndAddL mem incr));
8131   ins_cost(INSN_COST * 9);
8132   format %{ "get_and_addL [$mem], $incr" %}
8133   ins_encode %{
8134     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8135   %}
8136   ins_pipe(pipe_serial);
8137 %}
8138 
8139 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8140   match(Set newval (GetAndAddI mem incr));
8141   ins_cost(INSN_COST * 10);
8142   format %{ "get_and_addI $newval, [$mem], $incr" %}
8143   ins_encode %{
8144     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8145   %}
8146   ins_pipe(pipe_serial);
8147 %}
8148 
8149 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8150   predicate(n->as_LoadStore()->result_not_used());
8151   match(Set dummy (GetAndAddI mem incr));
8152   ins_cost(INSN_COST * 9);
8153   format %{ "get_and_addI [$mem], $incr" %}
8154   ins_encode %{
8155     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8156   %}
8157   ins_pipe(pipe_serial);
8158 %}
8159 
8160 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8161   match(Set newval (GetAndAddI mem incr));
8162   ins_cost(INSN_COST * 10);
8163   format %{ "get_and_addI $newval, [$mem], $incr" %}
8164   ins_encode %{
8165     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8166   %}
8167   ins_pipe(pipe_serial);
8168 %}
8169 
8170 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8171   predicate(n->as_LoadStore()->result_not_used());
8172   match(Set dummy (GetAndAddI mem incr));
8173   ins_cost(INSN_COST * 9);
8174   format %{ "get_and_addI [$mem], $incr" %}
8175   ins_encode %{
8176     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8177   %}
8178   ins_pipe(pipe_serial);
8179 %}
8180 
8181 // Manifest a CmpL result in an integer register.
8182 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8183 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8184 %{
8185   match(Set dst (CmpL3 src1 src2));
8186   effect(KILL flags);
8187 
8188   ins_cost(INSN_COST * 6);
8189   format %{
8190       "cmp $src1, $src2"
8191       "csetw $dst, ne"
8192       "cnegw $dst, lt"
8193   %}
8194   // format %{ "CmpL3 $dst, $src1, $src2" %}
8195   ins_encode %{
8196     __ cmp($src1$$Register, $src2$$Register);
8197     __ csetw($dst$$Register, Assembler::NE);
8198     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8199   %}
8200 
8201   ins_pipe(pipe_class_default);
8202 %}
8203 
8204 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8205 %{
8206   match(Set dst (CmpL3 src1 src2));
8207   effect(KILL flags);
8208 
8209   ins_cost(INSN_COST * 6);
8210   format %{
8211       "cmp $src1, $src2"
8212       "csetw $dst, ne"
8213       "cnegw $dst, lt"
8214   %}
8215   ins_encode %{
8216     int32_t con = (int32_t)$src2$$constant;
8217      if (con < 0) {
8218       __ adds(zr, $src1$$Register, -con);
8219     } else {
8220       __ subs(zr, $src1$$Register, con);
8221     }
8222     __ csetw($dst$$Register, Assembler::NE);
8223     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8224   %}
8225 
8226   ins_pipe(pipe_class_default);
8227 %}
8228 
8229 // ============================================================================
8230 // Conditional Move Instructions
8231 
8232 // n.b. we have identical rules for both a signed compare op (cmpOp)
8233 // and an unsigned compare op (cmpOpU). it would be nice if we could
8234 // define an op class which merged both inputs and use it to type the
8235 // argument to a single rule. unfortunatelyt his fails because the
8236 // opclass does not live up to the COND_INTER interface of its
8237 // component operands. When the generic code tries to negate the
8238 // operand it ends up running the generci Machoper::negate method
8239 // which throws a ShouldNotHappen. So, we have to provide two flavours
8240 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8241 
8242 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8243   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8244 
8245   ins_cost(INSN_COST * 2);
8246   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8247 
8248   ins_encode %{
8249     __ cselw(as_Register($dst$$reg),
8250              as_Register($src2$$reg),
8251              as_Register($src1$$reg),
8252              (Assembler::Condition)$cmp$$cmpcode);
8253   %}
8254 
8255   ins_pipe(icond_reg_reg);
8256 %}
8257 
8258 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8259   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8260 
8261   ins_cost(INSN_COST * 2);
8262   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8263 
8264   ins_encode %{
8265     __ cselw(as_Register($dst$$reg),
8266              as_Register($src2$$reg),
8267              as_Register($src1$$reg),
8268              (Assembler::Condition)$cmp$$cmpcode);
8269   %}
8270 
8271   ins_pipe(icond_reg_reg);
8272 %}
8273 
8274 // special cases where one arg is zero
8275 
8276 // n.b. this is selected in preference to the rule above because it
8277 // avoids loading constant 0 into a source register
8278 
8279 // TODO
8280 // we ought only to be able to cull one of these variants as the ideal
8281 // transforms ought always to order the zero consistently (to left/right?)
8282 
8283 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8284   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8285 
8286   ins_cost(INSN_COST * 2);
8287   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8288 
8289   ins_encode %{
8290     __ cselw(as_Register($dst$$reg),
8291              as_Register($src$$reg),
8292              zr,
8293              (Assembler::Condition)$cmp$$cmpcode);
8294   %}
8295 
8296   ins_pipe(icond_reg);
8297 %}
8298 
8299 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8300   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8301 
8302   ins_cost(INSN_COST * 2);
8303   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8304 
8305   ins_encode %{
8306     __ cselw(as_Register($dst$$reg),
8307              as_Register($src$$reg),
8308              zr,
8309              (Assembler::Condition)$cmp$$cmpcode);
8310   %}
8311 
8312   ins_pipe(icond_reg);
8313 %}
8314 
8315 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8316   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8317 
8318   ins_cost(INSN_COST * 2);
8319   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8320 
8321   ins_encode %{
8322     __ cselw(as_Register($dst$$reg),
8323              zr,
8324              as_Register($src$$reg),
8325              (Assembler::Condition)$cmp$$cmpcode);
8326   %}
8327 
8328   ins_pipe(icond_reg);
8329 %}
8330 
8331 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8332   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8333 
8334   ins_cost(INSN_COST * 2);
8335   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8336 
8337   ins_encode %{
8338     __ cselw(as_Register($dst$$reg),
8339              zr,
8340              as_Register($src$$reg),
8341              (Assembler::Condition)$cmp$$cmpcode);
8342   %}
8343 
8344   ins_pipe(icond_reg);
8345 %}
8346 
8347 // special case for creating a boolean 0 or 1
8348 
8349 // n.b. this is selected in preference to the rule above because it
8350 // avoids loading constants 0 and 1 into a source register
8351 
8352 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8353   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8354 
8355   ins_cost(INSN_COST * 2);
8356   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8357 
8358   ins_encode %{
8359     // equivalently
8360     // cset(as_Register($dst$$reg),
8361     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8362     __ csincw(as_Register($dst$$reg),
8363              zr,
8364              zr,
8365              (Assembler::Condition)$cmp$$cmpcode);
8366   %}
8367 
8368   ins_pipe(icond_none);
8369 %}
8370 
8371 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8372   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8373 
8374   ins_cost(INSN_COST * 2);
8375   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8376 
8377   ins_encode %{
8378     // equivalently
8379     // cset(as_Register($dst$$reg),
8380     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8381     __ csincw(as_Register($dst$$reg),
8382              zr,
8383              zr,
8384              (Assembler::Condition)$cmp$$cmpcode);
8385   %}
8386 
8387   ins_pipe(icond_none);
8388 %}
8389 
8390 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8391   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8392 
8393   ins_cost(INSN_COST * 2);
8394   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8395 
8396   ins_encode %{
8397     __ csel(as_Register($dst$$reg),
8398             as_Register($src2$$reg),
8399             as_Register($src1$$reg),
8400             (Assembler::Condition)$cmp$$cmpcode);
8401   %}
8402 
8403   ins_pipe(icond_reg_reg);
8404 %}
8405 
8406 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8407   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8408 
8409   ins_cost(INSN_COST * 2);
8410   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8411 
8412   ins_encode %{
8413     __ csel(as_Register($dst$$reg),
8414             as_Register($src2$$reg),
8415             as_Register($src1$$reg),
8416             (Assembler::Condition)$cmp$$cmpcode);
8417   %}
8418 
8419   ins_pipe(icond_reg_reg);
8420 %}
8421 
8422 // special cases where one arg is zero
8423 
8424 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8425   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8426 
8427   ins_cost(INSN_COST * 2);
8428   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8429 
8430   ins_encode %{
8431     __ csel(as_Register($dst$$reg),
8432             zr,
8433             as_Register($src$$reg),
8434             (Assembler::Condition)$cmp$$cmpcode);
8435   %}
8436 
8437   ins_pipe(icond_reg);
8438 %}
8439 
8440 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8441   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8442 
8443   ins_cost(INSN_COST * 2);
8444   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8445 
8446   ins_encode %{
8447     __ csel(as_Register($dst$$reg),
8448             zr,
8449             as_Register($src$$reg),
8450             (Assembler::Condition)$cmp$$cmpcode);
8451   %}
8452 
8453   ins_pipe(icond_reg);
8454 %}
8455 
8456 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8457   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8458 
8459   ins_cost(INSN_COST * 2);
8460   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8461 
8462   ins_encode %{
8463     __ csel(as_Register($dst$$reg),
8464             as_Register($src$$reg),
8465             zr,
8466             (Assembler::Condition)$cmp$$cmpcode);
8467   %}
8468 
8469   ins_pipe(icond_reg);
8470 %}
8471 
8472 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8473   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8474 
8475   ins_cost(INSN_COST * 2);
8476   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8477 
8478   ins_encode %{
8479     __ csel(as_Register($dst$$reg),
8480             as_Register($src$$reg),
8481             zr,
8482             (Assembler::Condition)$cmp$$cmpcode);
8483   %}
8484 
8485   ins_pipe(icond_reg);
8486 %}
8487 
8488 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8489   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8490 
8491   ins_cost(INSN_COST * 2);
8492   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8493 
8494   ins_encode %{
8495     __ csel(as_Register($dst$$reg),
8496             as_Register($src2$$reg),
8497             as_Register($src1$$reg),
8498             (Assembler::Condition)$cmp$$cmpcode);
8499   %}
8500 
8501   ins_pipe(icond_reg_reg);
8502 %}
8503 
8504 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8505   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8506 
8507   ins_cost(INSN_COST * 2);
8508   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8509 
8510   ins_encode %{
8511     __ csel(as_Register($dst$$reg),
8512             as_Register($src2$$reg),
8513             as_Register($src1$$reg),
8514             (Assembler::Condition)$cmp$$cmpcode);
8515   %}
8516 
8517   ins_pipe(icond_reg_reg);
8518 %}
8519 
8520 // special cases where one arg is zero
8521 
8522 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8523   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8524 
8525   ins_cost(INSN_COST * 2);
8526   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8527 
8528   ins_encode %{
8529     __ csel(as_Register($dst$$reg),
8530             zr,
8531             as_Register($src$$reg),
8532             (Assembler::Condition)$cmp$$cmpcode);
8533   %}
8534 
8535   ins_pipe(icond_reg);
8536 %}
8537 
8538 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8539   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8540 
8541   ins_cost(INSN_COST * 2);
8542   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8543 
8544   ins_encode %{
8545     __ csel(as_Register($dst$$reg),
8546             zr,
8547             as_Register($src$$reg),
8548             (Assembler::Condition)$cmp$$cmpcode);
8549   %}
8550 
8551   ins_pipe(icond_reg);
8552 %}
8553 
8554 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8555   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8556 
8557   ins_cost(INSN_COST * 2);
8558   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8559 
8560   ins_encode %{
8561     __ csel(as_Register($dst$$reg),
8562             as_Register($src$$reg),
8563             zr,
8564             (Assembler::Condition)$cmp$$cmpcode);
8565   %}
8566 
8567   ins_pipe(icond_reg);
8568 %}
8569 
8570 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8571   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8572 
8573   ins_cost(INSN_COST * 2);
8574   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8575 
8576   ins_encode %{
8577     __ csel(as_Register($dst$$reg),
8578             as_Register($src$$reg),
8579             zr,
8580             (Assembler::Condition)$cmp$$cmpcode);
8581   %}
8582 
8583   ins_pipe(icond_reg);
8584 %}
8585 
8586 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8587   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8588 
8589   ins_cost(INSN_COST * 2);
8590   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8591 
8592   ins_encode %{
8593     __ cselw(as_Register($dst$$reg),
8594              as_Register($src2$$reg),
8595              as_Register($src1$$reg),
8596              (Assembler::Condition)$cmp$$cmpcode);
8597   %}
8598 
8599   ins_pipe(icond_reg_reg);
8600 %}
8601 
8602 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8603   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8604 
8605   ins_cost(INSN_COST * 2);
8606   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8607 
8608   ins_encode %{
8609     __ cselw(as_Register($dst$$reg),
8610              as_Register($src2$$reg),
8611              as_Register($src1$$reg),
8612              (Assembler::Condition)$cmp$$cmpcode);
8613   %}
8614 
8615   ins_pipe(icond_reg_reg);
8616 %}
8617 
8618 // special cases where one arg is zero
8619 
8620 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8621   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8622 
8623   ins_cost(INSN_COST * 2);
8624   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8625 
8626   ins_encode %{
8627     __ cselw(as_Register($dst$$reg),
8628              zr,
8629              as_Register($src$$reg),
8630              (Assembler::Condition)$cmp$$cmpcode);
8631   %}
8632 
8633   ins_pipe(icond_reg);
8634 %}
8635 
8636 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8637   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8638 
8639   ins_cost(INSN_COST * 2);
8640   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8641 
8642   ins_encode %{
8643     __ cselw(as_Register($dst$$reg),
8644              zr,
8645              as_Register($src$$reg),
8646              (Assembler::Condition)$cmp$$cmpcode);
8647   %}
8648 
8649   ins_pipe(icond_reg);
8650 %}
8651 
8652 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8653   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8654 
8655   ins_cost(INSN_COST * 2);
8656   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8657 
8658   ins_encode %{
8659     __ cselw(as_Register($dst$$reg),
8660              as_Register($src$$reg),
8661              zr,
8662              (Assembler::Condition)$cmp$$cmpcode);
8663   %}
8664 
8665   ins_pipe(icond_reg);
8666 %}
8667 
8668 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8669   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8670 
8671   ins_cost(INSN_COST * 2);
8672   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8673 
8674   ins_encode %{
8675     __ cselw(as_Register($dst$$reg),
8676              as_Register($src$$reg),
8677              zr,
8678              (Assembler::Condition)$cmp$$cmpcode);
8679   %}
8680 
8681   ins_pipe(icond_reg);
8682 %}
8683 
8684 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8685 %{
8686   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8687 
8688   ins_cost(INSN_COST * 3);
8689 
8690   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8691   ins_encode %{
8692     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8693     __ fcsels(as_FloatRegister($dst$$reg),
8694               as_FloatRegister($src2$$reg),
8695               as_FloatRegister($src1$$reg),
8696               cond);
8697   %}
8698 
8699   ins_pipe(pipe_class_default);
8700 %}
8701 
8702 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
8703 %{
8704   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8705 
8706   ins_cost(INSN_COST * 3);
8707 
8708   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8709   ins_encode %{
8710     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8711     __ fcsels(as_FloatRegister($dst$$reg),
8712               as_FloatRegister($src2$$reg),
8713               as_FloatRegister($src1$$reg),
8714               cond);
8715   %}
8716 
8717   ins_pipe(pipe_class_default);
8718 %}
8719 
8720 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
8721 %{
8722   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8723 
8724   ins_cost(INSN_COST * 3);
8725 
8726   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8727   ins_encode %{
8728     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8729     __ fcseld(as_FloatRegister($dst$$reg),
8730               as_FloatRegister($src2$$reg),
8731               as_FloatRegister($src1$$reg),
8732               cond);
8733   %}
8734 
8735   ins_pipe(pipe_class_default);
8736 %}
8737 
8738 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
8739 %{
8740   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8741 
8742   ins_cost(INSN_COST * 3);
8743 
8744   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8745   ins_encode %{
8746     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8747     __ fcseld(as_FloatRegister($dst$$reg),
8748               as_FloatRegister($src2$$reg),
8749               as_FloatRegister($src1$$reg),
8750               cond);
8751   %}
8752 
8753   ins_pipe(pipe_class_default);
8754 %}
8755 
8756 // ============================================================================
8757 // Arithmetic Instructions
8758 //
8759 
8760 // Integer Addition
8761 
8762 // TODO
8763 // these currently employ operations which do not set CR and hence are
8764 // not flagged as killing CR but we would like to isolate the cases
8765 // where we want to set flags from those where we don't. need to work
8766 // out how to do that.
8767 
8768 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8769   match(Set dst (AddI src1 src2));
8770 
8771   ins_cost(INSN_COST);
8772   format %{ "addw  $dst, $src1, $src2" %}
8773 
8774   ins_encode %{
8775     __ addw(as_Register($dst$$reg),
8776             as_Register($src1$$reg),
8777             as_Register($src2$$reg));
8778   %}
8779 
8780   ins_pipe(ialu_reg_reg);
8781 %}
8782 
8783 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8784   match(Set dst (AddI src1 src2));
8785 
8786   ins_cost(INSN_COST);
8787   format %{ "addw $dst, $src1, $src2" %}
8788 
8789   // use opcode to indicate that this is an add not a sub
8790   opcode(0x0);
8791 
8792   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8793 
8794   ins_pipe(ialu_reg_imm);
8795 %}
8796 
8797 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
8798   match(Set dst (AddI (ConvL2I src1) src2));
8799 
8800   ins_cost(INSN_COST);
8801   format %{ "addw $dst, $src1, $src2" %}
8802 
8803   // use opcode to indicate that this is an add not a sub
8804   opcode(0x0);
8805 
8806   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8807 
8808   ins_pipe(ialu_reg_imm);
8809 %}
8810 
8811 // Pointer Addition
8812 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
8813   match(Set dst (AddP src1 src2));
8814 
8815   ins_cost(INSN_COST);
8816   format %{ "add $dst, $src1, $src2\t# ptr" %}
8817 
8818   ins_encode %{
8819     __ add(as_Register($dst$$reg),
8820            as_Register($src1$$reg),
8821            as_Register($src2$$reg));
8822   %}
8823 
8824   ins_pipe(ialu_reg_reg);
8825 %}
8826 
8827 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
8828   match(Set dst (AddP src1 (ConvI2L src2)));
8829 
8830   ins_cost(1.9 * INSN_COST);
8831   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
8832 
8833   ins_encode %{
8834     __ add(as_Register($dst$$reg),
8835            as_Register($src1$$reg),
8836            as_Register($src2$$reg), ext::sxtw);
8837   %}
8838 
8839   ins_pipe(ialu_reg_reg);
8840 %}
8841 
8842 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
8843   match(Set dst (AddP src1 (LShiftL src2 scale)));
8844 
8845   ins_cost(1.9 * INSN_COST);
8846   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
8847 
8848   ins_encode %{
8849     __ lea(as_Register($dst$$reg),
8850            Address(as_Register($src1$$reg), as_Register($src2$$reg),
8851                    Address::lsl($scale$$constant)));
8852   %}
8853 
8854   ins_pipe(ialu_reg_reg_shift);
8855 %}
8856 
8857 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
8858   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
8859 
8860   ins_cost(1.9 * INSN_COST);
8861   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
8862 
8863   ins_encode %{
8864     __ lea(as_Register($dst$$reg),
8865            Address(as_Register($src1$$reg), as_Register($src2$$reg),
8866                    Address::sxtw($scale$$constant)));
8867   %}
8868 
8869   ins_pipe(ialu_reg_reg_shift);
8870 %}
8871 
8872 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
8873   match(Set dst (LShiftL (ConvI2L src) scale));
8874 
8875   ins_cost(INSN_COST);
8876   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
8877 
8878   ins_encode %{
8879     __ sbfiz(as_Register($dst$$reg),
8880           as_Register($src$$reg),
8881           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
8882   %}
8883 
8884   ins_pipe(ialu_reg_shift);
8885 %}
8886 
8887 // Pointer Immediate Addition
8888 // n.b. this needs to be more expensive than using an indirect memory
8889 // operand
8890 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
8891   match(Set dst (AddP src1 src2));
8892 
8893   ins_cost(INSN_COST);
8894   format %{ "add $dst, $src1, $src2\t# ptr" %}
8895 
8896   // use opcode to indicate that this is an add not a sub
8897   opcode(0x0);
8898 
8899   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8900 
8901   ins_pipe(ialu_reg_imm);
8902 %}
8903 
8904 // Long Addition
8905 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8906 
8907   match(Set dst (AddL src1 src2));
8908 
8909   ins_cost(INSN_COST);
8910   format %{ "add  $dst, $src1, $src2" %}
8911 
8912   ins_encode %{
8913     __ add(as_Register($dst$$reg),
8914            as_Register($src1$$reg),
8915            as_Register($src2$$reg));
8916   %}
8917 
8918   ins_pipe(ialu_reg_reg);
8919 %}
8920 
8921 // No constant pool entries requiredLong Immediate Addition.
8922 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
8923   match(Set dst (AddL src1 src2));
8924 
8925   ins_cost(INSN_COST);
8926   format %{ "add $dst, $src1, $src2" %}
8927 
8928   // use opcode to indicate that this is an add not a sub
8929   opcode(0x0);
8930 
8931   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8932 
8933   ins_pipe(ialu_reg_imm);
8934 %}
8935 
8936 // Integer Subtraction
8937 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8938   match(Set dst (SubI src1 src2));
8939 
8940   ins_cost(INSN_COST);
8941   format %{ "subw  $dst, $src1, $src2" %}
8942 
8943   ins_encode %{
8944     __ subw(as_Register($dst$$reg),
8945             as_Register($src1$$reg),
8946             as_Register($src2$$reg));
8947   %}
8948 
8949   ins_pipe(ialu_reg_reg);
8950 %}
8951 
8952 // Immediate Subtraction
8953 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8954   match(Set dst (SubI src1 src2));
8955 
8956   ins_cost(INSN_COST);
8957   format %{ "subw $dst, $src1, $src2" %}
8958 
8959   // use opcode to indicate that this is a sub not an add
8960   opcode(0x1);
8961 
8962   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8963 
8964   ins_pipe(ialu_reg_imm);
8965 %}
8966 
8967 // Long Subtraction
8968 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8969 
8970   match(Set dst (SubL src1 src2));
8971 
8972   ins_cost(INSN_COST);
8973   format %{ "sub  $dst, $src1, $src2" %}
8974 
8975   ins_encode %{
8976     __ sub(as_Register($dst$$reg),
8977            as_Register($src1$$reg),
8978            as_Register($src2$$reg));
8979   %}
8980 
8981   ins_pipe(ialu_reg_reg);
8982 %}
8983 
8984 // No constant pool entries requiredLong Immediate Subtraction.
8985 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
8986   match(Set dst (SubL src1 src2));
8987 
8988   ins_cost(INSN_COST);
8989   format %{ "sub$dst, $src1, $src2" %}
8990 
8991   // use opcode to indicate that this is a sub not an add
8992   opcode(0x1);
8993 
8994   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8995 
8996   ins_pipe(ialu_reg_imm);
8997 %}
8998 
8999 // Integer Negation (special case for sub)
9000 
9001 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9002   match(Set dst (SubI zero src));
9003 
9004   ins_cost(INSN_COST);
9005   format %{ "negw $dst, $src\t# int" %}
9006 
9007   ins_encode %{
9008     __ negw(as_Register($dst$$reg),
9009             as_Register($src$$reg));
9010   %}
9011 
9012   ins_pipe(ialu_reg);
9013 %}
9014 
9015 // Long Negation
9016 
9017 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9018   match(Set dst (SubL zero src));
9019 
9020   ins_cost(INSN_COST);
9021   format %{ "neg $dst, $src\t# long" %}
9022 
9023   ins_encode %{
9024     __ neg(as_Register($dst$$reg),
9025            as_Register($src$$reg));
9026   %}
9027 
9028   ins_pipe(ialu_reg);
9029 %}
9030 
9031 // Integer Multiply
9032 
9033 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9034   match(Set dst (MulI src1 src2));
9035 
9036   ins_cost(INSN_COST * 3);
9037   format %{ "mulw  $dst, $src1, $src2" %}
9038 
9039   ins_encode %{
9040     __ mulw(as_Register($dst$$reg),
9041             as_Register($src1$$reg),
9042             as_Register($src2$$reg));
9043   %}
9044 
9045   ins_pipe(imul_reg_reg);
9046 %}
9047 
9048 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9049   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9050 
9051   ins_cost(INSN_COST * 3);
9052   format %{ "smull  $dst, $src1, $src2" %}
9053 
9054   ins_encode %{
9055     __ smull(as_Register($dst$$reg),
9056              as_Register($src1$$reg),
9057              as_Register($src2$$reg));
9058   %}
9059 
9060   ins_pipe(imul_reg_reg);
9061 %}
9062 
9063 // Long Multiply
9064 
9065 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9066   match(Set dst (MulL src1 src2));
9067 
9068   ins_cost(INSN_COST * 5);
9069   format %{ "mul  $dst, $src1, $src2" %}
9070 
9071   ins_encode %{
9072     __ mul(as_Register($dst$$reg),
9073            as_Register($src1$$reg),
9074            as_Register($src2$$reg));
9075   %}
9076 
9077   ins_pipe(lmul_reg_reg);
9078 %}
9079 
9080 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9081 %{
9082   match(Set dst (MulHiL src1 src2));
9083 
9084   ins_cost(INSN_COST * 7);
9085   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9086 
9087   ins_encode %{
9088     __ smulh(as_Register($dst$$reg),
9089              as_Register($src1$$reg),
9090              as_Register($src2$$reg));
9091   %}
9092 
9093   ins_pipe(lmul_reg_reg);
9094 %}
9095 
9096 // Combined Integer Multiply & Add/Sub
9097 
9098 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9099   match(Set dst (AddI src3 (MulI src1 src2)));
9100 
9101   ins_cost(INSN_COST * 3);
9102   format %{ "madd  $dst, $src1, $src2, $src3" %}
9103 
9104   ins_encode %{
9105     __ maddw(as_Register($dst$$reg),
9106              as_Register($src1$$reg),
9107              as_Register($src2$$reg),
9108              as_Register($src3$$reg));
9109   %}
9110 
9111   ins_pipe(imac_reg_reg);
9112 %}
9113 
9114 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9115   match(Set dst (SubI src3 (MulI src1 src2)));
9116 
9117   ins_cost(INSN_COST * 3);
9118   format %{ "msub  $dst, $src1, $src2, $src3" %}
9119 
9120   ins_encode %{
9121     __ msubw(as_Register($dst$$reg),
9122              as_Register($src1$$reg),
9123              as_Register($src2$$reg),
9124              as_Register($src3$$reg));
9125   %}
9126 
9127   ins_pipe(imac_reg_reg);
9128 %}
9129 
9130 // Combined Long Multiply & Add/Sub
9131 
9132 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9133   match(Set dst (AddL src3 (MulL src1 src2)));
9134 
9135   ins_cost(INSN_COST * 5);
9136   format %{ "madd  $dst, $src1, $src2, $src3" %}
9137 
9138   ins_encode %{
9139     __ madd(as_Register($dst$$reg),
9140             as_Register($src1$$reg),
9141             as_Register($src2$$reg),
9142             as_Register($src3$$reg));
9143   %}
9144 
9145   ins_pipe(lmac_reg_reg);
9146 %}
9147 
9148 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9149   match(Set dst (SubL src3 (MulL src1 src2)));
9150 
9151   ins_cost(INSN_COST * 5);
9152   format %{ "msub  $dst, $src1, $src2, $src3" %}
9153 
9154   ins_encode %{
9155     __ msub(as_Register($dst$$reg),
9156             as_Register($src1$$reg),
9157             as_Register($src2$$reg),
9158             as_Register($src3$$reg));
9159   %}
9160 
9161   ins_pipe(lmac_reg_reg);
9162 %}
9163 
9164 // Integer Divide
9165 
9166 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9167   match(Set dst (DivI src1 src2));
9168 
9169   ins_cost(INSN_COST * 19);
9170   format %{ "sdivw  $dst, $src1, $src2" %}
9171 
9172   ins_encode(aarch64_enc_divw(dst, src1, src2));
9173   ins_pipe(idiv_reg_reg);
9174 %}
9175 
9176 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9177   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9178   ins_cost(INSN_COST);
9179   format %{ "lsrw $dst, $src1, $div1" %}
9180   ins_encode %{
9181     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9182   %}
9183   ins_pipe(ialu_reg_shift);
9184 %}
9185 
9186 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9187   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9188   ins_cost(INSN_COST);
9189   format %{ "addw $dst, $src, LSR $div1" %}
9190 
9191   ins_encode %{
9192     __ addw(as_Register($dst$$reg),
9193               as_Register($src$$reg),
9194               as_Register($src$$reg),
9195               Assembler::LSR, 31);
9196   %}
9197   ins_pipe(ialu_reg);
9198 %}
9199 
9200 // Long Divide
9201 
9202 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9203   match(Set dst (DivL src1 src2));
9204 
9205   ins_cost(INSN_COST * 35);
9206   format %{ "sdiv   $dst, $src1, $src2" %}
9207 
9208   ins_encode(aarch64_enc_div(dst, src1, src2));
9209   ins_pipe(ldiv_reg_reg);
9210 %}
9211 
9212 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9213   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9214   ins_cost(INSN_COST);
9215   format %{ "lsr $dst, $src1, $div1" %}
9216   ins_encode %{
9217     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9218   %}
9219   ins_pipe(ialu_reg_shift);
9220 %}
9221 
9222 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9223   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9224   ins_cost(INSN_COST);
9225   format %{ "add $dst, $src, $div1" %}
9226 
9227   ins_encode %{
9228     __ add(as_Register($dst$$reg),
9229               as_Register($src$$reg),
9230               as_Register($src$$reg),
9231               Assembler::LSR, 63);
9232   %}
9233   ins_pipe(ialu_reg);
9234 %}
9235 
9236 // Integer Remainder
9237 
9238 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9239   match(Set dst (ModI src1 src2));
9240 
9241   ins_cost(INSN_COST * 22);
9242   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9243             "msubw($dst, rscratch1, $src2, $src1" %}
9244 
9245   ins_encode(aarch64_enc_modw(dst, src1, src2));
9246   ins_pipe(idiv_reg_reg);
9247 %}
9248 
9249 // Long Remainder
9250 
9251 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9252   match(Set dst (ModL src1 src2));
9253 
9254   ins_cost(INSN_COST * 38);
9255   format %{ "sdiv   rscratch1, $src1, $src2\n"
9256             "msub($dst, rscratch1, $src2, $src1" %}
9257 
9258   ins_encode(aarch64_enc_mod(dst, src1, src2));
9259   ins_pipe(ldiv_reg_reg);
9260 %}
9261 
9262 // Integer Shifts
9263 
9264 // Shift Left Register
9265 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9266   match(Set dst (LShiftI src1 src2));
9267 
9268   ins_cost(INSN_COST * 2);
9269   format %{ "lslvw  $dst, $src1, $src2" %}
9270 
9271   ins_encode %{
9272     __ lslvw(as_Register($dst$$reg),
9273              as_Register($src1$$reg),
9274              as_Register($src2$$reg));
9275   %}
9276 
9277   ins_pipe(ialu_reg_reg_vshift);
9278 %}
9279 
9280 // Shift Left Immediate
9281 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9282   match(Set dst (LShiftI src1 src2));
9283 
9284   ins_cost(INSN_COST);
9285   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9286 
9287   ins_encode %{
9288     __ lslw(as_Register($dst$$reg),
9289             as_Register($src1$$reg),
9290             $src2$$constant & 0x1f);
9291   %}
9292 
9293   ins_pipe(ialu_reg_shift);
9294 %}
9295 
9296 // Shift Right Logical Register
9297 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9298   match(Set dst (URShiftI src1 src2));
9299 
9300   ins_cost(INSN_COST * 2);
9301   format %{ "lsrvw  $dst, $src1, $src2" %}
9302 
9303   ins_encode %{
9304     __ lsrvw(as_Register($dst$$reg),
9305              as_Register($src1$$reg),
9306              as_Register($src2$$reg));
9307   %}
9308 
9309   ins_pipe(ialu_reg_reg_vshift);
9310 %}
9311 
9312 // Shift Right Logical Immediate
9313 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9314   match(Set dst (URShiftI src1 src2));
9315 
9316   ins_cost(INSN_COST);
9317   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9318 
9319   ins_encode %{
9320     __ lsrw(as_Register($dst$$reg),
9321             as_Register($src1$$reg),
9322             $src2$$constant & 0x1f);
9323   %}
9324 
9325   ins_pipe(ialu_reg_shift);
9326 %}
9327 
9328 // Shift Right Arithmetic Register
9329 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9330   match(Set dst (RShiftI src1 src2));
9331 
9332   ins_cost(INSN_COST * 2);
9333   format %{ "asrvw  $dst, $src1, $src2" %}
9334 
9335   ins_encode %{
9336     __ asrvw(as_Register($dst$$reg),
9337              as_Register($src1$$reg),
9338              as_Register($src2$$reg));
9339   %}
9340 
9341   ins_pipe(ialu_reg_reg_vshift);
9342 %}
9343 
9344 // Shift Right Arithmetic Immediate
9345 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9346   match(Set dst (RShiftI src1 src2));
9347 
9348   ins_cost(INSN_COST);
9349   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9350 
9351   ins_encode %{
9352     __ asrw(as_Register($dst$$reg),
9353             as_Register($src1$$reg),
9354             $src2$$constant & 0x1f);
9355   %}
9356 
9357   ins_pipe(ialu_reg_shift);
9358 %}
9359 
9360 // Combined Int Mask and Right Shift (using UBFM)
9361 // TODO
9362 
9363 // Long Shifts
9364 
9365 // Shift Left Register
9366 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9367   match(Set dst (LShiftL src1 src2));
9368 
9369   ins_cost(INSN_COST * 2);
9370   format %{ "lslv  $dst, $src1, $src2" %}
9371 
9372   ins_encode %{
9373     __ lslv(as_Register($dst$$reg),
9374             as_Register($src1$$reg),
9375             as_Register($src2$$reg));
9376   %}
9377 
9378   ins_pipe(ialu_reg_reg_vshift);
9379 %}
9380 
9381 // Shift Left Immediate
9382 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9383   match(Set dst (LShiftL src1 src2));
9384 
9385   ins_cost(INSN_COST);
9386   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9387 
9388   ins_encode %{
9389     __ lsl(as_Register($dst$$reg),
9390             as_Register($src1$$reg),
9391             $src2$$constant & 0x3f);
9392   %}
9393 
9394   ins_pipe(ialu_reg_shift);
9395 %}
9396 
9397 // Shift Right Logical Register
9398 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9399   match(Set dst (URShiftL src1 src2));
9400 
9401   ins_cost(INSN_COST * 2);
9402   format %{ "lsrv  $dst, $src1, $src2" %}
9403 
9404   ins_encode %{
9405     __ lsrv(as_Register($dst$$reg),
9406             as_Register($src1$$reg),
9407             as_Register($src2$$reg));
9408   %}
9409 
9410   ins_pipe(ialu_reg_reg_vshift);
9411 %}
9412 
9413 // Shift Right Logical Immediate
9414 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9415   match(Set dst (URShiftL src1 src2));
9416 
9417   ins_cost(INSN_COST);
9418   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9419 
9420   ins_encode %{
9421     __ lsr(as_Register($dst$$reg),
9422            as_Register($src1$$reg),
9423            $src2$$constant & 0x3f);
9424   %}
9425 
9426   ins_pipe(ialu_reg_shift);
9427 %}
9428 
9429 // A special-case pattern for card table stores.
9430 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9431   match(Set dst (URShiftL (CastP2X src1) src2));
9432 
9433   ins_cost(INSN_COST);
9434   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9435 
9436   ins_encode %{
9437     __ lsr(as_Register($dst$$reg),
9438            as_Register($src1$$reg),
9439            $src2$$constant & 0x3f);
9440   %}
9441 
9442   ins_pipe(ialu_reg_shift);
9443 %}
9444 
9445 // Shift Right Arithmetic Register
9446 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9447   match(Set dst (RShiftL src1 src2));
9448 
9449   ins_cost(INSN_COST * 2);
9450   format %{ "asrv  $dst, $src1, $src2" %}
9451 
9452   ins_encode %{
9453     __ asrv(as_Register($dst$$reg),
9454             as_Register($src1$$reg),
9455             as_Register($src2$$reg));
9456   %}
9457 
9458   ins_pipe(ialu_reg_reg_vshift);
9459 %}
9460 
9461 // Shift Right Arithmetic Immediate
9462 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9463   match(Set dst (RShiftL src1 src2));
9464 
9465   ins_cost(INSN_COST);
9466   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9467 
9468   ins_encode %{
9469     __ asr(as_Register($dst$$reg),
9470            as_Register($src1$$reg),
9471            $src2$$constant & 0x3f);
9472   %}
9473 
9474   ins_pipe(ialu_reg_shift);
9475 %}
9476 
9477 // BEGIN This section of the file is automatically generated. Do not edit --------------
9478 
9479 instruct regL_not_reg(iRegLNoSp dst,
9480                          iRegL src1, immL_M1 m1,
9481                          rFlagsReg cr) %{
9482   match(Set dst (XorL src1 m1));
9483   ins_cost(INSN_COST);
9484   format %{ "eon  $dst, $src1, zr" %}
9485 
9486   ins_encode %{
9487     __ eon(as_Register($dst$$reg),
9488               as_Register($src1$$reg),
9489               zr,
9490               Assembler::LSL, 0);
9491   %}
9492 
9493   ins_pipe(ialu_reg);
9494 %}
9495 instruct regI_not_reg(iRegINoSp dst,
9496                          iRegIorL2I src1, immI_M1 m1,
9497                          rFlagsReg cr) %{
9498   match(Set dst (XorI src1 m1));
9499   ins_cost(INSN_COST);
9500   format %{ "eonw  $dst, $src1, zr" %}
9501 
9502   ins_encode %{
9503     __ eonw(as_Register($dst$$reg),
9504               as_Register($src1$$reg),
9505               zr,
9506               Assembler::LSL, 0);
9507   %}
9508 
9509   ins_pipe(ialu_reg);
9510 %}
9511 
9512 instruct AndI_reg_not_reg(iRegINoSp dst,
9513                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9514                          rFlagsReg cr) %{
9515   match(Set dst (AndI src1 (XorI src2 m1)));
9516   ins_cost(INSN_COST);
9517   format %{ "bicw  $dst, $src1, $src2" %}
9518 
9519   ins_encode %{
9520     __ bicw(as_Register($dst$$reg),
9521               as_Register($src1$$reg),
9522               as_Register($src2$$reg),
9523               Assembler::LSL, 0);
9524   %}
9525 
9526   ins_pipe(ialu_reg_reg);
9527 %}
9528 
9529 instruct AndL_reg_not_reg(iRegLNoSp dst,
9530                          iRegL src1, iRegL src2, immL_M1 m1,
9531                          rFlagsReg cr) %{
9532   match(Set dst (AndL src1 (XorL src2 m1)));
9533   ins_cost(INSN_COST);
9534   format %{ "bic  $dst, $src1, $src2" %}
9535 
9536   ins_encode %{
9537     __ bic(as_Register($dst$$reg),
9538               as_Register($src1$$reg),
9539               as_Register($src2$$reg),
9540               Assembler::LSL, 0);
9541   %}
9542 
9543   ins_pipe(ialu_reg_reg);
9544 %}
9545 
9546 instruct OrI_reg_not_reg(iRegINoSp dst,
9547                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9548                          rFlagsReg cr) %{
9549   match(Set dst (OrI src1 (XorI src2 m1)));
9550   ins_cost(INSN_COST);
9551   format %{ "ornw  $dst, $src1, $src2" %}
9552 
9553   ins_encode %{
9554     __ ornw(as_Register($dst$$reg),
9555               as_Register($src1$$reg),
9556               as_Register($src2$$reg),
9557               Assembler::LSL, 0);
9558   %}
9559 
9560   ins_pipe(ialu_reg_reg);
9561 %}
9562 
9563 instruct OrL_reg_not_reg(iRegLNoSp dst,
9564                          iRegL src1, iRegL src2, immL_M1 m1,
9565                          rFlagsReg cr) %{
9566   match(Set dst (OrL src1 (XorL src2 m1)));
9567   ins_cost(INSN_COST);
9568   format %{ "orn  $dst, $src1, $src2" %}
9569 
9570   ins_encode %{
9571     __ orn(as_Register($dst$$reg),
9572               as_Register($src1$$reg),
9573               as_Register($src2$$reg),
9574               Assembler::LSL, 0);
9575   %}
9576 
9577   ins_pipe(ialu_reg_reg);
9578 %}
9579 
9580 instruct XorI_reg_not_reg(iRegINoSp dst,
9581                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9582                          rFlagsReg cr) %{
9583   match(Set dst (XorI m1 (XorI src2 src1)));
9584   ins_cost(INSN_COST);
9585   format %{ "eonw  $dst, $src1, $src2" %}
9586 
9587   ins_encode %{
9588     __ eonw(as_Register($dst$$reg),
9589               as_Register($src1$$reg),
9590               as_Register($src2$$reg),
9591               Assembler::LSL, 0);
9592   %}
9593 
9594   ins_pipe(ialu_reg_reg);
9595 %}
9596 
9597 instruct XorL_reg_not_reg(iRegLNoSp dst,
9598                          iRegL src1, iRegL src2, immL_M1 m1,
9599                          rFlagsReg cr) %{
9600   match(Set dst (XorL m1 (XorL src2 src1)));
9601   ins_cost(INSN_COST);
9602   format %{ "eon  $dst, $src1, $src2" %}
9603 
9604   ins_encode %{
9605     __ eon(as_Register($dst$$reg),
9606               as_Register($src1$$reg),
9607               as_Register($src2$$reg),
9608               Assembler::LSL, 0);
9609   %}
9610 
9611   ins_pipe(ialu_reg_reg);
9612 %}
9613 
9614 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9615                          iRegIorL2I src1, iRegIorL2I src2,
9616                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9617   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9618   ins_cost(1.9 * INSN_COST);
9619   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9620 
9621   ins_encode %{
9622     __ bicw(as_Register($dst$$reg),
9623               as_Register($src1$$reg),
9624               as_Register($src2$$reg),
9625               Assembler::LSR,
9626               $src3$$constant & 0x3f);
9627   %}
9628 
9629   ins_pipe(ialu_reg_reg_shift);
9630 %}
9631 
9632 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9633                          iRegL src1, iRegL src2,
9634                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9635   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9636   ins_cost(1.9 * INSN_COST);
9637   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9638 
9639   ins_encode %{
9640     __ bic(as_Register($dst$$reg),
9641               as_Register($src1$$reg),
9642               as_Register($src2$$reg),
9643               Assembler::LSR,
9644               $src3$$constant & 0x3f);
9645   %}
9646 
9647   ins_pipe(ialu_reg_reg_shift);
9648 %}
9649 
9650 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9651                          iRegIorL2I src1, iRegIorL2I src2,
9652                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9653   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9654   ins_cost(1.9 * INSN_COST);
9655   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9656 
9657   ins_encode %{
9658     __ bicw(as_Register($dst$$reg),
9659               as_Register($src1$$reg),
9660               as_Register($src2$$reg),
9661               Assembler::ASR,
9662               $src3$$constant & 0x3f);
9663   %}
9664 
9665   ins_pipe(ialu_reg_reg_shift);
9666 %}
9667 
9668 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9669                          iRegL src1, iRegL src2,
9670                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9671   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9672   ins_cost(1.9 * INSN_COST);
9673   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9674 
9675   ins_encode %{
9676     __ bic(as_Register($dst$$reg),
9677               as_Register($src1$$reg),
9678               as_Register($src2$$reg),
9679               Assembler::ASR,
9680               $src3$$constant & 0x3f);
9681   %}
9682 
9683   ins_pipe(ialu_reg_reg_shift);
9684 %}
9685 
9686 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9687                          iRegIorL2I src1, iRegIorL2I src2,
9688                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9689   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9690   ins_cost(1.9 * INSN_COST);
9691   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9692 
9693   ins_encode %{
9694     __ bicw(as_Register($dst$$reg),
9695               as_Register($src1$$reg),
9696               as_Register($src2$$reg),
9697               Assembler::LSL,
9698               $src3$$constant & 0x3f);
9699   %}
9700 
9701   ins_pipe(ialu_reg_reg_shift);
9702 %}
9703 
9704 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
9705                          iRegL src1, iRegL src2,
9706                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9707   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
9708   ins_cost(1.9 * INSN_COST);
9709   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
9710 
9711   ins_encode %{
9712     __ bic(as_Register($dst$$reg),
9713               as_Register($src1$$reg),
9714               as_Register($src2$$reg),
9715               Assembler::LSL,
9716               $src3$$constant & 0x3f);
9717   %}
9718 
9719   ins_pipe(ialu_reg_reg_shift);
9720 %}
9721 
9722 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
9723                          iRegIorL2I src1, iRegIorL2I src2,
9724                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9725   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
9726   ins_cost(1.9 * INSN_COST);
9727   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
9728 
9729   ins_encode %{
9730     __ eonw(as_Register($dst$$reg),
9731               as_Register($src1$$reg),
9732               as_Register($src2$$reg),
9733               Assembler::LSR,
9734               $src3$$constant & 0x3f);
9735   %}
9736 
9737   ins_pipe(ialu_reg_reg_shift);
9738 %}
9739 
9740 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
9741                          iRegL src1, iRegL src2,
9742                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9743   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
9744   ins_cost(1.9 * INSN_COST);
9745   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
9746 
9747   ins_encode %{
9748     __ eon(as_Register($dst$$reg),
9749               as_Register($src1$$reg),
9750               as_Register($src2$$reg),
9751               Assembler::LSR,
9752               $src3$$constant & 0x3f);
9753   %}
9754 
9755   ins_pipe(ialu_reg_reg_shift);
9756 %}
9757 
9758 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
9759                          iRegIorL2I src1, iRegIorL2I src2,
9760                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9761   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
9762   ins_cost(1.9 * INSN_COST);
9763   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
9764 
9765   ins_encode %{
9766     __ eonw(as_Register($dst$$reg),
9767               as_Register($src1$$reg),
9768               as_Register($src2$$reg),
9769               Assembler::ASR,
9770               $src3$$constant & 0x3f);
9771   %}
9772 
9773   ins_pipe(ialu_reg_reg_shift);
9774 %}
9775 
9776 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
9777                          iRegL src1, iRegL src2,
9778                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9779   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
9780   ins_cost(1.9 * INSN_COST);
9781   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
9782 
9783   ins_encode %{
9784     __ eon(as_Register($dst$$reg),
9785               as_Register($src1$$reg),
9786               as_Register($src2$$reg),
9787               Assembler::ASR,
9788               $src3$$constant & 0x3f);
9789   %}
9790 
9791   ins_pipe(ialu_reg_reg_shift);
9792 %}
9793 
9794 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
9795                          iRegIorL2I src1, iRegIorL2I src2,
9796                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9797   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
9798   ins_cost(1.9 * INSN_COST);
9799   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
9800 
9801   ins_encode %{
9802     __ eonw(as_Register($dst$$reg),
9803               as_Register($src1$$reg),
9804               as_Register($src2$$reg),
9805               Assembler::LSL,
9806               $src3$$constant & 0x3f);
9807   %}
9808 
9809   ins_pipe(ialu_reg_reg_shift);
9810 %}
9811 
9812 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
9813                          iRegL src1, iRegL src2,
9814                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9815   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
9816   ins_cost(1.9 * INSN_COST);
9817   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
9818 
9819   ins_encode %{
9820     __ eon(as_Register($dst$$reg),
9821               as_Register($src1$$reg),
9822               as_Register($src2$$reg),
9823               Assembler::LSL,
9824               $src3$$constant & 0x3f);
9825   %}
9826 
9827   ins_pipe(ialu_reg_reg_shift);
9828 %}
9829 
9830 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
9831                          iRegIorL2I src1, iRegIorL2I src2,
9832                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9833   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
9834   ins_cost(1.9 * INSN_COST);
9835   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
9836 
9837   ins_encode %{
9838     __ ornw(as_Register($dst$$reg),
9839               as_Register($src1$$reg),
9840               as_Register($src2$$reg),
9841               Assembler::LSR,
9842               $src3$$constant & 0x3f);
9843   %}
9844 
9845   ins_pipe(ialu_reg_reg_shift);
9846 %}
9847 
9848 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
9849                          iRegL src1, iRegL src2,
9850                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9851   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
9852   ins_cost(1.9 * INSN_COST);
9853   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
9854 
9855   ins_encode %{
9856     __ orn(as_Register($dst$$reg),
9857               as_Register($src1$$reg),
9858               as_Register($src2$$reg),
9859               Assembler::LSR,
9860               $src3$$constant & 0x3f);
9861   %}
9862 
9863   ins_pipe(ialu_reg_reg_shift);
9864 %}
9865 
9866 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
9867                          iRegIorL2I src1, iRegIorL2I src2,
9868                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9869   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
9870   ins_cost(1.9 * INSN_COST);
9871   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
9872 
9873   ins_encode %{
9874     __ ornw(as_Register($dst$$reg),
9875               as_Register($src1$$reg),
9876               as_Register($src2$$reg),
9877               Assembler::ASR,
9878               $src3$$constant & 0x3f);
9879   %}
9880 
9881   ins_pipe(ialu_reg_reg_shift);
9882 %}
9883 
9884 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
9885                          iRegL src1, iRegL src2,
9886                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9887   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
9888   ins_cost(1.9 * INSN_COST);
9889   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
9890 
9891   ins_encode %{
9892     __ orn(as_Register($dst$$reg),
9893               as_Register($src1$$reg),
9894               as_Register($src2$$reg),
9895               Assembler::ASR,
9896               $src3$$constant & 0x3f);
9897   %}
9898 
9899   ins_pipe(ialu_reg_reg_shift);
9900 %}
9901 
9902 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
9903                          iRegIorL2I src1, iRegIorL2I src2,
9904                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9905   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
9906   ins_cost(1.9 * INSN_COST);
9907   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
9908 
9909   ins_encode %{
9910     __ ornw(as_Register($dst$$reg),
9911               as_Register($src1$$reg),
9912               as_Register($src2$$reg),
9913               Assembler::LSL,
9914               $src3$$constant & 0x3f);
9915   %}
9916 
9917   ins_pipe(ialu_reg_reg_shift);
9918 %}
9919 
9920 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
9921                          iRegL src1, iRegL src2,
9922                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9923   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
9924   ins_cost(1.9 * INSN_COST);
9925   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
9926 
9927   ins_encode %{
9928     __ orn(as_Register($dst$$reg),
9929               as_Register($src1$$reg),
9930               as_Register($src2$$reg),
9931               Assembler::LSL,
9932               $src3$$constant & 0x3f);
9933   %}
9934 
9935   ins_pipe(ialu_reg_reg_shift);
9936 %}
9937 
9938 instruct AndI_reg_URShift_reg(iRegINoSp dst,
9939                          iRegIorL2I src1, iRegIorL2I src2,
9940                          immI src3, rFlagsReg cr) %{
9941   match(Set dst (AndI src1 (URShiftI src2 src3)));
9942 
9943   ins_cost(1.9 * INSN_COST);
9944   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
9945 
9946   ins_encode %{
9947     __ andw(as_Register($dst$$reg),
9948               as_Register($src1$$reg),
9949               as_Register($src2$$reg),
9950               Assembler::LSR,
9951               $src3$$constant & 0x3f);
9952   %}
9953 
9954   ins_pipe(ialu_reg_reg_shift);
9955 %}
9956 
9957 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
9958                          iRegL src1, iRegL src2,
9959                          immI src3, rFlagsReg cr) %{
9960   match(Set dst (AndL src1 (URShiftL src2 src3)));
9961 
9962   ins_cost(1.9 * INSN_COST);
9963   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
9964 
9965   ins_encode %{
9966     __ andr(as_Register($dst$$reg),
9967               as_Register($src1$$reg),
9968               as_Register($src2$$reg),
9969               Assembler::LSR,
9970               $src3$$constant & 0x3f);
9971   %}
9972 
9973   ins_pipe(ialu_reg_reg_shift);
9974 %}
9975 
9976 instruct AndI_reg_RShift_reg(iRegINoSp dst,
9977                          iRegIorL2I src1, iRegIorL2I src2,
9978                          immI src3, rFlagsReg cr) %{
9979   match(Set dst (AndI src1 (RShiftI src2 src3)));
9980 
9981   ins_cost(1.9 * INSN_COST);
9982   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
9983 
9984   ins_encode %{
9985     __ andw(as_Register($dst$$reg),
9986               as_Register($src1$$reg),
9987               as_Register($src2$$reg),
9988               Assembler::ASR,
9989               $src3$$constant & 0x3f);
9990   %}
9991 
9992   ins_pipe(ialu_reg_reg_shift);
9993 %}
9994 
9995 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
9996                          iRegL src1, iRegL src2,
9997                          immI src3, rFlagsReg cr) %{
9998   match(Set dst (AndL src1 (RShiftL src2 src3)));
9999 
10000   ins_cost(1.9 * INSN_COST);
10001   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10002 
10003   ins_encode %{
10004     __ andr(as_Register($dst$$reg),
10005               as_Register($src1$$reg),
10006               as_Register($src2$$reg),
10007               Assembler::ASR,
10008               $src3$$constant & 0x3f);
10009   %}
10010 
10011   ins_pipe(ialu_reg_reg_shift);
10012 %}
10013 
10014 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10015                          iRegIorL2I src1, iRegIorL2I src2,
10016                          immI src3, rFlagsReg cr) %{
10017   match(Set dst (AndI src1 (LShiftI src2 src3)));
10018 
10019   ins_cost(1.9 * INSN_COST);
10020   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10021 
10022   ins_encode %{
10023     __ andw(as_Register($dst$$reg),
10024               as_Register($src1$$reg),
10025               as_Register($src2$$reg),
10026               Assembler::LSL,
10027               $src3$$constant & 0x3f);
10028   %}
10029 
10030   ins_pipe(ialu_reg_reg_shift);
10031 %}
10032 
10033 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10034                          iRegL src1, iRegL src2,
10035                          immI src3, rFlagsReg cr) %{
10036   match(Set dst (AndL src1 (LShiftL src2 src3)));
10037 
10038   ins_cost(1.9 * INSN_COST);
10039   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10040 
10041   ins_encode %{
10042     __ andr(as_Register($dst$$reg),
10043               as_Register($src1$$reg),
10044               as_Register($src2$$reg),
10045               Assembler::LSL,
10046               $src3$$constant & 0x3f);
10047   %}
10048 
10049   ins_pipe(ialu_reg_reg_shift);
10050 %}
10051 
10052 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10053                          iRegIorL2I src1, iRegIorL2I src2,
10054                          immI src3, rFlagsReg cr) %{
10055   match(Set dst (XorI src1 (URShiftI src2 src3)));
10056 
10057   ins_cost(1.9 * INSN_COST);
10058   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10059 
10060   ins_encode %{
10061     __ eorw(as_Register($dst$$reg),
10062               as_Register($src1$$reg),
10063               as_Register($src2$$reg),
10064               Assembler::LSR,
10065               $src3$$constant & 0x3f);
10066   %}
10067 
10068   ins_pipe(ialu_reg_reg_shift);
10069 %}
10070 
10071 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10072                          iRegL src1, iRegL src2,
10073                          immI src3, rFlagsReg cr) %{
10074   match(Set dst (XorL src1 (URShiftL src2 src3)));
10075 
10076   ins_cost(1.9 * INSN_COST);
10077   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10078 
10079   ins_encode %{
10080     __ eor(as_Register($dst$$reg),
10081               as_Register($src1$$reg),
10082               as_Register($src2$$reg),
10083               Assembler::LSR,
10084               $src3$$constant & 0x3f);
10085   %}
10086 
10087   ins_pipe(ialu_reg_reg_shift);
10088 %}
10089 
10090 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10091                          iRegIorL2I src1, iRegIorL2I src2,
10092                          immI src3, rFlagsReg cr) %{
10093   match(Set dst (XorI src1 (RShiftI src2 src3)));
10094 
10095   ins_cost(1.9 * INSN_COST);
10096   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10097 
10098   ins_encode %{
10099     __ eorw(as_Register($dst$$reg),
10100               as_Register($src1$$reg),
10101               as_Register($src2$$reg),
10102               Assembler::ASR,
10103               $src3$$constant & 0x3f);
10104   %}
10105 
10106   ins_pipe(ialu_reg_reg_shift);
10107 %}
10108 
10109 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10110                          iRegL src1, iRegL src2,
10111                          immI src3, rFlagsReg cr) %{
10112   match(Set dst (XorL src1 (RShiftL src2 src3)));
10113 
10114   ins_cost(1.9 * INSN_COST);
10115   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10116 
10117   ins_encode %{
10118     __ eor(as_Register($dst$$reg),
10119               as_Register($src1$$reg),
10120               as_Register($src2$$reg),
10121               Assembler::ASR,
10122               $src3$$constant & 0x3f);
10123   %}
10124 
10125   ins_pipe(ialu_reg_reg_shift);
10126 %}
10127 
10128 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10129                          iRegIorL2I src1, iRegIorL2I src2,
10130                          immI src3, rFlagsReg cr) %{
10131   match(Set dst (XorI src1 (LShiftI src2 src3)));
10132 
10133   ins_cost(1.9 * INSN_COST);
10134   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10135 
10136   ins_encode %{
10137     __ eorw(as_Register($dst$$reg),
10138               as_Register($src1$$reg),
10139               as_Register($src2$$reg),
10140               Assembler::LSL,
10141               $src3$$constant & 0x3f);
10142   %}
10143 
10144   ins_pipe(ialu_reg_reg_shift);
10145 %}
10146 
10147 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10148                          iRegL src1, iRegL src2,
10149                          immI src3, rFlagsReg cr) %{
10150   match(Set dst (XorL src1 (LShiftL src2 src3)));
10151 
10152   ins_cost(1.9 * INSN_COST);
10153   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10154 
10155   ins_encode %{
10156     __ eor(as_Register($dst$$reg),
10157               as_Register($src1$$reg),
10158               as_Register($src2$$reg),
10159               Assembler::LSL,
10160               $src3$$constant & 0x3f);
10161   %}
10162 
10163   ins_pipe(ialu_reg_reg_shift);
10164 %}
10165 
10166 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10167                          iRegIorL2I src1, iRegIorL2I src2,
10168                          immI src3, rFlagsReg cr) %{
10169   match(Set dst (OrI src1 (URShiftI src2 src3)));
10170 
10171   ins_cost(1.9 * INSN_COST);
10172   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10173 
10174   ins_encode %{
10175     __ orrw(as_Register($dst$$reg),
10176               as_Register($src1$$reg),
10177               as_Register($src2$$reg),
10178               Assembler::LSR,
10179               $src3$$constant & 0x3f);
10180   %}
10181 
10182   ins_pipe(ialu_reg_reg_shift);
10183 %}
10184 
10185 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10186                          iRegL src1, iRegL src2,
10187                          immI src3, rFlagsReg cr) %{
10188   match(Set dst (OrL src1 (URShiftL src2 src3)));
10189 
10190   ins_cost(1.9 * INSN_COST);
10191   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10192 
10193   ins_encode %{
10194     __ orr(as_Register($dst$$reg),
10195               as_Register($src1$$reg),
10196               as_Register($src2$$reg),
10197               Assembler::LSR,
10198               $src3$$constant & 0x3f);
10199   %}
10200 
10201   ins_pipe(ialu_reg_reg_shift);
10202 %}
10203 
10204 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10205                          iRegIorL2I src1, iRegIorL2I src2,
10206                          immI src3, rFlagsReg cr) %{
10207   match(Set dst (OrI src1 (RShiftI src2 src3)));
10208 
10209   ins_cost(1.9 * INSN_COST);
10210   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10211 
10212   ins_encode %{
10213     __ orrw(as_Register($dst$$reg),
10214               as_Register($src1$$reg),
10215               as_Register($src2$$reg),
10216               Assembler::ASR,
10217               $src3$$constant & 0x3f);
10218   %}
10219 
10220   ins_pipe(ialu_reg_reg_shift);
10221 %}
10222 
10223 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10224                          iRegL src1, iRegL src2,
10225                          immI src3, rFlagsReg cr) %{
10226   match(Set dst (OrL src1 (RShiftL src2 src3)));
10227 
10228   ins_cost(1.9 * INSN_COST);
10229   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10230 
10231   ins_encode %{
10232     __ orr(as_Register($dst$$reg),
10233               as_Register($src1$$reg),
10234               as_Register($src2$$reg),
10235               Assembler::ASR,
10236               $src3$$constant & 0x3f);
10237   %}
10238 
10239   ins_pipe(ialu_reg_reg_shift);
10240 %}
10241 
10242 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10243                          iRegIorL2I src1, iRegIorL2I src2,
10244                          immI src3, rFlagsReg cr) %{
10245   match(Set dst (OrI src1 (LShiftI src2 src3)));
10246 
10247   ins_cost(1.9 * INSN_COST);
10248   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10249 
10250   ins_encode %{
10251     __ orrw(as_Register($dst$$reg),
10252               as_Register($src1$$reg),
10253               as_Register($src2$$reg),
10254               Assembler::LSL,
10255               $src3$$constant & 0x3f);
10256   %}
10257 
10258   ins_pipe(ialu_reg_reg_shift);
10259 %}
10260 
10261 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10262                          iRegL src1, iRegL src2,
10263                          immI src3, rFlagsReg cr) %{
10264   match(Set dst (OrL src1 (LShiftL src2 src3)));
10265 
10266   ins_cost(1.9 * INSN_COST);
10267   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10268 
10269   ins_encode %{
10270     __ orr(as_Register($dst$$reg),
10271               as_Register($src1$$reg),
10272               as_Register($src2$$reg),
10273               Assembler::LSL,
10274               $src3$$constant & 0x3f);
10275   %}
10276 
10277   ins_pipe(ialu_reg_reg_shift);
10278 %}
10279 
10280 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10281                          iRegIorL2I src1, iRegIorL2I src2,
10282                          immI src3, rFlagsReg cr) %{
10283   match(Set dst (AddI src1 (URShiftI src2 src3)));
10284 
10285   ins_cost(1.9 * INSN_COST);
10286   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10287 
10288   ins_encode %{
10289     __ addw(as_Register($dst$$reg),
10290               as_Register($src1$$reg),
10291               as_Register($src2$$reg),
10292               Assembler::LSR,
10293               $src3$$constant & 0x3f);
10294   %}
10295 
10296   ins_pipe(ialu_reg_reg_shift);
10297 %}
10298 
10299 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10300                          iRegL src1, iRegL src2,
10301                          immI src3, rFlagsReg cr) %{
10302   match(Set dst (AddL src1 (URShiftL src2 src3)));
10303 
10304   ins_cost(1.9 * INSN_COST);
10305   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10306 
10307   ins_encode %{
10308     __ add(as_Register($dst$$reg),
10309               as_Register($src1$$reg),
10310               as_Register($src2$$reg),
10311               Assembler::LSR,
10312               $src3$$constant & 0x3f);
10313   %}
10314 
10315   ins_pipe(ialu_reg_reg_shift);
10316 %}
10317 
10318 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10319                          iRegIorL2I src1, iRegIorL2I src2,
10320                          immI src3, rFlagsReg cr) %{
10321   match(Set dst (AddI src1 (RShiftI src2 src3)));
10322 
10323   ins_cost(1.9 * INSN_COST);
10324   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10325 
10326   ins_encode %{
10327     __ addw(as_Register($dst$$reg),
10328               as_Register($src1$$reg),
10329               as_Register($src2$$reg),
10330               Assembler::ASR,
10331               $src3$$constant & 0x3f);
10332   %}
10333 
10334   ins_pipe(ialu_reg_reg_shift);
10335 %}
10336 
10337 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10338                          iRegL src1, iRegL src2,
10339                          immI src3, rFlagsReg cr) %{
10340   match(Set dst (AddL src1 (RShiftL src2 src3)));
10341 
10342   ins_cost(1.9 * INSN_COST);
10343   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10344 
10345   ins_encode %{
10346     __ add(as_Register($dst$$reg),
10347               as_Register($src1$$reg),
10348               as_Register($src2$$reg),
10349               Assembler::ASR,
10350               $src3$$constant & 0x3f);
10351   %}
10352 
10353   ins_pipe(ialu_reg_reg_shift);
10354 %}
10355 
10356 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10357                          iRegIorL2I src1, iRegIorL2I src2,
10358                          immI src3, rFlagsReg cr) %{
10359   match(Set dst (AddI src1 (LShiftI src2 src3)));
10360 
10361   ins_cost(1.9 * INSN_COST);
10362   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10363 
10364   ins_encode %{
10365     __ addw(as_Register($dst$$reg),
10366               as_Register($src1$$reg),
10367               as_Register($src2$$reg),
10368               Assembler::LSL,
10369               $src3$$constant & 0x3f);
10370   %}
10371 
10372   ins_pipe(ialu_reg_reg_shift);
10373 %}
10374 
10375 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10376                          iRegL src1, iRegL src2,
10377                          immI src3, rFlagsReg cr) %{
10378   match(Set dst (AddL src1 (LShiftL src2 src3)));
10379 
10380   ins_cost(1.9 * INSN_COST);
10381   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10382 
10383   ins_encode %{
10384     __ add(as_Register($dst$$reg),
10385               as_Register($src1$$reg),
10386               as_Register($src2$$reg),
10387               Assembler::LSL,
10388               $src3$$constant & 0x3f);
10389   %}
10390 
10391   ins_pipe(ialu_reg_reg_shift);
10392 %}
10393 
10394 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10395                          iRegIorL2I src1, iRegIorL2I src2,
10396                          immI src3, rFlagsReg cr) %{
10397   match(Set dst (SubI src1 (URShiftI src2 src3)));
10398 
10399   ins_cost(1.9 * INSN_COST);
10400   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10401 
10402   ins_encode %{
10403     __ subw(as_Register($dst$$reg),
10404               as_Register($src1$$reg),
10405               as_Register($src2$$reg),
10406               Assembler::LSR,
10407               $src3$$constant & 0x3f);
10408   %}
10409 
10410   ins_pipe(ialu_reg_reg_shift);
10411 %}
10412 
10413 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10414                          iRegL src1, iRegL src2,
10415                          immI src3, rFlagsReg cr) %{
10416   match(Set dst (SubL src1 (URShiftL src2 src3)));
10417 
10418   ins_cost(1.9 * INSN_COST);
10419   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10420 
10421   ins_encode %{
10422     __ sub(as_Register($dst$$reg),
10423               as_Register($src1$$reg),
10424               as_Register($src2$$reg),
10425               Assembler::LSR,
10426               $src3$$constant & 0x3f);
10427   %}
10428 
10429   ins_pipe(ialu_reg_reg_shift);
10430 %}
10431 
10432 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10433                          iRegIorL2I src1, iRegIorL2I src2,
10434                          immI src3, rFlagsReg cr) %{
10435   match(Set dst (SubI src1 (RShiftI src2 src3)));
10436 
10437   ins_cost(1.9 * INSN_COST);
10438   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10439 
10440   ins_encode %{
10441     __ subw(as_Register($dst$$reg),
10442               as_Register($src1$$reg),
10443               as_Register($src2$$reg),
10444               Assembler::ASR,
10445               $src3$$constant & 0x3f);
10446   %}
10447 
10448   ins_pipe(ialu_reg_reg_shift);
10449 %}
10450 
10451 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10452                          iRegL src1, iRegL src2,
10453                          immI src3, rFlagsReg cr) %{
10454   match(Set dst (SubL src1 (RShiftL src2 src3)));
10455 
10456   ins_cost(1.9 * INSN_COST);
10457   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10458 
10459   ins_encode %{
10460     __ sub(as_Register($dst$$reg),
10461               as_Register($src1$$reg),
10462               as_Register($src2$$reg),
10463               Assembler::ASR,
10464               $src3$$constant & 0x3f);
10465   %}
10466 
10467   ins_pipe(ialu_reg_reg_shift);
10468 %}
10469 
10470 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10471                          iRegIorL2I src1, iRegIorL2I src2,
10472                          immI src3, rFlagsReg cr) %{
10473   match(Set dst (SubI src1 (LShiftI src2 src3)));
10474 
10475   ins_cost(1.9 * INSN_COST);
10476   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10477 
10478   ins_encode %{
10479     __ subw(as_Register($dst$$reg),
10480               as_Register($src1$$reg),
10481               as_Register($src2$$reg),
10482               Assembler::LSL,
10483               $src3$$constant & 0x3f);
10484   %}
10485 
10486   ins_pipe(ialu_reg_reg_shift);
10487 %}
10488 
10489 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10490                          iRegL src1, iRegL src2,
10491                          immI src3, rFlagsReg cr) %{
10492   match(Set dst (SubL src1 (LShiftL src2 src3)));
10493 
10494   ins_cost(1.9 * INSN_COST);
10495   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10496 
10497   ins_encode %{
10498     __ sub(as_Register($dst$$reg),
10499               as_Register($src1$$reg),
10500               as_Register($src2$$reg),
10501               Assembler::LSL,
10502               $src3$$constant & 0x3f);
10503   %}
10504 
10505   ins_pipe(ialu_reg_reg_shift);
10506 %}
10507 
10508 
10509 
10510 // Shift Left followed by Shift Right.
10511 // This idiom is used by the compiler for the i2b bytecode etc.
10512 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10513 %{
10514   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10515   // Make sure we are not going to exceed what sbfm can do.
10516   predicate((unsigned int)n->in(2)->get_int() <= 63
10517             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10518 
10519   ins_cost(INSN_COST * 2);
10520   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10521   ins_encode %{
10522     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10523     int s = 63 - lshift;
10524     int r = (rshift - lshift) & 63;
10525     __ sbfm(as_Register($dst$$reg),
10526             as_Register($src$$reg),
10527             r, s);
10528   %}
10529 
10530   ins_pipe(ialu_reg_shift);
10531 %}
10532 
10533 // Shift Left followed by Shift Right.
10534 // This idiom is used by the compiler for the i2b bytecode etc.
10535 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10536 %{
10537   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10538   // Make sure we are not going to exceed what sbfmw can do.
10539   predicate((unsigned int)n->in(2)->get_int() <= 31
10540             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10541 
10542   ins_cost(INSN_COST * 2);
10543   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10544   ins_encode %{
10545     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10546     int s = 31 - lshift;
10547     int r = (rshift - lshift) & 31;
10548     __ sbfmw(as_Register($dst$$reg),
10549             as_Register($src$$reg),
10550             r, s);
10551   %}
10552 
10553   ins_pipe(ialu_reg_shift);
10554 %}
10555 
10556 // Shift Left followed by Shift Right.
10557 // This idiom is used by the compiler for the i2b bytecode etc.
10558 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10559 %{
10560   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10561   // Make sure we are not going to exceed what ubfm can do.
10562   predicate((unsigned int)n->in(2)->get_int() <= 63
10563             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10564 
10565   ins_cost(INSN_COST * 2);
10566   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10567   ins_encode %{
10568     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10569     int s = 63 - lshift;
10570     int r = (rshift - lshift) & 63;
10571     __ ubfm(as_Register($dst$$reg),
10572             as_Register($src$$reg),
10573             r, s);
10574   %}
10575 
10576   ins_pipe(ialu_reg_shift);
10577 %}
10578 
10579 // Shift Left followed by Shift Right.
10580 // This idiom is used by the compiler for the i2b bytecode etc.
10581 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10582 %{
10583   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10584   // Make sure we are not going to exceed what ubfmw can do.
10585   predicate((unsigned int)n->in(2)->get_int() <= 31
10586             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10587 
10588   ins_cost(INSN_COST * 2);
10589   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10590   ins_encode %{
10591     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10592     int s = 31 - lshift;
10593     int r = (rshift - lshift) & 31;
10594     __ ubfmw(as_Register($dst$$reg),
10595             as_Register($src$$reg),
10596             r, s);
10597   %}
10598 
10599   ins_pipe(ialu_reg_shift);
10600 %}
10601 // Bitfield extract with shift & mask
10602 
10603 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10604 %{
10605   match(Set dst (AndI (URShiftI src rshift) mask));
10606 
10607   ins_cost(INSN_COST);
10608   format %{ "ubfxw $dst, $src, $mask" %}
10609   ins_encode %{
10610     int rshift = $rshift$$constant;
10611     long mask = $mask$$constant;
10612     int width = exact_log2(mask+1);
10613     __ ubfxw(as_Register($dst$$reg),
10614             as_Register($src$$reg), rshift, width);
10615   %}
10616   ins_pipe(ialu_reg_shift);
10617 %}
10618 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10619 %{
10620   match(Set dst (AndL (URShiftL src rshift) mask));
10621 
10622   ins_cost(INSN_COST);
10623   format %{ "ubfx $dst, $src, $mask" %}
10624   ins_encode %{
10625     int rshift = $rshift$$constant;
10626     long mask = $mask$$constant;
10627     int width = exact_log2(mask+1);
10628     __ ubfx(as_Register($dst$$reg),
10629             as_Register($src$$reg), rshift, width);
10630   %}
10631   ins_pipe(ialu_reg_shift);
10632 %}
10633 
10634 // We can use ubfx when extending an And with a mask when we know mask
10635 // is positive.  We know that because immI_bitmask guarantees it.
10636 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10637 %{
10638   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10639 
10640   ins_cost(INSN_COST * 2);
10641   format %{ "ubfx $dst, $src, $mask" %}
10642   ins_encode %{
10643     int rshift = $rshift$$constant;
10644     long mask = $mask$$constant;
10645     int width = exact_log2(mask+1);
10646     __ ubfx(as_Register($dst$$reg),
10647             as_Register($src$$reg), rshift, width);
10648   %}
10649   ins_pipe(ialu_reg_shift);
10650 %}
10651 
10652 // Rotations
10653 
10654 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10655 %{
10656   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10657   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10658 
10659   ins_cost(INSN_COST);
10660   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10661 
10662   ins_encode %{
10663     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10664             $rshift$$constant & 63);
10665   %}
10666   ins_pipe(ialu_reg_reg_extr);
10667 %}
10668 
10669 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10670 %{
10671   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10672   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10673 
10674   ins_cost(INSN_COST);
10675   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10676 
10677   ins_encode %{
10678     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10679             $rshift$$constant & 31);
10680   %}
10681   ins_pipe(ialu_reg_reg_extr);
10682 %}
10683 
10684 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10685 %{
10686   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10687   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10688 
10689   ins_cost(INSN_COST);
10690   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10691 
10692   ins_encode %{
10693     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10694             $rshift$$constant & 63);
10695   %}
10696   ins_pipe(ialu_reg_reg_extr);
10697 %}
10698 
10699 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10700 %{
10701   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10702   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10703 
10704   ins_cost(INSN_COST);
10705   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10706 
10707   ins_encode %{
10708     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10709             $rshift$$constant & 31);
10710   %}
10711   ins_pipe(ialu_reg_reg_extr);
10712 %}
10713 
10714 
10715 // rol expander
10716 
10717 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10718 %{
10719   effect(DEF dst, USE src, USE shift);
10720 
10721   format %{ "rol    $dst, $src, $shift" %}
10722   ins_cost(INSN_COST * 3);
10723   ins_encode %{
10724     __ subw(rscratch1, zr, as_Register($shift$$reg));
10725     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10726             rscratch1);
10727     %}
10728   ins_pipe(ialu_reg_reg_vshift);
10729 %}
10730 
10731 // rol expander
10732 
10733 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10734 %{
10735   effect(DEF dst, USE src, USE shift);
10736 
10737   format %{ "rol    $dst, $src, $shift" %}
10738   ins_cost(INSN_COST * 3);
10739   ins_encode %{
10740     __ subw(rscratch1, zr, as_Register($shift$$reg));
10741     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10742             rscratch1);
10743     %}
10744   ins_pipe(ialu_reg_reg_vshift);
10745 %}
10746 
10747 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10748 %{
10749   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
10750 
10751   expand %{
10752     rolL_rReg(dst, src, shift, cr);
10753   %}
10754 %}
10755 
10756 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10757 %{
10758   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
10759 
10760   expand %{
10761     rolL_rReg(dst, src, shift, cr);
10762   %}
10763 %}
10764 
10765 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10766 %{
10767   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
10768 
10769   expand %{
10770     rolL_rReg(dst, src, shift, cr);
10771   %}
10772 %}
10773 
10774 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10775 %{
10776   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
10777 
10778   expand %{
10779     rolL_rReg(dst, src, shift, cr);
10780   %}
10781 %}
10782 
10783 // ror expander
10784 
10785 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10786 %{
10787   effect(DEF dst, USE src, USE shift);
10788 
10789   format %{ "ror    $dst, $src, $shift" %}
10790   ins_cost(INSN_COST);
10791   ins_encode %{
10792     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10793             as_Register($shift$$reg));
10794     %}
10795   ins_pipe(ialu_reg_reg_vshift);
10796 %}
10797 
10798 // ror expander
10799 
10800 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10801 %{
10802   effect(DEF dst, USE src, USE shift);
10803 
10804   format %{ "ror    $dst, $src, $shift" %}
10805   ins_cost(INSN_COST);
10806   ins_encode %{
10807     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10808             as_Register($shift$$reg));
10809     %}
10810   ins_pipe(ialu_reg_reg_vshift);
10811 %}
10812 
10813 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10814 %{
10815   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
10816 
10817   expand %{
10818     rorL_rReg(dst, src, shift, cr);
10819   %}
10820 %}
10821 
10822 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10823 %{
10824   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
10825 
10826   expand %{
10827     rorL_rReg(dst, src, shift, cr);
10828   %}
10829 %}
10830 
10831 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10832 %{
10833   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
10834 
10835   expand %{
10836     rorL_rReg(dst, src, shift, cr);
10837   %}
10838 %}
10839 
10840 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10841 %{
10842   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
10843 
10844   expand %{
10845     rorL_rReg(dst, src, shift, cr);
10846   %}
10847 %}
10848 
10849 // Add/subtract (extended)
10850 
10851 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
10852 %{
10853   match(Set dst (AddL src1 (ConvI2L src2)));
10854   ins_cost(INSN_COST);
10855   format %{ "add  $dst, $src1, sxtw $src2" %}
10856 
10857    ins_encode %{
10858      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10859             as_Register($src2$$reg), ext::sxtw);
10860    %}
10861   ins_pipe(ialu_reg_reg);
10862 %};
10863 
10864 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
10865 %{
10866   match(Set dst (SubL src1 (ConvI2L src2)));
10867   ins_cost(INSN_COST);
10868   format %{ "sub  $dst, $src1, sxtw $src2" %}
10869 
10870    ins_encode %{
10871      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10872             as_Register($src2$$reg), ext::sxtw);
10873    %}
10874   ins_pipe(ialu_reg_reg);
10875 %};
10876 
10877 
10878 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
10879 %{
10880   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
10881   ins_cost(INSN_COST);
10882   format %{ "add  $dst, $src1, sxth $src2" %}
10883 
10884    ins_encode %{
10885      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10886             as_Register($src2$$reg), ext::sxth);
10887    %}
10888   ins_pipe(ialu_reg_reg);
10889 %}
10890 
10891 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
10892 %{
10893   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
10894   ins_cost(INSN_COST);
10895   format %{ "add  $dst, $src1, sxtb $src2" %}
10896 
10897    ins_encode %{
10898      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10899             as_Register($src2$$reg), ext::sxtb);
10900    %}
10901   ins_pipe(ialu_reg_reg);
10902 %}
10903 
10904 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
10905 %{
10906   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
10907   ins_cost(INSN_COST);
10908   format %{ "add  $dst, $src1, uxtb $src2" %}
10909 
10910    ins_encode %{
10911      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10912             as_Register($src2$$reg), ext::uxtb);
10913    %}
10914   ins_pipe(ialu_reg_reg);
10915 %}
10916 
10917 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
10918 %{
10919   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10920   ins_cost(INSN_COST);
10921   format %{ "add  $dst, $src1, sxth $src2" %}
10922 
10923    ins_encode %{
10924      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10925             as_Register($src2$$reg), ext::sxth);
10926    %}
10927   ins_pipe(ialu_reg_reg);
10928 %}
10929 
10930 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
10931 %{
10932   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10933   ins_cost(INSN_COST);
10934   format %{ "add  $dst, $src1, sxtw $src2" %}
10935 
10936    ins_encode %{
10937      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10938             as_Register($src2$$reg), ext::sxtw);
10939    %}
10940   ins_pipe(ialu_reg_reg);
10941 %}
10942 
10943 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
10944 %{
10945   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10946   ins_cost(INSN_COST);
10947   format %{ "add  $dst, $src1, sxtb $src2" %}
10948 
10949    ins_encode %{
10950      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10951             as_Register($src2$$reg), ext::sxtb);
10952    %}
10953   ins_pipe(ialu_reg_reg);
10954 %}
10955 
10956 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
10957 %{
10958   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
10959   ins_cost(INSN_COST);
10960   format %{ "add  $dst, $src1, uxtb $src2" %}
10961 
10962    ins_encode %{
10963      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10964             as_Register($src2$$reg), ext::uxtb);
10965    %}
10966   ins_pipe(ialu_reg_reg);
10967 %}
10968 
10969 
10970 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
10971 %{
10972   match(Set dst (AddI src1 (AndI src2 mask)));
10973   ins_cost(INSN_COST);
10974   format %{ "addw  $dst, $src1, $src2, uxtb" %}
10975 
10976    ins_encode %{
10977      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
10978             as_Register($src2$$reg), ext::uxtb);
10979    %}
10980   ins_pipe(ialu_reg_reg);
10981 %}
10982 
10983 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
10984 %{
10985   match(Set dst (AddI src1 (AndI src2 mask)));
10986   ins_cost(INSN_COST);
10987   format %{ "addw  $dst, $src1, $src2, uxth" %}
10988 
10989    ins_encode %{
10990      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
10991             as_Register($src2$$reg), ext::uxth);
10992    %}
10993   ins_pipe(ialu_reg_reg);
10994 %}
10995 
10996 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
10997 %{
10998   match(Set dst (AddL src1 (AndL src2 mask)));
10999   ins_cost(INSN_COST);
11000   format %{ "add  $dst, $src1, $src2, uxtb" %}
11001 
11002    ins_encode %{
11003      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11004             as_Register($src2$$reg), ext::uxtb);
11005    %}
11006   ins_pipe(ialu_reg_reg);
11007 %}
11008 
11009 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11010 %{
11011   match(Set dst (AddL src1 (AndL src2 mask)));
11012   ins_cost(INSN_COST);
11013   format %{ "add  $dst, $src1, $src2, uxth" %}
11014 
11015    ins_encode %{
11016      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11017             as_Register($src2$$reg), ext::uxth);
11018    %}
11019   ins_pipe(ialu_reg_reg);
11020 %}
11021 
11022 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11023 %{
11024   match(Set dst (AddL src1 (AndL src2 mask)));
11025   ins_cost(INSN_COST);
11026   format %{ "add  $dst, $src1, $src2, uxtw" %}
11027 
11028    ins_encode %{
11029      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11030             as_Register($src2$$reg), ext::uxtw);
11031    %}
11032   ins_pipe(ialu_reg_reg);
11033 %}
11034 
11035 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11036 %{
11037   match(Set dst (SubI src1 (AndI src2 mask)));
11038   ins_cost(INSN_COST);
11039   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11040 
11041    ins_encode %{
11042      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11043             as_Register($src2$$reg), ext::uxtb);
11044    %}
11045   ins_pipe(ialu_reg_reg);
11046 %}
11047 
11048 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11049 %{
11050   match(Set dst (SubI src1 (AndI src2 mask)));
11051   ins_cost(INSN_COST);
11052   format %{ "subw  $dst, $src1, $src2, uxth" %}
11053 
11054    ins_encode %{
11055      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11056             as_Register($src2$$reg), ext::uxth);
11057    %}
11058   ins_pipe(ialu_reg_reg);
11059 %}
11060 
11061 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11062 %{
11063   match(Set dst (SubL src1 (AndL src2 mask)));
11064   ins_cost(INSN_COST);
11065   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11066 
11067    ins_encode %{
11068      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11069             as_Register($src2$$reg), ext::uxtb);
11070    %}
11071   ins_pipe(ialu_reg_reg);
11072 %}
11073 
11074 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11075 %{
11076   match(Set dst (SubL src1 (AndL src2 mask)));
11077   ins_cost(INSN_COST);
11078   format %{ "sub  $dst, $src1, $src2, uxth" %}
11079 
11080    ins_encode %{
11081      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11082             as_Register($src2$$reg), ext::uxth);
11083    %}
11084   ins_pipe(ialu_reg_reg);
11085 %}
11086 
11087 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11088 %{
11089   match(Set dst (SubL src1 (AndL src2 mask)));
11090   ins_cost(INSN_COST);
11091   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11092 
11093    ins_encode %{
11094      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11095             as_Register($src2$$reg), ext::uxtw);
11096    %}
11097   ins_pipe(ialu_reg_reg);
11098 %}
11099 
11100 // END This section of the file is automatically generated. Do not edit --------------
11101 
11102 // ============================================================================
11103 // Floating Point Arithmetic Instructions
11104 
11105 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11106   match(Set dst (AddF src1 src2));
11107 
11108   ins_cost(INSN_COST * 5);
11109   format %{ "fadds   $dst, $src1, $src2" %}
11110 
11111   ins_encode %{
11112     __ fadds(as_FloatRegister($dst$$reg),
11113              as_FloatRegister($src1$$reg),
11114              as_FloatRegister($src2$$reg));
11115   %}
11116 
11117   ins_pipe(pipe_class_default);
11118 %}
11119 
11120 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11121   match(Set dst (AddD src1 src2));
11122 
11123   ins_cost(INSN_COST * 5);
11124   format %{ "faddd   $dst, $src1, $src2" %}
11125 
11126   ins_encode %{
11127     __ faddd(as_FloatRegister($dst$$reg),
11128              as_FloatRegister($src1$$reg),
11129              as_FloatRegister($src2$$reg));
11130   %}
11131 
11132   ins_pipe(pipe_class_default);
11133 %}
11134 
11135 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11136   match(Set dst (SubF src1 src2));
11137 
11138   ins_cost(INSN_COST * 5);
11139   format %{ "fsubs   $dst, $src1, $src2" %}
11140 
11141   ins_encode %{
11142     __ fsubs(as_FloatRegister($dst$$reg),
11143              as_FloatRegister($src1$$reg),
11144              as_FloatRegister($src2$$reg));
11145   %}
11146 
11147   ins_pipe(pipe_class_default);
11148 %}
11149 
11150 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11151   match(Set dst (SubD src1 src2));
11152 
11153   ins_cost(INSN_COST * 5);
11154   format %{ "fsubd   $dst, $src1, $src2" %}
11155 
11156   ins_encode %{
11157     __ fsubd(as_FloatRegister($dst$$reg),
11158              as_FloatRegister($src1$$reg),
11159              as_FloatRegister($src2$$reg));
11160   %}
11161 
11162   ins_pipe(pipe_class_default);
11163 %}
11164 
11165 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11166   match(Set dst (MulF src1 src2));
11167 
11168   ins_cost(INSN_COST * 6);
11169   format %{ "fmuls   $dst, $src1, $src2" %}
11170 
11171   ins_encode %{
11172     __ fmuls(as_FloatRegister($dst$$reg),
11173              as_FloatRegister($src1$$reg),
11174              as_FloatRegister($src2$$reg));
11175   %}
11176 
11177   ins_pipe(pipe_class_default);
11178 %}
11179 
11180 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11181   match(Set dst (MulD src1 src2));
11182 
11183   ins_cost(INSN_COST * 6);
11184   format %{ "fmuld   $dst, $src1, $src2" %}
11185 
11186   ins_encode %{
11187     __ fmuld(as_FloatRegister($dst$$reg),
11188              as_FloatRegister($src1$$reg),
11189              as_FloatRegister($src2$$reg));
11190   %}
11191 
11192   ins_pipe(pipe_class_default);
11193 %}
11194 
11195 // We cannot use these fused mul w add/sub ops because they don't
11196 // produce the same result as the equivalent separated ops
11197 // (essentially they don't round the intermediate result). that's a
11198 // shame. leaving them here in case we can idenitfy cases where it is
11199 // legitimate to use them
11200 
11201 
11202 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11203 //   match(Set dst (AddF (MulF src1 src2) src3));
11204 
11205 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11206 
11207 //   ins_encode %{
11208 //     __ fmadds(as_FloatRegister($dst$$reg),
11209 //              as_FloatRegister($src1$$reg),
11210 //              as_FloatRegister($src2$$reg),
11211 //              as_FloatRegister($src3$$reg));
11212 //   %}
11213 
11214 //   ins_pipe(pipe_class_default);
11215 // %}
11216 
11217 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11218 //   match(Set dst (AddD (MulD src1 src2) src3));
11219 
11220 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11221 
11222 //   ins_encode %{
11223 //     __ fmaddd(as_FloatRegister($dst$$reg),
11224 //              as_FloatRegister($src1$$reg),
11225 //              as_FloatRegister($src2$$reg),
11226 //              as_FloatRegister($src3$$reg));
11227 //   %}
11228 
11229 //   ins_pipe(pipe_class_default);
11230 // %}
11231 
11232 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11233 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11234 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11235 
11236 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11237 
11238 //   ins_encode %{
11239 //     __ fmsubs(as_FloatRegister($dst$$reg),
11240 //               as_FloatRegister($src1$$reg),
11241 //               as_FloatRegister($src2$$reg),
11242 //              as_FloatRegister($src3$$reg));
11243 //   %}
11244 
11245 //   ins_pipe(pipe_class_default);
11246 // %}
11247 
11248 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11249 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11250 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11251 
11252 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11253 
11254 //   ins_encode %{
11255 //     __ fmsubd(as_FloatRegister($dst$$reg),
11256 //               as_FloatRegister($src1$$reg),
11257 //               as_FloatRegister($src2$$reg),
11258 //               as_FloatRegister($src3$$reg));
11259 //   %}
11260 
11261 //   ins_pipe(pipe_class_default);
11262 // %}
11263 
11264 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11265 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11266 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11267 
11268 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11269 
11270 //   ins_encode %{
11271 //     __ fnmadds(as_FloatRegister($dst$$reg),
11272 //                as_FloatRegister($src1$$reg),
11273 //                as_FloatRegister($src2$$reg),
11274 //                as_FloatRegister($src3$$reg));
11275 //   %}
11276 
11277 //   ins_pipe(pipe_class_default);
11278 // %}
11279 
11280 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11281 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11282 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11283 
11284 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11285 
11286 //   ins_encode %{
11287 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11288 //                as_FloatRegister($src1$$reg),
11289 //                as_FloatRegister($src2$$reg),
11290 //                as_FloatRegister($src3$$reg));
11291 //   %}
11292 
11293 //   ins_pipe(pipe_class_default);
11294 // %}
11295 
11296 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11297 //   match(Set dst (SubF (MulF src1 src2) src3));
11298 
11299 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11300 
11301 //   ins_encode %{
11302 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11303 //                as_FloatRegister($src1$$reg),
11304 //                as_FloatRegister($src2$$reg),
11305 //                as_FloatRegister($src3$$reg));
11306 //   %}
11307 
11308 //   ins_pipe(pipe_class_default);
11309 // %}
11310 
11311 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11312 //   match(Set dst (SubD (MulD src1 src2) src3));
11313 
11314 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11315 
11316 //   ins_encode %{
11317 //   // n.b. insn name should be fnmsubd
11318 //     __ fnmsub(as_FloatRegister($dst$$reg),
11319 //                as_FloatRegister($src1$$reg),
11320 //                as_FloatRegister($src2$$reg),
11321 //                as_FloatRegister($src3$$reg));
11322 //   %}
11323 
11324 //   ins_pipe(pipe_class_default);
11325 // %}
11326 
11327 
11328 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11329   match(Set dst (DivF src1  src2));
11330 
11331   ins_cost(INSN_COST * 18);
11332   format %{ "fdivs   $dst, $src1, $src2" %}
11333 
11334   ins_encode %{
11335     __ fdivs(as_FloatRegister($dst$$reg),
11336              as_FloatRegister($src1$$reg),
11337              as_FloatRegister($src2$$reg));
11338   %}
11339 
11340   ins_pipe(pipe_class_default);
11341 %}
11342 
11343 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11344   match(Set dst (DivD src1  src2));
11345 
11346   ins_cost(INSN_COST * 32);
11347   format %{ "fdivd   $dst, $src1, $src2" %}
11348 
11349   ins_encode %{
11350     __ fdivd(as_FloatRegister($dst$$reg),
11351              as_FloatRegister($src1$$reg),
11352              as_FloatRegister($src2$$reg));
11353   %}
11354 
11355   ins_pipe(pipe_class_default);
11356 %}
11357 
11358 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11359   match(Set dst (NegF src));
11360 
11361   ins_cost(INSN_COST * 3);
11362   format %{ "fneg   $dst, $src" %}
11363 
11364   ins_encode %{
11365     __ fnegs(as_FloatRegister($dst$$reg),
11366              as_FloatRegister($src$$reg));
11367   %}
11368 
11369   ins_pipe(pipe_class_default);
11370 %}
11371 
11372 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11373   match(Set dst (NegD src));
11374 
11375   ins_cost(INSN_COST * 3);
11376   format %{ "fnegd   $dst, $src" %}
11377 
11378   ins_encode %{
11379     __ fnegd(as_FloatRegister($dst$$reg),
11380              as_FloatRegister($src$$reg));
11381   %}
11382 
11383   ins_pipe(pipe_class_default);
11384 %}
11385 
11386 instruct absF_reg(vRegF dst, vRegF src) %{
11387   match(Set dst (AbsF src));
11388 
11389   ins_cost(INSN_COST * 3);
11390   format %{ "fabss   $dst, $src" %}
11391   ins_encode %{
11392     __ fabss(as_FloatRegister($dst$$reg),
11393              as_FloatRegister($src$$reg));
11394   %}
11395 
11396   ins_pipe(pipe_class_default);
11397 %}
11398 
11399 instruct absD_reg(vRegD dst, vRegD src) %{
11400   match(Set dst (AbsD src));
11401 
11402   ins_cost(INSN_COST * 3);
11403   format %{ "fabsd   $dst, $src" %}
11404   ins_encode %{
11405     __ fabsd(as_FloatRegister($dst$$reg),
11406              as_FloatRegister($src$$reg));
11407   %}
11408 
11409   ins_pipe(pipe_class_default);
11410 %}
11411 
11412 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11413   match(Set dst (SqrtD src));
11414 
11415   ins_cost(INSN_COST * 50);
11416   format %{ "fsqrtd  $dst, $src" %}
11417   ins_encode %{
11418     __ fsqrtd(as_FloatRegister($dst$$reg),
11419              as_FloatRegister($src$$reg));
11420   %}
11421 
11422   ins_pipe(pipe_class_default);
11423 %}
11424 
11425 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11426   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11427 
11428   ins_cost(INSN_COST * 50);
11429   format %{ "fsqrts  $dst, $src" %}
11430   ins_encode %{
11431     __ fsqrts(as_FloatRegister($dst$$reg),
11432              as_FloatRegister($src$$reg));
11433   %}
11434 
11435   ins_pipe(pipe_class_default);
11436 %}
11437 
11438 // ============================================================================
11439 // Logical Instructions
11440 
11441 // Integer Logical Instructions
11442 
11443 // And Instructions
11444 
11445 
11446 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11447   match(Set dst (AndI src1 src2));
11448 
11449   format %{ "andw  $dst, $src1, $src2\t# int" %}
11450 
11451   ins_cost(INSN_COST);
11452   ins_encode %{
11453     __ andw(as_Register($dst$$reg),
11454             as_Register($src1$$reg),
11455             as_Register($src2$$reg));
11456   %}
11457 
11458   ins_pipe(ialu_reg_reg);
11459 %}
11460 
11461 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11462   match(Set dst (AndI src1 src2));
11463 
11464   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11465 
11466   ins_cost(INSN_COST);
11467   ins_encode %{
11468     __ andw(as_Register($dst$$reg),
11469             as_Register($src1$$reg),
11470             (unsigned long)($src2$$constant));
11471   %}
11472 
11473   ins_pipe(ialu_reg_imm);
11474 %}
11475 
11476 // Or Instructions
11477 
11478 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11479   match(Set dst (OrI src1 src2));
11480 
11481   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11482 
11483   ins_cost(INSN_COST);
11484   ins_encode %{
11485     __ orrw(as_Register($dst$$reg),
11486             as_Register($src1$$reg),
11487             as_Register($src2$$reg));
11488   %}
11489 
11490   ins_pipe(ialu_reg_reg);
11491 %}
11492 
11493 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11494   match(Set dst (OrI src1 src2));
11495 
11496   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11497 
11498   ins_cost(INSN_COST);
11499   ins_encode %{
11500     __ orrw(as_Register($dst$$reg),
11501             as_Register($src1$$reg),
11502             (unsigned long)($src2$$constant));
11503   %}
11504 
11505   ins_pipe(ialu_reg_imm);
11506 %}
11507 
11508 // Xor Instructions
11509 
11510 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11511   match(Set dst (XorI src1 src2));
11512 
11513   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11514 
11515   ins_cost(INSN_COST);
11516   ins_encode %{
11517     __ eorw(as_Register($dst$$reg),
11518             as_Register($src1$$reg),
11519             as_Register($src2$$reg));
11520   %}
11521 
11522   ins_pipe(ialu_reg_reg);
11523 %}
11524 
11525 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11526   match(Set dst (XorI src1 src2));
11527 
11528   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11529 
11530   ins_cost(INSN_COST);
11531   ins_encode %{
11532     __ eorw(as_Register($dst$$reg),
11533             as_Register($src1$$reg),
11534             (unsigned long)($src2$$constant));
11535   %}
11536 
11537   ins_pipe(ialu_reg_imm);
11538 %}
11539 
11540 // Long Logical Instructions
11541 // TODO
11542 
11543 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11544   match(Set dst (AndL src1 src2));
11545 
11546   format %{ "and  $dst, $src1, $src2\t# int" %}
11547 
11548   ins_cost(INSN_COST);
11549   ins_encode %{
11550     __ andr(as_Register($dst$$reg),
11551             as_Register($src1$$reg),
11552             as_Register($src2$$reg));
11553   %}
11554 
11555   ins_pipe(ialu_reg_reg);
11556 %}
11557 
11558 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11559   match(Set dst (AndL src1 src2));
11560 
11561   format %{ "and  $dst, $src1, $src2\t# int" %}
11562 
11563   ins_cost(INSN_COST);
11564   ins_encode %{
11565     __ andr(as_Register($dst$$reg),
11566             as_Register($src1$$reg),
11567             (unsigned long)($src2$$constant));
11568   %}
11569 
11570   ins_pipe(ialu_reg_imm);
11571 %}
11572 
11573 // Or Instructions
11574 
11575 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11576   match(Set dst (OrL src1 src2));
11577 
11578   format %{ "orr  $dst, $src1, $src2\t# int" %}
11579 
11580   ins_cost(INSN_COST);
11581   ins_encode %{
11582     __ orr(as_Register($dst$$reg),
11583            as_Register($src1$$reg),
11584            as_Register($src2$$reg));
11585   %}
11586 
11587   ins_pipe(ialu_reg_reg);
11588 %}
11589 
11590 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11591   match(Set dst (OrL src1 src2));
11592 
11593   format %{ "orr  $dst, $src1, $src2\t# int" %}
11594 
11595   ins_cost(INSN_COST);
11596   ins_encode %{
11597     __ orr(as_Register($dst$$reg),
11598            as_Register($src1$$reg),
11599            (unsigned long)($src2$$constant));
11600   %}
11601 
11602   ins_pipe(ialu_reg_imm);
11603 %}
11604 
11605 // Xor Instructions
11606 
11607 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11608   match(Set dst (XorL src1 src2));
11609 
11610   format %{ "eor  $dst, $src1, $src2\t# int" %}
11611 
11612   ins_cost(INSN_COST);
11613   ins_encode %{
11614     __ eor(as_Register($dst$$reg),
11615            as_Register($src1$$reg),
11616            as_Register($src2$$reg));
11617   %}
11618 
11619   ins_pipe(ialu_reg_reg);
11620 %}
11621 
11622 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11623   match(Set dst (XorL src1 src2));
11624 
11625   ins_cost(INSN_COST);
11626   format %{ "eor  $dst, $src1, $src2\t# int" %}
11627 
11628   ins_encode %{
11629     __ eor(as_Register($dst$$reg),
11630            as_Register($src1$$reg),
11631            (unsigned long)($src2$$constant));
11632   %}
11633 
11634   ins_pipe(ialu_reg_imm);
11635 %}
11636 
11637 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11638 %{
11639   match(Set dst (ConvI2L src));
11640 
11641   ins_cost(INSN_COST);
11642   format %{ "sxtw  $dst, $src\t# i2l" %}
11643   ins_encode %{
11644     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11645   %}
11646   ins_pipe(ialu_reg_shift);
11647 %}
11648 
11649 // this pattern occurs in bigmath arithmetic
11650 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11651 %{
11652   match(Set dst (AndL (ConvI2L src) mask));
11653 
11654   ins_cost(INSN_COST);
11655   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11656   ins_encode %{
11657     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11658   %}
11659 
11660   ins_pipe(ialu_reg_shift);
11661 %}
11662 
11663 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11664   match(Set dst (ConvL2I src));
11665 
11666   ins_cost(INSN_COST);
11667   format %{ "movw  $dst, $src \t// l2i" %}
11668 
11669   ins_encode %{
11670     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11671   %}
11672 
11673   ins_pipe(ialu_reg);
11674 %}
11675 
11676 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11677 %{
11678   match(Set dst (Conv2B src));
11679   effect(KILL cr);
11680 
11681   format %{
11682     "cmpw $src, zr\n\t"
11683     "cset $dst, ne"
11684   %}
11685 
11686   ins_encode %{
11687     __ cmpw(as_Register($src$$reg), zr);
11688     __ cset(as_Register($dst$$reg), Assembler::NE);
11689   %}
11690 
11691   ins_pipe(ialu_reg);
11692 %}
11693 
11694 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
11695 %{
11696   match(Set dst (Conv2B src));
11697   effect(KILL cr);
11698 
11699   format %{
11700     "cmp  $src, zr\n\t"
11701     "cset $dst, ne"
11702   %}
11703 
11704   ins_encode %{
11705     __ cmp(as_Register($src$$reg), zr);
11706     __ cset(as_Register($dst$$reg), Assembler::NE);
11707   %}
11708 
11709   ins_pipe(ialu_reg);
11710 %}
11711 
11712 instruct convD2F_reg(vRegF dst, vRegD src) %{
11713   match(Set dst (ConvD2F src));
11714 
11715   ins_cost(INSN_COST * 5);
11716   format %{ "fcvtd  $dst, $src \t// d2f" %}
11717 
11718   ins_encode %{
11719     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11720   %}
11721 
11722   ins_pipe(pipe_class_default);
11723 %}
11724 
11725 instruct convF2D_reg(vRegD dst, vRegF src) %{
11726   match(Set dst (ConvF2D src));
11727 
11728   ins_cost(INSN_COST * 5);
11729   format %{ "fcvts  $dst, $src \t// f2d" %}
11730 
11731   ins_encode %{
11732     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11733   %}
11734 
11735   ins_pipe(pipe_class_default);
11736 %}
11737 
11738 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11739   match(Set dst (ConvF2I src));
11740 
11741   ins_cost(INSN_COST * 5);
11742   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
11743 
11744   ins_encode %{
11745     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11746   %}
11747 
11748   ins_pipe(pipe_class_default);
11749 %}
11750 
11751 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
11752   match(Set dst (ConvF2L src));
11753 
11754   ins_cost(INSN_COST * 5);
11755   format %{ "fcvtzs  $dst, $src \t// f2l" %}
11756 
11757   ins_encode %{
11758     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11759   %}
11760 
11761   ins_pipe(pipe_class_default);
11762 %}
11763 
11764 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
11765   match(Set dst (ConvI2F src));
11766 
11767   ins_cost(INSN_COST * 5);
11768   format %{ "scvtfws  $dst, $src \t// i2f" %}
11769 
11770   ins_encode %{
11771     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11772   %}
11773 
11774   ins_pipe(pipe_class_default);
11775 %}
11776 
11777 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
11778   match(Set dst (ConvL2F src));
11779 
11780   ins_cost(INSN_COST * 5);
11781   format %{ "scvtfs  $dst, $src \t// l2f" %}
11782 
11783   ins_encode %{
11784     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11785   %}
11786 
11787   ins_pipe(pipe_class_default);
11788 %}
11789 
11790 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
11791   match(Set dst (ConvD2I src));
11792 
11793   ins_cost(INSN_COST * 5);
11794   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
11795 
11796   ins_encode %{
11797     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11798   %}
11799 
11800   ins_pipe(pipe_class_default);
11801 %}
11802 
11803 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
11804   match(Set dst (ConvD2L src));
11805 
11806   ins_cost(INSN_COST * 5);
11807   format %{ "fcvtzd  $dst, $src \t// d2l" %}
11808 
11809   ins_encode %{
11810     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11811   %}
11812 
11813   ins_pipe(pipe_class_default);
11814 %}
11815 
11816 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
11817   match(Set dst (ConvI2D src));
11818 
11819   ins_cost(INSN_COST * 5);
11820   format %{ "scvtfwd  $dst, $src \t// i2d" %}
11821 
11822   ins_encode %{
11823     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11824   %}
11825 
11826   ins_pipe(pipe_class_default);
11827 %}
11828 
11829 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
11830   match(Set dst (ConvL2D src));
11831 
11832   ins_cost(INSN_COST * 5);
11833   format %{ "scvtfd  $dst, $src \t// l2d" %}
11834 
11835   ins_encode %{
11836     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11837   %}
11838 
11839   ins_pipe(pipe_class_default);
11840 %}
11841 
11842 // stack <-> reg and reg <-> reg shuffles with no conversion
11843 
11844 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
11845 
11846   match(Set dst (MoveF2I src));
11847 
11848   effect(DEF dst, USE src);
11849 
11850   ins_cost(4 * INSN_COST);
11851 
11852   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
11853 
11854   ins_encode %{
11855     __ ldrw($dst$$Register, Address(sp, $src$$disp));
11856   %}
11857 
11858   ins_pipe(iload_reg_reg);
11859 
11860 %}
11861 
11862 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
11863 
11864   match(Set dst (MoveI2F src));
11865 
11866   effect(DEF dst, USE src);
11867 
11868   ins_cost(4 * INSN_COST);
11869 
11870   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
11871 
11872   ins_encode %{
11873     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
11874   %}
11875 
11876   ins_pipe(pipe_class_memory);
11877 
11878 %}
11879 
11880 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
11881 
11882   match(Set dst (MoveD2L src));
11883 
11884   effect(DEF dst, USE src);
11885 
11886   ins_cost(4 * INSN_COST);
11887 
11888   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
11889 
11890   ins_encode %{
11891     __ ldr($dst$$Register, Address(sp, $src$$disp));
11892   %}
11893 
11894   ins_pipe(iload_reg_reg);
11895 
11896 %}
11897 
11898 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
11899 
11900   match(Set dst (MoveL2D src));
11901 
11902   effect(DEF dst, USE src);
11903 
11904   ins_cost(4 * INSN_COST);
11905 
11906   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
11907 
11908   ins_encode %{
11909     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
11910   %}
11911 
11912   ins_pipe(pipe_class_memory);
11913 
11914 %}
11915 
11916 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
11917 
11918   match(Set dst (MoveF2I src));
11919 
11920   effect(DEF dst, USE src);
11921 
11922   ins_cost(INSN_COST);
11923 
11924   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
11925 
11926   ins_encode %{
11927     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
11928   %}
11929 
11930   ins_pipe(pipe_class_memory);
11931 
11932 %}
11933 
11934 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
11935 
11936   match(Set dst (MoveI2F src));
11937 
11938   effect(DEF dst, USE src);
11939 
11940   ins_cost(INSN_COST);
11941 
11942   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
11943 
11944   ins_encode %{
11945     __ strw($src$$Register, Address(sp, $dst$$disp));
11946   %}
11947 
11948   ins_pipe(istore_reg_reg);
11949 
11950 %}
11951 
11952 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
11953 
11954   match(Set dst (MoveD2L src));
11955 
11956   effect(DEF dst, USE src);
11957 
11958   ins_cost(INSN_COST);
11959 
11960   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
11961 
11962   ins_encode %{
11963     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
11964   %}
11965 
11966   ins_pipe(pipe_class_memory);
11967 
11968 %}
11969 
11970 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
11971 
11972   match(Set dst (MoveL2D src));
11973 
11974   effect(DEF dst, USE src);
11975 
11976   ins_cost(INSN_COST);
11977 
11978   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
11979 
11980   ins_encode %{
11981     __ str($src$$Register, Address(sp, $dst$$disp));
11982   %}
11983 
11984   ins_pipe(istore_reg_reg);
11985 
11986 %}
11987 
11988 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11989 
11990   match(Set dst (MoveF2I src));
11991 
11992   effect(DEF dst, USE src);
11993 
11994   ins_cost(INSN_COST);
11995 
11996   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
11997 
11998   ins_encode %{
11999     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12000   %}
12001 
12002   ins_pipe(pipe_class_memory);
12003 
12004 %}
12005 
12006 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12007 
12008   match(Set dst (MoveI2F src));
12009 
12010   effect(DEF dst, USE src);
12011 
12012   ins_cost(INSN_COST);
12013 
12014   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12015 
12016   ins_encode %{
12017     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12018   %}
12019 
12020   ins_pipe(pipe_class_memory);
12021 
12022 %}
12023 
12024 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12025 
12026   match(Set dst (MoveD2L src));
12027 
12028   effect(DEF dst, USE src);
12029 
12030   ins_cost(INSN_COST);
12031 
12032   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12033 
12034   ins_encode %{
12035     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12036   %}
12037 
12038   ins_pipe(pipe_class_memory);
12039 
12040 %}
12041 
12042 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12043 
12044   match(Set dst (MoveL2D src));
12045 
12046   effect(DEF dst, USE src);
12047 
12048   ins_cost(INSN_COST);
12049 
12050   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12051 
12052   ins_encode %{
12053     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12054   %}
12055 
12056   ins_pipe(pipe_class_memory);
12057 
12058 %}
12059 
12060 // ============================================================================
12061 // clearing of an array
12062 
12063 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12064 %{
12065   match(Set dummy (ClearArray cnt base));
12066   effect(USE_KILL cnt, USE_KILL base);
12067 
12068   ins_cost(4 * INSN_COST);
12069   format %{ "ClearArray $cnt, $base" %}
12070 
12071   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
12072 
12073   ins_pipe(pipe_class_memory);
12074 %}
12075 
12076 // ============================================================================
12077 // Overflow Math Instructions
12078 
12079 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12080 %{
12081   match(Set cr (OverflowAddI op1 op2));
12082 
12083   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12084   ins_cost(INSN_COST);
12085   ins_encode %{
12086     __ cmnw($op1$$Register, $op2$$Register);
12087   %}
12088 
12089   ins_pipe(icmp_reg_reg);
12090 %}
12091 
12092 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12093 %{
12094   match(Set cr (OverflowAddI op1 op2));
12095 
12096   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12097   ins_cost(INSN_COST);
12098   ins_encode %{
12099     __ cmnw($op1$$Register, $op2$$constant);
12100   %}
12101 
12102   ins_pipe(icmp_reg_imm);
12103 %}
12104 
12105 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12106 %{
12107   match(Set cr (OverflowAddL op1 op2));
12108 
12109   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12110   ins_cost(INSN_COST);
12111   ins_encode %{
12112     __ cmn($op1$$Register, $op2$$Register);
12113   %}
12114 
12115   ins_pipe(icmp_reg_reg);
12116 %}
12117 
12118 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12119 %{
12120   match(Set cr (OverflowAddL op1 op2));
12121 
12122   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12123   ins_cost(INSN_COST);
12124   ins_encode %{
12125     __ cmn($op1$$Register, $op2$$constant);
12126   %}
12127 
12128   ins_pipe(icmp_reg_imm);
12129 %}
12130 
12131 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12132 %{
12133   match(Set cr (OverflowSubI op1 op2));
12134 
12135   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12136   ins_cost(INSN_COST);
12137   ins_encode %{
12138     __ cmpw($op1$$Register, $op2$$Register);
12139   %}
12140 
12141   ins_pipe(icmp_reg_reg);
12142 %}
12143 
12144 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12145 %{
12146   match(Set cr (OverflowSubI op1 op2));
12147 
12148   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12149   ins_cost(INSN_COST);
12150   ins_encode %{
12151     __ cmpw($op1$$Register, $op2$$constant);
12152   %}
12153 
12154   ins_pipe(icmp_reg_imm);
12155 %}
12156 
12157 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12158 %{
12159   match(Set cr (OverflowSubL op1 op2));
12160 
12161   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12162   ins_cost(INSN_COST);
12163   ins_encode %{
12164     __ cmp($op1$$Register, $op2$$Register);
12165   %}
12166 
12167   ins_pipe(icmp_reg_reg);
12168 %}
12169 
12170 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12171 %{
12172   match(Set cr (OverflowSubL op1 op2));
12173 
12174   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12175   ins_cost(INSN_COST);
12176   ins_encode %{
12177     __ cmp($op1$$Register, $op2$$constant);
12178   %}
12179 
12180   ins_pipe(icmp_reg_imm);
12181 %}
12182 
12183 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12184 %{
12185   match(Set cr (OverflowSubI zero op1));
12186 
12187   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12188   ins_cost(INSN_COST);
12189   ins_encode %{
12190     __ cmpw(zr, $op1$$Register);
12191   %}
12192 
12193   ins_pipe(icmp_reg_imm);
12194 %}
12195 
12196 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12197 %{
12198   match(Set cr (OverflowSubL zero op1));
12199 
12200   format %{ "cmp   zr, $op1\t# overflow check long" %}
12201   ins_cost(INSN_COST);
12202   ins_encode %{
12203     __ cmp(zr, $op1$$Register);
12204   %}
12205 
12206   ins_pipe(icmp_reg_imm);
12207 %}
12208 
12209 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12210 %{
12211   match(Set cr (OverflowMulI op1 op2));
12212 
12213   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12214             "cmp   rscratch1, rscratch1, sxtw\n\t"
12215             "movw  rscratch1, #0x80000000\n\t"
12216             "cselw rscratch1, rscratch1, zr, NE\n\t"
12217             "cmpw  rscratch1, #1" %}
12218   ins_cost(5 * INSN_COST);
12219   ins_encode %{
12220     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12221     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12222     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12223     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12224     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12225   %}
12226 
12227   ins_pipe(pipe_slow);
12228 %}
12229 
12230 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12231 %{
12232   match(If cmp (OverflowMulI op1 op2));
12233   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12234             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12235   effect(USE labl, KILL cr);
12236 
12237   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12238             "cmp   rscratch1, rscratch1, sxtw\n\t"
12239             "b$cmp   $labl" %}
12240   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12241   ins_encode %{
12242     Label* L = $labl$$label;
12243     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12244     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12245     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12246     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12247   %}
12248 
12249   ins_pipe(pipe_serial);
12250 %}
12251 
12252 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12253 %{
12254   match(Set cr (OverflowMulL op1 op2));
12255 
12256   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12257             "smulh rscratch2, $op1, $op2\n\t"
12258             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12259             "movw  rscratch1, #0x80000000\n\t"
12260             "cselw rscratch1, rscratch1, zr, NE\n\t"
12261             "cmpw  rscratch1, #1" %}
12262   ins_cost(6 * INSN_COST);
12263   ins_encode %{
12264     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12265     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12266     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12267     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12268     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12269     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12270   %}
12271 
12272   ins_pipe(pipe_slow);
12273 %}
12274 
12275 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12276 %{
12277   match(If cmp (OverflowMulL op1 op2));
12278   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12279             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12280   effect(USE labl, KILL cr);
12281 
12282   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12283             "smulh rscratch2, $op1, $op2\n\t"
12284             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12285             "b$cmp $labl" %}
12286   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12287   ins_encode %{
12288     Label* L = $labl$$label;
12289     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12290     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12291     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12292     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12293     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12294   %}
12295 
12296   ins_pipe(pipe_serial);
12297 %}
12298 
12299 // ============================================================================
12300 // Compare Instructions
12301 
12302 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12303 %{
12304   match(Set cr (CmpI op1 op2));
12305 
12306   effect(DEF cr, USE op1, USE op2);
12307 
12308   ins_cost(INSN_COST);
12309   format %{ "cmpw  $op1, $op2" %}
12310 
12311   ins_encode(aarch64_enc_cmpw(op1, op2));
12312 
12313   ins_pipe(icmp_reg_reg);
12314 %}
12315 
12316 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12317 %{
12318   match(Set cr (CmpI op1 zero));
12319 
12320   effect(DEF cr, USE op1);
12321 
12322   ins_cost(INSN_COST);
12323   format %{ "cmpw $op1, 0" %}
12324 
12325   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12326 
12327   ins_pipe(icmp_reg_imm);
12328 %}
12329 
12330 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12331 %{
12332   match(Set cr (CmpI op1 op2));
12333 
12334   effect(DEF cr, USE op1);
12335 
12336   ins_cost(INSN_COST);
12337   format %{ "cmpw  $op1, $op2" %}
12338 
12339   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12340 
12341   ins_pipe(icmp_reg_imm);
12342 %}
12343 
12344 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12345 %{
12346   match(Set cr (CmpI op1 op2));
12347 
12348   effect(DEF cr, USE op1);
12349 
12350   ins_cost(INSN_COST * 2);
12351   format %{ "cmpw  $op1, $op2" %}
12352 
12353   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12354 
12355   ins_pipe(icmp_reg_imm);
12356 %}
12357 
12358 // Unsigned compare Instructions; really, same as signed compare
12359 // except it should only be used to feed an If or a CMovI which takes a
12360 // cmpOpU.
12361 
12362 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12363 %{
12364   match(Set cr (CmpU op1 op2));
12365 
12366   effect(DEF cr, USE op1, USE op2);
12367 
12368   ins_cost(INSN_COST);
12369   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12370 
12371   ins_encode(aarch64_enc_cmpw(op1, op2));
12372 
12373   ins_pipe(icmp_reg_reg);
12374 %}
12375 
12376 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12377 %{
12378   match(Set cr (CmpU op1 zero));
12379 
12380   effect(DEF cr, USE op1);
12381 
12382   ins_cost(INSN_COST);
12383   format %{ "cmpw $op1, #0\t# unsigned" %}
12384 
12385   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12386 
12387   ins_pipe(icmp_reg_imm);
12388 %}
12389 
12390 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12391 %{
12392   match(Set cr (CmpU op1 op2));
12393 
12394   effect(DEF cr, USE op1);
12395 
12396   ins_cost(INSN_COST);
12397   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12398 
12399   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12400 
12401   ins_pipe(icmp_reg_imm);
12402 %}
12403 
12404 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12405 %{
12406   match(Set cr (CmpU op1 op2));
12407 
12408   effect(DEF cr, USE op1);
12409 
12410   ins_cost(INSN_COST * 2);
12411   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12412 
12413   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12414 
12415   ins_pipe(icmp_reg_imm);
12416 %}
12417 
12418 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12419 %{
12420   match(Set cr (CmpL op1 op2));
12421 
12422   effect(DEF cr, USE op1, USE op2);
12423 
12424   ins_cost(INSN_COST);
12425   format %{ "cmp  $op1, $op2" %}
12426 
12427   ins_encode(aarch64_enc_cmp(op1, op2));
12428 
12429   ins_pipe(icmp_reg_reg);
12430 %}
12431 
12432 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
12433 %{
12434   match(Set cr (CmpL op1 zero));
12435 
12436   effect(DEF cr, USE op1);
12437 
12438   ins_cost(INSN_COST);
12439   format %{ "tst  $op1" %}
12440 
12441   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12442 
12443   ins_pipe(icmp_reg_imm);
12444 %}
12445 
12446 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12447 %{
12448   match(Set cr (CmpL op1 op2));
12449 
12450   effect(DEF cr, USE op1);
12451 
12452   ins_cost(INSN_COST);
12453   format %{ "cmp  $op1, $op2" %}
12454 
12455   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12456 
12457   ins_pipe(icmp_reg_imm);
12458 %}
12459 
12460 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12461 %{
12462   match(Set cr (CmpL op1 op2));
12463 
12464   effect(DEF cr, USE op1);
12465 
12466   ins_cost(INSN_COST * 2);
12467   format %{ "cmp  $op1, $op2" %}
12468 
12469   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12470 
12471   ins_pipe(icmp_reg_imm);
12472 %}
12473 
12474 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12475 %{
12476   match(Set cr (CmpP op1 op2));
12477 
12478   effect(DEF cr, USE op1, USE op2);
12479 
12480   ins_cost(INSN_COST);
12481   format %{ "cmp  $op1, $op2\t // ptr" %}
12482 
12483   ins_encode(aarch64_enc_cmpp(op1, op2));
12484 
12485   ins_pipe(icmp_reg_reg);
12486 %}
12487 
12488 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12489 %{
12490   match(Set cr (CmpN op1 op2));
12491 
12492   effect(DEF cr, USE op1, USE op2);
12493 
12494   ins_cost(INSN_COST);
12495   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12496 
12497   ins_encode(aarch64_enc_cmpn(op1, op2));
12498 
12499   ins_pipe(icmp_reg_reg);
12500 %}
12501 
12502 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12503 %{
12504   match(Set cr (CmpP op1 zero));
12505 
12506   effect(DEF cr, USE op1, USE zero);
12507 
12508   ins_cost(INSN_COST);
12509   format %{ "cmp  $op1, 0\t // ptr" %}
12510 
12511   ins_encode(aarch64_enc_testp(op1));
12512 
12513   ins_pipe(icmp_reg_imm);
12514 %}
12515 
12516 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12517 %{
12518   match(Set cr (CmpN op1 zero));
12519 
12520   effect(DEF cr, USE op1, USE zero);
12521 
12522   ins_cost(INSN_COST);
12523   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12524 
12525   ins_encode(aarch64_enc_testn(op1));
12526 
12527   ins_pipe(icmp_reg_imm);
12528 %}
12529 
12530 // FP comparisons
12531 //
12532 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12533 // using normal cmpOp. See declaration of rFlagsReg for details.
12534 
12535 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12536 %{
12537   match(Set cr (CmpF src1 src2));
12538 
12539   ins_cost(3 * INSN_COST);
12540   format %{ "fcmps $src1, $src2" %}
12541 
12542   ins_encode %{
12543     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12544   %}
12545 
12546   ins_pipe(pipe_class_compare);
12547 %}
12548 
12549 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12550 %{
12551   match(Set cr (CmpF src1 src2));
12552 
12553   ins_cost(3 * INSN_COST);
12554   format %{ "fcmps $src1, 0.0" %}
12555 
12556   ins_encode %{
12557     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
12558   %}
12559 
12560   ins_pipe(pipe_class_compare);
12561 %}
12562 // FROM HERE
12563 
12564 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12565 %{
12566   match(Set cr (CmpD src1 src2));
12567 
12568   ins_cost(3 * INSN_COST);
12569   format %{ "fcmpd $src1, $src2" %}
12570 
12571   ins_encode %{
12572     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12573   %}
12574 
12575   ins_pipe(pipe_class_compare);
12576 %}
12577 
12578 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12579 %{
12580   match(Set cr (CmpD src1 src2));
12581 
12582   ins_cost(3 * INSN_COST);
12583   format %{ "fcmpd $src1, 0.0" %}
12584 
12585   ins_encode %{
12586     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
12587   %}
12588 
12589   ins_pipe(pipe_class_compare);
12590 %}
12591 
12592 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12593 %{
12594   match(Set dst (CmpF3 src1 src2));
12595   effect(KILL cr);
12596 
12597   ins_cost(5 * INSN_COST);
12598   format %{ "fcmps $src1, $src2\n\t"
12599             "csinvw($dst, zr, zr, eq\n\t"
12600             "csnegw($dst, $dst, $dst, lt)"
12601   %}
12602 
12603   ins_encode %{
12604     Label done;
12605     FloatRegister s1 = as_FloatRegister($src1$$reg);
12606     FloatRegister s2 = as_FloatRegister($src2$$reg);
12607     Register d = as_Register($dst$$reg);
12608     __ fcmps(s1, s2);
12609     // installs 0 if EQ else -1
12610     __ csinvw(d, zr, zr, Assembler::EQ);
12611     // keeps -1 if less or unordered else installs 1
12612     __ csnegw(d, d, d, Assembler::LT);
12613     __ bind(done);
12614   %}
12615 
12616   ins_pipe(pipe_class_default);
12617 
12618 %}
12619 
12620 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
12621 %{
12622   match(Set dst (CmpD3 src1 src2));
12623   effect(KILL cr);
12624 
12625   ins_cost(5 * INSN_COST);
12626   format %{ "fcmpd $src1, $src2\n\t"
12627             "csinvw($dst, zr, zr, eq\n\t"
12628             "csnegw($dst, $dst, $dst, lt)"
12629   %}
12630 
12631   ins_encode %{
12632     Label done;
12633     FloatRegister s1 = as_FloatRegister($src1$$reg);
12634     FloatRegister s2 = as_FloatRegister($src2$$reg);
12635     Register d = as_Register($dst$$reg);
12636     __ fcmpd(s1, s2);
12637     // installs 0 if EQ else -1
12638     __ csinvw(d, zr, zr, Assembler::EQ);
12639     // keeps -1 if less or unordered else installs 1
12640     __ csnegw(d, d, d, Assembler::LT);
12641     __ bind(done);
12642   %}
12643   ins_pipe(pipe_class_default);
12644 
12645 %}
12646 
12647 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
12648 %{
12649   match(Set dst (CmpF3 src1 zero));
12650   effect(KILL cr);
12651 
12652   ins_cost(5 * INSN_COST);
12653   format %{ "fcmps $src1, 0.0\n\t"
12654             "csinvw($dst, zr, zr, eq\n\t"
12655             "csnegw($dst, $dst, $dst, lt)"
12656   %}
12657 
12658   ins_encode %{
12659     Label done;
12660     FloatRegister s1 = as_FloatRegister($src1$$reg);
12661     Register d = as_Register($dst$$reg);
12662     __ fcmps(s1, 0.0D);
12663     // installs 0 if EQ else -1
12664     __ csinvw(d, zr, zr, Assembler::EQ);
12665     // keeps -1 if less or unordered else installs 1
12666     __ csnegw(d, d, d, Assembler::LT);
12667     __ bind(done);
12668   %}
12669 
12670   ins_pipe(pipe_class_default);
12671 
12672 %}
12673 
12674 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
12675 %{
12676   match(Set dst (CmpD3 src1 zero));
12677   effect(KILL cr);
12678 
12679   ins_cost(5 * INSN_COST);
12680   format %{ "fcmpd $src1, 0.0\n\t"
12681             "csinvw($dst, zr, zr, eq\n\t"
12682             "csnegw($dst, $dst, $dst, lt)"
12683   %}
12684 
12685   ins_encode %{
12686     Label done;
12687     FloatRegister s1 = as_FloatRegister($src1$$reg);
12688     Register d = as_Register($dst$$reg);
12689     __ fcmpd(s1, 0.0D);
12690     // installs 0 if EQ else -1
12691     __ csinvw(d, zr, zr, Assembler::EQ);
12692     // keeps -1 if less or unordered else installs 1
12693     __ csnegw(d, d, d, Assembler::LT);
12694     __ bind(done);
12695   %}
12696   ins_pipe(pipe_class_default);
12697 
12698 %}
12699 
12700 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
12701 %{
12702   match(Set dst (CmpLTMask p q));
12703   effect(KILL cr);
12704 
12705   ins_cost(3 * INSN_COST);
12706 
12707   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
12708             "csetw $dst, lt\n\t"
12709             "subw $dst, zr, $dst"
12710   %}
12711 
12712   ins_encode %{
12713     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
12714     __ csetw(as_Register($dst$$reg), Assembler::LT);
12715     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
12716   %}
12717 
12718   ins_pipe(ialu_reg_reg);
12719 %}
12720 
12721 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
12722 %{
12723   match(Set dst (CmpLTMask src zero));
12724   effect(KILL cr);
12725 
12726   ins_cost(INSN_COST);
12727 
12728   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
12729 
12730   ins_encode %{
12731     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
12732   %}
12733 
12734   ins_pipe(ialu_reg_shift);
12735 %}
12736 
12737 // ============================================================================
12738 // Max and Min
12739 
12740 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
12741 %{
12742   match(Set dst (MinI src1 src2));
12743 
12744   effect(DEF dst, USE src1, USE src2, KILL cr);
12745   size(8);
12746 
12747   ins_cost(INSN_COST * 3);
12748   format %{
12749     "cmpw $src1 $src2\t signed int\n\t"
12750     "cselw $dst, $src1, $src2 lt\t"
12751   %}
12752 
12753   ins_encode %{
12754     __ cmpw(as_Register($src1$$reg),
12755             as_Register($src2$$reg));
12756     __ cselw(as_Register($dst$$reg),
12757              as_Register($src1$$reg),
12758              as_Register($src2$$reg),
12759              Assembler::LT);
12760   %}
12761 
12762   ins_pipe(ialu_reg_reg);
12763 %}
12764 // FROM HERE
12765 
12766 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
12767 %{
12768   match(Set dst (MaxI src1 src2));
12769 
12770   effect(DEF dst, USE src1, USE src2, KILL cr);
12771   size(8);
12772 
12773   ins_cost(INSN_COST * 3);
12774   format %{
12775     "cmpw $src1 $src2\t signed int\n\t"
12776     "cselw $dst, $src1, $src2 gt\t"
12777   %}
12778 
12779   ins_encode %{
12780     __ cmpw(as_Register($src1$$reg),
12781             as_Register($src2$$reg));
12782     __ cselw(as_Register($dst$$reg),
12783              as_Register($src1$$reg),
12784              as_Register($src2$$reg),
12785              Assembler::GT);
12786   %}
12787 
12788   ins_pipe(ialu_reg_reg);
12789 %}
12790 
12791 // ============================================================================
12792 // Branch Instructions
12793 
12794 // Direct Branch.
12795 instruct branch(label lbl)
12796 %{
12797   match(Goto);
12798 
12799   effect(USE lbl);
12800 
12801   ins_cost(BRANCH_COST);
12802   format %{ "b  $lbl" %}
12803 
12804   ins_encode(aarch64_enc_b(lbl));
12805 
12806   ins_pipe(pipe_branch);
12807 %}
12808 
12809 // Conditional Near Branch
12810 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
12811 %{
12812   // Same match rule as `branchConFar'.
12813   match(If cmp cr);
12814 
12815   effect(USE lbl);
12816 
12817   ins_cost(BRANCH_COST);
12818   // If set to 1 this indicates that the current instruction is a
12819   // short variant of a long branch. This avoids using this
12820   // instruction in first-pass matching. It will then only be used in
12821   // the `Shorten_branches' pass.
12822   // ins_short_branch(1);
12823   format %{ "b$cmp  $lbl" %}
12824 
12825   ins_encode(aarch64_enc_br_con(cmp, lbl));
12826 
12827   ins_pipe(pipe_branch_cond);
12828 %}
12829 
12830 // Conditional Near Branch Unsigned
12831 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
12832 %{
12833   // Same match rule as `branchConFar'.
12834   match(If cmp cr);
12835 
12836   effect(USE lbl);
12837 
12838   ins_cost(BRANCH_COST);
12839   // If set to 1 this indicates that the current instruction is a
12840   // short variant of a long branch. This avoids using this
12841   // instruction in first-pass matching. It will then only be used in
12842   // the `Shorten_branches' pass.
12843   // ins_short_branch(1);
12844   format %{ "b$cmp  $lbl\t# unsigned" %}
12845 
12846   ins_encode(aarch64_enc_br_conU(cmp, lbl));
12847 
12848   ins_pipe(pipe_branch_cond);
12849 %}
12850 
12851 // Make use of CBZ and CBNZ.  These instructions, as well as being
12852 // shorter than (cmp; branch), have the additional benefit of not
12853 // killing the flags.
12854 
12855 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
12856   match(If cmp (CmpI op1 op2));
12857   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12858             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12859   effect(USE labl);
12860 
12861   ins_cost(BRANCH_COST);
12862   format %{ "cbw$cmp   $op1, $labl" %}
12863   ins_encode %{
12864     Label* L = $labl$$label;
12865     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12866     if (cond == Assembler::EQ)
12867       __ cbzw($op1$$Register, *L);
12868     else
12869       __ cbnzw($op1$$Register, *L);
12870   %}
12871   ins_pipe(pipe_cmp_branch);
12872 %}
12873 
12874 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
12875   match(If cmp (CmpL op1 op2));
12876   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12877             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12878   effect(USE labl);
12879 
12880   ins_cost(BRANCH_COST);
12881   format %{ "cb$cmp   $op1, $labl" %}
12882   ins_encode %{
12883     Label* L = $labl$$label;
12884     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12885     if (cond == Assembler::EQ)
12886       __ cbz($op1$$Register, *L);
12887     else
12888       __ cbnz($op1$$Register, *L);
12889   %}
12890   ins_pipe(pipe_cmp_branch);
12891 %}
12892 
12893 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
12894   match(If cmp (CmpP op1 op2));
12895   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12896             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12897   effect(USE labl);
12898 
12899   ins_cost(BRANCH_COST);
12900   format %{ "cb$cmp   $op1, $labl" %}
12901   ins_encode %{
12902     Label* L = $labl$$label;
12903     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12904     if (cond == Assembler::EQ)
12905       __ cbz($op1$$Register, *L);
12906     else
12907       __ cbnz($op1$$Register, *L);
12908   %}
12909   ins_pipe(pipe_cmp_branch);
12910 %}
12911 
12912 // Conditional Far Branch
12913 // Conditional Far Branch Unsigned
12914 // TODO: fixme
12915 
12916 // counted loop end branch near
12917 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
12918 %{
12919   match(CountedLoopEnd cmp cr);
12920 
12921   effect(USE lbl);
12922 
12923   ins_cost(BRANCH_COST);
12924   // short variant.
12925   // ins_short_branch(1);
12926   format %{ "b$cmp $lbl \t// counted loop end" %}
12927 
12928   ins_encode(aarch64_enc_br_con(cmp, lbl));
12929 
12930   ins_pipe(pipe_branch);
12931 %}
12932 
12933 // counted loop end branch near Unsigned
12934 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
12935 %{
12936   match(CountedLoopEnd cmp cr);
12937 
12938   effect(USE lbl);
12939 
12940   ins_cost(BRANCH_COST);
12941   // short variant.
12942   // ins_short_branch(1);
12943   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
12944 
12945   ins_encode(aarch64_enc_br_conU(cmp, lbl));
12946 
12947   ins_pipe(pipe_branch);
12948 %}
12949 
12950 // counted loop end branch far
12951 // counted loop end branch far unsigned
12952 // TODO: fixme
12953 
12954 // ============================================================================
12955 // inlined locking and unlocking
12956 
12957 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
12958 %{
12959   match(Set cr (FastLock object box));
12960   effect(TEMP tmp, TEMP tmp2);
12961 
12962   // TODO
12963   // identify correct cost
12964   ins_cost(5 * INSN_COST);
12965   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
12966 
12967   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
12968 
12969   ins_pipe(pipe_serial);
12970 %}
12971 
12972 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
12973 %{
12974   match(Set cr (FastUnlock object box));
12975   effect(TEMP tmp, TEMP tmp2);
12976 
12977   ins_cost(5 * INSN_COST);
12978   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
12979 
12980   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
12981 
12982   ins_pipe(pipe_serial);
12983 %}
12984 
12985 
12986 // ============================================================================
12987 // Safepoint Instructions
12988 
12989 // TODO
12990 // provide a near and far version of this code
12991 
12992 instruct safePoint(iRegP poll)
12993 %{
12994   match(SafePoint poll);
12995 
12996   format %{
12997     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
12998   %}
12999   ins_encode %{
13000     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13001   %}
13002   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13003 %}
13004 
13005 
13006 // ============================================================================
13007 // Procedure Call/Return Instructions
13008 
13009 // Call Java Static Instruction
13010 
13011 instruct CallStaticJavaDirect(method meth)
13012 %{
13013   match(CallStaticJava);
13014 
13015   effect(USE meth);
13016 
13017   ins_cost(CALL_COST);
13018 
13019   format %{ "call,static $meth \t// ==> " %}
13020 
13021   ins_encode( aarch64_enc_java_static_call(meth),
13022               aarch64_enc_call_epilog );
13023 
13024   ins_pipe(pipe_class_call);
13025 %}
13026 
13027 // TO HERE
13028 
13029 // Call Java Dynamic Instruction
13030 instruct CallDynamicJavaDirect(method meth)
13031 %{
13032   match(CallDynamicJava);
13033 
13034   effect(USE meth);
13035 
13036   ins_cost(CALL_COST);
13037 
13038   format %{ "CALL,dynamic $meth \t// ==> " %}
13039 
13040   ins_encode( aarch64_enc_java_dynamic_call(meth),
13041                aarch64_enc_call_epilog );
13042 
13043   ins_pipe(pipe_class_call);
13044 %}
13045 
13046 // Call Runtime Instruction
13047 
13048 instruct CallRuntimeDirect(method meth)
13049 %{
13050   match(CallRuntime);
13051 
13052   effect(USE meth);
13053 
13054   ins_cost(CALL_COST);
13055 
13056   format %{ "CALL, runtime $meth" %}
13057 
13058   ins_encode( aarch64_enc_java_to_runtime(meth) );
13059 
13060   ins_pipe(pipe_class_call);
13061 %}
13062 
13063 // Call Runtime Instruction
13064 
13065 instruct CallLeafDirect(method meth)
13066 %{
13067   match(CallLeaf);
13068 
13069   effect(USE meth);
13070 
13071   ins_cost(CALL_COST);
13072 
13073   format %{ "CALL, runtime leaf $meth" %}
13074 
13075   ins_encode( aarch64_enc_java_to_runtime(meth) );
13076 
13077   ins_pipe(pipe_class_call);
13078 %}
13079 
13080 // Call Runtime Instruction
13081 
13082 instruct CallLeafNoFPDirect(method meth)
13083 %{
13084   match(CallLeafNoFP);
13085 
13086   effect(USE meth);
13087 
13088   ins_cost(CALL_COST);
13089 
13090   format %{ "CALL, runtime leaf nofp $meth" %}
13091 
13092   ins_encode( aarch64_enc_java_to_runtime(meth) );
13093 
13094   ins_pipe(pipe_class_call);
13095 %}
13096 
13097 // Tail Call; Jump from runtime stub to Java code.
13098 // Also known as an 'interprocedural jump'.
13099 // Target of jump will eventually return to caller.
13100 // TailJump below removes the return address.
13101 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13102 %{
13103   match(TailCall jump_target method_oop);
13104 
13105   ins_cost(CALL_COST);
13106 
13107   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13108 
13109   ins_encode(aarch64_enc_tail_call(jump_target));
13110 
13111   ins_pipe(pipe_class_call);
13112 %}
13113 
13114 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13115 %{
13116   match(TailJump jump_target ex_oop);
13117 
13118   ins_cost(CALL_COST);
13119 
13120   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13121 
13122   ins_encode(aarch64_enc_tail_jmp(jump_target));
13123 
13124   ins_pipe(pipe_class_call);
13125 %}
13126 
13127 // Create exception oop: created by stack-crawling runtime code.
13128 // Created exception is now available to this handler, and is setup
13129 // just prior to jumping to this handler. No code emitted.
13130 // TODO check
13131 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13132 instruct CreateException(iRegP_R0 ex_oop)
13133 %{
13134   match(Set ex_oop (CreateEx));
13135 
13136   format %{ " -- \t// exception oop; no code emitted" %}
13137 
13138   size(0);
13139 
13140   ins_encode( /*empty*/ );
13141 
13142   ins_pipe(pipe_class_empty);
13143 %}
13144 
13145 // Rethrow exception: The exception oop will come in the first
13146 // argument position. Then JUMP (not call) to the rethrow stub code.
13147 instruct RethrowException() %{
13148   match(Rethrow);
13149   ins_cost(CALL_COST);
13150 
13151   format %{ "b rethrow_stub" %}
13152 
13153   ins_encode( aarch64_enc_rethrow() );
13154 
13155   ins_pipe(pipe_class_call);
13156 %}
13157 
13158 
13159 // Return Instruction
13160 // epilog node loads ret address into lr as part of frame pop
13161 instruct Ret()
13162 %{
13163   match(Return);
13164 
13165   format %{ "ret\t// return register" %}
13166 
13167   ins_encode( aarch64_enc_ret() );
13168 
13169   ins_pipe(pipe_branch);
13170 %}
13171 
13172 // Die now.
13173 instruct ShouldNotReachHere() %{
13174   match(Halt);
13175 
13176   ins_cost(CALL_COST);
13177   format %{ "ShouldNotReachHere" %}
13178 
13179   ins_encode %{
13180     // TODO
13181     // implement proper trap call here
13182     __ brk(999);
13183   %}
13184 
13185   ins_pipe(pipe_class_default);
13186 %}
13187 
13188 // ============================================================================
13189 // Partial Subtype Check
13190 //
13191 // superklass array for an instance of the superklass.  Set a hidden
13192 // internal cache on a hit (cache is checked with exposed code in
13193 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13194 // encoding ALSO sets flags.
13195 
13196 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13197 %{
13198   match(Set result (PartialSubtypeCheck sub super));
13199   effect(KILL cr, KILL temp);
13200 
13201   ins_cost(1100);  // slightly larger than the next version
13202   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13203 
13204   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13205 
13206   opcode(0x1); // Force zero of result reg on hit
13207 
13208   ins_pipe(pipe_class_memory);
13209 %}
13210 
13211 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13212 %{
13213   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13214   effect(KILL temp, KILL result);
13215 
13216   ins_cost(1100);  // slightly larger than the next version
13217   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13218 
13219   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13220 
13221   opcode(0x0); // Don't zero result reg on hit
13222 
13223   ins_pipe(pipe_class_memory);
13224 %}
13225 
13226 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13227                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13228 %{
13229   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13230   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13231 
13232   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13233   ins_encode %{
13234     __ string_compare($str1$$Register, $str2$$Register,
13235                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13236                       $tmp1$$Register);
13237   %}
13238   ins_pipe(pipe_class_memory);
13239 %}
13240 
13241 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13242        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13243 %{
13244   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13245   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13246          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13247   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13248 
13249   ins_encode %{
13250     __ string_indexof($str1$$Register, $str2$$Register,
13251                       $cnt1$$Register, $cnt2$$Register,
13252                       $tmp1$$Register, $tmp2$$Register,
13253                       $tmp3$$Register, $tmp4$$Register,
13254                       -1, $result$$Register);
13255   %}
13256   ins_pipe(pipe_class_memory);
13257 %}
13258 
13259 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13260                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
13261                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13262 %{
13263   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13264   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13265          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13266   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13267 
13268   ins_encode %{
13269     int icnt2 = (int)$int_cnt2$$constant;
13270     __ string_indexof($str1$$Register, $str2$$Register,
13271                       $cnt1$$Register, zr,
13272                       $tmp1$$Register, $tmp2$$Register,
13273                       $tmp3$$Register, $tmp4$$Register,
13274                       icnt2, $result$$Register);
13275   %}
13276   ins_pipe(pipe_class_memory);
13277 %}
13278 
13279 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13280                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13281 %{
13282   match(Set result (StrEquals (Binary str1 str2) cnt));
13283   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13284 
13285   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13286   ins_encode %{
13287     __ string_equals($str1$$Register, $str2$$Register,
13288                       $cnt$$Register, $result$$Register,
13289                       $tmp$$Register);
13290   %}
13291   ins_pipe(pipe_class_memory);
13292 %}
13293 
13294 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13295                       iRegP_R10 tmp, rFlagsReg cr)
13296 %{
13297   match(Set result (AryEq ary1 ary2));
13298   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13299 
13300   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
13301   ins_encode %{
13302     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
13303                           $result$$Register, $tmp$$Register);
13304   %}
13305   ins_pipe(pipe_class_memory);
13306 %}
13307 
13308 // encode char[] to byte[] in ISO_8859_1
13309 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
13310                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
13311                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
13312                           iRegI_R0 result, rFlagsReg cr)
13313 %{
13314   match(Set result (EncodeISOArray src (Binary dst len)));
13315   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
13316          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
13317 
13318   format %{ "Encode array $src,$dst,$len -> $result" %}
13319   ins_encode %{
13320     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
13321          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
13322          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
13323   %}
13324   ins_pipe( pipe_class_memory );
13325 %}
13326 
13327 // ============================================================================
13328 // This name is KNOWN by the ADLC and cannot be changed.
13329 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13330 // for this guy.
13331 instruct tlsLoadP(thread_RegP dst)
13332 %{
13333   match(Set dst (ThreadLocal));
13334 
13335   ins_cost(0);
13336 
13337   format %{ " -- \t// $dst=Thread::current(), empty" %}
13338 
13339   size(0);
13340 
13341   ins_encode( /*empty*/ );
13342 
13343   ins_pipe(pipe_class_empty);
13344 %}
13345 
13346 // ====================VECTOR INSTRUCTIONS=====================================
13347 
13348 // Load vector (32 bits)
13349 instruct loadV4(vecD dst, vmem mem)
13350 %{
13351   predicate(n->as_LoadVector()->memory_size() == 4);
13352   match(Set dst (LoadVector mem));
13353   ins_cost(4 * INSN_COST);
13354   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13355   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13356   ins_pipe(pipe_class_memory);
13357 %}
13358 
13359 // Load vector (64 bits)
13360 instruct loadV8(vecD dst, vmem mem)
13361 %{
13362   predicate(n->as_LoadVector()->memory_size() == 8);
13363   match(Set dst (LoadVector mem));
13364   ins_cost(4 * INSN_COST);
13365   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13366   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13367   ins_pipe(pipe_class_memory);
13368 %}
13369 
13370 // Load Vector (128 bits)
13371 instruct loadV16(vecX dst, vmem mem)
13372 %{
13373   predicate(n->as_LoadVector()->memory_size() == 16);
13374   match(Set dst (LoadVector mem));
13375   ins_cost(4 * INSN_COST);
13376   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13377   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13378   ins_pipe(pipe_class_memory);
13379 %}
13380 
13381 // Store Vector (32 bits)
13382 instruct storeV4(vecD src, vmem mem)
13383 %{
13384   predicate(n->as_StoreVector()->memory_size() == 4);
13385   match(Set mem (StoreVector mem src));
13386   ins_cost(4 * INSN_COST);
13387   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13388   ins_encode( aarch64_enc_strvS(src, mem) );
13389   ins_pipe(pipe_class_memory);
13390 %}
13391 
13392 // Store Vector (64 bits)
13393 instruct storeV8(vecD src, vmem mem)
13394 %{
13395   predicate(n->as_StoreVector()->memory_size() == 8);
13396   match(Set mem (StoreVector mem src));
13397   ins_cost(4 * INSN_COST);
13398   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13399   ins_encode( aarch64_enc_strvD(src, mem) );
13400   ins_pipe(pipe_class_memory);
13401 %}
13402 
13403 // Store Vector (128 bits)
13404 instruct storeV16(vecX src, vmem mem)
13405 %{
13406   predicate(n->as_StoreVector()->memory_size() == 16);
13407   match(Set mem (StoreVector mem src));
13408   ins_cost(4 * INSN_COST);
13409   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13410   ins_encode( aarch64_enc_strvQ(src, mem) );
13411   ins_pipe(pipe_class_memory);
13412 %}
13413 
13414 instruct replicate8B(vecD dst, iRegIorL2I src)
13415 %{
13416   predicate(n->as_Vector()->length() == 4 ||
13417             n->as_Vector()->length() == 8);
13418   match(Set dst (ReplicateB src));
13419   ins_cost(INSN_COST);
13420   format %{ "dup  $dst, $src\t# vector (8B)" %}
13421   ins_encode %{
13422     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
13423   %}
13424   ins_pipe(pipe_class_default);
13425 %}
13426 
13427 instruct replicate16B(vecX dst, iRegIorL2I src)
13428 %{
13429   predicate(n->as_Vector()->length() == 16);
13430   match(Set dst (ReplicateB src));
13431   ins_cost(INSN_COST);
13432   format %{ "dup  $dst, $src\t# vector (16B)" %}
13433   ins_encode %{
13434     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13435   %}
13436   ins_pipe(pipe_class_default);
13437 %}
13438 
13439 instruct replicate8B_imm(vecD dst, immI con)
13440 %{
13441   predicate(n->as_Vector()->length() == 4 ||
13442             n->as_Vector()->length() == 8);
13443   match(Set dst (ReplicateB con));
13444   ins_cost(INSN_COST);
13445   format %{ "movi  $dst, $con\t# vector(8B)" %}
13446   ins_encode %{
13447     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
13448   %}
13449   ins_pipe(pipe_class_default);
13450 %}
13451 
13452 instruct replicate16B_imm(vecX dst, immI con)
13453 %{
13454   predicate(n->as_Vector()->length() == 16);
13455   match(Set dst (ReplicateB con));
13456   ins_cost(INSN_COST);
13457   format %{ "movi  $dst, $con\t# vector(16B)" %}
13458   ins_encode %{
13459     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
13460   %}
13461   ins_pipe(pipe_class_default);
13462 %}
13463 
13464 instruct replicate4S(vecD dst, iRegIorL2I src)
13465 %{
13466   predicate(n->as_Vector()->length() == 2 ||
13467             n->as_Vector()->length() == 4);
13468   match(Set dst (ReplicateS src));
13469   ins_cost(INSN_COST);
13470   format %{ "dup  $dst, $src\t# vector (4S)" %}
13471   ins_encode %{
13472     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
13473   %}
13474   ins_pipe(pipe_class_default);
13475 %}
13476 
13477 instruct replicate8S(vecX dst, iRegIorL2I src)
13478 %{
13479   predicate(n->as_Vector()->length() == 8);
13480   match(Set dst (ReplicateS src));
13481   ins_cost(INSN_COST);
13482   format %{ "dup  $dst, $src\t# vector (8S)" %}
13483   ins_encode %{
13484     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13485   %}
13486   ins_pipe(pipe_class_default);
13487 %}
13488 
13489 instruct replicate4S_imm(vecD dst, immI con)
13490 %{
13491   predicate(n->as_Vector()->length() == 2 ||
13492             n->as_Vector()->length() == 4);
13493   match(Set dst (ReplicateS con));
13494   ins_cost(INSN_COST);
13495   format %{ "movi  $dst, $con\t# vector(4H)" %}
13496   ins_encode %{
13497     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
13498   %}
13499   ins_pipe(pipe_class_default);
13500 %}
13501 
13502 instruct replicate8S_imm(vecX dst, immI con)
13503 %{
13504   predicate(n->as_Vector()->length() == 8);
13505   match(Set dst (ReplicateS con));
13506   ins_cost(INSN_COST);
13507   format %{ "movi  $dst, $con\t# vector(8H)" %}
13508   ins_encode %{
13509     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
13510   %}
13511   ins_pipe(pipe_class_default);
13512 %}
13513 
13514 instruct replicate2I(vecD dst, iRegIorL2I src)
13515 %{
13516   predicate(n->as_Vector()->length() == 2);
13517   match(Set dst (ReplicateI src));
13518   ins_cost(INSN_COST);
13519   format %{ "dup  $dst, $src\t# vector (2I)" %}
13520   ins_encode %{
13521     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
13522   %}
13523   ins_pipe(pipe_class_default);
13524 %}
13525 
13526 instruct replicate4I(vecX dst, iRegIorL2I src)
13527 %{
13528   predicate(n->as_Vector()->length() == 4);
13529   match(Set dst (ReplicateI src));
13530   ins_cost(INSN_COST);
13531   format %{ "dup  $dst, $src\t# vector (4I)" %}
13532   ins_encode %{
13533     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13534   %}
13535   ins_pipe(pipe_class_default);
13536 %}
13537 
13538 instruct replicate2I_imm(vecD dst, immI con)
13539 %{
13540   predicate(n->as_Vector()->length() == 2);
13541   match(Set dst (ReplicateI con));
13542   ins_cost(INSN_COST);
13543   format %{ "movi  $dst, $con\t# vector(2I)" %}
13544   ins_encode %{
13545     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
13546   %}
13547   ins_pipe(pipe_class_default);
13548 %}
13549 
13550 instruct replicate4I_imm(vecX dst, immI con)
13551 %{
13552   predicate(n->as_Vector()->length() == 4);
13553   match(Set dst (ReplicateI con));
13554   ins_cost(INSN_COST);
13555   format %{ "movi  $dst, $con\t# vector(4I)" %}
13556   ins_encode %{
13557     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13558   %}
13559   ins_pipe(pipe_class_default);
13560 %}
13561 
13562 instruct replicate2L(vecX dst, iRegL src)
13563 %{
13564   predicate(n->as_Vector()->length() == 2);
13565   match(Set dst (ReplicateL src));
13566   ins_cost(INSN_COST);
13567   format %{ "dup  $dst, $src\t# vector (2L)" %}
13568   ins_encode %{
13569     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13570   %}
13571   ins_pipe(pipe_class_default);
13572 %}
13573 
13574 instruct replicate2L_zero(vecX dst, immI0 zero)
13575 %{
13576   predicate(n->as_Vector()->length() == 2);
13577   match(Set dst (ReplicateI zero));
13578   ins_cost(INSN_COST);
13579   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13580   ins_encode %{
13581     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13582            as_FloatRegister($dst$$reg),
13583            as_FloatRegister($dst$$reg));
13584   %}
13585   ins_pipe(pipe_class_default);
13586 %}
13587 
13588 instruct replicate2F(vecD dst, vRegF src)
13589 %{
13590   predicate(n->as_Vector()->length() == 2);
13591   match(Set dst (ReplicateF src));
13592   ins_cost(INSN_COST);
13593   format %{ "dup  $dst, $src\t# vector (2F)" %}
13594   ins_encode %{
13595     __ dup(as_FloatRegister($dst$$reg), __ T2S,
13596            as_FloatRegister($src$$reg));
13597   %}
13598   ins_pipe(pipe_class_default);
13599 %}
13600 
13601 instruct replicate4F(vecX dst, vRegF src)
13602 %{
13603   predicate(n->as_Vector()->length() == 4);
13604   match(Set dst (ReplicateF src));
13605   ins_cost(INSN_COST);
13606   format %{ "dup  $dst, $src\t# vector (4F)" %}
13607   ins_encode %{
13608     __ dup(as_FloatRegister($dst$$reg), __ T4S,
13609            as_FloatRegister($src$$reg));
13610   %}
13611   ins_pipe(pipe_class_default);
13612 %}
13613 
13614 instruct replicate2D(vecX dst, vRegD src)
13615 %{
13616   predicate(n->as_Vector()->length() == 2);
13617   match(Set dst (ReplicateD src));
13618   ins_cost(INSN_COST);
13619   format %{ "dup  $dst, $src\t# vector (2D)" %}
13620   ins_encode %{
13621     __ dup(as_FloatRegister($dst$$reg), __ T2D,
13622            as_FloatRegister($src$$reg));
13623   %}
13624   ins_pipe(pipe_class_default);
13625 %}
13626 
13627 // ====================REDUCTION ARITHMETIC====================================
13628 
13629 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
13630 %{
13631   match(Set dst (AddReductionVI src1 src2));
13632   ins_cost(INSN_COST);
13633   effect(TEMP tmp, TEMP tmp2);
13634   format %{ "umov  $tmp, $src2, S, 0\n\t"
13635             "umov  $tmp2, $src2, S, 1\n\t"
13636             "addw  $dst, $src1, $tmp\n\t"
13637             "addw  $dst, $dst, $tmp2\t add reduction2i"
13638   %}
13639   ins_encode %{
13640     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13641     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13642     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
13643     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
13644   %}
13645   ins_pipe(pipe_class_default);
13646 %}
13647 
13648 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13649 %{
13650   match(Set dst (AddReductionVI src1 src2));
13651   ins_cost(INSN_COST);
13652   effect(TEMP tmp, TEMP tmp2);
13653   format %{ "addv  $tmp, T4S, $src2\n\t"
13654             "umov  $tmp2, $tmp, S, 0\n\t"
13655             "addw  $dst, $tmp2, $src1\t add reduction4i"
13656   %}
13657   ins_encode %{
13658     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13659             as_FloatRegister($src2$$reg));
13660     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13661     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13662   %}
13663   ins_pipe(pipe_class_default);
13664 %}
13665 
13666 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
13667 %{
13668   match(Set dst (MulReductionVI src1 src2));
13669   ins_cost(INSN_COST);
13670   effect(TEMP tmp, TEMP dst);
13671   format %{ "umov  $tmp, $src2, S, 0\n\t"
13672             "mul   $dst, $tmp, $src1\n\t"
13673             "umov  $tmp, $src2, S, 1\n\t"
13674             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
13675   %}
13676   ins_encode %{
13677     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13678     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
13679     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13680     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
13681   %}
13682   ins_pipe(pipe_class_default);
13683 %}
13684 
13685 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13686 %{
13687   match(Set dst (MulReductionVI src1 src2));
13688   ins_cost(INSN_COST);
13689   effect(TEMP tmp, TEMP tmp2, TEMP dst);
13690   format %{ "ins   $tmp, $src2, 0, 1\n\t"
13691             "mul   $tmp, $tmp, $src2\n\t"
13692             "umov  $tmp2, $tmp, S, 0\n\t"
13693             "mul   $dst, $tmp2, $src1\n\t"
13694             "umov  $tmp2, $tmp, S, 1\n\t"
13695             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
13696   %}
13697   ins_encode %{
13698     __ ins(as_FloatRegister($tmp$$reg), __ D,
13699            as_FloatRegister($src2$$reg), 0, 1);
13700     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13701            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13702     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13703     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13704     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13705     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13706   %}
13707   ins_pipe(pipe_class_default);
13708 %}
13709 
13710 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13711 %{
13712   match(Set dst (AddReductionVF src1 src2));
13713   ins_cost(INSN_COST);
13714   effect(TEMP tmp, TEMP dst);
13715   format %{ "fadds $dst, $src1, $src2\n\t"
13716             "ins   $tmp, S, $src2, 0, 1\n\t"
13717             "fadds $dst, $dst, $tmp\t add reduction2f"
13718   %}
13719   ins_encode %{
13720     __ fadds(as_FloatRegister($dst$$reg),
13721              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13722     __ ins(as_FloatRegister($tmp$$reg), __ S,
13723            as_FloatRegister($src2$$reg), 0, 1);
13724     __ fadds(as_FloatRegister($dst$$reg),
13725              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13726   %}
13727   ins_pipe(pipe_class_default);
13728 %}
13729 
13730 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13731 %{
13732   match(Set dst (AddReductionVF src1 src2));
13733   ins_cost(INSN_COST);
13734   effect(TEMP tmp, TEMP dst);
13735   format %{ "fadds $dst, $src1, $src2\n\t"
13736             "ins   $tmp, S, $src2, 0, 1\n\t"
13737             "fadds $dst, $dst, $tmp\n\t"
13738             "ins   $tmp, S, $src2, 0, 2\n\t"
13739             "fadds $dst, $dst, $tmp\n\t"
13740             "ins   $tmp, S, $src2, 0, 3\n\t"
13741             "fadds $dst, $dst, $tmp\t add reduction4f"
13742   %}
13743   ins_encode %{
13744     __ fadds(as_FloatRegister($dst$$reg),
13745              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13746     __ ins(as_FloatRegister($tmp$$reg), __ S,
13747            as_FloatRegister($src2$$reg), 0, 1);
13748     __ fadds(as_FloatRegister($dst$$reg),
13749              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13750     __ ins(as_FloatRegister($tmp$$reg), __ S,
13751            as_FloatRegister($src2$$reg), 0, 2);
13752     __ fadds(as_FloatRegister($dst$$reg),
13753              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13754     __ ins(as_FloatRegister($tmp$$reg), __ S,
13755            as_FloatRegister($src2$$reg), 0, 3);
13756     __ fadds(as_FloatRegister($dst$$reg),
13757              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13758   %}
13759   ins_pipe(pipe_class_default);
13760 %}
13761 
13762 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13763 %{
13764   match(Set dst (MulReductionVF src1 src2));
13765   ins_cost(INSN_COST);
13766   effect(TEMP tmp, TEMP dst);
13767   format %{ "fmuls $dst, $src1, $src2\n\t"
13768             "ins   $tmp, S, $src2, 0, 1\n\t"
13769             "fmuls $dst, $dst, $tmp\t add reduction4f"
13770   %}
13771   ins_encode %{
13772     __ fmuls(as_FloatRegister($dst$$reg),
13773              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13774     __ ins(as_FloatRegister($tmp$$reg), __ S,
13775            as_FloatRegister($src2$$reg), 0, 1);
13776     __ fmuls(as_FloatRegister($dst$$reg),
13777              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13778   %}
13779   ins_pipe(pipe_class_default);
13780 %}
13781 
13782 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13783 %{
13784   match(Set dst (MulReductionVF src1 src2));
13785   ins_cost(INSN_COST);
13786   effect(TEMP tmp, TEMP dst);
13787   format %{ "fmuls $dst, $src1, $src2\n\t"
13788             "ins   $tmp, S, $src2, 0, 1\n\t"
13789             "fmuls $dst, $dst, $tmp\n\t"
13790             "ins   $tmp, S, $src2, 0, 2\n\t"
13791             "fmuls $dst, $dst, $tmp\n\t"
13792             "ins   $tmp, S, $src2, 0, 3\n\t"
13793             "fmuls $dst, $dst, $tmp\t add reduction4f"
13794   %}
13795   ins_encode %{
13796     __ fmuls(as_FloatRegister($dst$$reg),
13797              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13798     __ ins(as_FloatRegister($tmp$$reg), __ S,
13799            as_FloatRegister($src2$$reg), 0, 1);
13800     __ fmuls(as_FloatRegister($dst$$reg),
13801              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13802     __ ins(as_FloatRegister($tmp$$reg), __ S,
13803            as_FloatRegister($src2$$reg), 0, 2);
13804     __ fmuls(as_FloatRegister($dst$$reg),
13805              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13806     __ ins(as_FloatRegister($tmp$$reg), __ S,
13807            as_FloatRegister($src2$$reg), 0, 3);
13808     __ fmuls(as_FloatRegister($dst$$reg),
13809              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13810   %}
13811   ins_pipe(pipe_class_default);
13812 %}
13813 
13814 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13815 %{
13816   match(Set dst (AddReductionVD src1 src2));
13817   ins_cost(INSN_COST);
13818   effect(TEMP tmp, TEMP dst);
13819   format %{ "faddd $dst, $src1, $src2\n\t"
13820             "ins   $tmp, D, $src2, 0, 1\n\t"
13821             "faddd $dst, $dst, $tmp\t add reduction2d"
13822   %}
13823   ins_encode %{
13824     __ faddd(as_FloatRegister($dst$$reg),
13825              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13826     __ ins(as_FloatRegister($tmp$$reg), __ D,
13827            as_FloatRegister($src2$$reg), 0, 1);
13828     __ faddd(as_FloatRegister($dst$$reg),
13829              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13830   %}
13831   ins_pipe(pipe_class_default);
13832 %}
13833 
13834 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13835 %{
13836   match(Set dst (MulReductionVD src1 src2));
13837   ins_cost(INSN_COST);
13838   effect(TEMP tmp, TEMP dst);
13839   format %{ "fmuld $dst, $src1, $src2\n\t"
13840             "ins   $tmp, D, $src2, 0, 1\n\t"
13841             "fmuld $dst, $dst, $tmp\t add reduction2d"
13842   %}
13843   ins_encode %{
13844     __ fmuld(as_FloatRegister($dst$$reg),
13845              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13846     __ ins(as_FloatRegister($tmp$$reg), __ D,
13847            as_FloatRegister($src2$$reg), 0, 1);
13848     __ fmuld(as_FloatRegister($dst$$reg),
13849              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13850   %}
13851   ins_pipe(pipe_class_default);
13852 %}
13853 
13854 // ====================VECTOR ARITHMETIC=======================================
13855 
13856 // --------------------------------- ADD --------------------------------------
13857 
13858 instruct vadd8B(vecD dst, vecD src1, vecD src2)
13859 %{
13860   predicate(n->as_Vector()->length() == 4 ||
13861             n->as_Vector()->length() == 8);
13862   match(Set dst (AddVB src1 src2));
13863   ins_cost(INSN_COST);
13864   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
13865   ins_encode %{
13866     __ addv(as_FloatRegister($dst$$reg), __ T8B,
13867             as_FloatRegister($src1$$reg),
13868             as_FloatRegister($src2$$reg));
13869   %}
13870   ins_pipe(pipe_class_default);
13871 %}
13872 
13873 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13874 %{
13875   predicate(n->as_Vector()->length() == 16);
13876   match(Set dst (AddVB src1 src2));
13877   ins_cost(INSN_COST);
13878   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
13879   ins_encode %{
13880     __ addv(as_FloatRegister($dst$$reg), __ T16B,
13881             as_FloatRegister($src1$$reg),
13882             as_FloatRegister($src2$$reg));
13883   %}
13884   ins_pipe(pipe_class_default);
13885 %}
13886 
13887 instruct vadd4S(vecD dst, vecD src1, vecD src2)
13888 %{
13889   predicate(n->as_Vector()->length() == 2 ||
13890             n->as_Vector()->length() == 4);
13891   match(Set dst (AddVS src1 src2));
13892   ins_cost(INSN_COST);
13893   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
13894   ins_encode %{
13895     __ addv(as_FloatRegister($dst$$reg), __ T4H,
13896             as_FloatRegister($src1$$reg),
13897             as_FloatRegister($src2$$reg));
13898   %}
13899   ins_pipe(pipe_class_default);
13900 %}
13901 
13902 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13903 %{
13904   predicate(n->as_Vector()->length() == 8);
13905   match(Set dst (AddVS src1 src2));
13906   ins_cost(INSN_COST);
13907   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
13908   ins_encode %{
13909     __ addv(as_FloatRegister($dst$$reg), __ T8H,
13910             as_FloatRegister($src1$$reg),
13911             as_FloatRegister($src2$$reg));
13912   %}
13913   ins_pipe(pipe_class_default);
13914 %}
13915 
13916 instruct vadd2I(vecD dst, vecD src1, vecD src2)
13917 %{
13918   predicate(n->as_Vector()->length() == 2);
13919   match(Set dst (AddVI src1 src2));
13920   ins_cost(INSN_COST);
13921   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
13922   ins_encode %{
13923     __ addv(as_FloatRegister($dst$$reg), __ T2S,
13924             as_FloatRegister($src1$$reg),
13925             as_FloatRegister($src2$$reg));
13926   %}
13927   ins_pipe(pipe_class_default);
13928 %}
13929 
13930 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13931 %{
13932   predicate(n->as_Vector()->length() == 4);
13933   match(Set dst (AddVI src1 src2));
13934   ins_cost(INSN_COST);
13935   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
13936   ins_encode %{
13937     __ addv(as_FloatRegister($dst$$reg), __ T4S,
13938             as_FloatRegister($src1$$reg),
13939             as_FloatRegister($src2$$reg));
13940   %}
13941   ins_pipe(pipe_class_default);
13942 %}
13943 
13944 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13945 %{
13946   predicate(n->as_Vector()->length() == 2);
13947   match(Set dst (AddVL src1 src2));
13948   ins_cost(INSN_COST);
13949   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
13950   ins_encode %{
13951     __ addv(as_FloatRegister($dst$$reg), __ T2D,
13952             as_FloatRegister($src1$$reg),
13953             as_FloatRegister($src2$$reg));
13954   %}
13955   ins_pipe(pipe_class_default);
13956 %}
13957 
13958 instruct vadd2F(vecD dst, vecD src1, vecD src2)
13959 %{
13960   predicate(n->as_Vector()->length() == 2);
13961   match(Set dst (AddVF src1 src2));
13962   ins_cost(INSN_COST);
13963   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
13964   ins_encode %{
13965     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
13966             as_FloatRegister($src1$$reg),
13967             as_FloatRegister($src2$$reg));
13968   %}
13969   ins_pipe(pipe_class_default);
13970 %}
13971 
13972 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13973 %{
13974   predicate(n->as_Vector()->length() == 4);
13975   match(Set dst (AddVF src1 src2));
13976   ins_cost(INSN_COST);
13977   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
13978   ins_encode %{
13979     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13980             as_FloatRegister($src1$$reg),
13981             as_FloatRegister($src2$$reg));
13982   %}
13983   ins_pipe(pipe_class_default);
13984 %}
13985 
13986 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13987 %{
13988   match(Set dst (AddVD src1 src2));
13989   ins_cost(INSN_COST);
13990   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
13991   ins_encode %{
13992     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13993             as_FloatRegister($src1$$reg),
13994             as_FloatRegister($src2$$reg));
13995   %}
13996   ins_pipe(pipe_class_default);
13997 %}
13998 
13999 // --------------------------------- SUB --------------------------------------
14000 
14001 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14002 %{
14003   predicate(n->as_Vector()->length() == 4 ||
14004             n->as_Vector()->length() == 8);
14005   match(Set dst (SubVB src1 src2));
14006   ins_cost(INSN_COST);
14007   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14008   ins_encode %{
14009     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14010             as_FloatRegister($src1$$reg),
14011             as_FloatRegister($src2$$reg));
14012   %}
14013   ins_pipe(pipe_class_default);
14014 %}
14015 
14016 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14017 %{
14018   predicate(n->as_Vector()->length() == 16);
14019   match(Set dst (SubVB src1 src2));
14020   ins_cost(INSN_COST);
14021   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14022   ins_encode %{
14023     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14024             as_FloatRegister($src1$$reg),
14025             as_FloatRegister($src2$$reg));
14026   %}
14027   ins_pipe(pipe_class_default);
14028 %}
14029 
14030 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14031 %{
14032   predicate(n->as_Vector()->length() == 2 ||
14033             n->as_Vector()->length() == 4);
14034   match(Set dst (SubVS src1 src2));
14035   ins_cost(INSN_COST);
14036   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14037   ins_encode %{
14038     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14039             as_FloatRegister($src1$$reg),
14040             as_FloatRegister($src2$$reg));
14041   %}
14042   ins_pipe(pipe_class_default);
14043 %}
14044 
14045 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14046 %{
14047   predicate(n->as_Vector()->length() == 8);
14048   match(Set dst (SubVS src1 src2));
14049   ins_cost(INSN_COST);
14050   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14051   ins_encode %{
14052     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14053             as_FloatRegister($src1$$reg),
14054             as_FloatRegister($src2$$reg));
14055   %}
14056   ins_pipe(pipe_class_default);
14057 %}
14058 
14059 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14060 %{
14061   predicate(n->as_Vector()->length() == 2);
14062   match(Set dst (SubVI src1 src2));
14063   ins_cost(INSN_COST);
14064   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14065   ins_encode %{
14066     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14067             as_FloatRegister($src1$$reg),
14068             as_FloatRegister($src2$$reg));
14069   %}
14070   ins_pipe(pipe_class_default);
14071 %}
14072 
14073 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14074 %{
14075   predicate(n->as_Vector()->length() == 4);
14076   match(Set dst (SubVI src1 src2));
14077   ins_cost(INSN_COST);
14078   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14079   ins_encode %{
14080     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14081             as_FloatRegister($src1$$reg),
14082             as_FloatRegister($src2$$reg));
14083   %}
14084   ins_pipe(pipe_class_default);
14085 %}
14086 
14087 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14088 %{
14089   predicate(n->as_Vector()->length() == 2);
14090   match(Set dst (SubVL src1 src2));
14091   ins_cost(INSN_COST);
14092   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14093   ins_encode %{
14094     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14095             as_FloatRegister($src1$$reg),
14096             as_FloatRegister($src2$$reg));
14097   %}
14098   ins_pipe(pipe_class_default);
14099 %}
14100 
14101 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14102 %{
14103   predicate(n->as_Vector()->length() == 2);
14104   match(Set dst (SubVF src1 src2));
14105   ins_cost(INSN_COST);
14106   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14107   ins_encode %{
14108     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14109             as_FloatRegister($src1$$reg),
14110             as_FloatRegister($src2$$reg));
14111   %}
14112   ins_pipe(pipe_class_default);
14113 %}
14114 
14115 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14116 %{
14117   predicate(n->as_Vector()->length() == 4);
14118   match(Set dst (SubVF src1 src2));
14119   ins_cost(INSN_COST);
14120   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14121   ins_encode %{
14122     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14123             as_FloatRegister($src1$$reg),
14124             as_FloatRegister($src2$$reg));
14125   %}
14126   ins_pipe(pipe_class_default);
14127 %}
14128 
14129 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14130 %{
14131   predicate(n->as_Vector()->length() == 2);
14132   match(Set dst (SubVD src1 src2));
14133   ins_cost(INSN_COST);
14134   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14135   ins_encode %{
14136     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14137             as_FloatRegister($src1$$reg),
14138             as_FloatRegister($src2$$reg));
14139   %}
14140   ins_pipe(pipe_class_default);
14141 %}
14142 
14143 // --------------------------------- MUL --------------------------------------
14144 
14145 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14146 %{
14147   predicate(n->as_Vector()->length() == 2 ||
14148             n->as_Vector()->length() == 4);
14149   match(Set dst (MulVS src1 src2));
14150   ins_cost(INSN_COST);
14151   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14152   ins_encode %{
14153     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14154             as_FloatRegister($src1$$reg),
14155             as_FloatRegister($src2$$reg));
14156   %}
14157   ins_pipe(pipe_class_default);
14158 %}
14159 
14160 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14161 %{
14162   predicate(n->as_Vector()->length() == 8);
14163   match(Set dst (MulVS src1 src2));
14164   ins_cost(INSN_COST);
14165   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14166   ins_encode %{
14167     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14168             as_FloatRegister($src1$$reg),
14169             as_FloatRegister($src2$$reg));
14170   %}
14171   ins_pipe(pipe_class_default);
14172 %}
14173 
14174 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14175 %{
14176   predicate(n->as_Vector()->length() == 2);
14177   match(Set dst (MulVI src1 src2));
14178   ins_cost(INSN_COST);
14179   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14180   ins_encode %{
14181     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14182             as_FloatRegister($src1$$reg),
14183             as_FloatRegister($src2$$reg));
14184   %}
14185   ins_pipe(pipe_class_default);
14186 %}
14187 
14188 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14189 %{
14190   predicate(n->as_Vector()->length() == 4);
14191   match(Set dst (MulVI src1 src2));
14192   ins_cost(INSN_COST);
14193   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14194   ins_encode %{
14195     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14196             as_FloatRegister($src1$$reg),
14197             as_FloatRegister($src2$$reg));
14198   %}
14199   ins_pipe(pipe_class_default);
14200 %}
14201 
14202 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14203 %{
14204   predicate(n->as_Vector()->length() == 2);
14205   match(Set dst (MulVF src1 src2));
14206   ins_cost(INSN_COST);
14207   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14208   ins_encode %{
14209     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14210             as_FloatRegister($src1$$reg),
14211             as_FloatRegister($src2$$reg));
14212   %}
14213   ins_pipe(pipe_class_default);
14214 %}
14215 
14216 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14217 %{
14218   predicate(n->as_Vector()->length() == 4);
14219   match(Set dst (MulVF src1 src2));
14220   ins_cost(INSN_COST);
14221   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14222   ins_encode %{
14223     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14224             as_FloatRegister($src1$$reg),
14225             as_FloatRegister($src2$$reg));
14226   %}
14227   ins_pipe(pipe_class_default);
14228 %}
14229 
14230 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14231 %{
14232   predicate(n->as_Vector()->length() == 2);
14233   match(Set dst (MulVD src1 src2));
14234   ins_cost(INSN_COST);
14235   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14236   ins_encode %{
14237     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14238             as_FloatRegister($src1$$reg),
14239             as_FloatRegister($src2$$reg));
14240   %}
14241   ins_pipe(pipe_class_default);
14242 %}
14243 
14244 // --------------------------------- DIV --------------------------------------
14245 
14246 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14247 %{
14248   predicate(n->as_Vector()->length() == 2);
14249   match(Set dst (DivVF src1 src2));
14250   ins_cost(INSN_COST);
14251   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14252   ins_encode %{
14253     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14254             as_FloatRegister($src1$$reg),
14255             as_FloatRegister($src2$$reg));
14256   %}
14257   ins_pipe(pipe_class_default);
14258 %}
14259 
14260 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14261 %{
14262   predicate(n->as_Vector()->length() == 4);
14263   match(Set dst (DivVF src1 src2));
14264   ins_cost(INSN_COST);
14265   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14266   ins_encode %{
14267     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14268             as_FloatRegister($src1$$reg),
14269             as_FloatRegister($src2$$reg));
14270   %}
14271   ins_pipe(pipe_class_default);
14272 %}
14273 
14274 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14275 %{
14276   predicate(n->as_Vector()->length() == 2);
14277   match(Set dst (DivVD src1 src2));
14278   ins_cost(INSN_COST);
14279   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14280   ins_encode %{
14281     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14282             as_FloatRegister($src1$$reg),
14283             as_FloatRegister($src2$$reg));
14284   %}
14285   ins_pipe(pipe_class_default);
14286 %}
14287 
14288 // --------------------------------- AND --------------------------------------
14289 
14290 instruct vand8B(vecD dst, vecD src1, vecD src2)
14291 %{
14292   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14293             n->as_Vector()->length_in_bytes() == 8);
14294   match(Set dst (AndV src1 src2));
14295   ins_cost(INSN_COST);
14296   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14297   ins_encode %{
14298     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14299             as_FloatRegister($src1$$reg),
14300             as_FloatRegister($src2$$reg));
14301   %}
14302   ins_pipe(pipe_class_default);
14303 %}
14304 
14305 instruct vand16B(vecX dst, vecX src1, vecX src2)
14306 %{
14307   predicate(n->as_Vector()->length_in_bytes() == 16);
14308   match(Set dst (AndV src1 src2));
14309   ins_cost(INSN_COST);
14310   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14311   ins_encode %{
14312     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14313             as_FloatRegister($src1$$reg),
14314             as_FloatRegister($src2$$reg));
14315   %}
14316   ins_pipe(pipe_class_default);
14317 %}
14318 
14319 // --------------------------------- OR ---------------------------------------
14320 
14321 instruct vor8B(vecD dst, vecD src1, vecD src2)
14322 %{
14323   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14324             n->as_Vector()->length_in_bytes() == 8);
14325   match(Set dst (OrV src1 src2));
14326   ins_cost(INSN_COST);
14327   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14328   ins_encode %{
14329     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14330             as_FloatRegister($src1$$reg),
14331             as_FloatRegister($src2$$reg));
14332   %}
14333   ins_pipe(pipe_class_default);
14334 %}
14335 
14336 instruct vor16B(vecX dst, vecX src1, vecX src2)
14337 %{
14338   predicate(n->as_Vector()->length_in_bytes() == 16);
14339   match(Set dst (OrV src1 src2));
14340   ins_cost(INSN_COST);
14341   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14342   ins_encode %{
14343     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14344             as_FloatRegister($src1$$reg),
14345             as_FloatRegister($src2$$reg));
14346   %}
14347   ins_pipe(pipe_class_default);
14348 %}
14349 
14350 // --------------------------------- XOR --------------------------------------
14351 
14352 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14353 %{
14354   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14355             n->as_Vector()->length_in_bytes() == 8);
14356   match(Set dst (XorV src1 src2));
14357   ins_cost(INSN_COST);
14358   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14359   ins_encode %{
14360     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14361             as_FloatRegister($src1$$reg),
14362             as_FloatRegister($src2$$reg));
14363   %}
14364   ins_pipe(pipe_class_default);
14365 %}
14366 
14367 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14368 %{
14369   predicate(n->as_Vector()->length_in_bytes() == 16);
14370   match(Set dst (XorV src1 src2));
14371   ins_cost(INSN_COST);
14372   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14373   ins_encode %{
14374     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14375             as_FloatRegister($src1$$reg),
14376             as_FloatRegister($src2$$reg));
14377   %}
14378   ins_pipe(pipe_class_default);
14379 %}
14380 
14381 // ------------------------------ Shift ---------------------------------------
14382 
14383 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
14384   match(Set dst (LShiftCntV cnt));
14385   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
14386   ins_encode %{
14387     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14388   %}
14389   ins_pipe(pipe_class_default);
14390 %}
14391 
14392 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
14393 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
14394   match(Set dst (RShiftCntV cnt));
14395   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
14396   ins_encode %{
14397     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14398     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
14399   %}
14400   ins_pipe(pipe_class_default);
14401 %}
14402 
14403 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
14404   predicate(n->as_Vector()->length() == 4 ||
14405             n->as_Vector()->length() == 8);
14406   match(Set dst (LShiftVB src shift));
14407   match(Set dst (RShiftVB src shift));
14408   ins_cost(INSN_COST);
14409   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14410   ins_encode %{
14411     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14412             as_FloatRegister($src$$reg),
14413             as_FloatRegister($shift$$reg));
14414   %}
14415   ins_pipe(pipe_class_default);
14416 %}
14417 
14418 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14419   predicate(n->as_Vector()->length() == 16);
14420   match(Set dst (LShiftVB src shift));
14421   match(Set dst (RShiftVB src shift));
14422   ins_cost(INSN_COST);
14423   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14424   ins_encode %{
14425     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14426             as_FloatRegister($src$$reg),
14427             as_FloatRegister($shift$$reg));
14428   %}
14429   ins_pipe(pipe_class_default);
14430 %}
14431 
14432 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
14433   predicate(n->as_Vector()->length() == 4 ||
14434             n->as_Vector()->length() == 8);
14435   match(Set dst (URShiftVB src shift));
14436   ins_cost(INSN_COST);
14437   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
14438   ins_encode %{
14439     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
14440             as_FloatRegister($src$$reg),
14441             as_FloatRegister($shift$$reg));
14442   %}
14443   ins_pipe(pipe_class_default);
14444 %}
14445 
14446 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
14447   predicate(n->as_Vector()->length() == 16);
14448   match(Set dst (URShiftVB src shift));
14449   ins_cost(INSN_COST);
14450   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
14451   ins_encode %{
14452     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
14453             as_FloatRegister($src$$reg),
14454             as_FloatRegister($shift$$reg));
14455   %}
14456   ins_pipe(pipe_class_default);
14457 %}
14458 
14459 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
14460   predicate(n->as_Vector()->length() == 4 ||
14461             n->as_Vector()->length() == 8);
14462   match(Set dst (LShiftVB src shift));
14463   ins_cost(INSN_COST);
14464   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
14465   ins_encode %{
14466     int sh = (int)$shift$$constant & 31;
14467     if (sh >= 8) {
14468       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14469              as_FloatRegister($src$$reg),
14470              as_FloatRegister($src$$reg));
14471     } else {
14472       __ shl(as_FloatRegister($dst$$reg), __ T8B,
14473              as_FloatRegister($src$$reg), sh);
14474     }
14475   %}
14476   ins_pipe(pipe_class_default);
14477 %}
14478 
14479 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
14480   predicate(n->as_Vector()->length() == 16);
14481   match(Set dst (LShiftVB src shift));
14482   ins_cost(INSN_COST);
14483   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
14484   ins_encode %{
14485     int sh = (int)$shift$$constant & 31;
14486     if (sh >= 8) {
14487       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14488              as_FloatRegister($src$$reg),
14489              as_FloatRegister($src$$reg));
14490     } else {
14491       __ shl(as_FloatRegister($dst$$reg), __ T16B,
14492              as_FloatRegister($src$$reg), sh);
14493     }
14494   %}
14495   ins_pipe(pipe_class_default);
14496 %}
14497 
14498 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
14499   predicate(n->as_Vector()->length() == 4 ||
14500             n->as_Vector()->length() == 8);
14501   match(Set dst (RShiftVB src shift));
14502   ins_cost(INSN_COST);
14503   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
14504   ins_encode %{
14505     int sh = (int)$shift$$constant & 31;
14506     if (sh >= 8) sh = 7;
14507     sh = -sh & 7;
14508     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
14509            as_FloatRegister($src$$reg), sh);
14510   %}
14511   ins_pipe(pipe_class_default);
14512 %}
14513 
14514 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
14515   predicate(n->as_Vector()->length() == 16);
14516   match(Set dst (RShiftVB src shift));
14517   ins_cost(INSN_COST);
14518   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
14519   ins_encode %{
14520     int sh = (int)$shift$$constant & 31;
14521     if (sh >= 8) sh = 7;
14522     sh = -sh & 7;
14523     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
14524            as_FloatRegister($src$$reg), sh);
14525   %}
14526   ins_pipe(pipe_class_default);
14527 %}
14528 
14529 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
14530   predicate(n->as_Vector()->length() == 4 ||
14531             n->as_Vector()->length() == 8);
14532   match(Set dst (URShiftVB src shift));
14533   ins_cost(INSN_COST);
14534   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
14535   ins_encode %{
14536     int sh = (int)$shift$$constant & 31;
14537     if (sh >= 8) {
14538       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14539              as_FloatRegister($src$$reg),
14540              as_FloatRegister($src$$reg));
14541     } else {
14542       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
14543              as_FloatRegister($src$$reg), -sh & 7);
14544     }
14545   %}
14546   ins_pipe(pipe_class_default);
14547 %}
14548 
14549 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
14550   predicate(n->as_Vector()->length() == 16);
14551   match(Set dst (URShiftVB src shift));
14552   ins_cost(INSN_COST);
14553   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
14554   ins_encode %{
14555     int sh = (int)$shift$$constant & 31;
14556     if (sh >= 8) {
14557       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14558              as_FloatRegister($src$$reg),
14559              as_FloatRegister($src$$reg));
14560     } else {
14561       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
14562              as_FloatRegister($src$$reg), -sh & 7);
14563     }
14564   %}
14565   ins_pipe(pipe_class_default);
14566 %}
14567 
14568 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
14569   predicate(n->as_Vector()->length() == 2 ||
14570             n->as_Vector()->length() == 4);
14571   match(Set dst (LShiftVS src shift));
14572   match(Set dst (RShiftVS src shift));
14573   ins_cost(INSN_COST);
14574   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
14575   ins_encode %{
14576     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
14577             as_FloatRegister($src$$reg),
14578             as_FloatRegister($shift$$reg));
14579   %}
14580   ins_pipe(pipe_class_default);
14581 %}
14582 
14583 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
14584   predicate(n->as_Vector()->length() == 8);
14585   match(Set dst (LShiftVS src shift));
14586   match(Set dst (RShiftVS src shift));
14587   ins_cost(INSN_COST);
14588   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
14589   ins_encode %{
14590     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
14591             as_FloatRegister($src$$reg),
14592             as_FloatRegister($shift$$reg));
14593   %}
14594   ins_pipe(pipe_class_default);
14595 %}
14596 
14597 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
14598   predicate(n->as_Vector()->length() == 2 ||
14599             n->as_Vector()->length() == 4);
14600   match(Set dst (URShiftVS src shift));
14601   ins_cost(INSN_COST);
14602   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
14603   ins_encode %{
14604     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
14605             as_FloatRegister($src$$reg),
14606             as_FloatRegister($shift$$reg));
14607   %}
14608   ins_pipe(pipe_class_default);
14609 %}
14610 
14611 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
14612   predicate(n->as_Vector()->length() == 8);
14613   match(Set dst (URShiftVS src shift));
14614   ins_cost(INSN_COST);
14615   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
14616   ins_encode %{
14617     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
14618             as_FloatRegister($src$$reg),
14619             as_FloatRegister($shift$$reg));
14620   %}
14621   ins_pipe(pipe_class_default);
14622 %}
14623 
14624 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
14625   predicate(n->as_Vector()->length() == 2 ||
14626             n->as_Vector()->length() == 4);
14627   match(Set dst (LShiftVS src shift));
14628   ins_cost(INSN_COST);
14629   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
14630   ins_encode %{
14631     int sh = (int)$shift$$constant & 31;
14632     if (sh >= 16) {
14633       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14634              as_FloatRegister($src$$reg),
14635              as_FloatRegister($src$$reg));
14636     } else {
14637       __ shl(as_FloatRegister($dst$$reg), __ T4H,
14638              as_FloatRegister($src$$reg), sh);
14639     }
14640   %}
14641   ins_pipe(pipe_class_default);
14642 %}
14643 
14644 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
14645   predicate(n->as_Vector()->length() == 8);
14646   match(Set dst (LShiftVS src shift));
14647   ins_cost(INSN_COST);
14648   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
14649   ins_encode %{
14650     int sh = (int)$shift$$constant & 31;
14651     if (sh >= 16) {
14652       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14653              as_FloatRegister($src$$reg),
14654              as_FloatRegister($src$$reg));
14655     } else {
14656       __ shl(as_FloatRegister($dst$$reg), __ T8H,
14657              as_FloatRegister($src$$reg), sh);
14658     }
14659   %}
14660   ins_pipe(pipe_class_default);
14661 %}
14662 
14663 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
14664   predicate(n->as_Vector()->length() == 2 ||
14665             n->as_Vector()->length() == 4);
14666   match(Set dst (RShiftVS src shift));
14667   ins_cost(INSN_COST);
14668   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
14669   ins_encode %{
14670     int sh = (int)$shift$$constant & 31;
14671     if (sh >= 16) sh = 15;
14672     sh = -sh & 15;
14673     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
14674            as_FloatRegister($src$$reg), sh);
14675   %}
14676   ins_pipe(pipe_class_default);
14677 %}
14678 
14679 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
14680   predicate(n->as_Vector()->length() == 8);
14681   match(Set dst (RShiftVS src shift));
14682   ins_cost(INSN_COST);
14683   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
14684   ins_encode %{
14685     int sh = (int)$shift$$constant & 31;
14686     if (sh >= 16) sh = 15;
14687     sh = -sh & 15;
14688     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
14689            as_FloatRegister($src$$reg), sh);
14690   %}
14691   ins_pipe(pipe_class_default);
14692 %}
14693 
14694 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
14695   predicate(n->as_Vector()->length() == 2 ||
14696             n->as_Vector()->length() == 4);
14697   match(Set dst (URShiftVS src shift));
14698   ins_cost(INSN_COST);
14699   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
14700   ins_encode %{
14701     int sh = (int)$shift$$constant & 31;
14702     if (sh >= 16) {
14703       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14704              as_FloatRegister($src$$reg),
14705              as_FloatRegister($src$$reg));
14706     } else {
14707       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
14708              as_FloatRegister($src$$reg), -sh & 15);
14709     }
14710   %}
14711   ins_pipe(pipe_class_default);
14712 %}
14713 
14714 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
14715   predicate(n->as_Vector()->length() == 8);
14716   match(Set dst (URShiftVS src shift));
14717   ins_cost(INSN_COST);
14718   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
14719   ins_encode %{
14720     int sh = (int)$shift$$constant & 31;
14721     if (sh >= 16) {
14722       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14723              as_FloatRegister($src$$reg),
14724              as_FloatRegister($src$$reg));
14725     } else {
14726       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
14727              as_FloatRegister($src$$reg), -sh & 15);
14728     }
14729   %}
14730   ins_pipe(pipe_class_default);
14731 %}
14732 
14733 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
14734   predicate(n->as_Vector()->length() == 2);
14735   match(Set dst (LShiftVI src shift));
14736   match(Set dst (RShiftVI src shift));
14737   ins_cost(INSN_COST);
14738   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
14739   ins_encode %{
14740     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
14741             as_FloatRegister($src$$reg),
14742             as_FloatRegister($shift$$reg));
14743   %}
14744   ins_pipe(pipe_class_default);
14745 %}
14746 
14747 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
14748   predicate(n->as_Vector()->length() == 4);
14749   match(Set dst (LShiftVI src shift));
14750   match(Set dst (RShiftVI src shift));
14751   ins_cost(INSN_COST);
14752   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
14753   ins_encode %{
14754     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
14755             as_FloatRegister($src$$reg),
14756             as_FloatRegister($shift$$reg));
14757   %}
14758   ins_pipe(pipe_class_default);
14759 %}
14760 
14761 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
14762   predicate(n->as_Vector()->length() == 2);
14763   match(Set dst (URShiftVI src shift));
14764   ins_cost(INSN_COST);
14765   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
14766   ins_encode %{
14767     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
14768             as_FloatRegister($src$$reg),
14769             as_FloatRegister($shift$$reg));
14770   %}
14771   ins_pipe(pipe_class_default);
14772 %}
14773 
14774 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
14775   predicate(n->as_Vector()->length() == 4);
14776   match(Set dst (URShiftVI src shift));
14777   ins_cost(INSN_COST);
14778   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
14779   ins_encode %{
14780     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14781             as_FloatRegister($src$$reg),
14782             as_FloatRegister($shift$$reg));
14783   %}
14784   ins_pipe(pipe_class_default);
14785 %}
14786 
14787 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
14788   predicate(n->as_Vector()->length() == 2);
14789   match(Set dst (LShiftVI src shift));
14790   ins_cost(INSN_COST);
14791   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
14792   ins_encode %{
14793     __ shl(as_FloatRegister($dst$$reg), __ T2S,
14794            as_FloatRegister($src$$reg),
14795            (int)$shift$$constant & 31);
14796   %}
14797   ins_pipe(pipe_class_default);
14798 %}
14799 
14800 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
14801   predicate(n->as_Vector()->length() == 4);
14802   match(Set dst (LShiftVI src shift));
14803   ins_cost(INSN_COST);
14804   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
14805   ins_encode %{
14806     __ shl(as_FloatRegister($dst$$reg), __ T4S,
14807            as_FloatRegister($src$$reg),
14808            (int)$shift$$constant & 31);
14809   %}
14810   ins_pipe(pipe_class_default);
14811 %}
14812 
14813 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
14814   predicate(n->as_Vector()->length() == 2);
14815   match(Set dst (RShiftVI src shift));
14816   ins_cost(INSN_COST);
14817   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
14818   ins_encode %{
14819     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
14820             as_FloatRegister($src$$reg),
14821             -(int)$shift$$constant & 31);
14822   %}
14823   ins_pipe(pipe_class_default);
14824 %}
14825 
14826 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
14827   predicate(n->as_Vector()->length() == 4);
14828   match(Set dst (RShiftVI src shift));
14829   ins_cost(INSN_COST);
14830   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
14831   ins_encode %{
14832     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14833             as_FloatRegister($src$$reg),
14834             -(int)$shift$$constant & 31);
14835   %}
14836   ins_pipe(pipe_class_default);
14837 %}
14838 
14839 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
14840   predicate(n->as_Vector()->length() == 2);
14841   match(Set dst (URShiftVI src shift));
14842   ins_cost(INSN_COST);
14843   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
14844   ins_encode %{
14845     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
14846             as_FloatRegister($src$$reg),
14847             -(int)$shift$$constant & 31);
14848   %}
14849   ins_pipe(pipe_class_default);
14850 %}
14851 
14852 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
14853   predicate(n->as_Vector()->length() == 4);
14854   match(Set dst (URShiftVI src shift));
14855   ins_cost(INSN_COST);
14856   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
14857   ins_encode %{
14858     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14859             as_FloatRegister($src$$reg),
14860             -(int)$shift$$constant & 31);
14861   %}
14862   ins_pipe(pipe_class_default);
14863 %}
14864 
14865 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
14866   predicate(n->as_Vector()->length() == 2);
14867   match(Set dst (LShiftVL src shift));
14868   match(Set dst (RShiftVL src shift));
14869   ins_cost(INSN_COST);
14870   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
14871   ins_encode %{
14872     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14873             as_FloatRegister($src$$reg),
14874             as_FloatRegister($shift$$reg));
14875   %}
14876   ins_pipe(pipe_class_default);
14877 %}
14878 
14879 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
14880   predicate(n->as_Vector()->length() == 2);
14881   match(Set dst (URShiftVL src shift));
14882   ins_cost(INSN_COST);
14883   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
14884   ins_encode %{
14885     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14886             as_FloatRegister($src$$reg),
14887             as_FloatRegister($shift$$reg));
14888   %}
14889   ins_pipe(pipe_class_default);
14890 %}
14891 
14892 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
14893   predicate(n->as_Vector()->length() == 2);
14894   match(Set dst (LShiftVL src shift));
14895   ins_cost(INSN_COST);
14896   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
14897   ins_encode %{
14898     __ shl(as_FloatRegister($dst$$reg), __ T2D,
14899            as_FloatRegister($src$$reg),
14900            (int)$shift$$constant & 63);
14901   %}
14902   ins_pipe(pipe_class_default);
14903 %}
14904 
14905 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
14906   predicate(n->as_Vector()->length() == 2);
14907   match(Set dst (RShiftVL src shift));
14908   ins_cost(INSN_COST);
14909   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
14910   ins_encode %{
14911     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14912             as_FloatRegister($src$$reg),
14913             -(int)$shift$$constant & 63);
14914   %}
14915   ins_pipe(pipe_class_default);
14916 %}
14917 
14918 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
14919   predicate(n->as_Vector()->length() == 2);
14920   match(Set dst (URShiftVL src shift));
14921   ins_cost(INSN_COST);
14922   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
14923   ins_encode %{
14924     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14925             as_FloatRegister($src$$reg),
14926             -(int)$shift$$constant & 63);
14927   %}
14928   ins_pipe(pipe_class_default);
14929 %}
14930 
14931 //----------PEEPHOLE RULES-----------------------------------------------------
14932 // These must follow all instruction definitions as they use the names
14933 // defined in the instructions definitions.
14934 //
14935 // peepmatch ( root_instr_name [preceding_instruction]* );
14936 //
14937 // peepconstraint %{
14938 // (instruction_number.operand_name relational_op instruction_number.operand_name
14939 //  [, ...] );
14940 // // instruction numbers are zero-based using left to right order in peepmatch
14941 //
14942 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
14943 // // provide an instruction_number.operand_name for each operand that appears
14944 // // in the replacement instruction's match rule
14945 //
14946 // ---------VM FLAGS---------------------------------------------------------
14947 //
14948 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14949 //
14950 // Each peephole rule is given an identifying number starting with zero and
14951 // increasing by one in the order seen by the parser.  An individual peephole
14952 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14953 // on the command-line.
14954 //
14955 // ---------CURRENT LIMITATIONS----------------------------------------------
14956 //
14957 // Only match adjacent instructions in same basic block
14958 // Only equality constraints
14959 // Only constraints between operands, not (0.dest_reg == RAX_enc)
14960 // Only one replacement instruction
14961 //
14962 // ---------EXAMPLE----------------------------------------------------------
14963 //
14964 // // pertinent parts of existing instructions in architecture description
14965 // instruct movI(iRegINoSp dst, iRegI src)
14966 // %{
14967 //   match(Set dst (CopyI src));
14968 // %}
14969 //
14970 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
14971 // %{
14972 //   match(Set dst (AddI dst src));
14973 //   effect(KILL cr);
14974 // %}
14975 //
14976 // // Change (inc mov) to lea
14977 // peephole %{
14978 //   // increment preceeded by register-register move
14979 //   peepmatch ( incI_iReg movI );
14980 //   // require that the destination register of the increment
14981 //   // match the destination register of the move
14982 //   peepconstraint ( 0.dst == 1.dst );
14983 //   // construct a replacement instruction that sets
14984 //   // the destination to ( move's source register + one )
14985 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
14986 // %}
14987 //
14988 
14989 // Implementation no longer uses movX instructions since
14990 // machine-independent system no longer uses CopyX nodes.
14991 //
14992 // peephole
14993 // %{
14994 //   peepmatch (incI_iReg movI);
14995 //   peepconstraint (0.dst == 1.dst);
14996 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14997 // %}
14998 
14999 // peephole
15000 // %{
15001 //   peepmatch (decI_iReg movI);
15002 //   peepconstraint (0.dst == 1.dst);
15003 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15004 // %}
15005 
15006 // peephole
15007 // %{
15008 //   peepmatch (addI_iReg_imm movI);
15009 //   peepconstraint (0.dst == 1.dst);
15010 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15011 // %}
15012 
15013 // peephole
15014 // %{
15015 //   peepmatch (incL_iReg movL);
15016 //   peepconstraint (0.dst == 1.dst);
15017 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15018 // %}
15019 
15020 // peephole
15021 // %{
15022 //   peepmatch (decL_iReg movL);
15023 //   peepconstraint (0.dst == 1.dst);
15024 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15025 // %}
15026 
15027 // peephole
15028 // %{
15029 //   peepmatch (addL_iReg_imm movL);
15030 //   peepconstraint (0.dst == 1.dst);
15031 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15032 // %}
15033 
15034 // peephole
15035 // %{
15036 //   peepmatch (addP_iReg_imm movP);
15037 //   peepconstraint (0.dst == 1.dst);
15038 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15039 // %}
15040 
15041 // // Change load of spilled value to only a spill
15042 // instruct storeI(memory mem, iRegI src)
15043 // %{
15044 //   match(Set mem (StoreI mem src));
15045 // %}
15046 //
15047 // instruct loadI(iRegINoSp dst, memory mem)
15048 // %{
15049 //   match(Set dst (LoadI mem));
15050 // %}
15051 //
15052 
15053 //----------SMARTSPILL RULES---------------------------------------------------
15054 // These must follow all instruction definitions as they use the names
15055 // defined in the instructions definitions.
15056 
15057 // Local Variables:
15058 // mode: c++
15059 // End: