1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036   MemBarNode *has_parent_membar(const Node *n,
1037                                 ProjNode *&ctl, ProjNode *&mem);
1038   MemBarNode *has_child_membar(const MemBarNode *n,
1039                                ProjNode *&ctl, ProjNode *&mem);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042   bool unnecessary_acquire(const Node *barrier);
1043   bool needs_acquiring_load(const Node *load);
1044 
1045   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1046   bool unnecessary_release(const Node *barrier);
1047   bool unnecessary_volatile(const Node *barrier);
1048   bool needs_releasing_store(const Node *store);
1049 
1050   // Use barrier instructions for unsafe volatile gets rather than
1051   // trying to identify an exact signature for them
1052   const bool UseBarriersForUnsafeVolatileGet = false;
1053 %}
1054 
1055 source %{
1056 
1057   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1058   // use to implement volatile reads and writes. For a volatile read
1059   // we simply need
1060   //
1061   //   ldar<x>
1062   //
1063   // and for a volatile write we need
1064   //
1065   //   stlr<x>
1066   // 
1067   // Alternatively, we can implement them by pairing a normal
1068   // load/store with a memory barrier. For a volatile read we need
1069   // 
1070   //   ldr<x>
1071   //   dmb ishld
1072   //
1073   // for a volatile write
1074   //
1075   //   dmb ish
1076   //   str<x>
1077   //   dmb ish
1078   //
1079   // In order to generate the desired instruction sequence we need to
1080   // be able to identify specific 'signature' ideal graph node
1081   // sequences which i) occur as a translation of a volatile reads or
1082   // writes and ii) do not occur through any other translation or
1083   // graph transformation. We can then provide alternative aldc
1084   // matching rules which translate these node sequences to the
1085   // desired machine code sequences. Selection of the alternative
1086   // rules can be implemented by predicates which identify the
1087   // relevant node sequences.
1088   //
1089   // The ideal graph generator translates a volatile read to the node
1090   // sequence
1091   //
1092   //   LoadX[mo_acquire]
1093   //   MemBarAcquire
1094   //
1095   // As a special case when using the compressed oops optimization we
1096   // may also see this variant
1097   //
1098   //   LoadN[mo_acquire]
1099   //   DecodeN
1100   //   MemBarAcquire
1101   //
1102   // A volatile write is translated to the node sequence
1103   //
1104   //   MemBarRelease
1105   //   StoreX[mo_release]
1106   //   MemBarVolatile
1107   //
1108   // n.b. the above node patterns are generated with a strict
1109   // 'signature' configuration of input and output dependencies (see
1110   // the predicates below for exact details). The two signatures are
1111   // unique to translated volatile reads/stores -- they will not
1112   // appear as a result of any other bytecode translation or inlining
1113   // nor as a consequence of optimizing transforms.
1114   //
1115   // We also want to catch inlined unsafe volatile gets and puts and
1116   // be able to implement them using either ldar<x>/stlr<x> or some
1117   // combination of ldr<x>/stlr<x> and dmb instructions.
1118   //
1119   // Inlined unsafe volatiles puts manifest as a minor variant of the
1120   // normal volatile put node sequence containing an extra cpuorder
1121   // membar
1122   //
1123   //   MemBarRelease
1124   //   MemBarCPUOrder
1125   //   StoreX[mo_release]
1126   //   MemBarVolatile
1127   //
1128   // n.b. as an aside, the cpuorder membar is not itself subject to
1129   // matching and translation by adlc rules.  However, the rule
1130   // predicates need to detect its presence in order to correctly
1131   // select the desired adlc rules.
1132   //
1133   // Inlined unsafe volatiles gets manifest as a somewhat different
1134   // node sequence to a normal volatile get
1135   //
1136   //   MemBarCPUOrder
1137   //        ||       \\
1138   //   MemBarAcquire LoadX[mo_acquire]
1139   //        ||
1140   //   MemBarCPUOrder
1141   //
1142   // In this case the acquire membar does not directly depend on the
1143   // load. However, we can be sure that the load is generated from an
1144   // inlined unsafe volatile get if we see it dependent on this unique
1145   // sequence of membar nodes. Similarly, given an acquire membar we
1146   // can know that it was added because of an inlined unsafe volatile
1147   // get if it is fed and feeds a cpuorder membar and if its feed
1148   // membar also feeds an acquiring load.
1149   //
1150   // So, where we can identify these volatile read and write
1151   // signatures we can choose to plant either of the above two code
1152   // sequences. For a volatile read we can simply plant a normal
1153   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1154   // also choose to inhibit translation of the MemBarAcquire and
1155   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1156   //
1157   // When we recognise a volatile store signature we can choose to
1158   // plant at a dmb ish as a translation for the MemBarRelease, a
1159   // normal str<x> and then a dmb ish for the MemBarVolatile.
1160   // Alternatively, we can inhibit translation of the MemBarRelease
1161   // and MemBarVolatile and instead plant a simple stlr<x>
1162   // instruction.
1163   //
1164   // Of course, the above only applies when we see these signature
1165   // configurations. We still want to plant dmb instructions in any
1166   // other cases where we may see a MemBarAcquire, MemBarRelease or
1167   // MemBarVolatile. For example, at the end of a constructor which
1168   // writes final/volatile fields we will see a MemBarRelease
1169   // instruction and this needs a 'dmb ish' lest we risk the
1170   // constructed object being visible without making the
1171   // final/volatile field writes visible.
1172   //
1173   // n.b. the translation rules below which rely on detection of the
1174   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1175   // If we see anything other than the signature configurations we
1176   // always just translate the loads and stors to ldr<x> and str<x>
1177   // and translate acquire, release and volatile membars to the
1178   // relevant dmb instructions.
1179   //
1180   // n.b.b as a case in point for the above comment, the current
1181   // predicates don't detect the precise signature for certain types
1182   // of volatile object stores (where the heap_base input type is not
1183   // known at compile-time to be non-NULL). In those cases the
1184   // MemBarRelease and MemBarVolatile bracket an if-then-else sequence
1185   // with a store in each branch (we need a different store depending
1186   // on whether heap_base is actually NULL). In such a case we will
1187   // just plant a dmb both before and after the branch/merge. The
1188   // predicate could (and probably should) be fixed later to also
1189   // detect this case.
1190 
1191   // graph traversal helpers
1192 
1193   // if node n is linked to a parent MemBarNode by an intervening
1194   // Control or Memory ProjNode return the MemBarNode otherwise return
1195   // NULL.
1196   //
1197   // n may only be a Load or a MemBar.
1198   //
1199   // The ProjNode* references c and m are used to return the relevant
1200   // nodes.
1201 
1202   MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m)
1203   {
1204     Node *ctl = NULL;
1205     Node *mem = NULL;
1206     Node *membar = NULL;
1207 
1208     if (n->is_Load()) {
1209       ctl = n->lookup(LoadNode::Control);
1210       mem = n->lookup(LoadNode::Memory);
1211     } else if (n->is_MemBar()) {
1212       ctl = n->lookup(TypeFunc::Control);
1213       mem = n->lookup(TypeFunc::Memory);
1214     } else {
1215         return NULL;
1216     }
1217 
1218     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
1219       return NULL;
1220 
1221     c = ctl->as_Proj();
1222 
1223     membar = ctl->lookup(0);
1224 
1225     if (!membar || !membar->is_MemBar())
1226       return NULL;
1227 
1228     m = mem->as_Proj();
1229 
1230     if (mem->lookup(0) != membar)
1231       return NULL;
1232 
1233     return membar->as_MemBar();
1234   }
1235 
1236   // if n is linked to a child MemBarNode by intervening Control and
1237   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1238   //
1239   // The ProjNode** arguments c and m are used to return pointers to
1240   // the relevant nodes. A null argument means don't don't return a
1241   // value.
1242 
1243   MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m)
1244   {
1245     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1246     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1247 
1248     // MemBar needs to have both a Ctl and Mem projection
1249     if (! ctl || ! mem)
1250       return NULL;
1251 
1252     c = ctl;
1253     m = mem;
1254 
1255     MemBarNode *child = NULL;
1256     Node *x;
1257 
1258     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1259       x = ctl->fast_out(i);
1260       // if we see a membar we keep hold of it. we may also see a new
1261       // arena copy of the original but it will appear later
1262       if (x->is_MemBar()) {
1263           child = x->as_MemBar();
1264           break;
1265       }
1266     }
1267 
1268     if (child == NULL)
1269       return NULL;
1270 
1271     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1272       x = mem->fast_out(i);
1273       // if we see a membar we keep hold of it. we may also see a new
1274       // arena copy of the original but it will appear later
1275       if (x == child) {
1276         return child;
1277       }
1278     }
1279     return NULL;
1280   }
1281 
1282   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1283 
1284 bool unnecessary_acquire(const Node *barrier) {
1285   // assert barrier->is_MemBar();
1286   if (UseBarriersForVolatile)
1287     // we need to plant a dmb
1288     return false;
1289 
1290   // a volatile read derived from bytecode (or also from an inlined
1291   // SHA field read via LibraryCallKit::load_field_from_object)
1292   // manifests as a LoadX[mo_acquire] followed by an acquire membar
1293   // with a bogus read dependency on it's preceding load. so in those
1294   // cases we will find the load node at the PARMS offset of the
1295   // acquire membar.  n.b. there may be an intervening DecodeN node.
1296   //
1297   // a volatile load derived from an inlined unsafe field access
1298   // manifests as a cpuorder membar with Ctl and Mem projections
1299   // feeding both an acquire membar and a LoadX[mo_acquire]. The
1300   // acquire then feeds another cpuorder membar via Ctl and Mem
1301   // projections. The load has no output dependency on these trailing
1302   // membars because subsequent nodes inserted into the graph take
1303   // their control feed from the final membar cpuorder meaning they
1304   // are all ordered after the load.
1305 
1306   Node *x = barrier->lookup(TypeFunc::Parms);
1307   if (x) {
1308     // we are starting from an acquire and it has a fake dependency
1309     //
1310     // need to check for
1311     //
1312     //   LoadX[mo_acquire]
1313     //   {  |1   }
1314     //   {DecodeN}
1315     //      |Parms
1316     //   MemBarAcquire*
1317     //
1318     // where * tags node we were passed
1319     // and |k means input k
1320     if (x->is_DecodeNarrowPtr())
1321       x = x->in(1);
1322 
1323     return (x->is_Load() && x->as_Load()->is_acquire());
1324   }
1325   
1326   // only continue if we want to try to match unsafe volatile gets
1327   if (UseBarriersForUnsafeVolatileGet)
1328     return false;
1329 
1330   // need to check for
1331   //
1332   //     MemBarCPUOrder
1333   //        ||       \\
1334   //   MemBarAcquire* LoadX[mo_acquire]
1335   //        ||
1336   //   MemBarCPUOrder
1337   //
1338   // where * tags node we were passed
1339   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
1340 
1341   // check for a parent MemBarCPUOrder
1342   ProjNode *ctl;
1343   ProjNode *mem;
1344   MemBarNode *parent = has_parent_membar(barrier, ctl, mem);
1345   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
1346     return false;
1347   // ensure the proj nodes both feed a LoadX[mo_acquire]
1348   LoadNode *ld = NULL;
1349   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1350     x = ctl->fast_out(i);
1351     // if we see a load we keep hold of it and stop searching
1352     if (x->is_Load()) {
1353       ld = x->as_Load();
1354       break;
1355     }
1356   }
1357   // it must be an acquiring load
1358   if (! ld || ! ld->is_acquire())
1359     return false;
1360   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1361     x = mem->fast_out(i);
1362     // if we see the same load we drop it and stop searching
1363     if (x == ld) {
1364       ld = NULL;
1365       break;
1366     }
1367   }
1368   // we must have dropped the load
1369   if (ld)
1370     return false;
1371   // check for a child cpuorder membar
1372   MemBarNode *child  = has_child_membar(barrier->as_MemBar(), ctl, mem);
1373   if (!child || child->Opcode() != Op_MemBarCPUOrder)
1374     return false;
1375 
1376   return true;
1377 }
1378 
1379 bool needs_acquiring_load(const Node *n)
1380 {
1381   // assert n->is_Load();
1382   if (UseBarriersForVolatile)
1383     // we use a normal load and a dmb
1384     return false;
1385 
1386   LoadNode *ld = n->as_Load();
1387 
1388   if (!ld->is_acquire())
1389     return false;
1390 
1391   // check if this load is feeding an acquire membar
1392   //
1393   //   LoadX[mo_acquire]
1394   //   {  |1   }
1395   //   {DecodeN}
1396   //      |Parms
1397   //   MemBarAcquire*
1398   //
1399   // where * tags node we were passed
1400   // and |k means input k
1401 
1402   Node *start = ld;
1403   Node *mbacq = NULL;
1404 
1405   // if we hit a DecodeNarrowPtr we reset the start node and restart
1406   // the search through the outputs
1407  restart:
1408 
1409   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
1410     Node *x = start->fast_out(i);
1411     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
1412       mbacq = x;
1413     } else if (!mbacq &&
1414                (x->is_DecodeNarrowPtr() ||
1415                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
1416       start = x;
1417       goto restart;
1418     }
1419   }
1420 
1421   if (mbacq) {
1422     return true;
1423   }
1424 
1425   // only continue if we want to try to match unsafe volatile gets
1426   if (UseBarriersForUnsafeVolatileGet)
1427     return false;
1428 
1429   // check if Ctl and Proj feed comes from a MemBarCPUOrder
1430   //
1431   //     MemBarCPUOrder
1432   //        ||       \\
1433   //   MemBarAcquire* LoadX[mo_acquire]
1434   //        ||
1435   //   MemBarCPUOrder
1436 
1437   MemBarNode *membar;
1438   ProjNode *ctl;
1439   ProjNode *mem;
1440 
1441   membar = has_parent_membar(ld, ctl, mem);
1442 
1443   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
1444     return false;
1445 
1446   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
1447 
1448   membar = has_child_membar(membar, ctl, mem);
1449 
1450   if (!membar || !membar->Opcode() == Op_MemBarAcquire)
1451     return false;
1452 
1453   membar = has_child_membar(membar, ctl, mem);
1454   
1455   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
1456     return false;
1457 
1458   return true;
1459 }
1460 
1461 bool unnecessary_release(const Node *n) {
1462   // assert n->is_MemBar();
1463   if (UseBarriersForVolatile)
1464     // we need to plant a dmb
1465     return false;
1466 
1467   // ok, so we can omit this release barrier if it has been inserted
1468   // as part of a volatile store sequence
1469   //
1470   //   MemBarRelease
1471   //  {      ||      }
1472   //  {MemBarCPUOrder} -- optional
1473   //         ||     \\
1474   //         ||     StoreX[mo_release]
1475   //         | \     /
1476   //         | MergeMem
1477   //         | /
1478   //   MemBarVolatile
1479   //
1480   // where
1481   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1482   //  | \ and / indicate further routing of the Ctl and Mem feeds
1483   // 
1484   // so we need to check that
1485   //
1486   // ia) the release membar (or its dependent cpuorder membar) feeds
1487   // control to a store node (via a Control project node)
1488   //
1489   // ii) the store is ordered release
1490   //
1491   // iii) the release membar (or its dependent cpuorder membar) feeds
1492   // control to a volatile membar (via the same Control project node)
1493   //
1494   // iv) the release membar feeds memory to a merge mem and to the
1495   // same store (both via a single Memory proj node)
1496   //
1497   // v) the store outputs to the merge mem
1498   //
1499   // vi) the merge mem outputs to the same volatile membar
1500   //
1501   // n.b. if this is an inlined unsafe node then the release membar
1502   // may feed its control and memory links via an intervening cpuorder
1503   // membar. this case can be dealt with when we check the release
1504   // membar projections. if they both feed a single cpuorder membar
1505   // node continue to make the same checks as above but with the
1506   // cpuorder membar substituted for the release membar. if they don't
1507   // both feed a cpuorder membar then the check fails.
1508   //
1509   // n.b.b. for an inlined unsafe store of an object in the case where
1510   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1511   // an embedded if then else where we expect the store. this is
1512   // needed to do the right type of store depending on whether
1513   // heap_base is NULL. We could check for that but for now we can
1514   // just take the hit of on inserting a redundant dmb for this
1515   // redundant volatile membar
1516 
1517   MemBarNode *barrier = n->as_MemBar();
1518   ProjNode *ctl;
1519   ProjNode *mem;
1520   // check for an intervening cpuorder membar
1521   MemBarNode *b = has_child_membar(barrier, ctl, mem);
1522   if (b && b->Opcode() == Op_MemBarCPUOrder) {
1523     // ok, so start form the dependent cpuorder barrier
1524     barrier = b;
1525   }
1526   // check the ctl and mem flow
1527   ctl = barrier->proj_out(TypeFunc::Control);
1528   mem = barrier->proj_out(TypeFunc::Memory);
1529 
1530   // the barrier needs to have both a Ctl and Mem projection
1531   if (! ctl || ! mem)
1532     return false;
1533 
1534   Node *x = NULL;
1535   Node *mbvol = NULL;
1536   StoreNode * st = NULL;
1537 
1538   // For a normal volatile write the Ctl ProjNode should have output
1539   // to a MemBarVolatile and a Store marked as releasing
1540   //
1541   // n.b. for an inlined unsafe store of an object in the case where
1542   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1543   // an embedded if then else where we expect the store. this is
1544   // needed to do the right type of store depending on whether
1545   // heap_base is NULL. We could check for that case too but for now
1546   // we can just take the hit of inserting a dmb and a non-volatile
1547   // store to implement the volatile store
1548 
1549   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1550     x = ctl->fast_out(i);
1551     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1552       if (mbvol) {
1553         return false;
1554       }
1555       mbvol = x;
1556     } else if (x->is_Store()) {
1557       st = x->as_Store();
1558       if (! st->is_release()) {
1559         return false;
1560       }
1561     } else if (!x->is_Mach()) {
1562       // we may see mach nodes added during matching but nothing else
1563       return false;
1564     }
1565   }
1566 
1567   if (!mbvol || !st)
1568     return false;
1569 
1570   // the Mem ProjNode should output to a MergeMem and the same Store
1571   Node *mm = NULL;
1572   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1573     x = mem->fast_out(i);
1574     if (!mm && x->is_MergeMem()) {
1575       mm = x;
1576     } else if (x != st && !x->is_Mach()) {
1577       // we may see mach nodes added during matching but nothing else
1578       return false;
1579     }
1580   }
1581 
1582   if (!mm)
1583     return false;
1584 
1585   // the MergeMem should output to the MemBarVolatile
1586   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1587     x = mm->fast_out(i);
1588     if (x != mbvol && !x->is_Mach()) {
1589       // we may see mach nodes added during matching but nothing else
1590       return false;
1591     }
1592   }
1593 
1594   return true;
1595 }
1596 
1597 bool unnecessary_volatile(const Node *n) {
1598   // assert n->is_MemBar();
1599   if (UseBarriersForVolatile)
1600     // we need to plant a dmb
1601     return false;
1602 
1603   // ok, so we can omit this volatile barrier if it has been inserted
1604   // as part of a volatile store sequence
1605   //
1606   //   MemBarRelease
1607   //  {      ||      }
1608   //  {MemBarCPUOrder} -- optional
1609   //         ||     \\
1610   //         ||     StoreX[mo_release]
1611   //         | \     /
1612   //         | MergeMem
1613   //         | /
1614   //   MemBarVolatile
1615   //
1616   // where
1617   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1618   //  | \ and / indicate further routing of the Ctl and Mem feeds
1619   // 
1620   // we need to check that
1621   //
1622   // i) the volatile membar gets its control feed from a release
1623   // membar (or its dependent cpuorder membar) via a Control project
1624   // node
1625   //
1626   // ii) the release membar (or its dependent cpuorder membar) also
1627   // feeds control to a store node via the same proj node
1628   //
1629   // iii) the store is ordered release
1630   //
1631   // iv) the release membar (or its dependent cpuorder membar) feeds
1632   // memory to a merge mem and to the same store (both via a single
1633   // Memory proj node)
1634   //
1635   // v) the store outputs to the merge mem
1636   //
1637   // vi) the merge mem outputs to the volatile membar
1638   //
1639   // n.b. for an inlined unsafe store of an object in the case where
1640   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1641   // an embedded if then else where we expect the store. this is
1642   // needed to do the right type of store depending on whether
1643   // heap_base is NULL. We could check for that but for now we can
1644   // just take the hit of on inserting a redundant dmb for this
1645   // redundant volatile membar
1646 
1647   MemBarNode *mbvol = n->as_MemBar();
1648   Node *x = n->lookup(TypeFunc::Control);
1649 
1650   if (! x || !x->is_Proj())
1651     return false;
1652 
1653   ProjNode *proj = x->as_Proj();
1654 
1655   x = proj->lookup(0);
1656 
1657   if (!x || !x->is_MemBar())
1658     return false;
1659 
1660   MemBarNode *barrier = x->as_MemBar();
1661 
1662   // if the barrier is a release membar we have what we want. if it is
1663   // a cpuorder membar then we need to ensure that it is fed by a
1664   // release membar in which case we proceed to check the graph below
1665   // this cpuorder membar as the feed
1666 
1667   if (x->Opcode() != Op_MemBarRelease) {
1668     if (x->Opcode() != Op_MemBarCPUOrder)
1669       return false;
1670     ProjNode *ctl;
1671     ProjNode *mem;
1672     MemBarNode *b = has_parent_membar(x, ctl, mem);
1673     if (!b || !b->Opcode() == Op_MemBarRelease)
1674       return false;
1675   }
1676 
1677   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
1678   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1679 
1680   // barrier needs to have both a Ctl and Mem projection
1681   // and we need to have reached it via the Ctl projection
1682   if (! ctl || ! mem || ctl != proj)
1683     return false;
1684 
1685   StoreNode * st = NULL;
1686 
1687   // The Ctl ProjNode should have output to a MemBarVolatile and
1688   // a Store marked as releasing
1689   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1690     x = ctl->fast_out(i);
1691     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1692       if (x != mbvol) {
1693         return false;
1694       }
1695     } else if (x->is_Store()) {
1696       st = x->as_Store();
1697       if (! st->is_release()) {
1698         return false;
1699       }
1700     } else if (!x->is_Mach()){
1701       // we may see mach nodes added during matching but nothing else
1702       return false;
1703     }
1704   }
1705 
1706   if (!st)
1707     return false;
1708 
1709   // the Mem ProjNode should output to a MergeMem and the same Store
1710   Node *mm = NULL;
1711   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1712     x = mem->fast_out(i);
1713     if (!mm && x->is_MergeMem()) {
1714       mm = x;
1715     } else if (x != st && !x->is_Mach()) {
1716       // we may see mach nodes added during matching but nothing else
1717       return false;
1718     }
1719   }
1720 
1721   if (!mm)
1722     return false;
1723 
1724   // the MergeMem should output to the MemBarVolatile
1725   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1726     x = mm->fast_out(i);
1727     if (x != mbvol && !x->is_Mach()) {
1728       // we may see mach nodes added during matching but nothing else
1729       return false;
1730     }
1731   }
1732 
1733   return true;
1734 }
1735 
1736 
1737 
1738 bool needs_releasing_store(const Node *n)
1739 {
1740   // assert n->is_Store();
1741   if (UseBarriersForVolatile)
1742     // we use a normal store and dmb combination
1743     return false;
1744 
1745   StoreNode *st = n->as_Store();
1746 
1747   if (!st->is_release())
1748     return false;
1749 
1750   // check if this store is bracketed by a release (or its dependent
1751   // cpuorder membar) and a volatile membar
1752   //
1753   //   MemBarRelease
1754   //  {      ||      }
1755   //  {MemBarCPUOrder} -- optional
1756   //         ||     \\
1757   //         ||     StoreX[mo_release]
1758   //         | \     /
1759   //         | MergeMem
1760   //         | /
1761   //   MemBarVolatile
1762   //
1763   // where
1764   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1765   //  | \ and / indicate further routing of the Ctl and Mem feeds
1766   // 
1767 
1768 
1769   Node *x = st->lookup(TypeFunc::Control);
1770 
1771   if (! x || !x->is_Proj())
1772     return false;
1773 
1774   ProjNode *proj = x->as_Proj();
1775 
1776   x = proj->lookup(0);
1777 
1778   if (!x || !x->is_MemBar())
1779     return false;
1780 
1781   MemBarNode *barrier = x->as_MemBar();
1782 
1783   // if the barrier is a release membar we have what we want. if it is
1784   // a cpuorder membar then we need to ensure that it is fed by a
1785   // release membar in which case we proceed to check the graph below
1786   // this cpuorder membar as the feed
1787 
1788   if (x->Opcode() != Op_MemBarRelease) {
1789     if (x->Opcode() != Op_MemBarCPUOrder)
1790       return false;
1791     Node *ctl = x->lookup(TypeFunc::Control);
1792     Node *mem = x->lookup(TypeFunc::Memory);
1793     if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj())
1794       return false;
1795     x = ctl->lookup(0);
1796     if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease)
1797       return false;
1798     Node *y = mem->lookup(0);
1799     if (!y || y != x)
1800       return false;
1801   }
1802 
1803   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
1804   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1805 
1806   // MemBarRelease needs to have both a Ctl and Mem projection
1807   // and we need to have reached it via the Ctl projection
1808   if (! ctl || ! mem || ctl != proj)
1809     return false;
1810 
1811   MemBarNode *mbvol = NULL;
1812 
1813   // The Ctl ProjNode should have output to a MemBarVolatile and
1814   // a Store marked as releasing
1815   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1816     x = ctl->fast_out(i);
1817     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1818       mbvol = x->as_MemBar();
1819     } else if (x->is_Store()) {
1820       if (x != st) {
1821         return false;
1822       }
1823     } else if (!x->is_Mach()){
1824       return false;
1825     }
1826   }
1827 
1828   if (!mbvol)
1829     return false;
1830 
1831   // the Mem ProjNode should output to a MergeMem and the same Store
1832   Node *mm = NULL;
1833   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1834     x = mem->fast_out(i);
1835     if (!mm && x->is_MergeMem()) {
1836       mm = x;
1837     } else if (x != st && !x->is_Mach()) {
1838       return false;
1839     }
1840   }
1841 
1842   if (!mm)
1843     return false;
1844 
1845   // the MergeMem should output to the MemBarVolatile
1846   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1847     x = mm->fast_out(i);
1848     if (x != mbvol && !x->is_Mach()) {
1849       return false;
1850     }
1851   }
1852 
1853   return true;
1854 }
1855 
1856 
1857 
1858 #define __ _masm.
1859 
1860 // advance declarations for helper functions to convert register
1861 // indices to register objects
1862 
1863 // the ad file has to provide implementations of certain methods
1864 // expected by the generic code
1865 //
1866 // REQUIRED FUNCTIONALITY
1867 
1868 //=============================================================================
1869 
1870 // !!!!! Special hack to get all types of calls to specify the byte offset
1871 //       from the start of the call to the point where the return address
1872 //       will point.
1873 
1874 int MachCallStaticJavaNode::ret_addr_offset()
1875 {
1876   // call should be a simple bl
1877   int off = 4;
1878   return off;
1879 }
1880 
1881 int MachCallDynamicJavaNode::ret_addr_offset()
1882 {
1883   return 16; // movz, movk, movk, bl
1884 }
1885 
1886 int MachCallRuntimeNode::ret_addr_offset() {
1887   // for generated stubs the call will be
1888   //   far_call(addr)
1889   // for real runtime callouts it will be six instructions
1890   // see aarch64_enc_java_to_runtime
1891   //   adr(rscratch2, retaddr)
1892   //   lea(rscratch1, RuntimeAddress(addr)
1893   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1894   //   blrt rscratch1
1895   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1896   if (cb) {
1897     return MacroAssembler::far_branch_size();
1898   } else {
1899     return 6 * NativeInstruction::instruction_size;
1900   }
1901 }
1902 
1903 // Indicate if the safepoint node needs the polling page as an input
1904 
1905 // the shared code plants the oop data at the start of the generated
1906 // code for the safepoint node and that needs ot be at the load
1907 // instruction itself. so we cannot plant a mov of the safepoint poll
1908 // address followed by a load. setting this to true means the mov is
1909 // scheduled as a prior instruction. that's better for scheduling
1910 // anyway.
1911 
1912 bool SafePointNode::needs_polling_address_input()
1913 {
1914   return true;
1915 }
1916 
1917 //=============================================================================
1918 
1919 #ifndef PRODUCT
1920 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1921   st->print("BREAKPOINT");
1922 }
1923 #endif
1924 
1925 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1926   MacroAssembler _masm(&cbuf);
1927   __ brk(0);
1928 }
1929 
1930 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1931   return MachNode::size(ra_);
1932 }
1933 
1934 //=============================================================================
1935 
1936 #ifndef PRODUCT
1937   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1938     st->print("nop \t# %d bytes pad for loops and calls", _count);
1939   }
1940 #endif
1941 
1942   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1943     MacroAssembler _masm(&cbuf);
1944     for (int i = 0; i < _count; i++) {
1945       __ nop();
1946     }
1947   }
1948 
1949   uint MachNopNode::size(PhaseRegAlloc*) const {
1950     return _count * NativeInstruction::instruction_size;
1951   }
1952 
1953 //=============================================================================
1954 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1955 
1956 int Compile::ConstantTable::calculate_table_base_offset() const {
1957   return 0;  // absolute addressing, no offset
1958 }
1959 
1960 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1961 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1962   ShouldNotReachHere();
1963 }
1964 
1965 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1966   // Empty encoding
1967 }
1968 
1969 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1970   return 0;
1971 }
1972 
1973 #ifndef PRODUCT
1974 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1975   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1976 }
1977 #endif
1978 
1979 #ifndef PRODUCT
1980 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1981   Compile* C = ra_->C;
1982 
1983   int framesize = C->frame_slots() << LogBytesPerInt;
1984 
1985   if (C->need_stack_bang(framesize))
1986     st->print("# stack bang size=%d\n\t", framesize);
1987 
1988   if (framesize < ((1 << 9) + 2 * wordSize)) {
1989     st->print("sub  sp, sp, #%d\n\t", framesize);
1990     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1991     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1992   } else {
1993     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1994     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1995     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1996     st->print("sub  sp, sp, rscratch1");
1997   }
1998 }
1999 #endif
2000 
2001 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2002   Compile* C = ra_->C;
2003   MacroAssembler _masm(&cbuf);
2004 
2005   // n.b. frame size includes space for return pc and rfp
2006   const long framesize = C->frame_size_in_bytes();
2007   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2008 
2009   // insert a nop at the start of the prolog so we can patch in a
2010   // branch if we need to invalidate the method later
2011   __ nop();
2012 
2013   int bangsize = C->bang_size_in_bytes();
2014   if (C->need_stack_bang(bangsize) && UseStackBanging)
2015     __ generate_stack_overflow_check(bangsize);
2016 
2017   __ build_frame(framesize);
2018 
2019   if (NotifySimulator) {
2020     __ notify(Assembler::method_entry);
2021   }
2022 
2023   if (VerifyStackAtCalls) {
2024     Unimplemented();
2025   }
2026 
2027   C->set_frame_complete(cbuf.insts_size());
2028 
2029   if (C->has_mach_constant_base_node()) {
2030     // NOTE: We set the table base offset here because users might be
2031     // emitted before MachConstantBaseNode.
2032     Compile::ConstantTable& constant_table = C->constant_table();
2033     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2034   }
2035 }
2036 
2037 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2038 {
2039   return MachNode::size(ra_); // too many variables; just compute it
2040                               // the hard way
2041 }
2042 
2043 int MachPrologNode::reloc() const
2044 {
2045   return 0;
2046 }
2047 
2048 //=============================================================================
2049 
2050 #ifndef PRODUCT
2051 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2052   Compile* C = ra_->C;
2053   int framesize = C->frame_slots() << LogBytesPerInt;
2054 
2055   st->print("# pop frame %d\n\t",framesize);
2056 
2057   if (framesize == 0) {
2058     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2059   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2060     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2061     st->print("add  sp, sp, #%d\n\t", framesize);
2062   } else {
2063     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2064     st->print("add  sp, sp, rscratch1\n\t");
2065     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2066   }
2067 
2068   if (do_polling() && C->is_method_compilation()) {
2069     st->print("# touch polling page\n\t");
2070     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2071     st->print("ldr zr, [rscratch1]");
2072   }
2073 }
2074 #endif
2075 
2076 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2077   Compile* C = ra_->C;
2078   MacroAssembler _masm(&cbuf);
2079   int framesize = C->frame_slots() << LogBytesPerInt;
2080 
2081   __ remove_frame(framesize);
2082 
2083   if (NotifySimulator) {
2084     __ notify(Assembler::method_reentry);
2085   }
2086 
2087   if (do_polling() && C->is_method_compilation()) {
2088     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
2089   }
2090 }
2091 
2092 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
2093   // Variable size. Determine dynamically.
2094   return MachNode::size(ra_);
2095 }
2096 
2097 int MachEpilogNode::reloc() const {
2098   // Return number of relocatable values contained in this instruction.
2099   return 1; // 1 for polling page.
2100 }
2101 
2102 const Pipeline * MachEpilogNode::pipeline() const {
2103   return MachNode::pipeline_class();
2104 }
2105 
2106 // This method seems to be obsolete. It is declared in machnode.hpp
2107 // and defined in all *.ad files, but it is never called. Should we
2108 // get rid of it?
2109 int MachEpilogNode::safepoint_offset() const {
2110   assert(do_polling(), "no return for this epilog node");
2111   return 4;
2112 }
2113 
2114 //=============================================================================
2115 
2116 // Figure out which register class each belongs in: rc_int, rc_float or
2117 // rc_stack.
2118 enum RC { rc_bad, rc_int, rc_float, rc_stack };
2119 
2120 static enum RC rc_class(OptoReg::Name reg) {
2121 
2122   if (reg == OptoReg::Bad) {
2123     return rc_bad;
2124   }
2125 
2126   // we have 30 int registers * 2 halves
2127   // (rscratch1 and rscratch2 are omitted)
2128 
2129   if (reg < 60) {
2130     return rc_int;
2131   }
2132 
2133   // we have 32 float register * 2 halves
2134   if (reg < 60 + 128) {
2135     return rc_float;
2136   }
2137 
2138   // Between float regs & stack is the flags regs.
2139   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
2140 
2141   return rc_stack;
2142 }
2143 
2144 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
2145   Compile* C = ra_->C;
2146 
2147   // Get registers to move.
2148   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
2149   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
2150   OptoReg::Name dst_hi = ra_->get_reg_second(this);
2151   OptoReg::Name dst_lo = ra_->get_reg_first(this);
2152 
2153   enum RC src_hi_rc = rc_class(src_hi);
2154   enum RC src_lo_rc = rc_class(src_lo);
2155   enum RC dst_hi_rc = rc_class(dst_hi);
2156   enum RC dst_lo_rc = rc_class(dst_lo);
2157 
2158   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2159 
2160   if (src_hi != OptoReg::Bad) {
2161     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2162            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2163            "expected aligned-adjacent pairs");
2164   }
2165 
2166   if (src_lo == dst_lo && src_hi == dst_hi) {
2167     return 0;            // Self copy, no move.
2168   }
2169 
2170   if (bottom_type()->isa_vect() != NULL) {
2171     uint len = 4;
2172     uint ireg = ideal_reg();
2173     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2174     if (cbuf) {
2175       MacroAssembler _masm(cbuf);
2176       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2177       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2178         // stack->stack
2179         int src_offset = ra_->reg2offset(src_lo);
2180         int dst_offset = ra_->reg2offset(dst_lo);
2181         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2182         len = 8;
2183         if (ireg == Op_VecD) {
2184           __ ldr(rscratch1, Address(sp, src_offset));
2185           __ str(rscratch1, Address(sp, dst_offset));
2186         } else {
2187           if (src_offset < 512) {
2188             __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
2189           } else {
2190             __ ldr(rscratch1, Address(sp, src_offset));
2191             __ ldr(rscratch2, Address(sp, src_offset+4));
2192             len += 4;
2193           }
2194           if (dst_offset < 512) {
2195             __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
2196           } else {
2197             __ str(rscratch1, Address(sp, dst_offset));
2198             __ str(rscratch2, Address(sp, dst_offset+4));
2199             len += 4;
2200           }
2201         }
2202       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2203         __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2204                ireg == Op_VecD ? __ T8B : __ T16B,
2205                as_FloatRegister(Matcher::_regEncode[src_lo]),
2206                as_FloatRegister(Matcher::_regEncode[src_lo]));
2207       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2208         __ str(as_FloatRegister(Matcher::_regEncode[src_lo]),
2209                ireg == Op_VecD ? __ D : __ Q,
2210                Address(sp, ra_->reg2offset(dst_lo)));
2211       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2212         __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2213                ireg == Op_VecD ? __ D : __ Q,
2214                Address(sp, ra_->reg2offset(src_lo)));
2215       } else {
2216         ShouldNotReachHere();
2217       }
2218     } else if (st) {
2219       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2220         // stack->stack
2221         int src_offset = ra_->reg2offset(src_lo);
2222         int dst_offset = ra_->reg2offset(dst_lo);
2223         if (ireg == Op_VecD) {
2224           st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2225           st->print("str  rscratch1, [sp, #%d]", dst_offset);
2226         } else {
2227           if (src_offset < 512) {
2228             st->print("ldp  rscratch1, rscratch2, [sp, #%d]", src_offset);
2229           } else {
2230             st->print("ldr  rscratch1, [sp, #%d]", src_offset);
2231             st->print("\nldr  rscratch2, [sp, #%d]", src_offset+4);
2232           }
2233           if (dst_offset < 512) {
2234             st->print("\nstp  rscratch1, rscratch2, [sp, #%d]", dst_offset);
2235           } else {
2236             st->print("\nstr  rscratch1, [sp, #%d]", dst_offset);
2237             st->print("\nstr  rscratch2, [sp, #%d]", dst_offset+4);
2238           }
2239         }
2240         st->print("\t# vector spill, stack to stack");
2241       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2242         st->print("mov  %s, %s\t# vector spill, reg to reg",
2243                    Matcher::regName[dst_lo], Matcher::regName[src_lo]);
2244       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2245         st->print("str  %s, [sp, #%d]\t# vector spill, reg to stack",
2246                    Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
2247       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2248         st->print("ldr  %s, [sp, #%d]\t# vector spill, stack to reg",
2249                    Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
2250       }
2251     }
2252     return len;
2253   }
2254 
2255   switch (src_lo_rc) {
2256   case rc_int:
2257     if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2258       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2259           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2260           // 64 bit
2261         if (cbuf) {
2262           MacroAssembler _masm(cbuf);
2263           __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2264                  as_Register(Matcher::_regEncode[src_lo]));
2265         } else if (st) {
2266           st->print("mov  %s, %s\t# shuffle",
2267                     Matcher::regName[dst_lo],
2268                     Matcher::regName[src_lo]);
2269         }
2270       } else {
2271         // 32 bit
2272         if (cbuf) {
2273           MacroAssembler _masm(cbuf);
2274           __ movw(as_Register(Matcher::_regEncode[dst_lo]),
2275                   as_Register(Matcher::_regEncode[src_lo]));
2276         } else if (st) {
2277           st->print("movw  %s, %s\t# shuffle",
2278                     Matcher::regName[dst_lo],
2279                     Matcher::regName[src_lo]);
2280         }
2281       }
2282     } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
2283       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2284           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2285           // 64 bit
2286         if (cbuf) {
2287           MacroAssembler _masm(cbuf);
2288           __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2289                    as_Register(Matcher::_regEncode[src_lo]));
2290         } else if (st) {
2291           st->print("fmovd  %s, %s\t# shuffle",
2292                     Matcher::regName[dst_lo],
2293                     Matcher::regName[src_lo]);
2294         }
2295       } else {
2296         // 32 bit
2297         if (cbuf) {
2298           MacroAssembler _masm(cbuf);
2299           __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2300                    as_Register(Matcher::_regEncode[src_lo]));
2301         } else if (st) {
2302           st->print("fmovs  %s, %s\t# shuffle",
2303                     Matcher::regName[dst_lo],
2304                     Matcher::regName[src_lo]);
2305         }
2306       }
2307     } else {                    // gpr --> stack spill
2308       assert(dst_lo_rc == rc_stack, "spill to bad register class");
2309       int dst_offset = ra_->reg2offset(dst_lo);
2310       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2311           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2312           // 64 bit
2313         if (cbuf) {
2314           MacroAssembler _masm(cbuf);
2315           __ str(as_Register(Matcher::_regEncode[src_lo]),
2316                  Address(sp, dst_offset));
2317         } else if (st) {
2318           st->print("str  %s, [sp, #%d]\t# spill",
2319                     Matcher::regName[src_lo],
2320                     dst_offset);
2321         }
2322       } else {
2323         // 32 bit
2324         if (cbuf) {
2325           MacroAssembler _masm(cbuf);
2326           __ strw(as_Register(Matcher::_regEncode[src_lo]),
2327                  Address(sp, dst_offset));
2328         } else if (st) {
2329           st->print("strw  %s, [sp, #%d]\t# spill",
2330                     Matcher::regName[src_lo],
2331                     dst_offset);
2332         }
2333       }
2334     }
2335     return 4;
2336   case rc_float:
2337     if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
2338       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2339           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2340           // 64 bit
2341         if (cbuf) {
2342           MacroAssembler _masm(cbuf);
2343           __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
2344                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2345         } else if (st) {
2346           st->print("fmovd  %s, %s\t# shuffle",
2347                     Matcher::regName[dst_lo],
2348                     Matcher::regName[src_lo]);
2349         }
2350       } else {
2351         // 32 bit
2352         if (cbuf) {
2353           MacroAssembler _masm(cbuf);
2354           __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
2355                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2356         } else if (st) {
2357           st->print("fmovs  %s, %s\t# shuffle",
2358                     Matcher::regName[dst_lo],
2359                     Matcher::regName[src_lo]);
2360         }
2361       }
2362     } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
2363       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2364           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2365           // 64 bit
2366         if (cbuf) {
2367           MacroAssembler _masm(cbuf);
2368           __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2369                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2370         } else if (st) {
2371           st->print("fmovd  %s, %s\t# shuffle",
2372                     Matcher::regName[dst_lo],
2373                     Matcher::regName[src_lo]);
2374         }
2375       } else {
2376         // 32 bit
2377         if (cbuf) {
2378           MacroAssembler _masm(cbuf);
2379           __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2380                    as_FloatRegister(Matcher::_regEncode[src_lo]));
2381         } else if (st) {
2382           st->print("fmovs  %s, %s\t# shuffle",
2383                     Matcher::regName[dst_lo],
2384                     Matcher::regName[src_lo]);
2385         }
2386       }
2387     } else {                    // fpr --> stack spill
2388       assert(dst_lo_rc == rc_stack, "spill to bad register class");
2389       int dst_offset = ra_->reg2offset(dst_lo);
2390       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2391           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2392           // 64 bit
2393         if (cbuf) {
2394           MacroAssembler _masm(cbuf);
2395           __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]),
2396                  Address(sp, dst_offset));
2397         } else if (st) {
2398           st->print("strd  %s, [sp, #%d]\t# spill",
2399                     Matcher::regName[src_lo],
2400                     dst_offset);
2401         }
2402       } else {
2403         // 32 bit
2404         if (cbuf) {
2405           MacroAssembler _masm(cbuf);
2406           __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]),
2407                  Address(sp, dst_offset));
2408         } else if (st) {
2409           st->print("strs  %s, [sp, #%d]\t# spill",
2410                     Matcher::regName[src_lo],
2411                     dst_offset);
2412         }
2413       }
2414     }
2415     return 4;
2416   case rc_stack:
2417     int src_offset = ra_->reg2offset(src_lo);
2418     if (dst_lo_rc == rc_int) {  // stack --> gpr load
2419       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2420           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2421           // 64 bit
2422         if (cbuf) {
2423           MacroAssembler _masm(cbuf);
2424           __ ldr(as_Register(Matcher::_regEncode[dst_lo]),
2425                  Address(sp, src_offset));
2426         } else if (st) {
2427           st->print("ldr  %s, [sp, %d]\t# restore",
2428                     Matcher::regName[dst_lo],
2429                     src_offset);
2430         }
2431       } else {
2432         // 32 bit
2433         if (cbuf) {
2434           MacroAssembler _masm(cbuf);
2435           __ ldrw(as_Register(Matcher::_regEncode[dst_lo]),
2436                   Address(sp, src_offset));
2437         } else if (st) {
2438           st->print("ldr  %s, [sp, %d]\t# restore",
2439                     Matcher::regName[dst_lo],
2440                    src_offset);
2441         }
2442       }
2443       return 4;
2444     } else if (dst_lo_rc == rc_float) { // stack --> fpr load
2445       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2446           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2447           // 64 bit
2448         if (cbuf) {
2449           MacroAssembler _masm(cbuf);
2450           __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2451                  Address(sp, src_offset));
2452         } else if (st) {
2453           st->print("ldrd  %s, [sp, %d]\t# restore",
2454                     Matcher::regName[dst_lo],
2455                     src_offset);
2456         }
2457       } else {
2458         // 32 bit
2459         if (cbuf) {
2460           MacroAssembler _masm(cbuf);
2461           __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2462                   Address(sp, src_offset));
2463         } else if (st) {
2464           st->print("ldrs  %s, [sp, %d]\t# restore",
2465                     Matcher::regName[dst_lo],
2466                    src_offset);
2467         }
2468       }
2469       return 4;
2470     } else {                    // stack --> stack copy
2471       assert(dst_lo_rc == rc_stack, "spill to bad register class");
2472       int dst_offset = ra_->reg2offset(dst_lo);
2473       if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
2474           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
2475           // 64 bit
2476         if (cbuf) {
2477           MacroAssembler _masm(cbuf);
2478           __ ldr(rscratch1, Address(sp, src_offset));
2479           __ str(rscratch1, Address(sp, dst_offset));
2480         } else if (st) {
2481           st->print("ldr  rscratch1, [sp, %d]\t# mem-mem spill",
2482                     src_offset);
2483           st->print("\n\t");
2484           st->print("str  rscratch1, [sp, %d]",
2485                     dst_offset);
2486         }
2487       } else {
2488         // 32 bit
2489         if (cbuf) {
2490           MacroAssembler _masm(cbuf);
2491           __ ldrw(rscratch1, Address(sp, src_offset));
2492           __ strw(rscratch1, Address(sp, dst_offset));
2493         } else if (st) {
2494           st->print("ldrw  rscratch1, [sp, %d]\t# mem-mem spill",
2495                     src_offset);
2496           st->print("\n\t");
2497           st->print("strw  rscratch1, [sp, %d]",
2498                     dst_offset);
2499         }
2500       }
2501       return 8;
2502     }
2503   }
2504 
2505   assert(false," bad rc_class for spill ");
2506   Unimplemented();
2507   return 0;
2508 
2509 }
2510 
2511 #ifndef PRODUCT
2512 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2513   if (!ra_)
2514     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2515   else
2516     implementation(NULL, ra_, false, st);
2517 }
2518 #endif
2519 
2520 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2521   implementation(&cbuf, ra_, false, NULL);
2522 }
2523 
2524 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2525   return implementation(NULL, ra_, true, NULL);
2526 }
2527 
2528 //=============================================================================
2529 
2530 #ifndef PRODUCT
2531 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2532   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2533   int reg = ra_->get_reg_first(this);
2534   st->print("add %s, rsp, #%d]\t# box lock",
2535             Matcher::regName[reg], offset);
2536 }
2537 #endif
2538 
2539 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2540   MacroAssembler _masm(&cbuf);
2541 
2542   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2543   int reg    = ra_->get_encode(this);
2544 
2545   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2546     __ add(as_Register(reg), sp, offset);
2547   } else {
2548     ShouldNotReachHere();
2549   }
2550 }
2551 
2552 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2553   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2554   return 4;
2555 }
2556 
2557 //=============================================================================
2558 
2559 #ifndef PRODUCT
2560 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2561 {
2562   st->print_cr("# MachUEPNode");
2563   if (UseCompressedClassPointers) {
2564     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2565     if (Universe::narrow_klass_shift() != 0) {
2566       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2567     }
2568   } else {
2569    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2570   }
2571   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2572   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2573 }
2574 #endif
2575 
2576 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2577 {
2578   // This is the unverified entry point.
2579   MacroAssembler _masm(&cbuf);
2580 
2581   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2582   Label skip;
2583   // TODO
2584   // can we avoid this skip and still use a reloc?
2585   __ br(Assembler::EQ, skip);
2586   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2587   __ bind(skip);
2588 }
2589 
2590 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2591 {
2592   return MachNode::size(ra_);
2593 }
2594 
2595 // REQUIRED EMIT CODE
2596 
2597 //=============================================================================
2598 
2599 // Emit exception handler code.
2600 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2601 {
2602   // mov rscratch1 #exception_blob_entry_point
2603   // br rscratch1
2604   // Note that the code buffer's insts_mark is always relative to insts.
2605   // That's why we must use the macroassembler to generate a handler.
2606   MacroAssembler _masm(&cbuf);
2607   address base =
2608   __ start_a_stub(size_exception_handler());
2609   if (base == NULL)  return 0;  // CodeBuffer::expand failed
2610   int offset = __ offset();
2611   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2612   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2613   __ end_a_stub();
2614   return offset;
2615 }
2616 
2617 // Emit deopt handler code.
2618 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2619 {
2620   // Note that the code buffer's insts_mark is always relative to insts.
2621   // That's why we must use the macroassembler to generate a handler.
2622   MacroAssembler _masm(&cbuf);
2623   address base =
2624   __ start_a_stub(size_deopt_handler());
2625   if (base == NULL)  return 0;  // CodeBuffer::expand failed
2626   int offset = __ offset();
2627 
2628   __ adr(lr, __ pc());
2629   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2630 
2631   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2632   __ end_a_stub();
2633   return offset;
2634 }
2635 
2636 // REQUIRED MATCHER CODE
2637 
2638 //=============================================================================
2639 
2640 const bool Matcher::match_rule_supported(int opcode) {
2641 
2642   // TODO
2643   // identify extra cases that we might want to provide match rules for
2644   // e.g. Op_StrEquals and other intrinsics
2645   if (!has_match_rule(opcode)) {
2646     return false;
2647   }
2648 
2649   return true;  // Per default match rules are supported.
2650 }
2651 
2652 int Matcher::regnum_to_fpu_offset(int regnum)
2653 {
2654   Unimplemented();
2655   return 0;
2656 }
2657 
2658 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2659 {
2660   Unimplemented();
2661   return false;
2662 }
2663 
2664 const bool Matcher::isSimpleConstant64(jlong value) {
2665   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2666   // Probably always true, even if a temp register is required.
2667   return true;
2668 }
2669 
2670 // true just means we have fast l2f conversion
2671 const bool Matcher::convL2FSupported(void) {
2672   return true;
2673 }
2674 
2675 // Vector width in bytes.
2676 const int Matcher::vector_width_in_bytes(BasicType bt) {
2677   int size = MIN2(16,(int)MaxVectorSize);
2678   // Minimum 2 values in vector
2679   if (size < 2*type2aelembytes(bt)) size = 0;
2680   // But never < 4
2681   if (size < 4) size = 0;
2682   return size;
2683 }
2684 
2685 // Limits on vector size (number of elements) loaded into vector.
2686 const int Matcher::max_vector_size(const BasicType bt) {
2687   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2688 }
2689 const int Matcher::min_vector_size(const BasicType bt) {
2690   return (type2aelembytes(bt) == 1) ? 4 : 2;
2691 }
2692 
2693 // Vector ideal reg.
2694 const int Matcher::vector_ideal_reg(int len) {
2695   switch(len) {
2696     case  4:
2697     case  8: return Op_VecD;
2698     case 16: return Op_VecX;
2699   }
2700   ShouldNotReachHere();
2701   return 0;
2702 }
2703 
2704 const int Matcher::vector_shift_count_ideal_reg(int size) {
2705   return Op_VecX;
2706 }
2707 
2708 // AES support not yet implemented
2709 const bool Matcher::pass_original_key_for_aes() {
2710   return false;
2711 }
2712 
2713 // x86 supports misaligned vectors store/load.
2714 const bool Matcher::misaligned_vectors_ok() {
2715   return !AlignVector; // can be changed by flag
2716 }
2717 
2718 // false => size gets scaled to BytesPerLong, ok.
2719 const bool Matcher::init_array_count_is_in_bytes = false;
2720 
2721 // Threshold size for cleararray.
2722 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2723 
2724 // Use conditional move (CMOVL)
2725 const int Matcher::long_cmove_cost() {
2726   // long cmoves are no more expensive than int cmoves
2727   return 0;
2728 }
2729 
2730 const int Matcher::float_cmove_cost() {
2731   // float cmoves are no more expensive than int cmoves
2732   return 0;
2733 }
2734 
2735 // Does the CPU require late expand (see block.cpp for description of late expand)?
2736 const bool Matcher::require_postalloc_expand = false;
2737 
2738 // Should the Matcher clone shifts on addressing modes, expecting them
2739 // to be subsumed into complex addressing expressions or compute them
2740 // into registers?  True for Intel but false for most RISCs
2741 const bool Matcher::clone_shift_expressions = false;
2742 
2743 // Do we need to mask the count passed to shift instructions or does
2744 // the cpu only look at the lower 5/6 bits anyway?
2745 const bool Matcher::need_masked_shift_count = false;
2746 
2747 // This affects two different things:
2748 //  - how Decode nodes are matched
2749 //  - how ImplicitNullCheck opportunities are recognized
2750 // If true, the matcher will try to remove all Decodes and match them
2751 // (as operands) into nodes. NullChecks are not prepared to deal with
2752 // Decodes by final_graph_reshaping().
2753 // If false, final_graph_reshaping() forces the decode behind the Cmp
2754 // for a NullCheck. The matcher matches the Decode node into a register.
2755 // Implicit_null_check optimization moves the Decode along with the
2756 // memory operation back up before the NullCheck.
2757 bool Matcher::narrow_oop_use_complex_address() {
2758   return Universe::narrow_oop_shift() == 0;
2759 }
2760 
2761 bool Matcher::narrow_klass_use_complex_address() {
2762 // TODO
2763 // decide whether we need to set this to true
2764   return false;
2765 }
2766 
2767 // Is it better to copy float constants, or load them directly from
2768 // memory?  Intel can load a float constant from a direct address,
2769 // requiring no extra registers.  Most RISCs will have to materialize
2770 // an address into a register first, so they would do better to copy
2771 // the constant from stack.
2772 const bool Matcher::rematerialize_float_constants = false;
2773 
2774 // If CPU can load and store mis-aligned doubles directly then no
2775 // fixup is needed.  Else we split the double into 2 integer pieces
2776 // and move it piece-by-piece.  Only happens when passing doubles into
2777 // C code as the Java calling convention forces doubles to be aligned.
2778 const bool Matcher::misaligned_doubles_ok = true;
2779 
2780 // No-op on amd64
2781 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2782   Unimplemented();
2783 }
2784 
2785 // Advertise here if the CPU requires explicit rounding operations to
2786 // implement the UseStrictFP mode.
2787 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2788 
2789 // Are floats converted to double when stored to stack during
2790 // deoptimization?
2791 bool Matcher::float_in_double() { return true; }
2792 
2793 // Do ints take an entire long register or just half?
2794 // The relevant question is how the int is callee-saved:
2795 // the whole long is written but de-opt'ing will have to extract
2796 // the relevant 32 bits.
2797 const bool Matcher::int_in_long = true;
2798 
2799 // Return whether or not this register is ever used as an argument.
2800 // This function is used on startup to build the trampoline stubs in
2801 // generateOptoStub.  Registers not mentioned will be killed by the VM
2802 // call in the trampoline, and arguments in those registers not be
2803 // available to the callee.
2804 bool Matcher::can_be_java_arg(int reg)
2805 {
2806   return
2807     reg ==  R0_num || reg == R0_H_num ||
2808     reg ==  R1_num || reg == R1_H_num ||
2809     reg ==  R2_num || reg == R2_H_num ||
2810     reg ==  R3_num || reg == R3_H_num ||
2811     reg ==  R4_num || reg == R4_H_num ||
2812     reg ==  R5_num || reg == R5_H_num ||
2813     reg ==  R6_num || reg == R6_H_num ||
2814     reg ==  R7_num || reg == R7_H_num ||
2815     reg ==  V0_num || reg == V0_H_num ||
2816     reg ==  V1_num || reg == V1_H_num ||
2817     reg ==  V2_num || reg == V2_H_num ||
2818     reg ==  V3_num || reg == V3_H_num ||
2819     reg ==  V4_num || reg == V4_H_num ||
2820     reg ==  V5_num || reg == V5_H_num ||
2821     reg ==  V6_num || reg == V6_H_num ||
2822     reg ==  V7_num || reg == V7_H_num;
2823 }
2824 
2825 bool Matcher::is_spillable_arg(int reg)
2826 {
2827   return can_be_java_arg(reg);
2828 }
2829 
2830 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2831   return false;
2832 }
2833 
2834 RegMask Matcher::divI_proj_mask() {
2835   ShouldNotReachHere();
2836   return RegMask();
2837 }
2838 
2839 // Register for MODI projection of divmodI.
2840 RegMask Matcher::modI_proj_mask() {
2841   ShouldNotReachHere();
2842   return RegMask();
2843 }
2844 
2845 // Register for DIVL projection of divmodL.
2846 RegMask Matcher::divL_proj_mask() {
2847   ShouldNotReachHere();
2848   return RegMask();
2849 }
2850 
2851 // Register for MODL projection of divmodL.
2852 RegMask Matcher::modL_proj_mask() {
2853   ShouldNotReachHere();
2854   return RegMask();
2855 }
2856 
2857 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2858   return FP_REG_mask();
2859 }
2860 
2861 // helper for encoding java_to_runtime calls on sim
2862 //
2863 // this is needed to compute the extra arguments required when
2864 // planting a call to the simulator blrt instruction. the TypeFunc
2865 // can be queried to identify the counts for integral, and floating
2866 // arguments and the return type
2867 
2868 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2869 {
2870   int gps = 0;
2871   int fps = 0;
2872   const TypeTuple *domain = tf->domain();
2873   int max = domain->cnt();
2874   for (int i = TypeFunc::Parms; i < max; i++) {
2875     const Type *t = domain->field_at(i);
2876     switch(t->basic_type()) {
2877     case T_FLOAT:
2878     case T_DOUBLE:
2879       fps++;
2880     default:
2881       gps++;
2882     }
2883   }
2884   gpcnt = gps;
2885   fpcnt = fps;
2886   BasicType rt = tf->return_type();
2887   switch (rt) {
2888   case T_VOID:
2889     rtype = MacroAssembler::ret_type_void;
2890     break;
2891   default:
2892     rtype = MacroAssembler::ret_type_integral;
2893     break;
2894   case T_FLOAT:
2895     rtype = MacroAssembler::ret_type_float;
2896     break;
2897   case T_DOUBLE:
2898     rtype = MacroAssembler::ret_type_double;
2899     break;
2900   }
2901 }
2902 
2903 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2904   MacroAssembler _masm(&cbuf);                                          \
2905   {                                                                     \
2906     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2907     guarantee(DISP == 0, "mode not permitted for volatile");            \
2908     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2909     __ INSN(REG, as_Register(BASE));                                    \
2910   }
2911 
2912 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2913 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2914 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2915                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2916 
2917   // Used for all non-volatile memory accesses.  The use of
2918   // $mem->opcode() to discover whether this pattern uses sign-extended
2919   // offsets is something of a kludge.
2920   static void loadStore(MacroAssembler masm, mem_insn insn,
2921                          Register reg, int opcode,
2922                          Register base, int index, int size, int disp)
2923   {
2924     Address::extend scale;
2925 
2926     // Hooboy, this is fugly.  We need a way to communicate to the
2927     // encoder that the index needs to be sign extended, so we have to
2928     // enumerate all the cases.
2929     switch (opcode) {
2930     case INDINDEXSCALEDOFFSETI2L:
2931     case INDINDEXSCALEDI2L:
2932     case INDINDEXSCALEDOFFSETI2LN:
2933     case INDINDEXSCALEDI2LN:
2934     case INDINDEXOFFSETI2L:
2935     case INDINDEXOFFSETI2LN:
2936       scale = Address::sxtw(size);
2937       break;
2938     default:
2939       scale = Address::lsl(size);
2940     }
2941 
2942     if (index == -1) {
2943       (masm.*insn)(reg, Address(base, disp));
2944     } else {
2945       if (disp == 0) {
2946         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2947       } else {
2948         masm.lea(rscratch1, Address(base, disp));
2949         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2950       }
2951     }
2952   }
2953 
2954   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2955                          FloatRegister reg, int opcode,
2956                          Register base, int index, int size, int disp)
2957   {
2958     Address::extend scale;
2959 
2960     switch (opcode) {
2961     case INDINDEXSCALEDOFFSETI2L:
2962     case INDINDEXSCALEDI2L:
2963     case INDINDEXSCALEDOFFSETI2LN:
2964     case INDINDEXSCALEDI2LN:
2965       scale = Address::sxtw(size);
2966       break;
2967     default:
2968       scale = Address::lsl(size);
2969     }
2970 
2971      if (index == -1) {
2972       (masm.*insn)(reg, Address(base, disp));
2973     } else {
2974       if (disp == 0) {
2975         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2976       } else {
2977         masm.lea(rscratch1, Address(base, disp));
2978         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2979       }
2980     }
2981   }
2982 
2983   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2984                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2985                          int opcode, Register base, int index, int size, int disp)
2986   {
2987     if (index == -1) {
2988       (masm.*insn)(reg, T, Address(base, disp));
2989     } else {
2990       assert(disp == 0, "unsupported address mode");
2991       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2992     }
2993   }
2994 
2995 %}
2996 
2997 
2998 
2999 //----------ENCODING BLOCK-----------------------------------------------------
3000 // This block specifies the encoding classes used by the compiler to
3001 // output byte streams.  Encoding classes are parameterized macros
3002 // used by Machine Instruction Nodes in order to generate the bit
3003 // encoding of the instruction.  Operands specify their base encoding
3004 // interface with the interface keyword.  There are currently
3005 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3006 // COND_INTER.  REG_INTER causes an operand to generate a function
3007 // which returns its register number when queried.  CONST_INTER causes
3008 // an operand to generate a function which returns the value of the
3009 // constant when queried.  MEMORY_INTER causes an operand to generate
3010 // four functions which return the Base Register, the Index Register,
3011 // the Scale Value, and the Offset Value of the operand when queried.
3012 // COND_INTER causes an operand to generate six functions which return
3013 // the encoding code (ie - encoding bits for the instruction)
3014 // associated with each basic boolean condition for a conditional
3015 // instruction.
3016 //
3017 // Instructions specify two basic values for encoding.  Again, a
3018 // function is available to check if the constant displacement is an
3019 // oop. They use the ins_encode keyword to specify their encoding
3020 // classes (which must be a sequence of enc_class names, and their
3021 // parameters, specified in the encoding block), and they use the
3022 // opcode keyword to specify, in order, their primary, secondary, and
3023 // tertiary opcode.  Only the opcode sections which a particular
3024 // instruction needs for encoding need to be specified.
3025 encode %{
3026   // Build emit functions for each basic byte or larger field in the
3027   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3028   // from C++ code in the enc_class source block.  Emit functions will
3029   // live in the main source block for now.  In future, we can
3030   // generalize this by adding a syntax that specifies the sizes of
3031   // fields in an order, so that the adlc can build the emit functions
3032   // automagically
3033 
3034   // catch all for unimplemented encodings
3035   enc_class enc_unimplemented %{
3036     MacroAssembler _masm(&cbuf);
3037     __ unimplemented("C2 catch all");
3038   %}
3039 
3040   // BEGIN Non-volatile memory access
3041 
3042   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3043     Register dst_reg = as_Register($dst$$reg);
3044     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3045                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3046   %}
3047 
3048   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3049     Register dst_reg = as_Register($dst$$reg);
3050     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3051                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3052   %}
3053 
3054   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3055     Register dst_reg = as_Register($dst$$reg);
3056     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3057                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3058   %}
3059 
3060   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3061     Register dst_reg = as_Register($dst$$reg);
3062     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3063                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3064   %}
3065 
3066   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3067     Register dst_reg = as_Register($dst$$reg);
3068     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3069                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3070   %}
3071 
3072   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3073     Register dst_reg = as_Register($dst$$reg);
3074     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3075                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3076   %}
3077 
3078   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3079     Register dst_reg = as_Register($dst$$reg);
3080     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3081                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3082   %}
3083 
3084   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3085     Register dst_reg = as_Register($dst$$reg);
3086     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3087                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3088   %}
3089 
3090   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3091     Register dst_reg = as_Register($dst$$reg);
3092     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3093                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3094   %}
3095 
3096   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3097     Register dst_reg = as_Register($dst$$reg);
3098     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3099                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3100   %}
3101 
3102   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3103     Register dst_reg = as_Register($dst$$reg);
3104     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3105                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3106   %}
3107 
3108   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3109     Register dst_reg = as_Register($dst$$reg);
3110     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3111                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3112   %}
3113 
3114   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3115     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3116     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3117                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3118   %}
3119 
3120   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3121     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3122     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3123                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3124   %}
3125 
3126   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3127     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3128     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3129        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3130   %}
3131 
3132   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3133     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3134     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3135        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3136   %}
3137 
3138   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3139     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3140     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3141        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3142   %}
3143 
3144   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3145     Register src_reg = as_Register($src$$reg);
3146     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3147                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3148   %}
3149 
3150   enc_class aarch64_enc_strb0(memory mem) %{
3151     MacroAssembler _masm(&cbuf);
3152     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3153                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3154   %}
3155 
3156   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3157     Register src_reg = as_Register($src$$reg);
3158     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3159                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3160   %}
3161 
3162   enc_class aarch64_enc_strh0(memory mem) %{
3163     MacroAssembler _masm(&cbuf);
3164     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
3165                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3166   %}
3167 
3168   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
3169     Register src_reg = as_Register($src$$reg);
3170     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
3171                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3172   %}
3173 
3174   enc_class aarch64_enc_strw0(memory mem) %{
3175     MacroAssembler _masm(&cbuf);
3176     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
3177                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3178   %}
3179 
3180   enc_class aarch64_enc_str(iRegL src, memory mem) %{
3181     Register src_reg = as_Register($src$$reg);
3182     // we sometimes get asked to store the stack pointer into the
3183     // current thread -- we cannot do that directly on AArch64
3184     if (src_reg == r31_sp) {
3185       MacroAssembler _masm(&cbuf);
3186       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3187       __ mov(rscratch2, sp);
3188       src_reg = rscratch2;
3189     }
3190     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
3191                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3192   %}
3193 
3194   enc_class aarch64_enc_str0(memory mem) %{
3195     MacroAssembler _masm(&cbuf);
3196     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3197                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3198   %}
3199 
3200   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3201     FloatRegister src_reg = as_FloatRegister($src$$reg);
3202     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3203                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3204   %}
3205 
3206   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3207     FloatRegister src_reg = as_FloatRegister($src$$reg);
3208     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3209                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3210   %}
3211 
3212   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
3213     FloatRegister src_reg = as_FloatRegister($src$$reg);
3214     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3215        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3216   %}
3217 
3218   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
3219     FloatRegister src_reg = as_FloatRegister($src$$reg);
3220     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3221        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3222   %}
3223 
3224   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3225     FloatRegister src_reg = as_FloatRegister($src$$reg);
3226     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3227        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3228   %}
3229 
3230   // END Non-volatile memory access
3231 
3232   // volatile loads and stores
3233 
3234   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3235     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3236                  rscratch1, stlrb);
3237   %}
3238 
3239   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
3240     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3241                  rscratch1, stlrh);
3242   %}
3243 
3244   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
3245     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3246                  rscratch1, stlrw);
3247   %}
3248 
3249 
3250   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
3251     Register dst_reg = as_Register($dst$$reg);
3252     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3253              rscratch1, ldarb);
3254     __ sxtbw(dst_reg, dst_reg);
3255   %}
3256 
3257   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
3258     Register dst_reg = as_Register($dst$$reg);
3259     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3260              rscratch1, ldarb);
3261     __ sxtb(dst_reg, dst_reg);
3262   %}
3263 
3264   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
3265     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3266              rscratch1, ldarb);
3267   %}
3268 
3269   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
3270     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3271              rscratch1, ldarb);
3272   %}
3273 
3274   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
3275     Register dst_reg = as_Register($dst$$reg);
3276     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3277              rscratch1, ldarh);
3278     __ sxthw(dst_reg, dst_reg);
3279   %}
3280 
3281   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
3282     Register dst_reg = as_Register($dst$$reg);
3283     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3284              rscratch1, ldarh);
3285     __ sxth(dst_reg, dst_reg);
3286   %}
3287 
3288   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
3289     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3290              rscratch1, ldarh);
3291   %}
3292 
3293   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
3294     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3295              rscratch1, ldarh);
3296   %}
3297 
3298   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
3299     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3300              rscratch1, ldarw);
3301   %}
3302 
3303   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
3304     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3305              rscratch1, ldarw);
3306   %}
3307 
3308   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
3309     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3310              rscratch1, ldar);
3311   %}
3312 
3313   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
3314     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3315              rscratch1, ldarw);
3316     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
3317   %}
3318 
3319   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
3320     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3321              rscratch1, ldar);
3322     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
3323   %}
3324 
3325   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
3326     Register src_reg = as_Register($src$$reg);
3327     // we sometimes get asked to store the stack pointer into the
3328     // current thread -- we cannot do that directly on AArch64
3329     if (src_reg == r31_sp) {
3330         MacroAssembler _masm(&cbuf);
3331       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3332       __ mov(rscratch2, sp);
3333       src_reg = rscratch2;
3334     }
3335     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3336                  rscratch1, stlr);
3337   %}
3338 
3339   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
3340     {
3341       MacroAssembler _masm(&cbuf);
3342       FloatRegister src_reg = as_FloatRegister($src$$reg);
3343       __ fmovs(rscratch2, src_reg);
3344     }
3345     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3346                  rscratch1, stlrw);
3347   %}
3348 
3349   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
3350     {
3351       MacroAssembler _masm(&cbuf);
3352       FloatRegister src_reg = as_FloatRegister($src$$reg);
3353       __ fmovd(rscratch2, src_reg);
3354     }
3355     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3356                  rscratch1, stlr);
3357   %}
3358 
3359   // synchronized read/update encodings
3360 
3361   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
3362     MacroAssembler _masm(&cbuf);
3363     Register dst_reg = as_Register($dst$$reg);
3364     Register base = as_Register($mem$$base);
3365     int index = $mem$$index;
3366     int scale = $mem$$scale;
3367     int disp = $mem$$disp;
3368     if (index == -1) {
3369        if (disp != 0) {
3370         __ lea(rscratch1, Address(base, disp));
3371         __ ldaxr(dst_reg, rscratch1);
3372       } else {
3373         // TODO
3374         // should we ever get anything other than this case?
3375         __ ldaxr(dst_reg, base);
3376       }
3377     } else {
3378       Register index_reg = as_Register(index);
3379       if (disp == 0) {
3380         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
3381         __ ldaxr(dst_reg, rscratch1);
3382       } else {
3383         __ lea(rscratch1, Address(base, disp));
3384         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
3385         __ ldaxr(dst_reg, rscratch1);
3386       }
3387     }
3388   %}
3389 
3390   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
3391     MacroAssembler _masm(&cbuf);
3392     Register src_reg = as_Register($src$$reg);
3393     Register base = as_Register($mem$$base);
3394     int index = $mem$$index;
3395     int scale = $mem$$scale;
3396     int disp = $mem$$disp;
3397     if (index == -1) {
3398        if (disp != 0) {
3399         __ lea(rscratch2, Address(base, disp));
3400         __ stlxr(rscratch1, src_reg, rscratch2);
3401       } else {
3402         // TODO
3403         // should we ever get anything other than this case?
3404         __ stlxr(rscratch1, src_reg, base);
3405       }
3406     } else {
3407       Register index_reg = as_Register(index);
3408       if (disp == 0) {
3409         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3410         __ stlxr(rscratch1, src_reg, rscratch2);
3411       } else {
3412         __ lea(rscratch2, Address(base, disp));
3413         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3414         __ stlxr(rscratch1, src_reg, rscratch2);
3415       }
3416     }
3417     __ cmpw(rscratch1, zr);
3418   %}
3419 
3420   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
3421     MacroAssembler _masm(&cbuf);
3422     Register old_reg = as_Register($oldval$$reg);
3423     Register new_reg = as_Register($newval$$reg);
3424     Register base = as_Register($mem$$base);
3425     Register addr_reg;
3426     int index = $mem$$index;
3427     int scale = $mem$$scale;
3428     int disp = $mem$$disp;
3429     if (index == -1) {
3430        if (disp != 0) {
3431         __ lea(rscratch2, Address(base, disp));
3432         addr_reg = rscratch2;
3433       } else {
3434         // TODO
3435         // should we ever get anything other than this case?
3436         addr_reg = base;
3437       }
3438     } else {
3439       Register index_reg = as_Register(index);
3440       if (disp == 0) {
3441         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3442         addr_reg = rscratch2;
3443       } else {
3444         __ lea(rscratch2, Address(base, disp));
3445         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3446         addr_reg = rscratch2;
3447       }
3448     }
3449     Label retry_load, done;
3450     __ bind(retry_load);
3451     __ ldxr(rscratch1, addr_reg);
3452     __ cmp(rscratch1, old_reg);
3453     __ br(Assembler::NE, done);
3454     __ stlxr(rscratch1, new_reg, addr_reg);
3455     __ cbnzw(rscratch1, retry_load);
3456     __ bind(done);
3457   %}
3458 
3459   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3460     MacroAssembler _masm(&cbuf);
3461     Register old_reg = as_Register($oldval$$reg);
3462     Register new_reg = as_Register($newval$$reg);
3463     Register base = as_Register($mem$$base);
3464     Register addr_reg;
3465     int index = $mem$$index;
3466     int scale = $mem$$scale;
3467     int disp = $mem$$disp;
3468     if (index == -1) {
3469        if (disp != 0) {
3470         __ lea(rscratch2, Address(base, disp));
3471         addr_reg = rscratch2;
3472       } else {
3473         // TODO
3474         // should we ever get anything other than this case?
3475         addr_reg = base;
3476       }
3477     } else {
3478       Register index_reg = as_Register(index);
3479       if (disp == 0) {
3480         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3481         addr_reg = rscratch2;
3482       } else {
3483         __ lea(rscratch2, Address(base, disp));
3484         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3485         addr_reg = rscratch2;
3486       }
3487     }
3488     Label retry_load, done;
3489     __ bind(retry_load);
3490     __ ldxrw(rscratch1, addr_reg);
3491     __ cmpw(rscratch1, old_reg);
3492     __ br(Assembler::NE, done);
3493     __ stlxrw(rscratch1, new_reg, addr_reg);
3494     __ cbnzw(rscratch1, retry_load);
3495     __ bind(done);
3496   %}
3497 
3498   // auxiliary used for CompareAndSwapX to set result register
3499   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
3500     MacroAssembler _masm(&cbuf);
3501     Register res_reg = as_Register($res$$reg);
3502     __ cset(res_reg, Assembler::EQ);
3503   %}
3504 
3505   // prefetch encodings
3506 
3507   enc_class aarch64_enc_prefetchw(memory mem) %{
3508     MacroAssembler _masm(&cbuf);
3509     Register base = as_Register($mem$$base);
3510     int index = $mem$$index;
3511     int scale = $mem$$scale;
3512     int disp = $mem$$disp;
3513     if (index == -1) {
3514       __ prfm(Address(base, disp), PSTL1KEEP);
3515       __ nop();
3516     } else {
3517       Register index_reg = as_Register(index);
3518       if (disp == 0) {
3519         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
3520       } else {
3521         __ lea(rscratch1, Address(base, disp));
3522         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3523       }
3524     }
3525   %}
3526 
3527   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
3528     MacroAssembler _masm(&cbuf);
3529     Register cnt_reg = as_Register($cnt$$reg);
3530     Register base_reg = as_Register($base$$reg);
3531     // base is word aligned
3532     // cnt is count of words
3533 
3534     Label loop;
3535     Label entry;
3536 
3537 //  Algorithm:
3538 //
3539 //    scratch1 = cnt & 7;
3540 //    cnt -= scratch1;
3541 //    p += scratch1;
3542 //    switch (scratch1) {
3543 //      do {
3544 //        cnt -= 8;
3545 //          p[-8] = 0;
3546 //        case 7:
3547 //          p[-7] = 0;
3548 //        case 6:
3549 //          p[-6] = 0;
3550 //          // ...
3551 //        case 1:
3552 //          p[-1] = 0;
3553 //        case 0:
3554 //          p += 8;
3555 //      } while (cnt);
3556 //    }
3557 
3558     const int unroll = 8; // Number of str(zr) instructions we'll unroll
3559 
3560     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
3561     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
3562     // base_reg always points to the end of the region we're about to zero
3563     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
3564     __ adr(rscratch2, entry);
3565     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
3566     __ br(rscratch2);
3567     __ bind(loop);
3568     __ sub(cnt_reg, cnt_reg, unroll);
3569     for (int i = -unroll; i < 0; i++)
3570       __ str(zr, Address(base_reg, i * wordSize));
3571     __ bind(entry);
3572     __ add(base_reg, base_reg, unroll * wordSize);
3573     __ cbnz(cnt_reg, loop);
3574   %}
3575 
3576   /// mov envcodings
3577 
3578   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3579     MacroAssembler _masm(&cbuf);
3580     u_int32_t con = (u_int32_t)$src$$constant;
3581     Register dst_reg = as_Register($dst$$reg);
3582     if (con == 0) {
3583       __ movw(dst_reg, zr);
3584     } else {
3585       __ movw(dst_reg, con);
3586     }
3587   %}
3588 
3589   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3590     MacroAssembler _masm(&cbuf);
3591     Register dst_reg = as_Register($dst$$reg);
3592     u_int64_t con = (u_int64_t)$src$$constant;
3593     if (con == 0) {
3594       __ mov(dst_reg, zr);
3595     } else {
3596       __ mov(dst_reg, con);
3597     }
3598   %}
3599 
3600   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3601     MacroAssembler _masm(&cbuf);
3602     Register dst_reg = as_Register($dst$$reg);
3603     address con = (address)$src$$constant;
3604     if (con == NULL || con == (address)1) {
3605       ShouldNotReachHere();
3606     } else {
3607       relocInfo::relocType rtype = $src->constant_reloc();
3608       if (rtype == relocInfo::oop_type) {
3609         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3610       } else if (rtype == relocInfo::metadata_type) {
3611         __ mov_metadata(dst_reg, (Metadata*)con);
3612       } else {
3613         assert(rtype == relocInfo::none, "unexpected reloc type");
3614         if (con < (address)(uintptr_t)os::vm_page_size()) {
3615           __ mov(dst_reg, con);
3616         } else {
3617           unsigned long offset;
3618           __ adrp(dst_reg, con, offset);
3619           __ add(dst_reg, dst_reg, offset);
3620         }
3621       }
3622     }
3623   %}
3624 
3625   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3626     MacroAssembler _masm(&cbuf);
3627     Register dst_reg = as_Register($dst$$reg);
3628     __ mov(dst_reg, zr);
3629   %}
3630 
3631   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3632     MacroAssembler _masm(&cbuf);
3633     Register dst_reg = as_Register($dst$$reg);
3634     __ mov(dst_reg, (u_int64_t)1);
3635   %}
3636 
3637   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3638     MacroAssembler _masm(&cbuf);
3639     address page = (address)$src$$constant;
3640     Register dst_reg = as_Register($dst$$reg);
3641     unsigned long off;
3642     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3643     assert(off == 0, "assumed offset == 0");
3644   %}
3645 
3646   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3647     MacroAssembler _masm(&cbuf);
3648     address page = (address)$src$$constant;
3649     Register dst_reg = as_Register($dst$$reg);
3650     unsigned long off;
3651     __ adrp(dst_reg, ExternalAddress(page), off);
3652     assert(off == 0, "assumed offset == 0");
3653   %}
3654 
3655   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3656     MacroAssembler _masm(&cbuf);
3657     Register dst_reg = as_Register($dst$$reg);
3658     address con = (address)$src$$constant;
3659     if (con == NULL) {
3660       ShouldNotReachHere();
3661     } else {
3662       relocInfo::relocType rtype = $src->constant_reloc();
3663       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3664       __ set_narrow_oop(dst_reg, (jobject)con);
3665     }
3666   %}
3667 
3668   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3669     MacroAssembler _masm(&cbuf);
3670     Register dst_reg = as_Register($dst$$reg);
3671     __ mov(dst_reg, zr);
3672   %}
3673 
3674   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3675     MacroAssembler _masm(&cbuf);
3676     Register dst_reg = as_Register($dst$$reg);
3677     address con = (address)$src$$constant;
3678     if (con == NULL) {
3679       ShouldNotReachHere();
3680     } else {
3681       relocInfo::relocType rtype = $src->constant_reloc();
3682       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3683       __ set_narrow_klass(dst_reg, (Klass *)con);
3684     }
3685   %}
3686 
3687   // arithmetic encodings
3688 
3689   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3690     MacroAssembler _masm(&cbuf);
3691     Register dst_reg = as_Register($dst$$reg);
3692     Register src_reg = as_Register($src1$$reg);
3693     int32_t con = (int32_t)$src2$$constant;
3694     // add has primary == 0, subtract has primary == 1
3695     if ($primary) { con = -con; }
3696     if (con < 0) {
3697       __ subw(dst_reg, src_reg, -con);
3698     } else {
3699       __ addw(dst_reg, src_reg, con);
3700     }
3701   %}
3702 
3703   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3704     MacroAssembler _masm(&cbuf);
3705     Register dst_reg = as_Register($dst$$reg);
3706     Register src_reg = as_Register($src1$$reg);
3707     int32_t con = (int32_t)$src2$$constant;
3708     // add has primary == 0, subtract has primary == 1
3709     if ($primary) { con = -con; }
3710     if (con < 0) {
3711       __ sub(dst_reg, src_reg, -con);
3712     } else {
3713       __ add(dst_reg, src_reg, con);
3714     }
3715   %}
3716 
3717   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3718     MacroAssembler _masm(&cbuf);
3719    Register dst_reg = as_Register($dst$$reg);
3720    Register src1_reg = as_Register($src1$$reg);
3721    Register src2_reg = as_Register($src2$$reg);
3722     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3723   %}
3724 
3725   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3726     MacroAssembler _masm(&cbuf);
3727    Register dst_reg = as_Register($dst$$reg);
3728    Register src1_reg = as_Register($src1$$reg);
3729    Register src2_reg = as_Register($src2$$reg);
3730     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3731   %}
3732 
3733   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3734     MacroAssembler _masm(&cbuf);
3735    Register dst_reg = as_Register($dst$$reg);
3736    Register src1_reg = as_Register($src1$$reg);
3737    Register src2_reg = as_Register($src2$$reg);
3738     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3739   %}
3740 
3741   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3742     MacroAssembler _masm(&cbuf);
3743    Register dst_reg = as_Register($dst$$reg);
3744    Register src1_reg = as_Register($src1$$reg);
3745    Register src2_reg = as_Register($src2$$reg);
3746     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3747   %}
3748 
3749   // compare instruction encodings
3750 
3751   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3752     MacroAssembler _masm(&cbuf);
3753     Register reg1 = as_Register($src1$$reg);
3754     Register reg2 = as_Register($src2$$reg);
3755     __ cmpw(reg1, reg2);
3756   %}
3757 
3758   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3759     MacroAssembler _masm(&cbuf);
3760     Register reg = as_Register($src1$$reg);
3761     int32_t val = $src2$$constant;
3762     if (val >= 0) {
3763       __ subsw(zr, reg, val);
3764     } else {
3765       __ addsw(zr, reg, -val);
3766     }
3767   %}
3768 
3769   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3770     MacroAssembler _masm(&cbuf);
3771     Register reg1 = as_Register($src1$$reg);
3772     u_int32_t val = (u_int32_t)$src2$$constant;
3773     __ movw(rscratch1, val);
3774     __ cmpw(reg1, rscratch1);
3775   %}
3776 
3777   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3778     MacroAssembler _masm(&cbuf);
3779     Register reg1 = as_Register($src1$$reg);
3780     Register reg2 = as_Register($src2$$reg);
3781     __ cmp(reg1, reg2);
3782   %}
3783 
3784   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3785     MacroAssembler _masm(&cbuf);
3786     Register reg = as_Register($src1$$reg);
3787     int64_t val = $src2$$constant;
3788     if (val >= 0) {
3789       __ subs(zr, reg, val);
3790     } else if (val != -val) {
3791       __ adds(zr, reg, -val);
3792     } else {
3793     // aargh, Long.MIN_VALUE is a special case
3794       __ orr(rscratch1, zr, (u_int64_t)val);
3795       __ subs(zr, reg, rscratch1);
3796     }
3797   %}
3798 
3799   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3800     MacroAssembler _masm(&cbuf);
3801     Register reg1 = as_Register($src1$$reg);
3802     u_int64_t val = (u_int64_t)$src2$$constant;
3803     __ mov(rscratch1, val);
3804     __ cmp(reg1, rscratch1);
3805   %}
3806 
3807   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3808     MacroAssembler _masm(&cbuf);
3809     Register reg1 = as_Register($src1$$reg);
3810     Register reg2 = as_Register($src2$$reg);
3811     __ cmp(reg1, reg2);
3812   %}
3813 
3814   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3815     MacroAssembler _masm(&cbuf);
3816     Register reg1 = as_Register($src1$$reg);
3817     Register reg2 = as_Register($src2$$reg);
3818     __ cmpw(reg1, reg2);
3819   %}
3820 
3821   enc_class aarch64_enc_testp(iRegP src) %{
3822     MacroAssembler _masm(&cbuf);
3823     Register reg = as_Register($src$$reg);
3824     __ cmp(reg, zr);
3825   %}
3826 
3827   enc_class aarch64_enc_testn(iRegN src) %{
3828     MacroAssembler _masm(&cbuf);
3829     Register reg = as_Register($src$$reg);
3830     __ cmpw(reg, zr);
3831   %}
3832 
3833   enc_class aarch64_enc_b(label lbl) %{
3834     MacroAssembler _masm(&cbuf);
3835     Label *L = $lbl$$label;
3836     __ b(*L);
3837   %}
3838 
3839   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3840     MacroAssembler _masm(&cbuf);
3841     Label *L = $lbl$$label;
3842     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3843   %}
3844 
3845   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3846     MacroAssembler _masm(&cbuf);
3847     Label *L = $lbl$$label;
3848     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3849   %}
3850 
3851   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3852   %{
3853      Register sub_reg = as_Register($sub$$reg);
3854      Register super_reg = as_Register($super$$reg);
3855      Register temp_reg = as_Register($temp$$reg);
3856      Register result_reg = as_Register($result$$reg);
3857 
3858      Label miss;
3859      MacroAssembler _masm(&cbuf);
3860      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3861                                      NULL, &miss,
3862                                      /*set_cond_codes:*/ true);
3863      if ($primary) {
3864        __ mov(result_reg, zr);
3865      }
3866      __ bind(miss);
3867   %}
3868 
3869   enc_class aarch64_enc_java_static_call(method meth) %{
3870     MacroAssembler _masm(&cbuf);
3871 
3872     address addr = (address)$meth$$method;
3873     if (!_method) {
3874       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3875       __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3876     } else if (_optimized_virtual) {
3877       __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3878     } else {
3879       __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3880     }
3881 
3882     if (_method) {
3883       // Emit stub for static call
3884       CompiledStaticCall::emit_to_interp_stub(cbuf);
3885     }
3886   %}
3887 
3888   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3889     MacroAssembler _masm(&cbuf);
3890     __ ic_call((address)$meth$$method);
3891   %}
3892 
3893   enc_class aarch64_enc_call_epilog() %{
3894     MacroAssembler _masm(&cbuf);
3895     if (VerifyStackAtCalls) {
3896       // Check that stack depth is unchanged: find majik cookie on stack
3897       __ call_Unimplemented();
3898     }
3899   %}
3900 
3901   enc_class aarch64_enc_java_to_runtime(method meth) %{
3902     MacroAssembler _masm(&cbuf);
3903 
3904     // some calls to generated routines (arraycopy code) are scheduled
3905     // by C2 as runtime calls. if so we can call them using a br (they
3906     // will be in a reachable segment) otherwise we have to use a blrt
3907     // which loads the absolute address into a register.
3908     address entry = (address)$meth$$method;
3909     CodeBlob *cb = CodeCache::find_blob(entry);
3910     if (cb) {
3911       __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3912     } else {
3913       int gpcnt;
3914       int fpcnt;
3915       int rtype;
3916       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3917       Label retaddr;
3918       __ adr(rscratch2, retaddr);
3919       __ lea(rscratch1, RuntimeAddress(entry));
3920       // Leave a breadcrumb for JavaThread::pd_last_frame().
3921       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3922       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3923       __ bind(retaddr);
3924       __ add(sp, sp, 2 * wordSize);
3925     }
3926   %}
3927 
3928   enc_class aarch64_enc_rethrow() %{
3929     MacroAssembler _masm(&cbuf);
3930     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3931   %}
3932 
3933   enc_class aarch64_enc_ret() %{
3934     MacroAssembler _masm(&cbuf);
3935     __ ret(lr);
3936   %}
3937 
3938   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3939     MacroAssembler _masm(&cbuf);
3940     Register target_reg = as_Register($jump_target$$reg);
3941     __ br(target_reg);
3942   %}
3943 
3944   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3945     MacroAssembler _masm(&cbuf);
3946     Register target_reg = as_Register($jump_target$$reg);
3947     // exception oop should be in r0
3948     // ret addr has been popped into lr
3949     // callee expects it in r3
3950     __ mov(r3, lr);
3951     __ br(target_reg);
3952   %}
3953 
3954   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3955     MacroAssembler _masm(&cbuf);
3956     Register oop = as_Register($object$$reg);
3957     Register box = as_Register($box$$reg);
3958     Register disp_hdr = as_Register($tmp$$reg);
3959     Register tmp = as_Register($tmp2$$reg);
3960     Label cont;
3961     Label object_has_monitor;
3962     Label cas_failed;
3963 
3964     assert_different_registers(oop, box, tmp, disp_hdr);
3965 
3966     // Load markOop from object into displaced_header.
3967     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3968 
3969     // Always do locking in runtime.
3970     if (EmitSync & 0x01) {
3971       __ cmp(oop, zr);
3972       return;
3973     }
3974 
3975     if (UseBiasedLocking) {
3976       __ biased_locking_enter(disp_hdr, oop, box, tmp, true, cont);
3977     }
3978 
3979     // Handle existing monitor
3980     if (EmitSync & 0x02) {
3981       // we can use AArch64's bit test and branch here but
3982       // markoopDesc does not define a bit index just the bit value
3983       // so assert in case the bit pos changes
3984 #     define __monitor_value_log2 1
3985       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3986       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3987 #     undef __monitor_value_log2
3988     }
3989 
3990     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3991     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3992 
3993     // Load Compare Value application register.
3994 
3995     // Initialize the box. (Must happen before we update the object mark!)
3996     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3997 
3998     // Compare object markOop with mark and if equal exchange scratch1
3999     // with object markOop.
4000     // Note that this is simply a CAS: it does not generate any
4001     // barriers.  These are separately generated by
4002     // membar_acquire_lock().
4003     {
4004       Label retry_load;
4005       __ bind(retry_load);
4006       __ ldxr(tmp, oop);
4007       __ cmp(tmp, disp_hdr);
4008       __ br(Assembler::NE, cas_failed);
4009       // use stlxr to ensure update is immediately visible
4010       __ stlxr(tmp, box, oop);
4011       __ cbzw(tmp, cont);
4012       __ b(retry_load);
4013     }
4014 
4015     // Formerly:
4016     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4017     //               /*newv=*/box,
4018     //               /*addr=*/oop,
4019     //               /*tmp=*/tmp,
4020     //               cont,
4021     //               /*fail*/NULL);
4022 
4023     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4024 
4025     // If the compare-and-exchange succeeded, then we found an unlocked
4026     // object, will have now locked it will continue at label cont
4027 
4028     __ bind(cas_failed);
4029     // We did not see an unlocked object so try the fast recursive case.
4030 
4031     // Check if the owner is self by comparing the value in the
4032     // markOop of object (disp_hdr) with the stack pointer.
4033     __ mov(rscratch1, sp);
4034     __ sub(disp_hdr, disp_hdr, rscratch1);
4035     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4036     // If condition is true we are cont and hence we can store 0 as the
4037     // displaced header in the box, which indicates that it is a recursive lock.
4038     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4039     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4040 
4041     // Handle existing monitor.
4042     if ((EmitSync & 0x02) == 0) {
4043       __ b(cont);
4044 
4045       __ bind(object_has_monitor);
4046       // The object's monitor m is unlocked iff m->owner == NULL,
4047       // otherwise m->owner may contain a thread or a stack address.
4048       //
4049       // Try to CAS m->owner from NULL to current thread.
4050       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4051       __ mov(disp_hdr, zr);
4052 
4053       {
4054         Label retry_load, fail;
4055         __ bind(retry_load);
4056         __ ldxr(rscratch1, tmp);
4057         __ cmp(disp_hdr, rscratch1);
4058         __ br(Assembler::NE, fail);
4059         // use stlxr to ensure update is immediately visible
4060         __ stlxr(rscratch1, rthread, tmp);
4061         __ cbnzw(rscratch1, retry_load);
4062         __ bind(fail);
4063       }
4064 
4065       // Label next;
4066       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4067       //               /*newv=*/rthread,
4068       //               /*addr=*/tmp,
4069       //               /*tmp=*/rscratch1,
4070       //               /*succeed*/next,
4071       //               /*fail*/NULL);
4072       // __ bind(next);
4073 
4074       // store a non-null value into the box.
4075       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4076 
4077       // PPC port checks the following invariants
4078       // #ifdef ASSERT
4079       // bne(flag, cont);
4080       // We have acquired the monitor, check some invariants.
4081       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4082       // Invariant 1: _recursions should be 0.
4083       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4084       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4085       //                        "monitor->_recursions should be 0", -1);
4086       // Invariant 2: OwnerIsThread shouldn't be 0.
4087       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4088       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4089       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4090       // #endif
4091     }
4092 
4093     __ bind(cont);
4094     // flag == EQ indicates success
4095     // flag == NE indicates failure
4096 
4097   %}
4098 
4099   // TODO
4100   // reimplement this with custom cmpxchgptr code
4101   // which avoids some of the unnecessary branching
4102   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4103     MacroAssembler _masm(&cbuf);
4104     Register oop = as_Register($object$$reg);
4105     Register box = as_Register($box$$reg);
4106     Register disp_hdr = as_Register($tmp$$reg);
4107     Register tmp = as_Register($tmp2$$reg);
4108     Label cont;
4109     Label object_has_monitor;
4110     Label cas_failed;
4111 
4112     assert_different_registers(oop, box, tmp, disp_hdr);
4113 
4114     // Always do locking in runtime.
4115     if (EmitSync & 0x01) {
4116       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4117       return;
4118     }
4119 
4120     if (UseBiasedLocking) {
4121       __ biased_locking_exit(oop, tmp, cont);
4122     }
4123 
4124     // Find the lock address and load the displaced header from the stack.
4125     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4126 
4127     // If the displaced header is 0, we have a recursive unlock.
4128     __ cmp(disp_hdr, zr);
4129     __ br(Assembler::EQ, cont);
4130 
4131 
4132     // Handle existing monitor.
4133     if ((EmitSync & 0x02) == 0) {
4134       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4135       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4136     }
4137 
4138     // Check if it is still a light weight lock, this is is true if we
4139     // see the stack address of the basicLock in the markOop of the
4140     // object.
4141 
4142       {
4143         Label retry_load;
4144         __ bind(retry_load);
4145         __ ldxr(tmp, oop);
4146         __ cmp(box, tmp);
4147         __ br(Assembler::NE, cas_failed);
4148         // use stlxr to ensure update is immediately visible
4149         __ stlxr(tmp, disp_hdr, oop);
4150         __ cbzw(tmp, cont);
4151         __ b(retry_load);
4152       }
4153 
4154     // __ cmpxchgptr(/*compare_value=*/box,
4155     //               /*exchange_value=*/disp_hdr,
4156     //               /*where=*/oop,
4157     //               /*result=*/tmp,
4158     //               cont,
4159     //               /*cas_failed*/NULL);
4160     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4161 
4162     __ bind(cas_failed);
4163 
4164     // Handle existing monitor.
4165     if ((EmitSync & 0x02) == 0) {
4166       __ b(cont);
4167 
4168       __ bind(object_has_monitor);
4169       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4170       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4171       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4172       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4173       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4174       __ cmp(rscratch1, zr);
4175       __ br(Assembler::NE, cont);
4176 
4177       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4178       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4179       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4180       __ cmp(rscratch1, zr);
4181       __ cbnz(rscratch1, cont);
4182       // need a release store here
4183       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4184       __ stlr(rscratch1, tmp); // rscratch1 is zero
4185     }
4186 
4187     __ bind(cont);
4188     // flag == EQ indicates success
4189     // flag == NE indicates failure
4190   %}
4191 
4192 %}
4193 
4194 //----------FRAME--------------------------------------------------------------
4195 // Definition of frame structure and management information.
4196 //
4197 //  S T A C K   L A Y O U T    Allocators stack-slot number
4198 //                             |   (to get allocators register number
4199 //  G  Owned by    |        |  v    add OptoReg::stack0())
4200 //  r   CALLER     |        |
4201 //  o     |        +--------+      pad to even-align allocators stack-slot
4202 //  w     V        |  pad0  |        numbers; owned by CALLER
4203 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4204 //  h     ^        |   in   |  5
4205 //        |        |  args  |  4   Holes in incoming args owned by SELF
4206 //  |     |        |        |  3
4207 //  |     |        +--------+
4208 //  V     |        | old out|      Empty on Intel, window on Sparc
4209 //        |    old |preserve|      Must be even aligned.
4210 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4211 //        |        |   in   |  3   area for Intel ret address
4212 //     Owned by    |preserve|      Empty on Sparc.
4213 //       SELF      +--------+
4214 //        |        |  pad2  |  2   pad to align old SP
4215 //        |        +--------+  1
4216 //        |        | locks  |  0
4217 //        |        +--------+----> OptoReg::stack0(), even aligned
4218 //        |        |  pad1  | 11   pad to align new SP
4219 //        |        +--------+
4220 //        |        |        | 10
4221 //        |        | spills |  9   spills
4222 //        V        |        |  8   (pad0 slot for callee)
4223 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4224 //        ^        |  out   |  7
4225 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4226 //     Owned by    +--------+
4227 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4228 //        |    new |preserve|      Must be even-aligned.
4229 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4230 //        |        |        |
4231 //
4232 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4233 //         known from SELF's arguments and the Java calling convention.
4234 //         Region 6-7 is determined per call site.
4235 // Note 2: If the calling convention leaves holes in the incoming argument
4236 //         area, those holes are owned by SELF.  Holes in the outgoing area
4237 //         are owned by the CALLEE.  Holes should not be nessecary in the
4238 //         incoming area, as the Java calling convention is completely under
4239 //         the control of the AD file.  Doubles can be sorted and packed to
4240 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4241 //         varargs C calling conventions.
4242 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4243 //         even aligned with pad0 as needed.
4244 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4245 //           (the latter is true on Intel but is it false on AArch64?)
4246 //         region 6-11 is even aligned; it may be padded out more so that
4247 //         the region from SP to FP meets the minimum stack alignment.
4248 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4249 //         alignment.  Region 11, pad1, may be dynamically extended so that
4250 //         SP meets the minimum alignment.
4251 
4252 frame %{
4253   // What direction does stack grow in (assumed to be same for C & Java)
4254   stack_direction(TOWARDS_LOW);
4255 
4256   // These three registers define part of the calling convention
4257   // between compiled code and the interpreter.
4258 
4259   // Inline Cache Register or methodOop for I2C.
4260   inline_cache_reg(R12);
4261 
4262   // Method Oop Register when calling interpreter.
4263   interpreter_method_oop_reg(R12);
4264 
4265   // Number of stack slots consumed by locking an object
4266   sync_stack_slots(2);
4267 
4268   // Compiled code's Frame Pointer
4269   frame_pointer(R31);
4270 
4271   // Interpreter stores its frame pointer in a register which is
4272   // stored to the stack by I2CAdaptors.
4273   // I2CAdaptors convert from interpreted java to compiled java.
4274   interpreter_frame_pointer(R29);
4275 
4276   // Stack alignment requirement
4277   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4278 
4279   // Number of stack slots between incoming argument block and the start of
4280   // a new frame.  The PROLOG must add this many slots to the stack.  The
4281   // EPILOG must remove this many slots. aarch64 needs two slots for
4282   // return address and fp.
4283   // TODO think this is correct but check
4284   in_preserve_stack_slots(4);
4285 
4286   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4287   // for calls to C.  Supports the var-args backing area for register parms.
4288   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4289 
4290   // The after-PROLOG location of the return address.  Location of
4291   // return address specifies a type (REG or STACK) and a number
4292   // representing the register number (i.e. - use a register name) or
4293   // stack slot.
4294   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4295   // Otherwise, it is above the locks and verification slot and alignment word
4296   // TODO this may well be correct but need to check why that - 2 is there
4297   // ppc port uses 0 but we definitely need to allow for fixed_slots
4298   // which folds in the space used for monitors
4299   return_addr(STACK - 2 +
4300               round_to((Compile::current()->in_preserve_stack_slots() +
4301                         Compile::current()->fixed_slots()),
4302                        stack_alignment_in_slots()));
4303 
4304   // Body of function which returns an integer array locating
4305   // arguments either in registers or in stack slots.  Passed an array
4306   // of ideal registers called "sig" and a "length" count.  Stack-slot
4307   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4308   // arguments for a CALLEE.  Incoming stack arguments are
4309   // automatically biased by the preserve_stack_slots field above.
4310 
4311   calling_convention
4312   %{
4313     // No difference between ingoing/outgoing just pass false
4314     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4315   %}
4316 
4317   c_calling_convention
4318   %{
4319     // This is obviously always outgoing
4320     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
4321   %}
4322 
4323   // Location of compiled Java return values.  Same as C for now.
4324   return_value
4325   %{
4326     // TODO do we allow ideal_reg == Op_RegN???
4327     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4328            "only return normal values");
4329 
4330     static const int lo[Op_RegL + 1] = { // enum name
4331       0,                                 // Op_Node
4332       0,                                 // Op_Set
4333       R0_num,                            // Op_RegN
4334       R0_num,                            // Op_RegI
4335       R0_num,                            // Op_RegP
4336       V0_num,                            // Op_RegF
4337       V0_num,                            // Op_RegD
4338       R0_num                             // Op_RegL
4339     };
4340 
4341     static const int hi[Op_RegL + 1] = { // enum name
4342       0,                                 // Op_Node
4343       0,                                 // Op_Set
4344       OptoReg::Bad,                       // Op_RegN
4345       OptoReg::Bad,                      // Op_RegI
4346       R0_H_num,                          // Op_RegP
4347       OptoReg::Bad,                      // Op_RegF
4348       V0_H_num,                          // Op_RegD
4349       R0_H_num                           // Op_RegL
4350     };
4351 
4352     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4353   %}
4354 %}
4355 
4356 //----------ATTRIBUTES---------------------------------------------------------
4357 //----------Operand Attributes-------------------------------------------------
4358 op_attrib op_cost(1);        // Required cost attribute
4359 
4360 //----------Instruction Attributes---------------------------------------------
4361 ins_attrib ins_cost(INSN_COST); // Required cost attribute
4362 ins_attrib ins_size(32);        // Required size attribute (in bits)
4363 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4364                                 // a non-matching short branch variant
4365                                 // of some long branch?
4366 ins_attrib ins_alignment(4);    // Required alignment attribute (must
4367                                 // be a power of 2) specifies the
4368                                 // alignment that some part of the
4369                                 // instruction (not necessarily the
4370                                 // start) requires.  If > 1, a
4371                                 // compute_padding() function must be
4372                                 // provided for the instruction
4373 
4374 //----------OPERANDS-----------------------------------------------------------
4375 // Operand definitions must precede instruction definitions for correct parsing
4376 // in the ADLC because operands constitute user defined types which are used in
4377 // instruction definitions.
4378 
4379 //----------Simple Operands----------------------------------------------------
4380 
4381 // Integer operands 32 bit
4382 // 32 bit immediate
4383 operand immI()
4384 %{
4385   match(ConI);
4386 
4387   op_cost(0);
4388   format %{ %}
4389   interface(CONST_INTER);
4390 %}
4391 
4392 // 32 bit zero
4393 operand immI0()
4394 %{
4395   predicate(n->get_int() == 0);
4396   match(ConI);
4397 
4398   op_cost(0);
4399   format %{ %}
4400   interface(CONST_INTER);
4401 %}
4402 
4403 // 32 bit unit increment
4404 operand immI_1()
4405 %{
4406   predicate(n->get_int() == 1);
4407   match(ConI);
4408 
4409   op_cost(0);
4410   format %{ %}
4411   interface(CONST_INTER);
4412 %}
4413 
4414 // 32 bit unit decrement
4415 operand immI_M1()
4416 %{
4417   predicate(n->get_int() == -1);
4418   match(ConI);
4419 
4420   op_cost(0);
4421   format %{ %}
4422   interface(CONST_INTER);
4423 %}
4424 
4425 operand immI_le_4()
4426 %{
4427   predicate(n->get_int() <= 4);
4428   match(ConI);
4429 
4430   op_cost(0);
4431   format %{ %}
4432   interface(CONST_INTER);
4433 %}
4434 
4435 operand immI_31()
4436 %{
4437   predicate(n->get_int() == 31);
4438   match(ConI);
4439 
4440   op_cost(0);
4441   format %{ %}
4442   interface(CONST_INTER);
4443 %}
4444 
4445 operand immI_8()
4446 %{
4447   predicate(n->get_int() == 8);
4448   match(ConI);
4449 
4450   op_cost(0);
4451   format %{ %}
4452   interface(CONST_INTER);
4453 %}
4454 
4455 operand immI_16()
4456 %{
4457   predicate(n->get_int() == 16);
4458   match(ConI);
4459 
4460   op_cost(0);
4461   format %{ %}
4462   interface(CONST_INTER);
4463 %}
4464 
4465 operand immI_24()
4466 %{
4467   predicate(n->get_int() == 24);
4468   match(ConI);
4469 
4470   op_cost(0);
4471   format %{ %}
4472   interface(CONST_INTER);
4473 %}
4474 
4475 operand immI_32()
4476 %{
4477   predicate(n->get_int() == 32);
4478   match(ConI);
4479 
4480   op_cost(0);
4481   format %{ %}
4482   interface(CONST_INTER);
4483 %}
4484 
4485 operand immI_48()
4486 %{
4487   predicate(n->get_int() == 48);
4488   match(ConI);
4489 
4490   op_cost(0);
4491   format %{ %}
4492   interface(CONST_INTER);
4493 %}
4494 
4495 operand immI_56()
4496 %{
4497   predicate(n->get_int() == 56);
4498   match(ConI);
4499 
4500   op_cost(0);
4501   format %{ %}
4502   interface(CONST_INTER);
4503 %}
4504 
4505 operand immI_64()
4506 %{
4507   predicate(n->get_int() == 64);
4508   match(ConI);
4509 
4510   op_cost(0);
4511   format %{ %}
4512   interface(CONST_INTER);
4513 %}
4514 
4515 operand immI_255()
4516 %{
4517   predicate(n->get_int() == 255);
4518   match(ConI);
4519 
4520   op_cost(0);
4521   format %{ %}
4522   interface(CONST_INTER);
4523 %}
4524 
4525 operand immI_65535()
4526 %{
4527   predicate(n->get_int() == 65535);
4528   match(ConI);
4529 
4530   op_cost(0);
4531   format %{ %}
4532   interface(CONST_INTER);
4533 %}
4534 
4535 operand immL_63()
4536 %{
4537   predicate(n->get_int() == 63);
4538   match(ConI);
4539 
4540   op_cost(0);
4541   format %{ %}
4542   interface(CONST_INTER);
4543 %}
4544 
4545 operand immL_255()
4546 %{
4547   predicate(n->get_int() == 255);
4548   match(ConI);
4549 
4550   op_cost(0);
4551   format %{ %}
4552   interface(CONST_INTER);
4553 %}
4554 
4555 operand immL_65535()
4556 %{
4557   predicate(n->get_long() == 65535L);
4558   match(ConL);
4559 
4560   op_cost(0);
4561   format %{ %}
4562   interface(CONST_INTER);
4563 %}
4564 
4565 operand immL_4294967295()
4566 %{
4567   predicate(n->get_long() == 4294967295L);
4568   match(ConL);
4569 
4570   op_cost(0);
4571   format %{ %}
4572   interface(CONST_INTER);
4573 %}
4574 
4575 operand immL_bitmask()
4576 %{
4577   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4578             && is_power_of_2(n->get_long() + 1));
4579   match(ConL);
4580 
4581   op_cost(0);
4582   format %{ %}
4583   interface(CONST_INTER);
4584 %}
4585 
4586 operand immI_bitmask()
4587 %{
4588   predicate(((n->get_int() & 0xc0000000) == 0)
4589             && is_power_of_2(n->get_int() + 1));
4590   match(ConI);
4591 
4592   op_cost(0);
4593   format %{ %}
4594   interface(CONST_INTER);
4595 %}
4596 
4597 // Scale values for scaled offset addressing modes (up to long but not quad)
4598 operand immIScale()
4599 %{
4600   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4601   match(ConI);
4602 
4603   op_cost(0);
4604   format %{ %}
4605   interface(CONST_INTER);
4606 %}
4607 
4608 // 26 bit signed offset -- for pc-relative branches
4609 operand immI26()
4610 %{
4611   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4612   match(ConI);
4613 
4614   op_cost(0);
4615   format %{ %}
4616   interface(CONST_INTER);
4617 %}
4618 
4619 // 19 bit signed offset -- for pc-relative loads
4620 operand immI19()
4621 %{
4622   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4623   match(ConI);
4624 
4625   op_cost(0);
4626   format %{ %}
4627   interface(CONST_INTER);
4628 %}
4629 
4630 // 12 bit unsigned offset -- for base plus immediate loads
4631 operand immIU12()
4632 %{
4633   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4634   match(ConI);
4635 
4636   op_cost(0);
4637   format %{ %}
4638   interface(CONST_INTER);
4639 %}
4640 
4641 operand immLU12()
4642 %{
4643   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4644   match(ConL);
4645 
4646   op_cost(0);
4647   format %{ %}
4648   interface(CONST_INTER);
4649 %}
4650 
4651 // Offset for scaled or unscaled immediate loads and stores
4652 operand immIOffset()
4653 %{
4654   predicate(Address::offset_ok_for_immed(n->get_int()));
4655   match(ConI);
4656 
4657   op_cost(0);
4658   format %{ %}
4659   interface(CONST_INTER);
4660 %}
4661 
4662 operand immLoffset()
4663 %{
4664   predicate(Address::offset_ok_for_immed(n->get_long()));
4665   match(ConL);
4666 
4667   op_cost(0);
4668   format %{ %}
4669   interface(CONST_INTER);
4670 %}
4671 
4672 // 32 bit integer valid for add sub immediate
4673 operand immIAddSub()
4674 %{
4675   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4676   match(ConI);
4677   op_cost(0);
4678   format %{ %}
4679   interface(CONST_INTER);
4680 %}
4681 
4682 // 32 bit unsigned integer valid for logical immediate
4683 // TODO -- check this is right when e.g the mask is 0x80000000
4684 operand immILog()
4685 %{
4686   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4687   match(ConI);
4688 
4689   op_cost(0);
4690   format %{ %}
4691   interface(CONST_INTER);
4692 %}
4693 
4694 // Integer operands 64 bit
4695 // 64 bit immediate
4696 operand immL()
4697 %{
4698   match(ConL);
4699 
4700   op_cost(0);
4701   format %{ %}
4702   interface(CONST_INTER);
4703 %}
4704 
4705 // 64 bit zero
4706 operand immL0()
4707 %{
4708   predicate(n->get_long() == 0);
4709   match(ConL);
4710 
4711   op_cost(0);
4712   format %{ %}
4713   interface(CONST_INTER);
4714 %}
4715 
4716 // 64 bit unit increment
4717 operand immL_1()
4718 %{
4719   predicate(n->get_long() == 1);
4720   match(ConL);
4721 
4722   op_cost(0);
4723   format %{ %}
4724   interface(CONST_INTER);
4725 %}
4726 
4727 // 64 bit unit decrement
4728 operand immL_M1()
4729 %{
4730   predicate(n->get_long() == -1);
4731   match(ConL);
4732 
4733   op_cost(0);
4734   format %{ %}
4735   interface(CONST_INTER);
4736 %}
4737 
4738 // 32 bit offset of pc in thread anchor
4739 
4740 operand immL_pc_off()
4741 %{
4742   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4743                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4744   match(ConL);
4745 
4746   op_cost(0);
4747   format %{ %}
4748   interface(CONST_INTER);
4749 %}
4750 
4751 // 64 bit integer valid for add sub immediate
4752 operand immLAddSub()
4753 %{
4754   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4755   match(ConL);
4756   op_cost(0);
4757   format %{ %}
4758   interface(CONST_INTER);
4759 %}
4760 
4761 // 64 bit integer valid for logical immediate
4762 operand immLLog()
4763 %{
4764   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4765   match(ConL);
4766   op_cost(0);
4767   format %{ %}
4768   interface(CONST_INTER);
4769 %}
4770 
4771 // Long Immediate: low 32-bit mask
4772 operand immL_32bits()
4773 %{
4774   predicate(n->get_long() == 0xFFFFFFFFL);
4775   match(ConL);
4776   op_cost(0);
4777   format %{ %}
4778   interface(CONST_INTER);
4779 %}
4780 
4781 // Pointer operands
4782 // Pointer Immediate
4783 operand immP()
4784 %{
4785   match(ConP);
4786 
4787   op_cost(0);
4788   format %{ %}
4789   interface(CONST_INTER);
4790 %}
4791 
4792 // NULL Pointer Immediate
4793 operand immP0()
4794 %{
4795   predicate(n->get_ptr() == 0);
4796   match(ConP);
4797 
4798   op_cost(0);
4799   format %{ %}
4800   interface(CONST_INTER);
4801 %}
4802 
4803 // Pointer Immediate One
4804 // this is used in object initialization (initial object header)
4805 operand immP_1()
4806 %{
4807   predicate(n->get_ptr() == 1);
4808   match(ConP);
4809 
4810   op_cost(0);
4811   format %{ %}
4812   interface(CONST_INTER);
4813 %}
4814 
4815 // Polling Page Pointer Immediate
4816 operand immPollPage()
4817 %{
4818   predicate((address)n->get_ptr() == os::get_polling_page());
4819   match(ConP);
4820 
4821   op_cost(0);
4822   format %{ %}
4823   interface(CONST_INTER);
4824 %}
4825 
4826 // Card Table Byte Map Base
4827 operand immByteMapBase()
4828 %{
4829   // Get base of card map
4830   predicate((jbyte*)n->get_ptr() ==
4831         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4832   match(ConP);
4833 
4834   op_cost(0);
4835   format %{ %}
4836   interface(CONST_INTER);
4837 %}
4838 
4839 // Pointer Immediate Minus One
4840 // this is used when we want to write the current PC to the thread anchor
4841 operand immP_M1()
4842 %{
4843   predicate(n->get_ptr() == -1);
4844   match(ConP);
4845 
4846   op_cost(0);
4847   format %{ %}
4848   interface(CONST_INTER);
4849 %}
4850 
4851 // Pointer Immediate Minus Two
4852 // this is used when we want to write the current PC to the thread anchor
4853 operand immP_M2()
4854 %{
4855   predicate(n->get_ptr() == -2);
4856   match(ConP);
4857 
4858   op_cost(0);
4859   format %{ %}
4860   interface(CONST_INTER);
4861 %}
4862 
4863 // Float and Double operands
4864 // Double Immediate
4865 operand immD()
4866 %{
4867   match(ConD);
4868   op_cost(0);
4869   format %{ %}
4870   interface(CONST_INTER);
4871 %}
4872 
4873 // Double Immediate: +0.0d
4874 operand immD0()
4875 %{
4876   predicate(jlong_cast(n->getd()) == 0);
4877   match(ConD);
4878 
4879   op_cost(0);
4880   format %{ %}
4881   interface(CONST_INTER);
4882 %}
4883 
4884 // constant 'double +0.0'.
4885 operand immDPacked()
4886 %{
4887   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4888   match(ConD);
4889   op_cost(0);
4890   format %{ %}
4891   interface(CONST_INTER);
4892 %}
4893 
4894 // Float Immediate
4895 operand immF()
4896 %{
4897   match(ConF);
4898   op_cost(0);
4899   format %{ %}
4900   interface(CONST_INTER);
4901 %}
4902 
4903 // Float Immediate: +0.0f.
4904 operand immF0()
4905 %{
4906   predicate(jint_cast(n->getf()) == 0);
4907   match(ConF);
4908 
4909   op_cost(0);
4910   format %{ %}
4911   interface(CONST_INTER);
4912 %}
4913 
4914 //
4915 operand immFPacked()
4916 %{
4917   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4918   match(ConF);
4919   op_cost(0);
4920   format %{ %}
4921   interface(CONST_INTER);
4922 %}
4923 
4924 // Narrow pointer operands
4925 // Narrow Pointer Immediate
4926 operand immN()
4927 %{
4928   match(ConN);
4929 
4930   op_cost(0);
4931   format %{ %}
4932   interface(CONST_INTER);
4933 %}
4934 
4935 // Narrow NULL Pointer Immediate
4936 operand immN0()
4937 %{
4938   predicate(n->get_narrowcon() == 0);
4939   match(ConN);
4940 
4941   op_cost(0);
4942   format %{ %}
4943   interface(CONST_INTER);
4944 %}
4945 
4946 operand immNKlass()
4947 %{
4948   match(ConNKlass);
4949 
4950   op_cost(0);
4951   format %{ %}
4952   interface(CONST_INTER);
4953 %}
4954 
4955 // Integer 32 bit Register Operands
4956 // Integer 32 bitRegister (excludes SP)
4957 operand iRegI()
4958 %{
4959   constraint(ALLOC_IN_RC(any_reg32));
4960   match(RegI);
4961   match(iRegINoSp);
4962   op_cost(0);
4963   format %{ %}
4964   interface(REG_INTER);
4965 %}
4966 
4967 // Integer 32 bit Register not Special
4968 operand iRegINoSp()
4969 %{
4970   constraint(ALLOC_IN_RC(no_special_reg32));
4971   match(RegI);
4972   op_cost(0);
4973   format %{ %}
4974   interface(REG_INTER);
4975 %}
4976 
4977 // Integer 64 bit Register Operands
4978 // Integer 64 bit Register (includes SP)
4979 operand iRegL()
4980 %{
4981   constraint(ALLOC_IN_RC(any_reg));
4982   match(RegL);
4983   match(iRegLNoSp);
4984   op_cost(0);
4985   format %{ %}
4986   interface(REG_INTER);
4987 %}
4988 
4989 // Integer 64 bit Register not Special
4990 operand iRegLNoSp()
4991 %{
4992   constraint(ALLOC_IN_RC(no_special_reg));
4993   match(RegL);
4994   format %{ %}
4995   interface(REG_INTER);
4996 %}
4997 
4998 // Pointer Register Operands
4999 // Pointer Register
5000 operand iRegP()
5001 %{
5002   constraint(ALLOC_IN_RC(ptr_reg));
5003   match(RegP);
5004   match(iRegPNoSp);
5005   match(iRegP_R0);
5006   //match(iRegP_R2);
5007   //match(iRegP_R4);
5008   //match(iRegP_R5);
5009   match(thread_RegP);
5010   op_cost(0);
5011   format %{ %}
5012   interface(REG_INTER);
5013 %}
5014 
5015 // Pointer 64 bit Register not Special
5016 operand iRegPNoSp()
5017 %{
5018   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5019   match(RegP);
5020   // match(iRegP);
5021   // match(iRegP_R0);
5022   // match(iRegP_R2);
5023   // match(iRegP_R4);
5024   // match(iRegP_R5);
5025   // match(thread_RegP);
5026   op_cost(0);
5027   format %{ %}
5028   interface(REG_INTER);
5029 %}
5030 
5031 // Pointer 64 bit Register R0 only
5032 operand iRegP_R0()
5033 %{
5034   constraint(ALLOC_IN_RC(r0_reg));
5035   match(RegP);
5036   // match(iRegP);
5037   match(iRegPNoSp);
5038   op_cost(0);
5039   format %{ %}
5040   interface(REG_INTER);
5041 %}
5042 
5043 // Pointer 64 bit Register R1 only
5044 operand iRegP_R1()
5045 %{
5046   constraint(ALLOC_IN_RC(r1_reg));
5047   match(RegP);
5048   // match(iRegP);
5049   match(iRegPNoSp);
5050   op_cost(0);
5051   format %{ %}
5052   interface(REG_INTER);
5053 %}
5054 
5055 // Pointer 64 bit Register R2 only
5056 operand iRegP_R2()
5057 %{
5058   constraint(ALLOC_IN_RC(r2_reg));
5059   match(RegP);
5060   // match(iRegP);
5061   match(iRegPNoSp);
5062   op_cost(0);
5063   format %{ %}
5064   interface(REG_INTER);
5065 %}
5066 
5067 // Pointer 64 bit Register R3 only
5068 operand iRegP_R3()
5069 %{
5070   constraint(ALLOC_IN_RC(r3_reg));
5071   match(RegP);
5072   // match(iRegP);
5073   match(iRegPNoSp);
5074   op_cost(0);
5075   format %{ %}
5076   interface(REG_INTER);
5077 %}
5078 
5079 // Pointer 64 bit Register R4 only
5080 operand iRegP_R4()
5081 %{
5082   constraint(ALLOC_IN_RC(r4_reg));
5083   match(RegP);
5084   // match(iRegP);
5085   match(iRegPNoSp);
5086   op_cost(0);
5087   format %{ %}
5088   interface(REG_INTER);
5089 %}
5090 
5091 // Pointer 64 bit Register R5 only
5092 operand iRegP_R5()
5093 %{
5094   constraint(ALLOC_IN_RC(r5_reg));
5095   match(RegP);
5096   // match(iRegP);
5097   match(iRegPNoSp);
5098   op_cost(0);
5099   format %{ %}
5100   interface(REG_INTER);
5101 %}
5102 
5103 // Pointer 64 bit Register R10 only
5104 operand iRegP_R10()
5105 %{
5106   constraint(ALLOC_IN_RC(r10_reg));
5107   match(RegP);
5108   // match(iRegP);
5109   match(iRegPNoSp);
5110   op_cost(0);
5111   format %{ %}
5112   interface(REG_INTER);
5113 %}
5114 
5115 // Long 64 bit Register R11 only
5116 operand iRegL_R11()
5117 %{
5118   constraint(ALLOC_IN_RC(r11_reg));
5119   match(RegL);
5120   match(iRegLNoSp);
5121   op_cost(0);
5122   format %{ %}
5123   interface(REG_INTER);
5124 %}
5125 
5126 // Pointer 64 bit Register FP only
5127 operand iRegP_FP()
5128 %{
5129   constraint(ALLOC_IN_RC(fp_reg));
5130   match(RegP);
5131   // match(iRegP);
5132   op_cost(0);
5133   format %{ %}
5134   interface(REG_INTER);
5135 %}
5136 
5137 // Register R0 only
5138 operand iRegI_R0()
5139 %{
5140   constraint(ALLOC_IN_RC(int_r0_reg));
5141   match(RegI);
5142   match(iRegINoSp);
5143   op_cost(0);
5144   format %{ %}
5145   interface(REG_INTER);
5146 %}
5147 
5148 // Register R2 only
5149 operand iRegI_R2()
5150 %{
5151   constraint(ALLOC_IN_RC(int_r2_reg));
5152   match(RegI);
5153   match(iRegINoSp);
5154   op_cost(0);
5155   format %{ %}
5156   interface(REG_INTER);
5157 %}
5158 
5159 // Register R3 only
5160 operand iRegI_R3()
5161 %{
5162   constraint(ALLOC_IN_RC(int_r3_reg));
5163   match(RegI);
5164   match(iRegINoSp);
5165   op_cost(0);
5166   format %{ %}
5167   interface(REG_INTER);
5168 %}
5169 
5170 
5171 // Register R2 only
5172 operand iRegI_R4()
5173 %{
5174   constraint(ALLOC_IN_RC(int_r4_reg));
5175   match(RegI);
5176   match(iRegINoSp);
5177   op_cost(0);
5178   format %{ %}
5179   interface(REG_INTER);
5180 %}
5181 
5182 
5183 // Pointer Register Operands
5184 // Narrow Pointer Register
5185 operand iRegN()
5186 %{
5187   constraint(ALLOC_IN_RC(any_reg32));
5188   match(RegN);
5189   match(iRegNNoSp);
5190   op_cost(0);
5191   format %{ %}
5192   interface(REG_INTER);
5193 %}
5194 
5195 // Integer 64 bit Register not Special
5196 operand iRegNNoSp()
5197 %{
5198   constraint(ALLOC_IN_RC(no_special_reg32));
5199   match(RegN);
5200   op_cost(0);
5201   format %{ %}
5202   interface(REG_INTER);
5203 %}
5204 
5205 // heap base register -- used for encoding immN0
5206 
5207 operand iRegIHeapbase()
5208 %{
5209   constraint(ALLOC_IN_RC(heapbase_reg));
5210   match(RegI);
5211   op_cost(0);
5212   format %{ %}
5213   interface(REG_INTER);
5214 %}
5215 
5216 // Float Register
5217 // Float register operands
5218 operand vRegF()
5219 %{
5220   constraint(ALLOC_IN_RC(float_reg));
5221   match(RegF);
5222 
5223   op_cost(0);
5224   format %{ %}
5225   interface(REG_INTER);
5226 %}
5227 
5228 // Double Register
5229 // Double register operands
5230 operand vRegD()
5231 %{
5232   constraint(ALLOC_IN_RC(double_reg));
5233   match(RegD);
5234 
5235   op_cost(0);
5236   format %{ %}
5237   interface(REG_INTER);
5238 %}
5239 
5240 operand vecD()
5241 %{
5242   constraint(ALLOC_IN_RC(vectord_reg));
5243   match(VecD);
5244 
5245   op_cost(0);
5246   format %{ %}
5247   interface(REG_INTER);
5248 %}
5249 
5250 operand vecX()
5251 %{
5252   constraint(ALLOC_IN_RC(vectorx_reg));
5253   match(VecX);
5254 
5255   op_cost(0);
5256   format %{ %}
5257   interface(REG_INTER);
5258 %}
5259 
5260 operand vRegD_V0()
5261 %{
5262   constraint(ALLOC_IN_RC(v0_reg));
5263   match(RegD);
5264   op_cost(0);
5265   format %{ %}
5266   interface(REG_INTER);
5267 %}
5268 
5269 operand vRegD_V1()
5270 %{
5271   constraint(ALLOC_IN_RC(v1_reg));
5272   match(RegD);
5273   op_cost(0);
5274   format %{ %}
5275   interface(REG_INTER);
5276 %}
5277 
5278 operand vRegD_V2()
5279 %{
5280   constraint(ALLOC_IN_RC(v2_reg));
5281   match(RegD);
5282   op_cost(0);
5283   format %{ %}
5284   interface(REG_INTER);
5285 %}
5286 
5287 operand vRegD_V3()
5288 %{
5289   constraint(ALLOC_IN_RC(v3_reg));
5290   match(RegD);
5291   op_cost(0);
5292   format %{ %}
5293   interface(REG_INTER);
5294 %}
5295 
5296 // Flags register, used as output of signed compare instructions
5297 
5298 // note that on AArch64 we also use this register as the output for
5299 // for floating point compare instructions (CmpF CmpD). this ensures
5300 // that ordered inequality tests use GT, GE, LT or LE none of which
5301 // pass through cases where the result is unordered i.e. one or both
5302 // inputs to the compare is a NaN. this means that the ideal code can
5303 // replace e.g. a GT with an LE and not end up capturing the NaN case
5304 // (where the comparison should always fail). EQ and NE tests are
5305 // always generated in ideal code so that unordered folds into the NE
5306 // case, matching the behaviour of AArch64 NE.
5307 //
5308 // This differs from x86 where the outputs of FP compares use a
5309 // special FP flags registers and where compares based on this
5310 // register are distinguished into ordered inequalities (cmpOpUCF) and
5311 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
5312 // to explicitly handle the unordered case in branches. x86 also has
5313 // to include extra CMoveX rules to accept a cmpOpUCF input.
5314 
5315 operand rFlagsReg()
5316 %{
5317   constraint(ALLOC_IN_RC(int_flags));
5318   match(RegFlags);
5319 
5320   op_cost(0);
5321   format %{ "RFLAGS" %}
5322   interface(REG_INTER);
5323 %}
5324 
5325 // Flags register, used as output of unsigned compare instructions
5326 operand rFlagsRegU()
5327 %{
5328   constraint(ALLOC_IN_RC(int_flags));
5329   match(RegFlags);
5330 
5331   op_cost(0);
5332   format %{ "RFLAGSU" %}
5333   interface(REG_INTER);
5334 %}
5335 
5336 // Special Registers
5337 
5338 // Method Register
5339 operand inline_cache_RegP(iRegP reg)
5340 %{
5341   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
5342   match(reg);
5343   match(iRegPNoSp);
5344   op_cost(0);
5345   format %{ %}
5346   interface(REG_INTER);
5347 %}
5348 
5349 operand interpreter_method_oop_RegP(iRegP reg)
5350 %{
5351   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
5352   match(reg);
5353   match(iRegPNoSp);
5354   op_cost(0);
5355   format %{ %}
5356   interface(REG_INTER);
5357 %}
5358 
5359 // Thread Register
5360 operand thread_RegP(iRegP reg)
5361 %{
5362   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
5363   match(reg);
5364   op_cost(0);
5365   format %{ %}
5366   interface(REG_INTER);
5367 %}
5368 
5369 operand lr_RegP(iRegP reg)
5370 %{
5371   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
5372   match(reg);
5373   op_cost(0);
5374   format %{ %}
5375   interface(REG_INTER);
5376 %}
5377 
5378 //----------Memory Operands----------------------------------------------------
5379 
5380 operand indirect(iRegP reg)
5381 %{
5382   constraint(ALLOC_IN_RC(ptr_reg));
5383   match(reg);
5384   op_cost(0);
5385   format %{ "[$reg]" %}
5386   interface(MEMORY_INTER) %{
5387     base($reg);
5388     index(0xffffffff);
5389     scale(0x0);
5390     disp(0x0);
5391   %}
5392 %}
5393 
5394 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
5395 %{
5396   constraint(ALLOC_IN_RC(ptr_reg));
5397   match(AddP (AddP reg (LShiftL lreg scale)) off);
5398   op_cost(INSN_COST);
5399   format %{ "$reg, $lreg lsl($scale), $off" %}
5400   interface(MEMORY_INTER) %{
5401     base($reg);
5402     index($lreg);
5403     scale($scale);
5404     disp($off);
5405   %}
5406 %}
5407 
5408 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
5409 %{
5410   constraint(ALLOC_IN_RC(ptr_reg));
5411   match(AddP (AddP reg (LShiftL lreg scale)) off);
5412   op_cost(INSN_COST);
5413   format %{ "$reg, $lreg lsl($scale), $off" %}
5414   interface(MEMORY_INTER) %{
5415     base($reg);
5416     index($lreg);
5417     scale($scale);
5418     disp($off);
5419   %}
5420 %}
5421 
5422 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
5423 %{
5424   constraint(ALLOC_IN_RC(ptr_reg));
5425   match(AddP (AddP reg (ConvI2L ireg)) off);
5426   op_cost(INSN_COST);
5427   format %{ "$reg, $ireg, $off I2L" %}
5428   interface(MEMORY_INTER) %{
5429     base($reg);
5430     index($ireg);
5431     scale(0x0);
5432     disp($off);
5433   %}
5434 %}
5435 
5436 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
5437 %{
5438   constraint(ALLOC_IN_RC(ptr_reg));
5439   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
5440   op_cost(INSN_COST);
5441   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
5442   interface(MEMORY_INTER) %{
5443     base($reg);
5444     index($ireg);
5445     scale($scale);
5446     disp($off);
5447   %}
5448 %}
5449 
5450 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
5451 %{
5452   constraint(ALLOC_IN_RC(ptr_reg));
5453   match(AddP reg (LShiftL (ConvI2L ireg) scale));
5454   op_cost(0);
5455   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
5456   interface(MEMORY_INTER) %{
5457     base($reg);
5458     index($ireg);
5459     scale($scale);
5460     disp(0x0);
5461   %}
5462 %}
5463 
5464 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
5465 %{
5466   constraint(ALLOC_IN_RC(ptr_reg));
5467   match(AddP reg (LShiftL lreg scale));
5468   op_cost(0);
5469   format %{ "$reg, $lreg lsl($scale)" %}
5470   interface(MEMORY_INTER) %{
5471     base($reg);
5472     index($lreg);
5473     scale($scale);
5474     disp(0x0);
5475   %}
5476 %}
5477 
5478 operand indIndex(iRegP reg, iRegL lreg)
5479 %{
5480   constraint(ALLOC_IN_RC(ptr_reg));
5481   match(AddP reg lreg);
5482   op_cost(0);
5483   format %{ "$reg, $lreg" %}
5484   interface(MEMORY_INTER) %{
5485     base($reg);
5486     index($lreg);
5487     scale(0x0);
5488     disp(0x0);
5489   %}
5490 %}
5491 
5492 operand indOffI(iRegP reg, immIOffset off)
5493 %{
5494   constraint(ALLOC_IN_RC(ptr_reg));
5495   match(AddP reg off);
5496   op_cost(0);
5497   format %{ "[$reg, $off]" %}
5498   interface(MEMORY_INTER) %{
5499     base($reg);
5500     index(0xffffffff);
5501     scale(0x0);
5502     disp($off);
5503   %}
5504 %}
5505 
5506 operand indOffL(iRegP reg, immLoffset off)
5507 %{
5508   constraint(ALLOC_IN_RC(ptr_reg));
5509   match(AddP reg off);
5510   op_cost(0);
5511   format %{ "[$reg, $off]" %}
5512   interface(MEMORY_INTER) %{
5513     base($reg);
5514     index(0xffffffff);
5515     scale(0x0);
5516     disp($off);
5517   %}
5518 %}
5519 
5520 
5521 operand indirectN(iRegN reg)
5522 %{
5523   predicate(Universe::narrow_oop_shift() == 0);
5524   constraint(ALLOC_IN_RC(ptr_reg));
5525   match(DecodeN reg);
5526   op_cost(0);
5527   format %{ "[$reg]\t# narrow" %}
5528   interface(MEMORY_INTER) %{
5529     base($reg);
5530     index(0xffffffff);
5531     scale(0x0);
5532     disp(0x0);
5533   %}
5534 %}
5535 
5536 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5537 %{
5538   predicate(Universe::narrow_oop_shift() == 0);
5539   constraint(ALLOC_IN_RC(ptr_reg));
5540   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5541   op_cost(0);
5542   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5543   interface(MEMORY_INTER) %{
5544     base($reg);
5545     index($lreg);
5546     scale($scale);
5547     disp($off);
5548   %}
5549 %}
5550 
5551 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5552 %{
5553   predicate(Universe::narrow_oop_shift() == 0);
5554   constraint(ALLOC_IN_RC(ptr_reg));
5555   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5556   op_cost(INSN_COST);
5557   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5558   interface(MEMORY_INTER) %{
5559     base($reg);
5560     index($lreg);
5561     scale($scale);
5562     disp($off);
5563   %}
5564 %}
5565 
5566 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5567 %{
5568   predicate(Universe::narrow_oop_shift() == 0);
5569   constraint(ALLOC_IN_RC(ptr_reg));
5570   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5571   op_cost(INSN_COST);
5572   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5573   interface(MEMORY_INTER) %{
5574     base($reg);
5575     index($ireg);
5576     scale(0x0);
5577     disp($off);
5578   %}
5579 %}
5580 
5581 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5582 %{
5583   predicate(Universe::narrow_oop_shift() == 0);
5584   constraint(ALLOC_IN_RC(ptr_reg));
5585   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5586   op_cost(INSN_COST);
5587   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5588   interface(MEMORY_INTER) %{
5589     base($reg);
5590     index($ireg);
5591     scale($scale);
5592     disp($off);
5593   %}
5594 %}
5595 
5596 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5597 %{
5598   predicate(Universe::narrow_oop_shift() == 0);
5599   constraint(ALLOC_IN_RC(ptr_reg));
5600   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5601   op_cost(0);
5602   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5603   interface(MEMORY_INTER) %{
5604     base($reg);
5605     index($ireg);
5606     scale($scale);
5607     disp(0x0);
5608   %}
5609 %}
5610 
5611 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5612 %{
5613   predicate(Universe::narrow_oop_shift() == 0);
5614   constraint(ALLOC_IN_RC(ptr_reg));
5615   match(AddP (DecodeN reg) (LShiftL lreg scale));
5616   op_cost(0);
5617   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5618   interface(MEMORY_INTER) %{
5619     base($reg);
5620     index($lreg);
5621     scale($scale);
5622     disp(0x0);
5623   %}
5624 %}
5625 
5626 operand indIndexN(iRegN reg, iRegL lreg)
5627 %{
5628   predicate(Universe::narrow_oop_shift() == 0);
5629   constraint(ALLOC_IN_RC(ptr_reg));
5630   match(AddP (DecodeN reg) lreg);
5631   op_cost(0);
5632   format %{ "$reg, $lreg\t# narrow" %}
5633   interface(MEMORY_INTER) %{
5634     base($reg);
5635     index($lreg);
5636     scale(0x0);
5637     disp(0x0);
5638   %}
5639 %}
5640 
5641 operand indOffIN(iRegN reg, immIOffset off)
5642 %{
5643   predicate(Universe::narrow_oop_shift() == 0);
5644   constraint(ALLOC_IN_RC(ptr_reg));
5645   match(AddP (DecodeN reg) off);
5646   op_cost(0);
5647   format %{ "[$reg, $off]\t# narrow" %}
5648   interface(MEMORY_INTER) %{
5649     base($reg);
5650     index(0xffffffff);
5651     scale(0x0);
5652     disp($off);
5653   %}
5654 %}
5655 
5656 operand indOffLN(iRegN reg, immLoffset off)
5657 %{
5658   predicate(Universe::narrow_oop_shift() == 0);
5659   constraint(ALLOC_IN_RC(ptr_reg));
5660   match(AddP (DecodeN reg) off);
5661   op_cost(0);
5662   format %{ "[$reg, $off]\t# narrow" %}
5663   interface(MEMORY_INTER) %{
5664     base($reg);
5665     index(0xffffffff);
5666     scale(0x0);
5667     disp($off);
5668   %}
5669 %}
5670 
5671 
5672 
5673 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5674 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5675 %{
5676   constraint(ALLOC_IN_RC(ptr_reg));
5677   match(AddP reg off);
5678   op_cost(0);
5679   format %{ "[$reg, $off]" %}
5680   interface(MEMORY_INTER) %{
5681     base($reg);
5682     index(0xffffffff);
5683     scale(0x0);
5684     disp($off);
5685   %}
5686 %}
5687 
5688 //----------Special Memory Operands--------------------------------------------
5689 // Stack Slot Operand - This operand is used for loading and storing temporary
5690 //                      values on the stack where a match requires a value to
5691 //                      flow through memory.
5692 operand stackSlotP(sRegP reg)
5693 %{
5694   constraint(ALLOC_IN_RC(stack_slots));
5695   op_cost(100);
5696   // No match rule because this operand is only generated in matching
5697   // match(RegP);
5698   format %{ "[$reg]" %}
5699   interface(MEMORY_INTER) %{
5700     base(0x1e);  // RSP
5701     index(0x0);  // No Index
5702     scale(0x0);  // No Scale
5703     disp($reg);  // Stack Offset
5704   %}
5705 %}
5706 
5707 operand stackSlotI(sRegI reg)
5708 %{
5709   constraint(ALLOC_IN_RC(stack_slots));
5710   // No match rule because this operand is only generated in matching
5711   // match(RegI);
5712   format %{ "[$reg]" %}
5713   interface(MEMORY_INTER) %{
5714     base(0x1e);  // RSP
5715     index(0x0);  // No Index
5716     scale(0x0);  // No Scale
5717     disp($reg);  // Stack Offset
5718   %}
5719 %}
5720 
5721 operand stackSlotF(sRegF reg)
5722 %{
5723   constraint(ALLOC_IN_RC(stack_slots));
5724   // No match rule because this operand is only generated in matching
5725   // match(RegF);
5726   format %{ "[$reg]" %}
5727   interface(MEMORY_INTER) %{
5728     base(0x1e);  // RSP
5729     index(0x0);  // No Index
5730     scale(0x0);  // No Scale
5731     disp($reg);  // Stack Offset
5732   %}
5733 %}
5734 
5735 operand stackSlotD(sRegD reg)
5736 %{
5737   constraint(ALLOC_IN_RC(stack_slots));
5738   // No match rule because this operand is only generated in matching
5739   // match(RegD);
5740   format %{ "[$reg]" %}
5741   interface(MEMORY_INTER) %{
5742     base(0x1e);  // RSP
5743     index(0x0);  // No Index
5744     scale(0x0);  // No Scale
5745     disp($reg);  // Stack Offset
5746   %}
5747 %}
5748 
5749 operand stackSlotL(sRegL reg)
5750 %{
5751   constraint(ALLOC_IN_RC(stack_slots));
5752   // No match rule because this operand is only generated in matching
5753   // match(RegL);
5754   format %{ "[$reg]" %}
5755   interface(MEMORY_INTER) %{
5756     base(0x1e);  // RSP
5757     index(0x0);  // No Index
5758     scale(0x0);  // No Scale
5759     disp($reg);  // Stack Offset
5760   %}
5761 %}
5762 
5763 // Operands for expressing Control Flow
5764 // NOTE: Label is a predefined operand which should not be redefined in
5765 //       the AD file. It is generically handled within the ADLC.
5766 
5767 //----------Conditional Branch Operands----------------------------------------
5768 // Comparison Op  - This is the operation of the comparison, and is limited to
5769 //                  the following set of codes:
5770 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5771 //
5772 // Other attributes of the comparison, such as unsignedness, are specified
5773 // by the comparison instruction that sets a condition code flags register.
5774 // That result is represented by a flags operand whose subtype is appropriate
5775 // to the unsignedness (etc.) of the comparison.
5776 //
5777 // Later, the instruction which matches both the Comparison Op (a Bool) and
5778 // the flags (produced by the Cmp) specifies the coding of the comparison op
5779 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5780 
5781 // used for signed integral comparisons and fp comparisons
5782 
5783 operand cmpOp()
5784 %{
5785   match(Bool);
5786 
5787   format %{ "" %}
5788   interface(COND_INTER) %{
5789     equal(0x0, "eq");
5790     not_equal(0x1, "ne");
5791     less(0xb, "lt");
5792     greater_equal(0xa, "ge");
5793     less_equal(0xd, "le");
5794     greater(0xc, "gt");
5795     overflow(0x6, "vs");
5796     no_overflow(0x7, "vc");
5797   %}
5798 %}
5799 
5800 // used for unsigned integral comparisons
5801 
5802 operand cmpOpU()
5803 %{
5804   match(Bool);
5805 
5806   format %{ "" %}
5807   interface(COND_INTER) %{
5808     equal(0x0, "eq");
5809     not_equal(0x1, "ne");
5810     less(0x3, "lo");
5811     greater_equal(0x2, "hs");
5812     less_equal(0x9, "ls");
5813     greater(0x8, "hi");
5814     overflow(0x6, "vs");
5815     no_overflow(0x7, "vc");
5816   %}
5817 %}
5818 
5819 // Special operand allowing long args to int ops to be truncated for free
5820 
5821 operand iRegL2I(iRegL reg) %{
5822 
5823   op_cost(0);
5824 
5825   match(ConvL2I reg);
5826 
5827   format %{ "l2i($reg)" %}
5828 
5829   interface(REG_INTER)
5830 %}
5831 
5832 opclass vmem(indirect, indIndex, indOffI, indOffL);
5833 
5834 //----------OPERAND CLASSES----------------------------------------------------
5835 // Operand Classes are groups of operands that are used as to simplify
5836 // instruction definitions by not requiring the AD writer to specify
5837 // separate instructions for every form of operand when the
5838 // instruction accepts multiple operand types with the same basic
5839 // encoding and format. The classic case of this is memory operands.
5840 
5841 // memory is used to define read/write location for load/store
5842 // instruction defs. we can turn a memory op into an Address
5843 
5844 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5845                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5846 
5847 
5848 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5849 // operations. it allows the src to be either an iRegI or a (ConvL2I
5850 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5851 // can be elided because the 32-bit instruction will just employ the
5852 // lower 32 bits anyway.
5853 //
5854 // n.b. this does not elide all L2I conversions. if the truncated
5855 // value is consumed by more than one operation then the ConvL2I
5856 // cannot be bundled into the consuming nodes so an l2i gets planted
5857 // (actually a movw $dst $src) and the downstream instructions consume
5858 // the result of the l2i as an iRegI input. That's a shame since the
5859 // movw is actually redundant but its not too costly.
5860 
5861 opclass iRegIorL2I(iRegI, iRegL2I);
5862 
5863 //----------PIPELINE-----------------------------------------------------------
5864 // Rules which define the behavior of the target architectures pipeline.
5865 // Integer ALU reg operation
5866 pipeline %{
5867 
5868 attributes %{
5869   // ARM instructions are of fixed length
5870   fixed_size_instructions;        // Fixed size instructions TODO does
5871   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5872   // ARM instructions come in 32-bit word units
5873   instruction_unit_size = 4;         // An instruction is 4 bytes long
5874   instruction_fetch_unit_size = 64;  // The processor fetches one line
5875   instruction_fetch_units = 1;       // of 64 bytes
5876 
5877   // List of nop instructions
5878   nops( MachNop );
5879 %}
5880 
5881 // We don't use an actual pipeline model so don't care about resources
5882 // or description. we do use pipeline classes to introduce fixed
5883 // latencies
5884 
5885 //----------RESOURCES----------------------------------------------------------
5886 // Resources are the functional units available to the machine
5887 
5888 resources( INS0, INS1, INS01 = INS0 | INS1,
5889            ALU0, ALU1, ALU = ALU0 | ALU1,
5890            MAC,
5891            DIV,
5892            BRANCH,
5893            LDST,
5894            NEON_FP);
5895 
5896 //----------PIPELINE DESCRIPTION-----------------------------------------------
5897 // Pipeline Description specifies the stages in the machine's pipeline
5898 
5899 pipe_desc(ISS, EX1, EX2, WR);
5900 
5901 //----------PIPELINE CLASSES---------------------------------------------------
5902 // Pipeline Classes describe the stages in which input and output are
5903 // referenced by the hardware pipeline.
5904 
5905 //------- Integer ALU operations --------------------------
5906 
5907 // Integer ALU reg-reg operation
5908 // Operands needed in EX1, result generated in EX2
5909 // Eg.  ADD     x0, x1, x2
5910 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5911 %{
5912   single_instruction;
5913   dst    : EX2(write);
5914   src1   : EX1(read);
5915   src2   : EX1(read);
5916   INS01  : ISS; // Dual issue as instruction 0 or 1
5917   ALU    : EX2;
5918 %}
5919 
5920 // Integer ALU reg-reg operation with constant shift
5921 // Shifted register must be available in LATE_ISS instead of EX1
5922 // Eg.  ADD     x0, x1, x2, LSL #2
5923 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5924 %{
5925   single_instruction;
5926   dst    : EX2(write);
5927   src1   : EX1(read);
5928   src2   : ISS(read);
5929   INS01  : ISS;
5930   ALU    : EX2;
5931 %}
5932 
5933 // Integer ALU reg operation with constant shift
5934 // Eg.  LSL     x0, x1, #shift
5935 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5936 %{
5937   single_instruction;
5938   dst    : EX2(write);
5939   src1   : ISS(read);
5940   INS01  : ISS;
5941   ALU    : EX2;
5942 %}
5943 
5944 // Integer ALU reg-reg operation with variable shift
5945 // Both operands must be available in LATE_ISS instead of EX1
5946 // Result is available in EX1 instead of EX2
5947 // Eg.  LSLV    x0, x1, x2
5948 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5949 %{
5950   single_instruction;
5951   dst    : EX1(write);
5952   src1   : ISS(read);
5953   src2   : ISS(read);
5954   INS01  : ISS;
5955   ALU    : EX1;
5956 %}
5957 
5958 // Integer ALU reg-reg operation with extract
5959 // As for _vshift above, but result generated in EX2
5960 // Eg.  EXTR    x0, x1, x2, #N
5961 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5962 %{
5963   single_instruction;
5964   dst    : EX2(write);
5965   src1   : ISS(read);
5966   src2   : ISS(read);
5967   INS1   : ISS; // Can only dual issue as Instruction 1
5968   ALU    : EX1;
5969 %}
5970 
5971 // Integer ALU reg operation
5972 // Eg.  NEG     x0, x1
5973 pipe_class ialu_reg(iRegI dst, iRegI src)
5974 %{
5975   single_instruction;
5976   dst    : EX2(write);
5977   src    : EX1(read);
5978   INS01  : ISS;
5979   ALU    : EX2;
5980 %}
5981 
5982 // Integer ALU reg mmediate operation
5983 // Eg.  ADD     x0, x1, #N
5984 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5985 %{
5986   single_instruction;
5987   dst    : EX2(write);
5988   src1   : EX1(read);
5989   INS01  : ISS;
5990   ALU    : EX2;
5991 %}
5992 
5993 // Integer ALU immediate operation (no source operands)
5994 // Eg.  MOV     x0, #N
5995 pipe_class ialu_imm(iRegI dst)
5996 %{
5997   single_instruction;
5998   dst    : EX1(write);
5999   INS01  : ISS;
6000   ALU    : EX1;
6001 %}
6002 
6003 //------- Compare operation -------------------------------
6004 
6005 // Compare reg-reg
6006 // Eg.  CMP     x0, x1
6007 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6008 %{
6009   single_instruction;
6010 //  fixed_latency(16);
6011   cr     : EX2(write);
6012   op1    : EX1(read);
6013   op2    : EX1(read);
6014   INS01  : ISS;
6015   ALU    : EX2;
6016 %}
6017 
6018 // Compare reg-reg
6019 // Eg.  CMP     x0, #N
6020 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6021 %{
6022   single_instruction;
6023 //  fixed_latency(16);
6024   cr     : EX2(write);
6025   op1    : EX1(read);
6026   INS01  : ISS;
6027   ALU    : EX2;
6028 %}
6029 
6030 //------- Conditional instructions ------------------------
6031 
6032 // Conditional no operands
6033 // Eg.  CSINC   x0, zr, zr, <cond>
6034 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6035 %{
6036   single_instruction;
6037   cr     : EX1(read);
6038   dst    : EX2(write);
6039   INS01  : ISS;
6040   ALU    : EX2;
6041 %}
6042 
6043 // Conditional 2 operand
6044 // EG.  CSEL    X0, X1, X2, <cond>
6045 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6046 %{
6047   single_instruction;
6048   cr     : EX1(read);
6049   src1   : EX1(read);
6050   src2   : EX1(read);
6051   dst    : EX2(write);
6052   INS01  : ISS;
6053   ALU    : EX2;
6054 %}
6055 
6056 // Conditional 2 operand
6057 // EG.  CSEL    X0, X1, X2, <cond>
6058 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6059 %{
6060   single_instruction;
6061   cr     : EX1(read);
6062   src    : EX1(read);
6063   dst    : EX2(write);
6064   INS01  : ISS;
6065   ALU    : EX2;
6066 %}
6067 
6068 //------- Multiply pipeline operations --------------------
6069 
6070 // Multiply reg-reg
6071 // Eg.  MUL     w0, w1, w2
6072 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6073 %{
6074   single_instruction;
6075   dst    : WR(write);
6076   src1   : ISS(read);
6077   src2   : ISS(read);
6078   INS01  : ISS;
6079   MAC    : WR;
6080 %}
6081 
6082 // Multiply accumulate
6083 // Eg.  MADD    w0, w1, w2, w3
6084 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6085 %{
6086   single_instruction;
6087   dst    : WR(write);
6088   src1   : ISS(read);
6089   src2   : ISS(read);
6090   src3   : ISS(read);
6091   INS01  : ISS;
6092   MAC    : WR;
6093 %}
6094 
6095 // Eg.  MUL     w0, w1, w2
6096 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6097 %{
6098   single_instruction;
6099   fixed_latency(3); // Maximum latency for 64 bit mul
6100   dst    : WR(write);
6101   src1   : ISS(read);
6102   src2   : ISS(read);
6103   INS01  : ISS;
6104   MAC    : WR;
6105 %}
6106 
6107 // Multiply accumulate
6108 // Eg.  MADD    w0, w1, w2, w3
6109 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6110 %{
6111   single_instruction;
6112   fixed_latency(3); // Maximum latency for 64 bit mul
6113   dst    : WR(write);
6114   src1   : ISS(read);
6115   src2   : ISS(read);
6116   src3   : ISS(read);
6117   INS01  : ISS;
6118   MAC    : WR;
6119 %}
6120 
6121 //------- Divide pipeline operations --------------------
6122 
6123 // Eg.  SDIV    w0, w1, w2
6124 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6125 %{
6126   single_instruction;
6127   fixed_latency(8); // Maximum latency for 32 bit divide
6128   dst    : WR(write);
6129   src1   : ISS(read);
6130   src2   : ISS(read);
6131   INS0   : ISS; // Can only dual issue as instruction 0
6132   DIV    : WR;
6133 %}
6134 
6135 // Eg.  SDIV    x0, x1, x2
6136 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6137 %{
6138   single_instruction;
6139   fixed_latency(16); // Maximum latency for 64 bit divide
6140   dst    : WR(write);
6141   src1   : ISS(read);
6142   src2   : ISS(read);
6143   INS0   : ISS; // Can only dual issue as instruction 0
6144   DIV    : WR;
6145 %}
6146 
6147 //------- Load pipeline operations ------------------------
6148 
6149 // Load - prefetch
6150 // Eg.  PFRM    <mem>
6151 pipe_class iload_prefetch(memory mem)
6152 %{
6153   single_instruction;
6154   mem    : ISS(read);
6155   INS01  : ISS;
6156   LDST   : WR;
6157 %}
6158 
6159 // Load - reg, mem
6160 // Eg.  LDR     x0, <mem>
6161 pipe_class iload_reg_mem(iRegI dst, memory mem)
6162 %{
6163   single_instruction;
6164   dst    : WR(write);
6165   mem    : ISS(read);
6166   INS01  : ISS;
6167   LDST   : WR;
6168 %}
6169 
6170 // Load - reg, reg
6171 // Eg.  LDR     x0, [sp, x1]
6172 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6173 %{
6174   single_instruction;
6175   dst    : WR(write);
6176   src    : ISS(read);
6177   INS01  : ISS;
6178   LDST   : WR;
6179 %}
6180 
6181 //------- Store pipeline operations -----------------------
6182 
6183 // Store - zr, mem
6184 // Eg.  STR     zr, <mem>
6185 pipe_class istore_mem(memory mem)
6186 %{
6187   single_instruction;
6188   mem    : ISS(read);
6189   INS01  : ISS;
6190   LDST   : WR;
6191 %}
6192 
6193 // Store - reg, mem
6194 // Eg.  STR     x0, <mem>
6195 pipe_class istore_reg_mem(iRegI src, memory mem)
6196 %{
6197   single_instruction;
6198   mem    : ISS(read);
6199   src    : EX2(read);
6200   INS01  : ISS;
6201   LDST   : WR;
6202 %}
6203 
6204 // Store - reg, reg
6205 // Eg. STR      x0, [sp, x1]
6206 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6207 %{
6208   single_instruction;
6209   dst    : ISS(read);
6210   src    : EX2(read);
6211   INS01  : ISS;
6212   LDST   : WR;
6213 %}
6214 
6215 //------- Store pipeline operations -----------------------
6216 
6217 // Branch
6218 pipe_class pipe_branch()
6219 %{
6220   single_instruction;
6221   INS01  : ISS;
6222   BRANCH : EX1;
6223 %}
6224 
6225 // Conditional branch
6226 pipe_class pipe_branch_cond(rFlagsReg cr)
6227 %{
6228   single_instruction;
6229   cr     : EX1(read);
6230   INS01  : ISS;
6231   BRANCH : EX1;
6232 %}
6233 
6234 // Compare & Branch
6235 // EG.  CBZ/CBNZ
6236 pipe_class pipe_cmp_branch(iRegI op1)
6237 %{
6238   single_instruction;
6239   op1    : EX1(read);
6240   INS01  : ISS;
6241   BRANCH : EX1;
6242 %}
6243 
6244 //------- Synchronisation operations ----------------------
6245 
6246 // Any operation requiring serialization.
6247 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6248 pipe_class pipe_serial()
6249 %{
6250   single_instruction;
6251   force_serialization;
6252   fixed_latency(16);
6253   INS01  : ISS(2); // Cannot dual issue with any other instruction
6254   LDST   : WR;
6255 %}
6256 
6257 // Generic big/slow expanded idiom - also serialized
6258 pipe_class pipe_slow()
6259 %{
6260   instruction_count(10);
6261   multiple_bundles;
6262   force_serialization;
6263   fixed_latency(16);
6264   INS01  : ISS(2); // Cannot dual issue with any other instruction
6265   LDST   : WR;
6266 %}
6267 
6268 // Empty pipeline class
6269 pipe_class pipe_class_empty()
6270 %{
6271   single_instruction;
6272   fixed_latency(0);
6273 %}
6274 
6275 // Default pipeline class.
6276 pipe_class pipe_class_default()
6277 %{
6278   single_instruction;
6279   fixed_latency(2);
6280 %}
6281 
6282 // Pipeline class for compares.
6283 pipe_class pipe_class_compare()
6284 %{
6285   single_instruction;
6286   fixed_latency(16);
6287 %}
6288 
6289 // Pipeline class for memory operations.
6290 pipe_class pipe_class_memory()
6291 %{
6292   single_instruction;
6293   fixed_latency(16);
6294 %}
6295 
6296 // Pipeline class for call.
6297 pipe_class pipe_class_call()
6298 %{
6299   single_instruction;
6300   fixed_latency(100);
6301 %}
6302 
6303 // Define the class for the Nop node.
6304 define %{
6305    MachNop = pipe_class_empty;
6306 %}
6307 
6308 %}
6309 //----------INSTRUCTIONS-------------------------------------------------------
6310 //
6311 // match      -- States which machine-independent subtree may be replaced
6312 //               by this instruction.
6313 // ins_cost   -- The estimated cost of this instruction is used by instruction
6314 //               selection to identify a minimum cost tree of machine
6315 //               instructions that matches a tree of machine-independent
6316 //               instructions.
6317 // format     -- A string providing the disassembly for this instruction.
6318 //               The value of an instruction's operand may be inserted
6319 //               by referring to it with a '$' prefix.
6320 // opcode     -- Three instruction opcodes may be provided.  These are referred
6321 //               to within an encode class as $primary, $secondary, and $tertiary
6322 //               rrspectively.  The primary opcode is commonly used to
6323 //               indicate the type of machine instruction, while secondary
6324 //               and tertiary are often used for prefix options or addressing
6325 //               modes.
6326 // ins_encode -- A list of encode classes with parameters. The encode class
6327 //               name must have been defined in an 'enc_class' specification
6328 //               in the encode section of the architecture description.
6329 
6330 // ============================================================================
6331 // Memory (Load/Store) Instructions
6332 
6333 // Load Instructions
6334 
6335 // Load Byte (8 bit signed)
6336 instruct loadB(iRegINoSp dst, memory mem)
6337 %{
6338   match(Set dst (LoadB mem));
6339   predicate(!needs_acquiring_load(n));
6340 
6341   ins_cost(4 * INSN_COST);
6342   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6343 
6344   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6345 
6346   ins_pipe(iload_reg_mem);
6347 %}
6348 
6349 // Load Byte (8 bit signed) into long
6350 instruct loadB2L(iRegLNoSp dst, memory mem)
6351 %{
6352   match(Set dst (ConvI2L (LoadB mem)));
6353   predicate(!needs_acquiring_load(n->in(1)));
6354 
6355   ins_cost(4 * INSN_COST);
6356   format %{ "ldrsb  $dst, $mem\t# byte" %}
6357 
6358   ins_encode(aarch64_enc_ldrsb(dst, mem));
6359 
6360   ins_pipe(iload_reg_mem);
6361 %}
6362 
6363 // Load Byte (8 bit unsigned)
6364 instruct loadUB(iRegINoSp dst, memory mem)
6365 %{
6366   match(Set dst (LoadUB mem));
6367   predicate(!needs_acquiring_load(n));
6368 
6369   ins_cost(4 * INSN_COST);
6370   format %{ "ldrbw  $dst, $mem\t# byte" %}
6371 
6372   ins_encode(aarch64_enc_ldrb(dst, mem));
6373 
6374   ins_pipe(iload_reg_mem);
6375 %}
6376 
6377 // Load Byte (8 bit unsigned) into long
6378 instruct loadUB2L(iRegLNoSp dst, memory mem)
6379 %{
6380   match(Set dst (ConvI2L (LoadUB mem)));
6381   predicate(!needs_acquiring_load(n->in(1)));
6382 
6383   ins_cost(4 * INSN_COST);
6384   format %{ "ldrb  $dst, $mem\t# byte" %}
6385 
6386   ins_encode(aarch64_enc_ldrb(dst, mem));
6387 
6388   ins_pipe(iload_reg_mem);
6389 %}
6390 
6391 // Load Short (16 bit signed)
6392 instruct loadS(iRegINoSp dst, memory mem)
6393 %{
6394   match(Set dst (LoadS mem));
6395   predicate(!needs_acquiring_load(n));
6396 
6397   ins_cost(4 * INSN_COST);
6398   format %{ "ldrshw  $dst, $mem\t# short" %}
6399 
6400   ins_encode(aarch64_enc_ldrshw(dst, mem));
6401 
6402   ins_pipe(iload_reg_mem);
6403 %}
6404 
6405 // Load Short (16 bit signed) into long
6406 instruct loadS2L(iRegLNoSp dst, memory mem)
6407 %{
6408   match(Set dst (ConvI2L (LoadS mem)));
6409   predicate(!needs_acquiring_load(n->in(1)));
6410 
6411   ins_cost(4 * INSN_COST);
6412   format %{ "ldrsh  $dst, $mem\t# short" %}
6413 
6414   ins_encode(aarch64_enc_ldrsh(dst, mem));
6415 
6416   ins_pipe(iload_reg_mem);
6417 %}
6418 
6419 // Load Char (16 bit unsigned)
6420 instruct loadUS(iRegINoSp dst, memory mem)
6421 %{
6422   match(Set dst (LoadUS mem));
6423   predicate(!needs_acquiring_load(n));
6424 
6425   ins_cost(4 * INSN_COST);
6426   format %{ "ldrh  $dst, $mem\t# short" %}
6427 
6428   ins_encode(aarch64_enc_ldrh(dst, mem));
6429 
6430   ins_pipe(iload_reg_mem);
6431 %}
6432 
6433 // Load Short/Char (16 bit unsigned) into long
6434 instruct loadUS2L(iRegLNoSp dst, memory mem)
6435 %{
6436   match(Set dst (ConvI2L (LoadUS mem)));
6437   predicate(!needs_acquiring_load(n->in(1)));
6438 
6439   ins_cost(4 * INSN_COST);
6440   format %{ "ldrh  $dst, $mem\t# short" %}
6441 
6442   ins_encode(aarch64_enc_ldrh(dst, mem));
6443 
6444   ins_pipe(iload_reg_mem);
6445 %}
6446 
6447 // Load Integer (32 bit signed)
6448 instruct loadI(iRegINoSp dst, memory mem)
6449 %{
6450   match(Set dst (LoadI mem));
6451   predicate(!needs_acquiring_load(n));
6452 
6453   ins_cost(4 * INSN_COST);
6454   format %{ "ldrw  $dst, $mem\t# int" %}
6455 
6456   ins_encode(aarch64_enc_ldrw(dst, mem));
6457 
6458   ins_pipe(iload_reg_mem);
6459 %}
6460 
6461 // Load Integer (32 bit signed) into long
6462 instruct loadI2L(iRegLNoSp dst, memory mem)
6463 %{
6464   match(Set dst (ConvI2L (LoadI mem)));
6465   predicate(!needs_acquiring_load(n->in(1)));
6466 
6467   ins_cost(4 * INSN_COST);
6468   format %{ "ldrsw  $dst, $mem\t# int" %}
6469 
6470   ins_encode(aarch64_enc_ldrsw(dst, mem));
6471 
6472   ins_pipe(iload_reg_mem);
6473 %}
6474 
6475 // Load Integer (32 bit unsigned) into long
6476 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6477 %{
6478   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6479   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6480 
6481   ins_cost(4 * INSN_COST);
6482   format %{ "ldrw  $dst, $mem\t# int" %}
6483 
6484   ins_encode(aarch64_enc_ldrw(dst, mem));
6485 
6486   ins_pipe(iload_reg_mem);
6487 %}
6488 
6489 // Load Long (64 bit signed)
6490 instruct loadL(iRegLNoSp dst, memory mem)
6491 %{
6492   match(Set dst (LoadL mem));
6493   predicate(!needs_acquiring_load(n));
6494 
6495   ins_cost(4 * INSN_COST);
6496   format %{ "ldr  $dst, $mem\t# int" %}
6497 
6498   ins_encode(aarch64_enc_ldr(dst, mem));
6499 
6500   ins_pipe(iload_reg_mem);
6501 %}
6502 
6503 // Load Range
6504 instruct loadRange(iRegINoSp dst, memory mem)
6505 %{
6506   match(Set dst (LoadRange mem));
6507 
6508   ins_cost(4 * INSN_COST);
6509   format %{ "ldrw  $dst, $mem\t# range" %}
6510 
6511   ins_encode(aarch64_enc_ldrw(dst, mem));
6512 
6513   ins_pipe(iload_reg_mem);
6514 %}
6515 
6516 // Load Pointer
6517 instruct loadP(iRegPNoSp dst, memory mem)
6518 %{
6519   match(Set dst (LoadP mem));
6520   predicate(!needs_acquiring_load(n));
6521 
6522   ins_cost(4 * INSN_COST);
6523   format %{ "ldr  $dst, $mem\t# ptr" %}
6524 
6525   ins_encode(aarch64_enc_ldr(dst, mem));
6526 
6527   ins_pipe(iload_reg_mem);
6528 %}
6529 
6530 // Load Compressed Pointer
6531 instruct loadN(iRegNNoSp dst, memory mem)
6532 %{
6533   match(Set dst (LoadN mem));
6534   predicate(!needs_acquiring_load(n));
6535 
6536   ins_cost(4 * INSN_COST);
6537   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6538 
6539   ins_encode(aarch64_enc_ldrw(dst, mem));
6540 
6541   ins_pipe(iload_reg_mem);
6542 %}
6543 
6544 // Load Klass Pointer
6545 instruct loadKlass(iRegPNoSp dst, memory mem)
6546 %{
6547   match(Set dst (LoadKlass mem));
6548   predicate(!needs_acquiring_load(n));
6549 
6550   ins_cost(4 * INSN_COST);
6551   format %{ "ldr  $dst, $mem\t# class" %}
6552 
6553   ins_encode(aarch64_enc_ldr(dst, mem));
6554 
6555   ins_pipe(iload_reg_mem);
6556 %}
6557 
6558 // Load Narrow Klass Pointer
6559 instruct loadNKlass(iRegNNoSp dst, memory mem)
6560 %{
6561   match(Set dst (LoadNKlass mem));
6562   predicate(!needs_acquiring_load(n));
6563 
6564   ins_cost(4 * INSN_COST);
6565   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6566 
6567   ins_encode(aarch64_enc_ldrw(dst, mem));
6568 
6569   ins_pipe(iload_reg_mem);
6570 %}
6571 
6572 // Load Float
6573 instruct loadF(vRegF dst, memory mem)
6574 %{
6575   match(Set dst (LoadF mem));
6576   predicate(!needs_acquiring_load(n));
6577 
6578   ins_cost(4 * INSN_COST);
6579   format %{ "ldrs  $dst, $mem\t# float" %}
6580 
6581   ins_encode( aarch64_enc_ldrs(dst, mem) );
6582 
6583   ins_pipe(pipe_class_memory);
6584 %}
6585 
6586 // Load Double
6587 instruct loadD(vRegD dst, memory mem)
6588 %{
6589   match(Set dst (LoadD mem));
6590   predicate(!needs_acquiring_load(n));
6591 
6592   ins_cost(4 * INSN_COST);
6593   format %{ "ldrd  $dst, $mem\t# double" %}
6594 
6595   ins_encode( aarch64_enc_ldrd(dst, mem) );
6596 
6597   ins_pipe(pipe_class_memory);
6598 %}
6599 
6600 
6601 // Load Int Constant
6602 instruct loadConI(iRegINoSp dst, immI src)
6603 %{
6604   match(Set dst src);
6605 
6606   ins_cost(INSN_COST);
6607   format %{ "mov $dst, $src\t# int" %}
6608 
6609   ins_encode( aarch64_enc_movw_imm(dst, src) );
6610 
6611   ins_pipe(ialu_imm);
6612 %}
6613 
6614 // Load Long Constant
6615 instruct loadConL(iRegLNoSp dst, immL src)
6616 %{
6617   match(Set dst src);
6618 
6619   ins_cost(INSN_COST);
6620   format %{ "mov $dst, $src\t# long" %}
6621 
6622   ins_encode( aarch64_enc_mov_imm(dst, src) );
6623 
6624   ins_pipe(ialu_imm);
6625 %}
6626 
6627 // Load Pointer Constant
6628 
6629 instruct loadConP(iRegPNoSp dst, immP con)
6630 %{
6631   match(Set dst con);
6632 
6633   ins_cost(INSN_COST * 4);
6634   format %{
6635     "mov  $dst, $con\t# ptr\n\t"
6636   %}
6637 
6638   ins_encode(aarch64_enc_mov_p(dst, con));
6639 
6640   ins_pipe(ialu_imm);
6641 %}
6642 
6643 // Load Null Pointer Constant
6644 
6645 instruct loadConP0(iRegPNoSp dst, immP0 con)
6646 %{
6647   match(Set dst con);
6648 
6649   ins_cost(INSN_COST);
6650   format %{ "mov  $dst, $con\t# NULL ptr" %}
6651 
6652   ins_encode(aarch64_enc_mov_p0(dst, con));
6653 
6654   ins_pipe(ialu_imm);
6655 %}
6656 
6657 // Load Pointer Constant One
6658 
6659 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6660 %{
6661   match(Set dst con);
6662 
6663   ins_cost(INSN_COST);
6664   format %{ "mov  $dst, $con\t# NULL ptr" %}
6665 
6666   ins_encode(aarch64_enc_mov_p1(dst, con));
6667 
6668   ins_pipe(ialu_imm);
6669 %}
6670 
6671 // Load Poll Page Constant
6672 
6673 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6674 %{
6675   match(Set dst con);
6676 
6677   ins_cost(INSN_COST);
6678   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6679 
6680   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6681 
6682   ins_pipe(ialu_imm);
6683 %}
6684 
6685 // Load Byte Map Base Constant
6686 
6687 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6688 %{
6689   match(Set dst con);
6690 
6691   ins_cost(INSN_COST);
6692   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6693 
6694   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6695 
6696   ins_pipe(ialu_imm);
6697 %}
6698 
6699 // Load Narrow Pointer Constant
6700 
6701 instruct loadConN(iRegNNoSp dst, immN con)
6702 %{
6703   match(Set dst con);
6704 
6705   ins_cost(INSN_COST * 4);
6706   format %{ "mov  $dst, $con\t# compressed ptr" %}
6707 
6708   ins_encode(aarch64_enc_mov_n(dst, con));
6709 
6710   ins_pipe(ialu_imm);
6711 %}
6712 
6713 // Load Narrow Null Pointer Constant
6714 
6715 instruct loadConN0(iRegNNoSp dst, immN0 con)
6716 %{
6717   match(Set dst con);
6718 
6719   ins_cost(INSN_COST);
6720   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6721 
6722   ins_encode(aarch64_enc_mov_n0(dst, con));
6723 
6724   ins_pipe(ialu_imm);
6725 %}
6726 
6727 // Load Narrow Klass Constant
6728 
6729 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6730 %{
6731   match(Set dst con);
6732 
6733   ins_cost(INSN_COST);
6734   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6735 
6736   ins_encode(aarch64_enc_mov_nk(dst, con));
6737 
6738   ins_pipe(ialu_imm);
6739 %}
6740 
6741 // Load Packed Float Constant
6742 
6743 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6744   match(Set dst con);
6745   ins_cost(INSN_COST * 4);
6746   format %{ "fmovs  $dst, $con"%}
6747   ins_encode %{
6748     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6749   %}
6750 
6751   ins_pipe(pipe_class_default);
6752 %}
6753 
6754 // Load Float Constant
6755 
6756 instruct loadConF(vRegF dst, immF con) %{
6757   match(Set dst con);
6758 
6759   ins_cost(INSN_COST * 4);
6760 
6761   format %{
6762     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6763   %}
6764 
6765   ins_encode %{
6766     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6767   %}
6768 
6769   ins_pipe(pipe_class_default);
6770 %}
6771 
6772 // Load Packed Double Constant
6773 
6774 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6775   match(Set dst con);
6776   ins_cost(INSN_COST);
6777   format %{ "fmovd  $dst, $con"%}
6778   ins_encode %{
6779     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6780   %}
6781 
6782   ins_pipe(pipe_class_default);
6783 %}
6784 
6785 // Load Double Constant
6786 
6787 instruct loadConD(vRegD dst, immD con) %{
6788   match(Set dst con);
6789 
6790   ins_cost(INSN_COST * 5);
6791   format %{
6792     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6793   %}
6794 
6795   ins_encode %{
6796     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6797   %}
6798 
6799   ins_pipe(pipe_class_default);
6800 %}
6801 
6802 // Store Instructions
6803 
6804 // Store CMS card-mark Immediate
6805 instruct storeimmCM0(immI0 zero, memory mem)
6806 %{
6807   match(Set mem (StoreCM mem zero));
6808 
6809   ins_cost(INSN_COST);
6810   format %{ "strb zr, $mem\t# byte" %}
6811 
6812   ins_encode(aarch64_enc_strb0(mem));
6813 
6814   ins_pipe(istore_mem);
6815 %}
6816 
6817 // Store Byte
6818 instruct storeB(iRegIorL2I src, memory mem)
6819 %{
6820   match(Set mem (StoreB mem src));
6821   predicate(!needs_releasing_store(n));
6822 
6823   ins_cost(INSN_COST);
6824   format %{ "strb  $src, $mem\t# byte" %}
6825 
6826   ins_encode(aarch64_enc_strb(src, mem));
6827 
6828   ins_pipe(istore_reg_mem);
6829 %}
6830 
6831 
6832 instruct storeimmB0(immI0 zero, memory mem)
6833 %{
6834   match(Set mem (StoreB mem zero));
6835   predicate(!needs_releasing_store(n));
6836 
6837   ins_cost(INSN_COST);
6838   format %{ "strb zr, $mem\t# byte" %}
6839 
6840   ins_encode(aarch64_enc_strb0(mem));
6841 
6842   ins_pipe(istore_mem);
6843 %}
6844 
6845 // Store Char/Short
6846 instruct storeC(iRegIorL2I src, memory mem)
6847 %{
6848   match(Set mem (StoreC mem src));
6849   predicate(!needs_releasing_store(n));
6850 
6851   ins_cost(INSN_COST);
6852   format %{ "strh  $src, $mem\t# short" %}
6853 
6854   ins_encode(aarch64_enc_strh(src, mem));
6855 
6856   ins_pipe(istore_reg_mem);
6857 %}
6858 
6859 instruct storeimmC0(immI0 zero, memory mem)
6860 %{
6861   match(Set mem (StoreC mem zero));
6862   predicate(!needs_releasing_store(n));
6863 
6864   ins_cost(INSN_COST);
6865   format %{ "strh  zr, $mem\t# short" %}
6866 
6867   ins_encode(aarch64_enc_strh0(mem));
6868 
6869   ins_pipe(istore_mem);
6870 %}
6871 
6872 // Store Integer
6873 
6874 instruct storeI(iRegIorL2I src, memory mem)
6875 %{
6876   match(Set mem(StoreI mem src));
6877   predicate(!needs_releasing_store(n));
6878 
6879   ins_cost(INSN_COST);
6880   format %{ "strw  $src, $mem\t# int" %}
6881 
6882   ins_encode(aarch64_enc_strw(src, mem));
6883 
6884   ins_pipe(istore_reg_mem);
6885 %}
6886 
6887 instruct storeimmI0(immI0 zero, memory mem)
6888 %{
6889   match(Set mem(StoreI mem zero));
6890   predicate(!needs_releasing_store(n));
6891 
6892   ins_cost(INSN_COST);
6893   format %{ "strw  zr, $mem\t# int" %}
6894 
6895   ins_encode(aarch64_enc_strw0(mem));
6896 
6897   ins_pipe(istore_mem);
6898 %}
6899 
6900 // Store Long (64 bit signed)
6901 instruct storeL(iRegL src, memory mem)
6902 %{
6903   match(Set mem (StoreL mem src));
6904   predicate(!needs_releasing_store(n));
6905 
6906   ins_cost(INSN_COST);
6907   format %{ "str  $src, $mem\t# int" %}
6908 
6909   ins_encode(aarch64_enc_str(src, mem));
6910 
6911   ins_pipe(istore_reg_mem);
6912 %}
6913 
6914 // Store Long (64 bit signed)
6915 instruct storeimmL0(immL0 zero, memory mem)
6916 %{
6917   match(Set mem (StoreL mem zero));
6918   predicate(!needs_releasing_store(n));
6919 
6920   ins_cost(INSN_COST);
6921   format %{ "str  zr, $mem\t# int" %}
6922 
6923   ins_encode(aarch64_enc_str0(mem));
6924 
6925   ins_pipe(istore_mem);
6926 %}
6927 
6928 // Store Pointer
6929 instruct storeP(iRegP src, memory mem)
6930 %{
6931   match(Set mem (StoreP mem src));
6932   predicate(!needs_releasing_store(n));
6933 
6934   ins_cost(INSN_COST);
6935   format %{ "str  $src, $mem\t# ptr" %}
6936 
6937   ins_encode(aarch64_enc_str(src, mem));
6938 
6939   ins_pipe(istore_reg_mem);
6940 %}
6941 
6942 // Store Pointer
6943 instruct storeimmP0(immP0 zero, memory mem)
6944 %{
6945   match(Set mem (StoreP mem zero));
6946   predicate(!needs_releasing_store(n));
6947 
6948   ins_cost(INSN_COST);
6949   format %{ "str zr, $mem\t# ptr" %}
6950 
6951   ins_encode(aarch64_enc_str0(mem));
6952 
6953   ins_pipe(istore_mem);
6954 %}
6955 
6956 // Store Compressed Pointer
6957 instruct storeN(iRegN src, memory mem)
6958 %{
6959   match(Set mem (StoreN mem src));
6960   predicate(!needs_releasing_store(n));
6961 
6962   ins_cost(INSN_COST);
6963   format %{ "strw  $src, $mem\t# compressed ptr" %}
6964 
6965   ins_encode(aarch64_enc_strw(src, mem));
6966 
6967   ins_pipe(istore_reg_mem);
6968 %}
6969 
6970 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6971 %{
6972   match(Set mem (StoreN mem zero));
6973   predicate(Universe::narrow_oop_base() == NULL &&
6974             Universe::narrow_klass_base() == NULL &&
6975             (!needs_releasing_store(n)));
6976 
6977   ins_cost(INSN_COST);
6978   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6979 
6980   ins_encode(aarch64_enc_strw(heapbase, mem));
6981 
6982   ins_pipe(istore_reg_mem);
6983 %}
6984 
6985 // Store Float
6986 instruct storeF(vRegF src, memory mem)
6987 %{
6988   match(Set mem (StoreF mem src));
6989   predicate(!needs_releasing_store(n));
6990 
6991   ins_cost(INSN_COST);
6992   format %{ "strs  $src, $mem\t# float" %}
6993 
6994   ins_encode( aarch64_enc_strs(src, mem) );
6995 
6996   ins_pipe(pipe_class_memory);
6997 %}
6998 
6999 // TODO
7000 // implement storeImmF0 and storeFImmPacked
7001 
7002 // Store Double
7003 instruct storeD(vRegD src, memory mem)
7004 %{
7005   match(Set mem (StoreD mem src));
7006   predicate(!needs_releasing_store(n));
7007 
7008   ins_cost(INSN_COST);
7009   format %{ "strd  $src, $mem\t# double" %}
7010 
7011   ins_encode( aarch64_enc_strd(src, mem) );
7012 
7013   ins_pipe(pipe_class_memory);
7014 %}
7015 
7016 // Store Compressed Klass Pointer
7017 instruct storeNKlass(iRegN src, memory mem)
7018 %{
7019   predicate(!needs_releasing_store(n));
7020   match(Set mem (StoreNKlass mem src));
7021 
7022   ins_cost(INSN_COST);
7023   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7024 
7025   ins_encode(aarch64_enc_strw(src, mem));
7026 
7027   ins_pipe(istore_reg_mem);
7028 %}
7029 
7030 // TODO
7031 // implement storeImmD0 and storeDImmPacked
7032 
7033 // prefetch instructions
7034 // Must be safe to execute with invalid address (cannot fault).
7035 
7036 instruct prefetchalloc( memory mem ) %{
7037   match(PrefetchAllocation mem);
7038 
7039   ins_cost(INSN_COST);
7040   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7041 
7042   ins_encode( aarch64_enc_prefetchw(mem) );
7043 
7044   ins_pipe(iload_prefetch);
7045 %}
7046 
7047 //  ---------------- volatile loads and stores ----------------
7048 
7049 // Load Byte (8 bit signed)
7050 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7051 %{
7052   match(Set dst (LoadB mem));
7053 
7054   ins_cost(VOLATILE_REF_COST);
7055   format %{ "ldarsb  $dst, $mem\t# byte" %}
7056 
7057   ins_encode(aarch64_enc_ldarsb(dst, mem));
7058 
7059   ins_pipe(pipe_serial);
7060 %}
7061 
7062 // Load Byte (8 bit signed) into long
7063 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7064 %{
7065   match(Set dst (ConvI2L (LoadB mem)));
7066 
7067   ins_cost(VOLATILE_REF_COST);
7068   format %{ "ldarsb  $dst, $mem\t# byte" %}
7069 
7070   ins_encode(aarch64_enc_ldarsb(dst, mem));
7071 
7072   ins_pipe(pipe_serial);
7073 %}
7074 
7075 // Load Byte (8 bit unsigned)
7076 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7077 %{
7078   match(Set dst (LoadUB mem));
7079 
7080   ins_cost(VOLATILE_REF_COST);
7081   format %{ "ldarb  $dst, $mem\t# byte" %}
7082 
7083   ins_encode(aarch64_enc_ldarb(dst, mem));
7084 
7085   ins_pipe(pipe_serial);
7086 %}
7087 
7088 // Load Byte (8 bit unsigned) into long
7089 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7090 %{
7091   match(Set dst (ConvI2L (LoadUB mem)));
7092 
7093   ins_cost(VOLATILE_REF_COST);
7094   format %{ "ldarb  $dst, $mem\t# byte" %}
7095 
7096   ins_encode(aarch64_enc_ldarb(dst, mem));
7097 
7098   ins_pipe(pipe_serial);
7099 %}
7100 
7101 // Load Short (16 bit signed)
7102 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7103 %{
7104   match(Set dst (LoadS mem));
7105 
7106   ins_cost(VOLATILE_REF_COST);
7107   format %{ "ldarshw  $dst, $mem\t# short" %}
7108 
7109   ins_encode(aarch64_enc_ldarshw(dst, mem));
7110 
7111   ins_pipe(pipe_serial);
7112 %}
7113 
7114 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7115 %{
7116   match(Set dst (LoadUS mem));
7117 
7118   ins_cost(VOLATILE_REF_COST);
7119   format %{ "ldarhw  $dst, $mem\t# short" %}
7120 
7121   ins_encode(aarch64_enc_ldarhw(dst, mem));
7122 
7123   ins_pipe(pipe_serial);
7124 %}
7125 
7126 // Load Short/Char (16 bit unsigned) into long
7127 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7128 %{
7129   match(Set dst (ConvI2L (LoadUS mem)));
7130 
7131   ins_cost(VOLATILE_REF_COST);
7132   format %{ "ldarh  $dst, $mem\t# short" %}
7133 
7134   ins_encode(aarch64_enc_ldarh(dst, mem));
7135 
7136   ins_pipe(pipe_serial);
7137 %}
7138 
7139 // Load Short/Char (16 bit signed) into long
7140 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7141 %{
7142   match(Set dst (ConvI2L (LoadS mem)));
7143 
7144   ins_cost(VOLATILE_REF_COST);
7145   format %{ "ldarh  $dst, $mem\t# short" %}
7146 
7147   ins_encode(aarch64_enc_ldarsh(dst, mem));
7148 
7149   ins_pipe(pipe_serial);
7150 %}
7151 
7152 // Load Integer (32 bit signed)
7153 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7154 %{
7155   match(Set dst (LoadI mem));
7156 
7157   ins_cost(VOLATILE_REF_COST);
7158   format %{ "ldarw  $dst, $mem\t# int" %}
7159 
7160   ins_encode(aarch64_enc_ldarw(dst, mem));
7161 
7162   ins_pipe(pipe_serial);
7163 %}
7164 
7165 // Load Integer (32 bit unsigned) into long
7166 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7167 %{
7168   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7169 
7170   ins_cost(VOLATILE_REF_COST);
7171   format %{ "ldarw  $dst, $mem\t# int" %}
7172 
7173   ins_encode(aarch64_enc_ldarw(dst, mem));
7174 
7175   ins_pipe(pipe_serial);
7176 %}
7177 
7178 // Load Long (64 bit signed)
7179 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7180 %{
7181   match(Set dst (LoadL mem));
7182 
7183   ins_cost(VOLATILE_REF_COST);
7184   format %{ "ldar  $dst, $mem\t# int" %}
7185 
7186   ins_encode(aarch64_enc_ldar(dst, mem));
7187 
7188   ins_pipe(pipe_serial);
7189 %}
7190 
7191 // Load Pointer
7192 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7193 %{
7194   match(Set dst (LoadP mem));
7195 
7196   ins_cost(VOLATILE_REF_COST);
7197   format %{ "ldar  $dst, $mem\t# ptr" %}
7198 
7199   ins_encode(aarch64_enc_ldar(dst, mem));
7200 
7201   ins_pipe(pipe_serial);
7202 %}
7203 
7204 // Load Compressed Pointer
7205 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7206 %{
7207   match(Set dst (LoadN mem));
7208 
7209   ins_cost(VOLATILE_REF_COST);
7210   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7211 
7212   ins_encode(aarch64_enc_ldarw(dst, mem));
7213 
7214   ins_pipe(pipe_serial);
7215 %}
7216 
7217 // Load Float
7218 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7219 %{
7220   match(Set dst (LoadF mem));
7221 
7222   ins_cost(VOLATILE_REF_COST);
7223   format %{ "ldars  $dst, $mem\t# float" %}
7224 
7225   ins_encode( aarch64_enc_fldars(dst, mem) );
7226 
7227   ins_pipe(pipe_serial);
7228 %}
7229 
7230 // Load Double
7231 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7232 %{
7233   match(Set dst (LoadD mem));
7234 
7235   ins_cost(VOLATILE_REF_COST);
7236   format %{ "ldard  $dst, $mem\t# double" %}
7237 
7238   ins_encode( aarch64_enc_fldard(dst, mem) );
7239 
7240   ins_pipe(pipe_serial);
7241 %}
7242 
7243 // Store Byte
7244 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7245 %{
7246   match(Set mem (StoreB mem src));
7247 
7248   ins_cost(VOLATILE_REF_COST);
7249   format %{ "stlrb  $src, $mem\t# byte" %}
7250 
7251   ins_encode(aarch64_enc_stlrb(src, mem));
7252 
7253   ins_pipe(pipe_class_memory);
7254 %}
7255 
7256 // Store Char/Short
7257 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7258 %{
7259   match(Set mem (StoreC mem src));
7260 
7261   ins_cost(VOLATILE_REF_COST);
7262   format %{ "stlrh  $src, $mem\t# short" %}
7263 
7264   ins_encode(aarch64_enc_stlrh(src, mem));
7265 
7266   ins_pipe(pipe_class_memory);
7267 %}
7268 
7269 // Store Integer
7270 
7271 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7272 %{
7273   match(Set mem(StoreI mem src));
7274 
7275   ins_cost(VOLATILE_REF_COST);
7276   format %{ "stlrw  $src, $mem\t# int" %}
7277 
7278   ins_encode(aarch64_enc_stlrw(src, mem));
7279 
7280   ins_pipe(pipe_class_memory);
7281 %}
7282 
7283 // Store Long (64 bit signed)
7284 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7285 %{
7286   match(Set mem (StoreL mem src));
7287 
7288   ins_cost(VOLATILE_REF_COST);
7289   format %{ "stlr  $src, $mem\t# int" %}
7290 
7291   ins_encode(aarch64_enc_stlr(src, mem));
7292 
7293   ins_pipe(pipe_class_memory);
7294 %}
7295 
7296 // Store Pointer
7297 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7298 %{
7299   match(Set mem (StoreP mem src));
7300 
7301   ins_cost(VOLATILE_REF_COST);
7302   format %{ "stlr  $src, $mem\t# ptr" %}
7303 
7304   ins_encode(aarch64_enc_stlr(src, mem));
7305 
7306   ins_pipe(pipe_class_memory);
7307 %}
7308 
7309 // Store Compressed Pointer
7310 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7311 %{
7312   match(Set mem (StoreN mem src));
7313 
7314   ins_cost(VOLATILE_REF_COST);
7315   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7316 
7317   ins_encode(aarch64_enc_stlrw(src, mem));
7318 
7319   ins_pipe(pipe_class_memory);
7320 %}
7321 
7322 // Store Float
7323 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7324 %{
7325   match(Set mem (StoreF mem src));
7326 
7327   ins_cost(VOLATILE_REF_COST);
7328   format %{ "stlrs  $src, $mem\t# float" %}
7329 
7330   ins_encode( aarch64_enc_fstlrs(src, mem) );
7331 
7332   ins_pipe(pipe_class_memory);
7333 %}
7334 
7335 // TODO
7336 // implement storeImmF0 and storeFImmPacked
7337 
7338 // Store Double
7339 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7340 %{
7341   match(Set mem (StoreD mem src));
7342 
7343   ins_cost(VOLATILE_REF_COST);
7344   format %{ "stlrd  $src, $mem\t# double" %}
7345 
7346   ins_encode( aarch64_enc_fstlrd(src, mem) );
7347 
7348   ins_pipe(pipe_class_memory);
7349 %}
7350 
7351 //  ---------------- end of volatile loads and stores ----------------
7352 
7353 // ============================================================================
7354 // BSWAP Instructions
7355 
7356 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7357   match(Set dst (ReverseBytesI src));
7358 
7359   ins_cost(INSN_COST);
7360   format %{ "revw  $dst, $src" %}
7361 
7362   ins_encode %{
7363     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7364   %}
7365 
7366   ins_pipe(ialu_reg);
7367 %}
7368 
7369 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7370   match(Set dst (ReverseBytesL src));
7371 
7372   ins_cost(INSN_COST);
7373   format %{ "rev  $dst, $src" %}
7374 
7375   ins_encode %{
7376     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7377   %}
7378 
7379   ins_pipe(ialu_reg);
7380 %}
7381 
7382 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7383   match(Set dst (ReverseBytesUS src));
7384 
7385   ins_cost(INSN_COST);
7386   format %{ "rev16w  $dst, $src" %}
7387 
7388   ins_encode %{
7389     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7390   %}
7391 
7392   ins_pipe(ialu_reg);
7393 %}
7394 
7395 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7396   match(Set dst (ReverseBytesS src));
7397 
7398   ins_cost(INSN_COST);
7399   format %{ "rev16w  $dst, $src\n\t"
7400             "sbfmw $dst, $dst, #0, #15" %}
7401 
7402   ins_encode %{
7403     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7404     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7405   %}
7406 
7407   ins_pipe(ialu_reg);
7408 %}
7409 
7410 // ============================================================================
7411 // Zero Count Instructions
7412 
7413 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7414   match(Set dst (CountLeadingZerosI src));
7415 
7416   ins_cost(INSN_COST);
7417   format %{ "clzw  $dst, $src" %}
7418   ins_encode %{
7419     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7420   %}
7421 
7422   ins_pipe(ialu_reg);
7423 %}
7424 
7425 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7426   match(Set dst (CountLeadingZerosL src));
7427 
7428   ins_cost(INSN_COST);
7429   format %{ "clz   $dst, $src" %}
7430   ins_encode %{
7431     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7432   %}
7433 
7434   ins_pipe(ialu_reg);
7435 %}
7436 
7437 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7438   match(Set dst (CountTrailingZerosI src));
7439 
7440   ins_cost(INSN_COST * 2);
7441   format %{ "rbitw  $dst, $src\n\t"
7442             "clzw   $dst, $dst" %}
7443   ins_encode %{
7444     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7445     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7446   %}
7447 
7448   ins_pipe(ialu_reg);
7449 %}
7450 
7451 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7452   match(Set dst (CountTrailingZerosL src));
7453 
7454   ins_cost(INSN_COST * 2);
7455   format %{ "rbit   $dst, $src\n\t"
7456             "clz    $dst, $dst" %}
7457   ins_encode %{
7458     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7459     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7460   %}
7461 
7462   ins_pipe(ialu_reg);
7463 %}
7464 
7465 // ============================================================================
7466 // MemBar Instruction
7467 
7468 instruct load_fence() %{
7469   match(LoadFence);
7470   ins_cost(VOLATILE_REF_COST);
7471 
7472   format %{ "load_fence" %}
7473 
7474   ins_encode %{
7475     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7476   %}
7477   ins_pipe(pipe_serial);
7478 %}
7479 
7480 instruct unnecessary_membar_acquire() %{
7481   predicate(unnecessary_acquire(n));
7482   match(MemBarAcquire);
7483   ins_cost(0);
7484 
7485   format %{ "membar_acquire (elided)" %}
7486 
7487   ins_encode %{
7488     __ block_comment("membar_acquire (elided)");
7489   %}
7490 
7491   ins_pipe(pipe_class_empty);
7492 %}
7493 
7494 instruct membar_acquire() %{
7495   match(MemBarAcquire);
7496   ins_cost(VOLATILE_REF_COST);
7497 
7498   format %{ "membar_acquire" %}
7499 
7500   ins_encode %{
7501     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7502   %}
7503 
7504   ins_pipe(pipe_serial);
7505 %}
7506 
7507 
7508 instruct membar_acquire_lock() %{
7509   match(MemBarAcquireLock);
7510   ins_cost(VOLATILE_REF_COST);
7511 
7512   format %{ "membar_acquire_lock" %}
7513 
7514   ins_encode %{
7515     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7516   %}
7517 
7518   ins_pipe(pipe_serial);
7519 %}
7520 
7521 instruct store_fence() %{
7522   match(StoreFence);
7523   ins_cost(VOLATILE_REF_COST);
7524 
7525   format %{ "store_fence" %}
7526 
7527   ins_encode %{
7528     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7529   %}
7530   ins_pipe(pipe_serial);
7531 %}
7532 
7533 instruct unnecessary_membar_release() %{
7534   predicate(unnecessary_release(n));
7535   match(MemBarRelease);
7536   ins_cost(0);
7537 
7538   format %{ "membar_release (elided)" %}
7539 
7540   ins_encode %{
7541     __ block_comment("membar_release (elided)");
7542   %}
7543   ins_pipe(pipe_serial);
7544 %}
7545 
7546 instruct membar_release() %{
7547   match(MemBarRelease);
7548   ins_cost(VOLATILE_REF_COST);
7549 
7550   format %{ "membar_release" %}
7551 
7552   ins_encode %{
7553     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7554   %}
7555   ins_pipe(pipe_serial);
7556 %}
7557 
7558 instruct membar_storestore() %{
7559   match(MemBarStoreStore);
7560   ins_cost(VOLATILE_REF_COST);
7561 
7562   format %{ "MEMBAR-store-store" %}
7563 
7564   ins_encode %{
7565     __ membar(Assembler::StoreStore);
7566   %}
7567   ins_pipe(pipe_serial);
7568 %}
7569 
7570 instruct membar_release_lock() %{
7571   match(MemBarReleaseLock);
7572   ins_cost(VOLATILE_REF_COST);
7573 
7574   format %{ "membar_release_lock" %}
7575 
7576   ins_encode %{
7577     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7578   %}
7579 
7580   ins_pipe(pipe_serial);
7581 %}
7582 
7583 instruct unnecessary_membar_volatile() %{
7584   predicate(unnecessary_volatile(n));
7585   match(MemBarVolatile);
7586   ins_cost(0);
7587 
7588   format %{ "membar_volatile (elided)" %}
7589 
7590   ins_encode %{
7591     __ block_comment("membar_volatile (elided)");
7592   %}
7593 
7594   ins_pipe(pipe_serial);
7595 %}
7596 
7597 instruct membar_volatile() %{
7598   match(MemBarVolatile);
7599   ins_cost(VOLATILE_REF_COST*100);
7600 
7601   format %{ "membar_volatile" %}
7602 
7603   ins_encode %{
7604     __ membar(Assembler::StoreLoad);
7605   %}
7606 
7607   ins_pipe(pipe_serial);
7608 %}
7609 
7610 // ============================================================================
7611 // Cast/Convert Instructions
7612 
7613 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7614   match(Set dst (CastX2P src));
7615 
7616   ins_cost(INSN_COST);
7617   format %{ "mov $dst, $src\t# long -> ptr" %}
7618 
7619   ins_encode %{
7620     if ($dst$$reg != $src$$reg) {
7621       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7622     }
7623   %}
7624 
7625   ins_pipe(ialu_reg);
7626 %}
7627 
7628 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7629   match(Set dst (CastP2X src));
7630 
7631   ins_cost(INSN_COST);
7632   format %{ "mov $dst, $src\t# ptr -> long" %}
7633 
7634   ins_encode %{
7635     if ($dst$$reg != $src$$reg) {
7636       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7637     }
7638   %}
7639 
7640   ins_pipe(ialu_reg);
7641 %}
7642 
7643 // Convert oop into int for vectors alignment masking
7644 instruct convP2I(iRegINoSp dst, iRegP src) %{
7645   match(Set dst (ConvL2I (CastP2X src)));
7646 
7647   ins_cost(INSN_COST);
7648   format %{ "movw $dst, $src\t# ptr -> int" %}
7649   ins_encode %{
7650     __ movw($dst$$Register, $src$$Register);
7651   %}
7652 
7653   ins_pipe(ialu_reg);
7654 %}
7655 
7656 // Convert compressed oop into int for vectors alignment masking
7657 // in case of 32bit oops (heap < 4Gb).
7658 instruct convN2I(iRegINoSp dst, iRegN src)
7659 %{
7660   predicate(Universe::narrow_oop_shift() == 0);
7661   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7662 
7663   ins_cost(INSN_COST);
7664   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7665   ins_encode %{
7666     __ movw($dst$$Register, $src$$Register);
7667   %}
7668 
7669   ins_pipe(ialu_reg);
7670 %}
7671 
7672 
7673 // Convert oop pointer into compressed form
7674 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7675   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7676   match(Set dst (EncodeP src));
7677   effect(KILL cr);
7678   ins_cost(INSN_COST * 3);
7679   format %{ "encode_heap_oop $dst, $src" %}
7680   ins_encode %{
7681     Register s = $src$$Register;
7682     Register d = $dst$$Register;
7683     __ encode_heap_oop(d, s);
7684   %}
7685   ins_pipe(ialu_reg);
7686 %}
7687 
7688 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7689   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7690   match(Set dst (EncodeP src));
7691   ins_cost(INSN_COST * 3);
7692   format %{ "encode_heap_oop_not_null $dst, $src" %}
7693   ins_encode %{
7694     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7695   %}
7696   ins_pipe(ialu_reg);
7697 %}
7698 
7699 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7700   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7701             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7702   match(Set dst (DecodeN src));
7703   ins_cost(INSN_COST * 3);
7704   format %{ "decode_heap_oop $dst, $src" %}
7705   ins_encode %{
7706     Register s = $src$$Register;
7707     Register d = $dst$$Register;
7708     __ decode_heap_oop(d, s);
7709   %}
7710   ins_pipe(ialu_reg);
7711 %}
7712 
7713 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7714   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7715             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7716   match(Set dst (DecodeN src));
7717   ins_cost(INSN_COST * 3);
7718   format %{ "decode_heap_oop_not_null $dst, $src" %}
7719   ins_encode %{
7720     Register s = $src$$Register;
7721     Register d = $dst$$Register;
7722     __ decode_heap_oop_not_null(d, s);
7723   %}
7724   ins_pipe(ialu_reg);
7725 %}
7726 
7727 // n.b. AArch64 implementations of encode_klass_not_null and
7728 // decode_klass_not_null do not modify the flags register so, unlike
7729 // Intel, we don't kill CR as a side effect here
7730 
7731 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7732   match(Set dst (EncodePKlass src));
7733 
7734   ins_cost(INSN_COST * 3);
7735   format %{ "encode_klass_not_null $dst,$src" %}
7736 
7737   ins_encode %{
7738     Register src_reg = as_Register($src$$reg);
7739     Register dst_reg = as_Register($dst$$reg);
7740     __ encode_klass_not_null(dst_reg, src_reg);
7741   %}
7742 
7743    ins_pipe(ialu_reg);
7744 %}
7745 
7746 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7747   match(Set dst (DecodeNKlass src));
7748 
7749   ins_cost(INSN_COST * 3);
7750   format %{ "decode_klass_not_null $dst,$src" %}
7751 
7752   ins_encode %{
7753     Register src_reg = as_Register($src$$reg);
7754     Register dst_reg = as_Register($dst$$reg);
7755     if (dst_reg != src_reg) {
7756       __ decode_klass_not_null(dst_reg, src_reg);
7757     } else {
7758       __ decode_klass_not_null(dst_reg);
7759     }
7760   %}
7761 
7762    ins_pipe(ialu_reg);
7763 %}
7764 
7765 instruct checkCastPP(iRegPNoSp dst)
7766 %{
7767   match(Set dst (CheckCastPP dst));
7768 
7769   size(0);
7770   format %{ "# checkcastPP of $dst" %}
7771   ins_encode(/* empty encoding */);
7772   ins_pipe(pipe_class_empty);
7773 %}
7774 
7775 instruct castPP(iRegPNoSp dst)
7776 %{
7777   match(Set dst (CastPP dst));
7778 
7779   size(0);
7780   format %{ "# castPP of $dst" %}
7781   ins_encode(/* empty encoding */);
7782   ins_pipe(pipe_class_empty);
7783 %}
7784 
7785 instruct castII(iRegI dst)
7786 %{
7787   match(Set dst (CastII dst));
7788 
7789   size(0);
7790   format %{ "# castII of $dst" %}
7791   ins_encode(/* empty encoding */);
7792   ins_cost(0);
7793   ins_pipe(pipe_class_empty);
7794 %}
7795 
7796 // ============================================================================
7797 // Atomic operation instructions
7798 //
7799 // Intel and SPARC both implement Ideal Node LoadPLocked and
7800 // Store{PIL}Conditional instructions using a normal load for the
7801 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7802 //
7803 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7804 // pair to lock object allocations from Eden space when not using
7805 // TLABs.
7806 //
7807 // There does not appear to be a Load{IL}Locked Ideal Node and the
7808 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7809 // and to use StoreIConditional only for 32-bit and StoreLConditional
7810 // only for 64-bit.
7811 //
7812 // We implement LoadPLocked and StorePLocked instructions using,
7813 // respectively the AArch64 hw load-exclusive and store-conditional
7814 // instructions. Whereas we must implement each of
7815 // Store{IL}Conditional using a CAS which employs a pair of
7816 // instructions comprising a load-exclusive followed by a
7817 // store-conditional.
7818 
7819 
7820 // Locked-load (linked load) of the current heap-top
7821 // used when updating the eden heap top
7822 // implemented using ldaxr on AArch64
7823 
7824 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7825 %{
7826   match(Set dst (LoadPLocked mem));
7827 
7828   ins_cost(VOLATILE_REF_COST);
7829 
7830   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7831 
7832   ins_encode(aarch64_enc_ldaxr(dst, mem));
7833 
7834   ins_pipe(pipe_serial);
7835 %}
7836 
7837 // Conditional-store of the updated heap-top.
7838 // Used during allocation of the shared heap.
7839 // Sets flag (EQ) on success.
7840 // implemented using stlxr on AArch64.
7841 
7842 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7843 %{
7844   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7845 
7846   ins_cost(VOLATILE_REF_COST);
7847 
7848  // TODO
7849  // do we need to do a store-conditional release or can we just use a
7850  // plain store-conditional?
7851 
7852   format %{
7853     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7854     "cmpw rscratch1, zr\t# EQ on successful write"
7855   %}
7856 
7857   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7858 
7859   ins_pipe(pipe_serial);
7860 %}
7861 
7862 // this has to be implemented as a CAS
7863 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7864 %{
7865   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7866 
7867   ins_cost(VOLATILE_REF_COST);
7868 
7869   format %{
7870     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7871     "cmpw rscratch1, zr\t# EQ on successful write"
7872   %}
7873 
7874   ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
7875 
7876   ins_pipe(pipe_slow);
7877 %}
7878 
7879 // this has to be implemented as a CAS
7880 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
7881 %{
7882   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7883 
7884   ins_cost(VOLATILE_REF_COST);
7885 
7886   format %{
7887     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7888     "cmpw rscratch1, zr\t# EQ on successful write"
7889   %}
7890 
7891   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
7892 
7893   ins_pipe(pipe_slow);
7894 %}
7895 
7896 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
7897 // can't match them
7898 
7899 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
7900 
7901   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
7902 
7903   effect(KILL cr);
7904 
7905  format %{
7906     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
7907     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7908  %}
7909 
7910  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
7911             aarch64_enc_cset_eq(res));
7912 
7913   ins_pipe(pipe_slow);
7914 %}
7915 
7916 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
7917 
7918   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
7919 
7920   effect(KILL cr);
7921 
7922  format %{
7923     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
7924     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7925  %}
7926 
7927  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
7928             aarch64_enc_cset_eq(res));
7929 
7930   ins_pipe(pipe_slow);
7931 %}
7932 
7933 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
7934 
7935   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
7936 
7937   effect(KILL cr);
7938 
7939  format %{
7940     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
7941     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7942  %}
7943 
7944  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
7945             aarch64_enc_cset_eq(res));
7946 
7947   ins_pipe(pipe_slow);
7948 %}
7949 
7950 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
7951 
7952   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
7953 
7954   effect(KILL cr);
7955 
7956  format %{
7957     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
7958     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7959  %}
7960 
7961  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
7962             aarch64_enc_cset_eq(res));
7963 
7964   ins_pipe(pipe_slow);
7965 %}
7966 
7967 
7968 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
7969   match(Set prev (GetAndSetI mem newv));
7970   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
7971   ins_encode %{
7972     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
7973   %}
7974   ins_pipe(pipe_serial);
7975 %}
7976 
7977 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
7978   match(Set prev (GetAndSetL mem newv));
7979   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
7980   ins_encode %{
7981     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
7982   %}
7983   ins_pipe(pipe_serial);
7984 %}
7985 
7986 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
7987   match(Set prev (GetAndSetN mem newv));
7988   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
7989   ins_encode %{
7990     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
7991   %}
7992   ins_pipe(pipe_serial);
7993 %}
7994 
7995 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
7996   match(Set prev (GetAndSetP mem newv));
7997   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
7998   ins_encode %{
7999     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8000   %}
8001   ins_pipe(pipe_serial);
8002 %}
8003 
8004 
8005 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8006   match(Set newval (GetAndAddL mem incr));
8007   ins_cost(INSN_COST * 10);
8008   format %{ "get_and_addL $newval, [$mem], $incr" %}
8009   ins_encode %{
8010     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8011   %}
8012   ins_pipe(pipe_serial);
8013 %}
8014 
8015 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8016   predicate(n->as_LoadStore()->result_not_used());
8017   match(Set dummy (GetAndAddL mem incr));
8018   ins_cost(INSN_COST * 9);
8019   format %{ "get_and_addL [$mem], $incr" %}
8020   ins_encode %{
8021     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8022   %}
8023   ins_pipe(pipe_serial);
8024 %}
8025 
8026 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8027   match(Set newval (GetAndAddL mem incr));
8028   ins_cost(INSN_COST * 10);
8029   format %{ "get_and_addL $newval, [$mem], $incr" %}
8030   ins_encode %{
8031     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8032   %}
8033   ins_pipe(pipe_serial);
8034 %}
8035 
8036 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8037   predicate(n->as_LoadStore()->result_not_used());
8038   match(Set dummy (GetAndAddL mem incr));
8039   ins_cost(INSN_COST * 9);
8040   format %{ "get_and_addL [$mem], $incr" %}
8041   ins_encode %{
8042     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8043   %}
8044   ins_pipe(pipe_serial);
8045 %}
8046 
8047 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8048   match(Set newval (GetAndAddI mem incr));
8049   ins_cost(INSN_COST * 10);
8050   format %{ "get_and_addI $newval, [$mem], $incr" %}
8051   ins_encode %{
8052     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8053   %}
8054   ins_pipe(pipe_serial);
8055 %}
8056 
8057 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8058   predicate(n->as_LoadStore()->result_not_used());
8059   match(Set dummy (GetAndAddI mem incr));
8060   ins_cost(INSN_COST * 9);
8061   format %{ "get_and_addI [$mem], $incr" %}
8062   ins_encode %{
8063     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8064   %}
8065   ins_pipe(pipe_serial);
8066 %}
8067 
8068 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8069   match(Set newval (GetAndAddI mem incr));
8070   ins_cost(INSN_COST * 10);
8071   format %{ "get_and_addI $newval, [$mem], $incr" %}
8072   ins_encode %{
8073     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8074   %}
8075   ins_pipe(pipe_serial);
8076 %}
8077 
8078 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8079   predicate(n->as_LoadStore()->result_not_used());
8080   match(Set dummy (GetAndAddI mem incr));
8081   ins_cost(INSN_COST * 9);
8082   format %{ "get_and_addI [$mem], $incr" %}
8083   ins_encode %{
8084     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8085   %}
8086   ins_pipe(pipe_serial);
8087 %}
8088 
8089 // Manifest a CmpL result in an integer register.
8090 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8091 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8092 %{
8093   match(Set dst (CmpL3 src1 src2));
8094   effect(KILL flags);
8095 
8096   ins_cost(INSN_COST * 6);
8097   format %{
8098       "cmp $src1, $src2"
8099       "csetw $dst, ne"
8100       "cnegw $dst, lt"
8101   %}
8102   // format %{ "CmpL3 $dst, $src1, $src2" %}
8103   ins_encode %{
8104     __ cmp($src1$$Register, $src2$$Register);
8105     __ csetw($dst$$Register, Assembler::NE);
8106     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8107   %}
8108 
8109   ins_pipe(pipe_class_default);
8110 %}
8111 
8112 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8113 %{
8114   match(Set dst (CmpL3 src1 src2));
8115   effect(KILL flags);
8116 
8117   ins_cost(INSN_COST * 6);
8118   format %{
8119       "cmp $src1, $src2"
8120       "csetw $dst, ne"
8121       "cnegw $dst, lt"
8122   %}
8123   ins_encode %{
8124     int32_t con = (int32_t)$src2$$constant;
8125      if (con < 0) {
8126       __ adds(zr, $src1$$Register, -con);
8127     } else {
8128       __ subs(zr, $src1$$Register, con);
8129     }
8130     __ csetw($dst$$Register, Assembler::NE);
8131     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8132   %}
8133 
8134   ins_pipe(pipe_class_default);
8135 %}
8136 
8137 // ============================================================================
8138 // Conditional Move Instructions
8139 
8140 // n.b. we have identical rules for both a signed compare op (cmpOp)
8141 // and an unsigned compare op (cmpOpU). it would be nice if we could
8142 // define an op class which merged both inputs and use it to type the
8143 // argument to a single rule. unfortunatelyt his fails because the
8144 // opclass does not live up to the COND_INTER interface of its
8145 // component operands. When the generic code tries to negate the
8146 // operand it ends up running the generci Machoper::negate method
8147 // which throws a ShouldNotHappen. So, we have to provide two flavours
8148 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8149 
8150 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8151   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8152 
8153   ins_cost(INSN_COST * 2);
8154   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8155 
8156   ins_encode %{
8157     __ cselw(as_Register($dst$$reg),
8158              as_Register($src2$$reg),
8159              as_Register($src1$$reg),
8160              (Assembler::Condition)$cmp$$cmpcode);
8161   %}
8162 
8163   ins_pipe(icond_reg_reg);
8164 %}
8165 
8166 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8167   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8168 
8169   ins_cost(INSN_COST * 2);
8170   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8171 
8172   ins_encode %{
8173     __ cselw(as_Register($dst$$reg),
8174              as_Register($src2$$reg),
8175              as_Register($src1$$reg),
8176              (Assembler::Condition)$cmp$$cmpcode);
8177   %}
8178 
8179   ins_pipe(icond_reg_reg);
8180 %}
8181 
8182 // special cases where one arg is zero
8183 
8184 // n.b. this is selected in preference to the rule above because it
8185 // avoids loading constant 0 into a source register
8186 
8187 // TODO
8188 // we ought only to be able to cull one of these variants as the ideal
8189 // transforms ought always to order the zero consistently (to left/right?)
8190 
8191 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8192   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8193 
8194   ins_cost(INSN_COST * 2);
8195   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8196 
8197   ins_encode %{
8198     __ cselw(as_Register($dst$$reg),
8199              as_Register($src$$reg),
8200              zr,
8201              (Assembler::Condition)$cmp$$cmpcode);
8202   %}
8203 
8204   ins_pipe(icond_reg);
8205 %}
8206 
8207 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8208   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8209 
8210   ins_cost(INSN_COST * 2);
8211   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8212 
8213   ins_encode %{
8214     __ cselw(as_Register($dst$$reg),
8215              as_Register($src$$reg),
8216              zr,
8217              (Assembler::Condition)$cmp$$cmpcode);
8218   %}
8219 
8220   ins_pipe(icond_reg);
8221 %}
8222 
8223 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8224   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8225 
8226   ins_cost(INSN_COST * 2);
8227   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8228 
8229   ins_encode %{
8230     __ cselw(as_Register($dst$$reg),
8231              zr,
8232              as_Register($src$$reg),
8233              (Assembler::Condition)$cmp$$cmpcode);
8234   %}
8235 
8236   ins_pipe(icond_reg);
8237 %}
8238 
8239 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8240   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8241 
8242   ins_cost(INSN_COST * 2);
8243   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8244 
8245   ins_encode %{
8246     __ cselw(as_Register($dst$$reg),
8247              zr,
8248              as_Register($src$$reg),
8249              (Assembler::Condition)$cmp$$cmpcode);
8250   %}
8251 
8252   ins_pipe(icond_reg);
8253 %}
8254 
8255 // special case for creating a boolean 0 or 1
8256 
8257 // n.b. this is selected in preference to the rule above because it
8258 // avoids loading constants 0 and 1 into a source register
8259 
8260 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8261   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8262 
8263   ins_cost(INSN_COST * 2);
8264   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8265 
8266   ins_encode %{
8267     // equivalently
8268     // cset(as_Register($dst$$reg),
8269     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8270     __ csincw(as_Register($dst$$reg),
8271              zr,
8272              zr,
8273              (Assembler::Condition)$cmp$$cmpcode);
8274   %}
8275 
8276   ins_pipe(icond_none);
8277 %}
8278 
8279 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8280   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8281 
8282   ins_cost(INSN_COST * 2);
8283   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8284 
8285   ins_encode %{
8286     // equivalently
8287     // cset(as_Register($dst$$reg),
8288     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8289     __ csincw(as_Register($dst$$reg),
8290              zr,
8291              zr,
8292              (Assembler::Condition)$cmp$$cmpcode);
8293   %}
8294 
8295   ins_pipe(icond_none);
8296 %}
8297 
8298 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8299   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8300 
8301   ins_cost(INSN_COST * 2);
8302   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8303 
8304   ins_encode %{
8305     __ csel(as_Register($dst$$reg),
8306             as_Register($src2$$reg),
8307             as_Register($src1$$reg),
8308             (Assembler::Condition)$cmp$$cmpcode);
8309   %}
8310 
8311   ins_pipe(icond_reg_reg);
8312 %}
8313 
8314 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8315   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8316 
8317   ins_cost(INSN_COST * 2);
8318   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8319 
8320   ins_encode %{
8321     __ csel(as_Register($dst$$reg),
8322             as_Register($src2$$reg),
8323             as_Register($src1$$reg),
8324             (Assembler::Condition)$cmp$$cmpcode);
8325   %}
8326 
8327   ins_pipe(icond_reg_reg);
8328 %}
8329 
8330 // special cases where one arg is zero
8331 
8332 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8333   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8334 
8335   ins_cost(INSN_COST * 2);
8336   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8337 
8338   ins_encode %{
8339     __ csel(as_Register($dst$$reg),
8340             zr,
8341             as_Register($src$$reg),
8342             (Assembler::Condition)$cmp$$cmpcode);
8343   %}
8344 
8345   ins_pipe(icond_reg);
8346 %}
8347 
8348 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8349   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8350 
8351   ins_cost(INSN_COST * 2);
8352   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8353 
8354   ins_encode %{
8355     __ csel(as_Register($dst$$reg),
8356             zr,
8357             as_Register($src$$reg),
8358             (Assembler::Condition)$cmp$$cmpcode);
8359   %}
8360 
8361   ins_pipe(icond_reg);
8362 %}
8363 
8364 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8365   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8366 
8367   ins_cost(INSN_COST * 2);
8368   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8369 
8370   ins_encode %{
8371     __ csel(as_Register($dst$$reg),
8372             as_Register($src$$reg),
8373             zr,
8374             (Assembler::Condition)$cmp$$cmpcode);
8375   %}
8376 
8377   ins_pipe(icond_reg);
8378 %}
8379 
8380 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8381   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8382 
8383   ins_cost(INSN_COST * 2);
8384   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8385 
8386   ins_encode %{
8387     __ csel(as_Register($dst$$reg),
8388             as_Register($src$$reg),
8389             zr,
8390             (Assembler::Condition)$cmp$$cmpcode);
8391   %}
8392 
8393   ins_pipe(icond_reg);
8394 %}
8395 
8396 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8397   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8398 
8399   ins_cost(INSN_COST * 2);
8400   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8401 
8402   ins_encode %{
8403     __ csel(as_Register($dst$$reg),
8404             as_Register($src2$$reg),
8405             as_Register($src1$$reg),
8406             (Assembler::Condition)$cmp$$cmpcode);
8407   %}
8408 
8409   ins_pipe(icond_reg_reg);
8410 %}
8411 
8412 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8413   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8414 
8415   ins_cost(INSN_COST * 2);
8416   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8417 
8418   ins_encode %{
8419     __ csel(as_Register($dst$$reg),
8420             as_Register($src2$$reg),
8421             as_Register($src1$$reg),
8422             (Assembler::Condition)$cmp$$cmpcode);
8423   %}
8424 
8425   ins_pipe(icond_reg_reg);
8426 %}
8427 
8428 // special cases where one arg is zero
8429 
8430 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8431   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8432 
8433   ins_cost(INSN_COST * 2);
8434   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8435 
8436   ins_encode %{
8437     __ csel(as_Register($dst$$reg),
8438             zr,
8439             as_Register($src$$reg),
8440             (Assembler::Condition)$cmp$$cmpcode);
8441   %}
8442 
8443   ins_pipe(icond_reg);
8444 %}
8445 
8446 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8447   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8448 
8449   ins_cost(INSN_COST * 2);
8450   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8451 
8452   ins_encode %{
8453     __ csel(as_Register($dst$$reg),
8454             zr,
8455             as_Register($src$$reg),
8456             (Assembler::Condition)$cmp$$cmpcode);
8457   %}
8458 
8459   ins_pipe(icond_reg);
8460 %}
8461 
8462 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8463   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8464 
8465   ins_cost(INSN_COST * 2);
8466   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8467 
8468   ins_encode %{
8469     __ csel(as_Register($dst$$reg),
8470             as_Register($src$$reg),
8471             zr,
8472             (Assembler::Condition)$cmp$$cmpcode);
8473   %}
8474 
8475   ins_pipe(icond_reg);
8476 %}
8477 
8478 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8479   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8480 
8481   ins_cost(INSN_COST * 2);
8482   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8483 
8484   ins_encode %{
8485     __ csel(as_Register($dst$$reg),
8486             as_Register($src$$reg),
8487             zr,
8488             (Assembler::Condition)$cmp$$cmpcode);
8489   %}
8490 
8491   ins_pipe(icond_reg);
8492 %}
8493 
8494 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8495   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8496 
8497   ins_cost(INSN_COST * 2);
8498   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8499 
8500   ins_encode %{
8501     __ cselw(as_Register($dst$$reg),
8502              as_Register($src2$$reg),
8503              as_Register($src1$$reg),
8504              (Assembler::Condition)$cmp$$cmpcode);
8505   %}
8506 
8507   ins_pipe(icond_reg_reg);
8508 %}
8509 
8510 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8511   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8512 
8513   ins_cost(INSN_COST * 2);
8514   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8515 
8516   ins_encode %{
8517     __ cselw(as_Register($dst$$reg),
8518              as_Register($src2$$reg),
8519              as_Register($src1$$reg),
8520              (Assembler::Condition)$cmp$$cmpcode);
8521   %}
8522 
8523   ins_pipe(icond_reg_reg);
8524 %}
8525 
8526 // special cases where one arg is zero
8527 
8528 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8529   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8530 
8531   ins_cost(INSN_COST * 2);
8532   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8533 
8534   ins_encode %{
8535     __ cselw(as_Register($dst$$reg),
8536              zr,
8537              as_Register($src$$reg),
8538              (Assembler::Condition)$cmp$$cmpcode);
8539   %}
8540 
8541   ins_pipe(icond_reg);
8542 %}
8543 
8544 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8545   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8546 
8547   ins_cost(INSN_COST * 2);
8548   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8549 
8550   ins_encode %{
8551     __ cselw(as_Register($dst$$reg),
8552              zr,
8553              as_Register($src$$reg),
8554              (Assembler::Condition)$cmp$$cmpcode);
8555   %}
8556 
8557   ins_pipe(icond_reg);
8558 %}
8559 
8560 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8561   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8562 
8563   ins_cost(INSN_COST * 2);
8564   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8565 
8566   ins_encode %{
8567     __ cselw(as_Register($dst$$reg),
8568              as_Register($src$$reg),
8569              zr,
8570              (Assembler::Condition)$cmp$$cmpcode);
8571   %}
8572 
8573   ins_pipe(icond_reg);
8574 %}
8575 
8576 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8577   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8578 
8579   ins_cost(INSN_COST * 2);
8580   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8581 
8582   ins_encode %{
8583     __ cselw(as_Register($dst$$reg),
8584              as_Register($src$$reg),
8585              zr,
8586              (Assembler::Condition)$cmp$$cmpcode);
8587   %}
8588 
8589   ins_pipe(icond_reg);
8590 %}
8591 
8592 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8593 %{
8594   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8595 
8596   ins_cost(INSN_COST * 3);
8597 
8598   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8599   ins_encode %{
8600     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8601     __ fcsels(as_FloatRegister($dst$$reg),
8602               as_FloatRegister($src2$$reg),
8603               as_FloatRegister($src1$$reg),
8604               cond);
8605   %}
8606 
8607   ins_pipe(pipe_class_default);
8608 %}
8609 
8610 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
8611 %{
8612   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8613 
8614   ins_cost(INSN_COST * 3);
8615 
8616   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8617   ins_encode %{
8618     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8619     __ fcsels(as_FloatRegister($dst$$reg),
8620               as_FloatRegister($src2$$reg),
8621               as_FloatRegister($src1$$reg),
8622               cond);
8623   %}
8624 
8625   ins_pipe(pipe_class_default);
8626 %}
8627 
8628 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
8629 %{
8630   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8631 
8632   ins_cost(INSN_COST * 3);
8633 
8634   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8635   ins_encode %{
8636     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8637     __ fcseld(as_FloatRegister($dst$$reg),
8638               as_FloatRegister($src2$$reg),
8639               as_FloatRegister($src1$$reg),
8640               cond);
8641   %}
8642 
8643   ins_pipe(pipe_class_default);
8644 %}
8645 
8646 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
8647 %{
8648   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8649 
8650   ins_cost(INSN_COST * 3);
8651 
8652   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8653   ins_encode %{
8654     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8655     __ fcseld(as_FloatRegister($dst$$reg),
8656               as_FloatRegister($src2$$reg),
8657               as_FloatRegister($src1$$reg),
8658               cond);
8659   %}
8660 
8661   ins_pipe(pipe_class_default);
8662 %}
8663 
8664 // ============================================================================
8665 // Arithmetic Instructions
8666 //
8667 
8668 // Integer Addition
8669 
8670 // TODO
8671 // these currently employ operations which do not set CR and hence are
8672 // not flagged as killing CR but we would like to isolate the cases
8673 // where we want to set flags from those where we don't. need to work
8674 // out how to do that.
8675 
8676 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8677   match(Set dst (AddI src1 src2));
8678 
8679   ins_cost(INSN_COST);
8680   format %{ "addw  $dst, $src1, $src2" %}
8681 
8682   ins_encode %{
8683     __ addw(as_Register($dst$$reg),
8684             as_Register($src1$$reg),
8685             as_Register($src2$$reg));
8686   %}
8687 
8688   ins_pipe(ialu_reg_reg);
8689 %}
8690 
8691 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8692   match(Set dst (AddI src1 src2));
8693 
8694   ins_cost(INSN_COST);
8695   format %{ "addw $dst, $src1, $src2" %}
8696 
8697   // use opcode to indicate that this is an add not a sub
8698   opcode(0x0);
8699 
8700   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8701 
8702   ins_pipe(ialu_reg_imm);
8703 %}
8704 
8705 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
8706   match(Set dst (AddI (ConvL2I src1) src2));
8707 
8708   ins_cost(INSN_COST);
8709   format %{ "addw $dst, $src1, $src2" %}
8710 
8711   // use opcode to indicate that this is an add not a sub
8712   opcode(0x0);
8713 
8714   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8715 
8716   ins_pipe(ialu_reg_imm);
8717 %}
8718 
8719 // Pointer Addition
8720 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
8721   match(Set dst (AddP src1 src2));
8722 
8723   ins_cost(INSN_COST);
8724   format %{ "add $dst, $src1, $src2\t# ptr" %}
8725 
8726   ins_encode %{
8727     __ add(as_Register($dst$$reg),
8728            as_Register($src1$$reg),
8729            as_Register($src2$$reg));
8730   %}
8731 
8732   ins_pipe(ialu_reg_reg);
8733 %}
8734 
8735 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
8736   match(Set dst (AddP src1 (ConvI2L src2)));
8737 
8738   ins_cost(1.9 * INSN_COST);
8739   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
8740 
8741   ins_encode %{
8742     __ add(as_Register($dst$$reg),
8743            as_Register($src1$$reg),
8744            as_Register($src2$$reg), ext::sxtw);
8745   %}
8746 
8747   ins_pipe(ialu_reg_reg);
8748 %}
8749 
8750 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
8751   match(Set dst (AddP src1 (LShiftL src2 scale)));
8752 
8753   ins_cost(1.9 * INSN_COST);
8754   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
8755 
8756   ins_encode %{
8757     __ lea(as_Register($dst$$reg),
8758            Address(as_Register($src1$$reg), as_Register($src2$$reg),
8759                    Address::lsl($scale$$constant)));
8760   %}
8761 
8762   ins_pipe(ialu_reg_reg_shift);
8763 %}
8764 
8765 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
8766   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
8767 
8768   ins_cost(1.9 * INSN_COST);
8769   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
8770 
8771   ins_encode %{
8772     __ lea(as_Register($dst$$reg),
8773            Address(as_Register($src1$$reg), as_Register($src2$$reg),
8774                    Address::sxtw($scale$$constant)));
8775   %}
8776 
8777   ins_pipe(ialu_reg_reg_shift);
8778 %}
8779 
8780 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
8781   match(Set dst (LShiftL (ConvI2L src) scale));
8782 
8783   ins_cost(INSN_COST);
8784   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
8785 
8786   ins_encode %{
8787     __ sbfiz(as_Register($dst$$reg),
8788           as_Register($src$$reg),
8789           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
8790   %}
8791 
8792   ins_pipe(ialu_reg_shift);
8793 %}
8794 
8795 // Pointer Immediate Addition
8796 // n.b. this needs to be more expensive than using an indirect memory
8797 // operand
8798 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
8799   match(Set dst (AddP src1 src2));
8800 
8801   ins_cost(INSN_COST);
8802   format %{ "add $dst, $src1, $src2\t# ptr" %}
8803 
8804   // use opcode to indicate that this is an add not a sub
8805   opcode(0x0);
8806 
8807   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8808 
8809   ins_pipe(ialu_reg_imm);
8810 %}
8811 
8812 // Long Addition
8813 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8814 
8815   match(Set dst (AddL src1 src2));
8816 
8817   ins_cost(INSN_COST);
8818   format %{ "add  $dst, $src1, $src2" %}
8819 
8820   ins_encode %{
8821     __ add(as_Register($dst$$reg),
8822            as_Register($src1$$reg),
8823            as_Register($src2$$reg));
8824   %}
8825 
8826   ins_pipe(ialu_reg_reg);
8827 %}
8828 
8829 // No constant pool entries requiredLong Immediate Addition.
8830 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
8831   match(Set dst (AddL src1 src2));
8832 
8833   ins_cost(INSN_COST);
8834   format %{ "add $dst, $src1, $src2" %}
8835 
8836   // use opcode to indicate that this is an add not a sub
8837   opcode(0x0);
8838 
8839   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8840 
8841   ins_pipe(ialu_reg_imm);
8842 %}
8843 
8844 // Integer Subtraction
8845 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8846   match(Set dst (SubI src1 src2));
8847 
8848   ins_cost(INSN_COST);
8849   format %{ "subw  $dst, $src1, $src2" %}
8850 
8851   ins_encode %{
8852     __ subw(as_Register($dst$$reg),
8853             as_Register($src1$$reg),
8854             as_Register($src2$$reg));
8855   %}
8856 
8857   ins_pipe(ialu_reg_reg);
8858 %}
8859 
8860 // Immediate Subtraction
8861 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8862   match(Set dst (SubI src1 src2));
8863 
8864   ins_cost(INSN_COST);
8865   format %{ "subw $dst, $src1, $src2" %}
8866 
8867   // use opcode to indicate that this is a sub not an add
8868   opcode(0x1);
8869 
8870   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8871 
8872   ins_pipe(ialu_reg_imm);
8873 %}
8874 
8875 // Long Subtraction
8876 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8877 
8878   match(Set dst (SubL src1 src2));
8879 
8880   ins_cost(INSN_COST);
8881   format %{ "sub  $dst, $src1, $src2" %}
8882 
8883   ins_encode %{
8884     __ sub(as_Register($dst$$reg),
8885            as_Register($src1$$reg),
8886            as_Register($src2$$reg));
8887   %}
8888 
8889   ins_pipe(ialu_reg_reg);
8890 %}
8891 
8892 // No constant pool entries requiredLong Immediate Subtraction.
8893 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
8894   match(Set dst (SubL src1 src2));
8895 
8896   ins_cost(INSN_COST);
8897   format %{ "sub$dst, $src1, $src2" %}
8898 
8899   // use opcode to indicate that this is a sub not an add
8900   opcode(0x1);
8901 
8902   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8903 
8904   ins_pipe(ialu_reg_imm);
8905 %}
8906 
8907 // Integer Negation (special case for sub)
8908 
8909 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
8910   match(Set dst (SubI zero src));
8911 
8912   ins_cost(INSN_COST);
8913   format %{ "negw $dst, $src\t# int" %}
8914 
8915   ins_encode %{
8916     __ negw(as_Register($dst$$reg),
8917             as_Register($src$$reg));
8918   %}
8919 
8920   ins_pipe(ialu_reg);
8921 %}
8922 
8923 // Long Negation
8924 
8925 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
8926   match(Set dst (SubL zero src));
8927 
8928   ins_cost(INSN_COST);
8929   format %{ "neg $dst, $src\t# long" %}
8930 
8931   ins_encode %{
8932     __ neg(as_Register($dst$$reg),
8933            as_Register($src$$reg));
8934   %}
8935 
8936   ins_pipe(ialu_reg);
8937 %}
8938 
8939 // Integer Multiply
8940 
8941 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8942   match(Set dst (MulI src1 src2));
8943 
8944   ins_cost(INSN_COST * 3);
8945   format %{ "mulw  $dst, $src1, $src2" %}
8946 
8947   ins_encode %{
8948     __ mulw(as_Register($dst$$reg),
8949             as_Register($src1$$reg),
8950             as_Register($src2$$reg));
8951   %}
8952 
8953   ins_pipe(imul_reg_reg);
8954 %}
8955 
8956 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8957   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
8958 
8959   ins_cost(INSN_COST * 3);
8960   format %{ "smull  $dst, $src1, $src2" %}
8961 
8962   ins_encode %{
8963     __ smull(as_Register($dst$$reg),
8964              as_Register($src1$$reg),
8965              as_Register($src2$$reg));
8966   %}
8967 
8968   ins_pipe(imul_reg_reg);
8969 %}
8970 
8971 // Long Multiply
8972 
8973 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8974   match(Set dst (MulL src1 src2));
8975 
8976   ins_cost(INSN_COST * 5);
8977   format %{ "mul  $dst, $src1, $src2" %}
8978 
8979   ins_encode %{
8980     __ mul(as_Register($dst$$reg),
8981            as_Register($src1$$reg),
8982            as_Register($src2$$reg));
8983   %}
8984 
8985   ins_pipe(lmul_reg_reg);
8986 %}
8987 
8988 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
8989 %{
8990   match(Set dst (MulHiL src1 src2));
8991 
8992   ins_cost(INSN_COST * 7);
8993   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
8994 
8995   ins_encode %{
8996     __ smulh(as_Register($dst$$reg),
8997              as_Register($src1$$reg),
8998              as_Register($src2$$reg));
8999   %}
9000 
9001   ins_pipe(lmul_reg_reg);
9002 %}
9003 
9004 // Combined Integer Multiply & Add/Sub
9005 
9006 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9007   match(Set dst (AddI src3 (MulI src1 src2)));
9008 
9009   ins_cost(INSN_COST * 3);
9010   format %{ "madd  $dst, $src1, $src2, $src3" %}
9011 
9012   ins_encode %{
9013     __ maddw(as_Register($dst$$reg),
9014              as_Register($src1$$reg),
9015              as_Register($src2$$reg),
9016              as_Register($src3$$reg));
9017   %}
9018 
9019   ins_pipe(imac_reg_reg);
9020 %}
9021 
9022 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9023   match(Set dst (SubI src3 (MulI src1 src2)));
9024 
9025   ins_cost(INSN_COST * 3);
9026   format %{ "msub  $dst, $src1, $src2, $src3" %}
9027 
9028   ins_encode %{
9029     __ msubw(as_Register($dst$$reg),
9030              as_Register($src1$$reg),
9031              as_Register($src2$$reg),
9032              as_Register($src3$$reg));
9033   %}
9034 
9035   ins_pipe(imac_reg_reg);
9036 %}
9037 
9038 // Combined Long Multiply & Add/Sub
9039 
9040 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9041   match(Set dst (AddL src3 (MulL src1 src2)));
9042 
9043   ins_cost(INSN_COST * 5);
9044   format %{ "madd  $dst, $src1, $src2, $src3" %}
9045 
9046   ins_encode %{
9047     __ madd(as_Register($dst$$reg),
9048             as_Register($src1$$reg),
9049             as_Register($src2$$reg),
9050             as_Register($src3$$reg));
9051   %}
9052 
9053   ins_pipe(lmac_reg_reg);
9054 %}
9055 
9056 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9057   match(Set dst (SubL src3 (MulL src1 src2)));
9058 
9059   ins_cost(INSN_COST * 5);
9060   format %{ "msub  $dst, $src1, $src2, $src3" %}
9061 
9062   ins_encode %{
9063     __ msub(as_Register($dst$$reg),
9064             as_Register($src1$$reg),
9065             as_Register($src2$$reg),
9066             as_Register($src3$$reg));
9067   %}
9068 
9069   ins_pipe(lmac_reg_reg);
9070 %}
9071 
9072 // Integer Divide
9073 
9074 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9075   match(Set dst (DivI src1 src2));
9076 
9077   ins_cost(INSN_COST * 19);
9078   format %{ "sdivw  $dst, $src1, $src2" %}
9079 
9080   ins_encode(aarch64_enc_divw(dst, src1, src2));
9081   ins_pipe(idiv_reg_reg);
9082 %}
9083 
9084 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9085   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9086   ins_cost(INSN_COST);
9087   format %{ "lsrw $dst, $src1, $div1" %}
9088   ins_encode %{
9089     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9090   %}
9091   ins_pipe(ialu_reg_shift);
9092 %}
9093 
9094 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9095   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9096   ins_cost(INSN_COST);
9097   format %{ "addw $dst, $src, LSR $div1" %}
9098 
9099   ins_encode %{
9100     __ addw(as_Register($dst$$reg),
9101               as_Register($src$$reg),
9102               as_Register($src$$reg),
9103               Assembler::LSR, 31);
9104   %}
9105   ins_pipe(ialu_reg);
9106 %}
9107 
9108 // Long Divide
9109 
9110 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9111   match(Set dst (DivL src1 src2));
9112 
9113   ins_cost(INSN_COST * 35);
9114   format %{ "sdiv   $dst, $src1, $src2" %}
9115 
9116   ins_encode(aarch64_enc_div(dst, src1, src2));
9117   ins_pipe(ldiv_reg_reg);
9118 %}
9119 
9120 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9121   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9122   ins_cost(INSN_COST);
9123   format %{ "lsr $dst, $src1, $div1" %}
9124   ins_encode %{
9125     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9126   %}
9127   ins_pipe(ialu_reg_shift);
9128 %}
9129 
9130 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9131   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9132   ins_cost(INSN_COST);
9133   format %{ "add $dst, $src, $div1" %}
9134 
9135   ins_encode %{
9136     __ add(as_Register($dst$$reg),
9137               as_Register($src$$reg),
9138               as_Register($src$$reg),
9139               Assembler::LSR, 63);
9140   %}
9141   ins_pipe(ialu_reg);
9142 %}
9143 
9144 // Integer Remainder
9145 
9146 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9147   match(Set dst (ModI src1 src2));
9148 
9149   ins_cost(INSN_COST * 22);
9150   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9151             "msubw($dst, rscratch1, $src2, $src1" %}
9152 
9153   ins_encode(aarch64_enc_modw(dst, src1, src2));
9154   ins_pipe(idiv_reg_reg);
9155 %}
9156 
9157 // Long Remainder
9158 
9159 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9160   match(Set dst (ModL src1 src2));
9161 
9162   ins_cost(INSN_COST * 38);
9163   format %{ "sdiv   rscratch1, $src1, $src2\n"
9164             "msub($dst, rscratch1, $src2, $src1" %}
9165 
9166   ins_encode(aarch64_enc_mod(dst, src1, src2));
9167   ins_pipe(ldiv_reg_reg);
9168 %}
9169 
9170 // Integer Shifts
9171 
9172 // Shift Left Register
9173 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9174   match(Set dst (LShiftI src1 src2));
9175 
9176   ins_cost(INSN_COST * 2);
9177   format %{ "lslvw  $dst, $src1, $src2" %}
9178 
9179   ins_encode %{
9180     __ lslvw(as_Register($dst$$reg),
9181              as_Register($src1$$reg),
9182              as_Register($src2$$reg));
9183   %}
9184 
9185   ins_pipe(ialu_reg_reg_vshift);
9186 %}
9187 
9188 // Shift Left Immediate
9189 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9190   match(Set dst (LShiftI src1 src2));
9191 
9192   ins_cost(INSN_COST);
9193   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9194 
9195   ins_encode %{
9196     __ lslw(as_Register($dst$$reg),
9197             as_Register($src1$$reg),
9198             $src2$$constant & 0x1f);
9199   %}
9200 
9201   ins_pipe(ialu_reg_shift);
9202 %}
9203 
9204 // Shift Right Logical Register
9205 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9206   match(Set dst (URShiftI src1 src2));
9207 
9208   ins_cost(INSN_COST * 2);
9209   format %{ "lsrvw  $dst, $src1, $src2" %}
9210 
9211   ins_encode %{
9212     __ lsrvw(as_Register($dst$$reg),
9213              as_Register($src1$$reg),
9214              as_Register($src2$$reg));
9215   %}
9216 
9217   ins_pipe(ialu_reg_reg_vshift);
9218 %}
9219 
9220 // Shift Right Logical Immediate
9221 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9222   match(Set dst (URShiftI src1 src2));
9223 
9224   ins_cost(INSN_COST);
9225   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9226 
9227   ins_encode %{
9228     __ lsrw(as_Register($dst$$reg),
9229             as_Register($src1$$reg),
9230             $src2$$constant & 0x1f);
9231   %}
9232 
9233   ins_pipe(ialu_reg_shift);
9234 %}
9235 
9236 // Shift Right Arithmetic Register
9237 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9238   match(Set dst (RShiftI src1 src2));
9239 
9240   ins_cost(INSN_COST * 2);
9241   format %{ "asrvw  $dst, $src1, $src2" %}
9242 
9243   ins_encode %{
9244     __ asrvw(as_Register($dst$$reg),
9245              as_Register($src1$$reg),
9246              as_Register($src2$$reg));
9247   %}
9248 
9249   ins_pipe(ialu_reg_reg_vshift);
9250 %}
9251 
9252 // Shift Right Arithmetic Immediate
9253 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9254   match(Set dst (RShiftI src1 src2));
9255 
9256   ins_cost(INSN_COST);
9257   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9258 
9259   ins_encode %{
9260     __ asrw(as_Register($dst$$reg),
9261             as_Register($src1$$reg),
9262             $src2$$constant & 0x1f);
9263   %}
9264 
9265   ins_pipe(ialu_reg_shift);
9266 %}
9267 
9268 // Combined Int Mask and Right Shift (using UBFM)
9269 // TODO
9270 
9271 // Long Shifts
9272 
9273 // Shift Left Register
9274 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9275   match(Set dst (LShiftL src1 src2));
9276 
9277   ins_cost(INSN_COST * 2);
9278   format %{ "lslv  $dst, $src1, $src2" %}
9279 
9280   ins_encode %{
9281     __ lslv(as_Register($dst$$reg),
9282             as_Register($src1$$reg),
9283             as_Register($src2$$reg));
9284   %}
9285 
9286   ins_pipe(ialu_reg_reg_vshift);
9287 %}
9288 
9289 // Shift Left Immediate
9290 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9291   match(Set dst (LShiftL src1 src2));
9292 
9293   ins_cost(INSN_COST);
9294   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9295 
9296   ins_encode %{
9297     __ lsl(as_Register($dst$$reg),
9298             as_Register($src1$$reg),
9299             $src2$$constant & 0x3f);
9300   %}
9301 
9302   ins_pipe(ialu_reg_shift);
9303 %}
9304 
9305 // Shift Right Logical Register
9306 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9307   match(Set dst (URShiftL src1 src2));
9308 
9309   ins_cost(INSN_COST * 2);
9310   format %{ "lsrv  $dst, $src1, $src2" %}
9311 
9312   ins_encode %{
9313     __ lsrv(as_Register($dst$$reg),
9314             as_Register($src1$$reg),
9315             as_Register($src2$$reg));
9316   %}
9317 
9318   ins_pipe(ialu_reg_reg_vshift);
9319 %}
9320 
9321 // Shift Right Logical Immediate
9322 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9323   match(Set dst (URShiftL src1 src2));
9324 
9325   ins_cost(INSN_COST);
9326   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9327 
9328   ins_encode %{
9329     __ lsr(as_Register($dst$$reg),
9330            as_Register($src1$$reg),
9331            $src2$$constant & 0x3f);
9332   %}
9333 
9334   ins_pipe(ialu_reg_shift);
9335 %}
9336 
9337 // A special-case pattern for card table stores.
9338 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9339   match(Set dst (URShiftL (CastP2X src1) src2));
9340 
9341   ins_cost(INSN_COST);
9342   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9343 
9344   ins_encode %{
9345     __ lsr(as_Register($dst$$reg),
9346            as_Register($src1$$reg),
9347            $src2$$constant & 0x3f);
9348   %}
9349 
9350   ins_pipe(ialu_reg_shift);
9351 %}
9352 
9353 // Shift Right Arithmetic Register
9354 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9355   match(Set dst (RShiftL src1 src2));
9356 
9357   ins_cost(INSN_COST * 2);
9358   format %{ "asrv  $dst, $src1, $src2" %}
9359 
9360   ins_encode %{
9361     __ asrv(as_Register($dst$$reg),
9362             as_Register($src1$$reg),
9363             as_Register($src2$$reg));
9364   %}
9365 
9366   ins_pipe(ialu_reg_reg_vshift);
9367 %}
9368 
9369 // Shift Right Arithmetic Immediate
9370 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9371   match(Set dst (RShiftL src1 src2));
9372 
9373   ins_cost(INSN_COST);
9374   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9375 
9376   ins_encode %{
9377     __ asr(as_Register($dst$$reg),
9378            as_Register($src1$$reg),
9379            $src2$$constant & 0x3f);
9380   %}
9381 
9382   ins_pipe(ialu_reg_shift);
9383 %}
9384 
9385 // BEGIN This section of the file is automatically generated. Do not edit --------------
9386 
9387 instruct regL_not_reg(iRegLNoSp dst,
9388                          iRegL src1, immL_M1 m1,
9389                          rFlagsReg cr) %{
9390   match(Set dst (XorL src1 m1));
9391   ins_cost(INSN_COST);
9392   format %{ "eon  $dst, $src1, zr" %}
9393 
9394   ins_encode %{
9395     __ eon(as_Register($dst$$reg),
9396               as_Register($src1$$reg),
9397               zr,
9398               Assembler::LSL, 0);
9399   %}
9400 
9401   ins_pipe(ialu_reg);
9402 %}
9403 instruct regI_not_reg(iRegINoSp dst,
9404                          iRegIorL2I src1, immI_M1 m1,
9405                          rFlagsReg cr) %{
9406   match(Set dst (XorI src1 m1));
9407   ins_cost(INSN_COST);
9408   format %{ "eonw  $dst, $src1, zr" %}
9409 
9410   ins_encode %{
9411     __ eonw(as_Register($dst$$reg),
9412               as_Register($src1$$reg),
9413               zr,
9414               Assembler::LSL, 0);
9415   %}
9416 
9417   ins_pipe(ialu_reg);
9418 %}
9419 
9420 instruct AndI_reg_not_reg(iRegINoSp dst,
9421                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9422                          rFlagsReg cr) %{
9423   match(Set dst (AndI src1 (XorI src2 m1)));
9424   ins_cost(INSN_COST);
9425   format %{ "bicw  $dst, $src1, $src2" %}
9426 
9427   ins_encode %{
9428     __ bicw(as_Register($dst$$reg),
9429               as_Register($src1$$reg),
9430               as_Register($src2$$reg),
9431               Assembler::LSL, 0);
9432   %}
9433 
9434   ins_pipe(ialu_reg_reg);
9435 %}
9436 
9437 instruct AndL_reg_not_reg(iRegLNoSp dst,
9438                          iRegL src1, iRegL src2, immL_M1 m1,
9439                          rFlagsReg cr) %{
9440   match(Set dst (AndL src1 (XorL src2 m1)));
9441   ins_cost(INSN_COST);
9442   format %{ "bic  $dst, $src1, $src2" %}
9443 
9444   ins_encode %{
9445     __ bic(as_Register($dst$$reg),
9446               as_Register($src1$$reg),
9447               as_Register($src2$$reg),
9448               Assembler::LSL, 0);
9449   %}
9450 
9451   ins_pipe(ialu_reg_reg);
9452 %}
9453 
9454 instruct OrI_reg_not_reg(iRegINoSp dst,
9455                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9456                          rFlagsReg cr) %{
9457   match(Set dst (OrI src1 (XorI src2 m1)));
9458   ins_cost(INSN_COST);
9459   format %{ "ornw  $dst, $src1, $src2" %}
9460 
9461   ins_encode %{
9462     __ ornw(as_Register($dst$$reg),
9463               as_Register($src1$$reg),
9464               as_Register($src2$$reg),
9465               Assembler::LSL, 0);
9466   %}
9467 
9468   ins_pipe(ialu_reg_reg);
9469 %}
9470 
9471 instruct OrL_reg_not_reg(iRegLNoSp dst,
9472                          iRegL src1, iRegL src2, immL_M1 m1,
9473                          rFlagsReg cr) %{
9474   match(Set dst (OrL src1 (XorL src2 m1)));
9475   ins_cost(INSN_COST);
9476   format %{ "orn  $dst, $src1, $src2" %}
9477 
9478   ins_encode %{
9479     __ orn(as_Register($dst$$reg),
9480               as_Register($src1$$reg),
9481               as_Register($src2$$reg),
9482               Assembler::LSL, 0);
9483   %}
9484 
9485   ins_pipe(ialu_reg_reg);
9486 %}
9487 
9488 instruct XorI_reg_not_reg(iRegINoSp dst,
9489                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9490                          rFlagsReg cr) %{
9491   match(Set dst (XorI m1 (XorI src2 src1)));
9492   ins_cost(INSN_COST);
9493   format %{ "eonw  $dst, $src1, $src2" %}
9494 
9495   ins_encode %{
9496     __ eonw(as_Register($dst$$reg),
9497               as_Register($src1$$reg),
9498               as_Register($src2$$reg),
9499               Assembler::LSL, 0);
9500   %}
9501 
9502   ins_pipe(ialu_reg_reg);
9503 %}
9504 
9505 instruct XorL_reg_not_reg(iRegLNoSp dst,
9506                          iRegL src1, iRegL src2, immL_M1 m1,
9507                          rFlagsReg cr) %{
9508   match(Set dst (XorL m1 (XorL src2 src1)));
9509   ins_cost(INSN_COST);
9510   format %{ "eon  $dst, $src1, $src2" %}
9511 
9512   ins_encode %{
9513     __ eon(as_Register($dst$$reg),
9514               as_Register($src1$$reg),
9515               as_Register($src2$$reg),
9516               Assembler::LSL, 0);
9517   %}
9518 
9519   ins_pipe(ialu_reg_reg);
9520 %}
9521 
9522 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9523                          iRegIorL2I src1, iRegIorL2I src2,
9524                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9525   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9526   ins_cost(1.9 * INSN_COST);
9527   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9528 
9529   ins_encode %{
9530     __ bicw(as_Register($dst$$reg),
9531               as_Register($src1$$reg),
9532               as_Register($src2$$reg),
9533               Assembler::LSR,
9534               $src3$$constant & 0x3f);
9535   %}
9536 
9537   ins_pipe(ialu_reg_reg_shift);
9538 %}
9539 
9540 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9541                          iRegL src1, iRegL src2,
9542                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9543   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9544   ins_cost(1.9 * INSN_COST);
9545   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9546 
9547   ins_encode %{
9548     __ bic(as_Register($dst$$reg),
9549               as_Register($src1$$reg),
9550               as_Register($src2$$reg),
9551               Assembler::LSR,
9552               $src3$$constant & 0x3f);
9553   %}
9554 
9555   ins_pipe(ialu_reg_reg_shift);
9556 %}
9557 
9558 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9559                          iRegIorL2I src1, iRegIorL2I src2,
9560                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9561   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9562   ins_cost(1.9 * INSN_COST);
9563   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9564 
9565   ins_encode %{
9566     __ bicw(as_Register($dst$$reg),
9567               as_Register($src1$$reg),
9568               as_Register($src2$$reg),
9569               Assembler::ASR,
9570               $src3$$constant & 0x3f);
9571   %}
9572 
9573   ins_pipe(ialu_reg_reg_shift);
9574 %}
9575 
9576 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9577                          iRegL src1, iRegL src2,
9578                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9579   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9580   ins_cost(1.9 * INSN_COST);
9581   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9582 
9583   ins_encode %{
9584     __ bic(as_Register($dst$$reg),
9585               as_Register($src1$$reg),
9586               as_Register($src2$$reg),
9587               Assembler::ASR,
9588               $src3$$constant & 0x3f);
9589   %}
9590 
9591   ins_pipe(ialu_reg_reg_shift);
9592 %}
9593 
9594 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9595                          iRegIorL2I src1, iRegIorL2I src2,
9596                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9597   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9598   ins_cost(1.9 * INSN_COST);
9599   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9600 
9601   ins_encode %{
9602     __ bicw(as_Register($dst$$reg),
9603               as_Register($src1$$reg),
9604               as_Register($src2$$reg),
9605               Assembler::LSL,
9606               $src3$$constant & 0x3f);
9607   %}
9608 
9609   ins_pipe(ialu_reg_reg_shift);
9610 %}
9611 
9612 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
9613                          iRegL src1, iRegL src2,
9614                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9615   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
9616   ins_cost(1.9 * INSN_COST);
9617   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
9618 
9619   ins_encode %{
9620     __ bic(as_Register($dst$$reg),
9621               as_Register($src1$$reg),
9622               as_Register($src2$$reg),
9623               Assembler::LSL,
9624               $src3$$constant & 0x3f);
9625   %}
9626 
9627   ins_pipe(ialu_reg_reg_shift);
9628 %}
9629 
9630 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
9631                          iRegIorL2I src1, iRegIorL2I src2,
9632                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9633   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
9634   ins_cost(1.9 * INSN_COST);
9635   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
9636 
9637   ins_encode %{
9638     __ eonw(as_Register($dst$$reg),
9639               as_Register($src1$$reg),
9640               as_Register($src2$$reg),
9641               Assembler::LSR,
9642               $src3$$constant & 0x3f);
9643   %}
9644 
9645   ins_pipe(ialu_reg_reg_shift);
9646 %}
9647 
9648 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
9649                          iRegL src1, iRegL src2,
9650                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9651   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
9652   ins_cost(1.9 * INSN_COST);
9653   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
9654 
9655   ins_encode %{
9656     __ eon(as_Register($dst$$reg),
9657               as_Register($src1$$reg),
9658               as_Register($src2$$reg),
9659               Assembler::LSR,
9660               $src3$$constant & 0x3f);
9661   %}
9662 
9663   ins_pipe(ialu_reg_reg_shift);
9664 %}
9665 
9666 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
9667                          iRegIorL2I src1, iRegIorL2I src2,
9668                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9669   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
9670   ins_cost(1.9 * INSN_COST);
9671   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
9672 
9673   ins_encode %{
9674     __ eonw(as_Register($dst$$reg),
9675               as_Register($src1$$reg),
9676               as_Register($src2$$reg),
9677               Assembler::ASR,
9678               $src3$$constant & 0x3f);
9679   %}
9680 
9681   ins_pipe(ialu_reg_reg_shift);
9682 %}
9683 
9684 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
9685                          iRegL src1, iRegL src2,
9686                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9687   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
9688   ins_cost(1.9 * INSN_COST);
9689   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
9690 
9691   ins_encode %{
9692     __ eon(as_Register($dst$$reg),
9693               as_Register($src1$$reg),
9694               as_Register($src2$$reg),
9695               Assembler::ASR,
9696               $src3$$constant & 0x3f);
9697   %}
9698 
9699   ins_pipe(ialu_reg_reg_shift);
9700 %}
9701 
9702 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
9703                          iRegIorL2I src1, iRegIorL2I src2,
9704                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9705   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
9706   ins_cost(1.9 * INSN_COST);
9707   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
9708 
9709   ins_encode %{
9710     __ eonw(as_Register($dst$$reg),
9711               as_Register($src1$$reg),
9712               as_Register($src2$$reg),
9713               Assembler::LSL,
9714               $src3$$constant & 0x3f);
9715   %}
9716 
9717   ins_pipe(ialu_reg_reg_shift);
9718 %}
9719 
9720 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
9721                          iRegL src1, iRegL src2,
9722                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9723   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
9724   ins_cost(1.9 * INSN_COST);
9725   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
9726 
9727   ins_encode %{
9728     __ eon(as_Register($dst$$reg),
9729               as_Register($src1$$reg),
9730               as_Register($src2$$reg),
9731               Assembler::LSL,
9732               $src3$$constant & 0x3f);
9733   %}
9734 
9735   ins_pipe(ialu_reg_reg_shift);
9736 %}
9737 
9738 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
9739                          iRegIorL2I src1, iRegIorL2I src2,
9740                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9741   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
9742   ins_cost(1.9 * INSN_COST);
9743   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
9744 
9745   ins_encode %{
9746     __ ornw(as_Register($dst$$reg),
9747               as_Register($src1$$reg),
9748               as_Register($src2$$reg),
9749               Assembler::LSR,
9750               $src3$$constant & 0x3f);
9751   %}
9752 
9753   ins_pipe(ialu_reg_reg_shift);
9754 %}
9755 
9756 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
9757                          iRegL src1, iRegL src2,
9758                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9759   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
9760   ins_cost(1.9 * INSN_COST);
9761   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
9762 
9763   ins_encode %{
9764     __ orn(as_Register($dst$$reg),
9765               as_Register($src1$$reg),
9766               as_Register($src2$$reg),
9767               Assembler::LSR,
9768               $src3$$constant & 0x3f);
9769   %}
9770 
9771   ins_pipe(ialu_reg_reg_shift);
9772 %}
9773 
9774 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
9775                          iRegIorL2I src1, iRegIorL2I src2,
9776                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9777   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
9778   ins_cost(1.9 * INSN_COST);
9779   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
9780 
9781   ins_encode %{
9782     __ ornw(as_Register($dst$$reg),
9783               as_Register($src1$$reg),
9784               as_Register($src2$$reg),
9785               Assembler::ASR,
9786               $src3$$constant & 0x3f);
9787   %}
9788 
9789   ins_pipe(ialu_reg_reg_shift);
9790 %}
9791 
9792 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
9793                          iRegL src1, iRegL src2,
9794                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9795   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
9796   ins_cost(1.9 * INSN_COST);
9797   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
9798 
9799   ins_encode %{
9800     __ orn(as_Register($dst$$reg),
9801               as_Register($src1$$reg),
9802               as_Register($src2$$reg),
9803               Assembler::ASR,
9804               $src3$$constant & 0x3f);
9805   %}
9806 
9807   ins_pipe(ialu_reg_reg_shift);
9808 %}
9809 
9810 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
9811                          iRegIorL2I src1, iRegIorL2I src2,
9812                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9813   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
9814   ins_cost(1.9 * INSN_COST);
9815   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
9816 
9817   ins_encode %{
9818     __ ornw(as_Register($dst$$reg),
9819               as_Register($src1$$reg),
9820               as_Register($src2$$reg),
9821               Assembler::LSL,
9822               $src3$$constant & 0x3f);
9823   %}
9824 
9825   ins_pipe(ialu_reg_reg_shift);
9826 %}
9827 
9828 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
9829                          iRegL src1, iRegL src2,
9830                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9831   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
9832   ins_cost(1.9 * INSN_COST);
9833   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
9834 
9835   ins_encode %{
9836     __ orn(as_Register($dst$$reg),
9837               as_Register($src1$$reg),
9838               as_Register($src2$$reg),
9839               Assembler::LSL,
9840               $src3$$constant & 0x3f);
9841   %}
9842 
9843   ins_pipe(ialu_reg_reg_shift);
9844 %}
9845 
9846 instruct AndI_reg_URShift_reg(iRegINoSp dst,
9847                          iRegIorL2I src1, iRegIorL2I src2,
9848                          immI src3, rFlagsReg cr) %{
9849   match(Set dst (AndI src1 (URShiftI src2 src3)));
9850 
9851   ins_cost(1.9 * INSN_COST);
9852   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
9853 
9854   ins_encode %{
9855     __ andw(as_Register($dst$$reg),
9856               as_Register($src1$$reg),
9857               as_Register($src2$$reg),
9858               Assembler::LSR,
9859               $src3$$constant & 0x3f);
9860   %}
9861 
9862   ins_pipe(ialu_reg_reg_shift);
9863 %}
9864 
9865 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
9866                          iRegL src1, iRegL src2,
9867                          immI src3, rFlagsReg cr) %{
9868   match(Set dst (AndL src1 (URShiftL src2 src3)));
9869 
9870   ins_cost(1.9 * INSN_COST);
9871   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
9872 
9873   ins_encode %{
9874     __ andr(as_Register($dst$$reg),
9875               as_Register($src1$$reg),
9876               as_Register($src2$$reg),
9877               Assembler::LSR,
9878               $src3$$constant & 0x3f);
9879   %}
9880 
9881   ins_pipe(ialu_reg_reg_shift);
9882 %}
9883 
9884 instruct AndI_reg_RShift_reg(iRegINoSp dst,
9885                          iRegIorL2I src1, iRegIorL2I src2,
9886                          immI src3, rFlagsReg cr) %{
9887   match(Set dst (AndI src1 (RShiftI src2 src3)));
9888 
9889   ins_cost(1.9 * INSN_COST);
9890   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
9891 
9892   ins_encode %{
9893     __ andw(as_Register($dst$$reg),
9894               as_Register($src1$$reg),
9895               as_Register($src2$$reg),
9896               Assembler::ASR,
9897               $src3$$constant & 0x3f);
9898   %}
9899 
9900   ins_pipe(ialu_reg_reg_shift);
9901 %}
9902 
9903 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
9904                          iRegL src1, iRegL src2,
9905                          immI src3, rFlagsReg cr) %{
9906   match(Set dst (AndL src1 (RShiftL src2 src3)));
9907 
9908   ins_cost(1.9 * INSN_COST);
9909   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
9910 
9911   ins_encode %{
9912     __ andr(as_Register($dst$$reg),
9913               as_Register($src1$$reg),
9914               as_Register($src2$$reg),
9915               Assembler::ASR,
9916               $src3$$constant & 0x3f);
9917   %}
9918 
9919   ins_pipe(ialu_reg_reg_shift);
9920 %}
9921 
9922 instruct AndI_reg_LShift_reg(iRegINoSp dst,
9923                          iRegIorL2I src1, iRegIorL2I src2,
9924                          immI src3, rFlagsReg cr) %{
9925   match(Set dst (AndI src1 (LShiftI src2 src3)));
9926 
9927   ins_cost(1.9 * INSN_COST);
9928   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
9929 
9930   ins_encode %{
9931     __ andw(as_Register($dst$$reg),
9932               as_Register($src1$$reg),
9933               as_Register($src2$$reg),
9934               Assembler::LSL,
9935               $src3$$constant & 0x3f);
9936   %}
9937 
9938   ins_pipe(ialu_reg_reg_shift);
9939 %}
9940 
9941 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
9942                          iRegL src1, iRegL src2,
9943                          immI src3, rFlagsReg cr) %{
9944   match(Set dst (AndL src1 (LShiftL src2 src3)));
9945 
9946   ins_cost(1.9 * INSN_COST);
9947   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
9948 
9949   ins_encode %{
9950     __ andr(as_Register($dst$$reg),
9951               as_Register($src1$$reg),
9952               as_Register($src2$$reg),
9953               Assembler::LSL,
9954               $src3$$constant & 0x3f);
9955   %}
9956 
9957   ins_pipe(ialu_reg_reg_shift);
9958 %}
9959 
9960 instruct XorI_reg_URShift_reg(iRegINoSp dst,
9961                          iRegIorL2I src1, iRegIorL2I src2,
9962                          immI src3, rFlagsReg cr) %{
9963   match(Set dst (XorI src1 (URShiftI src2 src3)));
9964 
9965   ins_cost(1.9 * INSN_COST);
9966   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
9967 
9968   ins_encode %{
9969     __ eorw(as_Register($dst$$reg),
9970               as_Register($src1$$reg),
9971               as_Register($src2$$reg),
9972               Assembler::LSR,
9973               $src3$$constant & 0x3f);
9974   %}
9975 
9976   ins_pipe(ialu_reg_reg_shift);
9977 %}
9978 
9979 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
9980                          iRegL src1, iRegL src2,
9981                          immI src3, rFlagsReg cr) %{
9982   match(Set dst (XorL src1 (URShiftL src2 src3)));
9983 
9984   ins_cost(1.9 * INSN_COST);
9985   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
9986 
9987   ins_encode %{
9988     __ eor(as_Register($dst$$reg),
9989               as_Register($src1$$reg),
9990               as_Register($src2$$reg),
9991               Assembler::LSR,
9992               $src3$$constant & 0x3f);
9993   %}
9994 
9995   ins_pipe(ialu_reg_reg_shift);
9996 %}
9997 
9998 instruct XorI_reg_RShift_reg(iRegINoSp dst,
9999                          iRegIorL2I src1, iRegIorL2I src2,
10000                          immI src3, rFlagsReg cr) %{
10001   match(Set dst (XorI src1 (RShiftI src2 src3)));
10002 
10003   ins_cost(1.9 * INSN_COST);
10004   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10005 
10006   ins_encode %{
10007     __ eorw(as_Register($dst$$reg),
10008               as_Register($src1$$reg),
10009               as_Register($src2$$reg),
10010               Assembler::ASR,
10011               $src3$$constant & 0x3f);
10012   %}
10013 
10014   ins_pipe(ialu_reg_reg_shift);
10015 %}
10016 
10017 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10018                          iRegL src1, iRegL src2,
10019                          immI src3, rFlagsReg cr) %{
10020   match(Set dst (XorL src1 (RShiftL src2 src3)));
10021 
10022   ins_cost(1.9 * INSN_COST);
10023   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10024 
10025   ins_encode %{
10026     __ eor(as_Register($dst$$reg),
10027               as_Register($src1$$reg),
10028               as_Register($src2$$reg),
10029               Assembler::ASR,
10030               $src3$$constant & 0x3f);
10031   %}
10032 
10033   ins_pipe(ialu_reg_reg_shift);
10034 %}
10035 
10036 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10037                          iRegIorL2I src1, iRegIorL2I src2,
10038                          immI src3, rFlagsReg cr) %{
10039   match(Set dst (XorI src1 (LShiftI src2 src3)));
10040 
10041   ins_cost(1.9 * INSN_COST);
10042   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10043 
10044   ins_encode %{
10045     __ eorw(as_Register($dst$$reg),
10046               as_Register($src1$$reg),
10047               as_Register($src2$$reg),
10048               Assembler::LSL,
10049               $src3$$constant & 0x3f);
10050   %}
10051 
10052   ins_pipe(ialu_reg_reg_shift);
10053 %}
10054 
10055 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10056                          iRegL src1, iRegL src2,
10057                          immI src3, rFlagsReg cr) %{
10058   match(Set dst (XorL src1 (LShiftL src2 src3)));
10059 
10060   ins_cost(1.9 * INSN_COST);
10061   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10062 
10063   ins_encode %{
10064     __ eor(as_Register($dst$$reg),
10065               as_Register($src1$$reg),
10066               as_Register($src2$$reg),
10067               Assembler::LSL,
10068               $src3$$constant & 0x3f);
10069   %}
10070 
10071   ins_pipe(ialu_reg_reg_shift);
10072 %}
10073 
10074 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10075                          iRegIorL2I src1, iRegIorL2I src2,
10076                          immI src3, rFlagsReg cr) %{
10077   match(Set dst (OrI src1 (URShiftI src2 src3)));
10078 
10079   ins_cost(1.9 * INSN_COST);
10080   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10081 
10082   ins_encode %{
10083     __ orrw(as_Register($dst$$reg),
10084               as_Register($src1$$reg),
10085               as_Register($src2$$reg),
10086               Assembler::LSR,
10087               $src3$$constant & 0x3f);
10088   %}
10089 
10090   ins_pipe(ialu_reg_reg_shift);
10091 %}
10092 
10093 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10094                          iRegL src1, iRegL src2,
10095                          immI src3, rFlagsReg cr) %{
10096   match(Set dst (OrL src1 (URShiftL src2 src3)));
10097 
10098   ins_cost(1.9 * INSN_COST);
10099   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10100 
10101   ins_encode %{
10102     __ orr(as_Register($dst$$reg),
10103               as_Register($src1$$reg),
10104               as_Register($src2$$reg),
10105               Assembler::LSR,
10106               $src3$$constant & 0x3f);
10107   %}
10108 
10109   ins_pipe(ialu_reg_reg_shift);
10110 %}
10111 
10112 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10113                          iRegIorL2I src1, iRegIorL2I src2,
10114                          immI src3, rFlagsReg cr) %{
10115   match(Set dst (OrI src1 (RShiftI src2 src3)));
10116 
10117   ins_cost(1.9 * INSN_COST);
10118   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10119 
10120   ins_encode %{
10121     __ orrw(as_Register($dst$$reg),
10122               as_Register($src1$$reg),
10123               as_Register($src2$$reg),
10124               Assembler::ASR,
10125               $src3$$constant & 0x3f);
10126   %}
10127 
10128   ins_pipe(ialu_reg_reg_shift);
10129 %}
10130 
10131 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10132                          iRegL src1, iRegL src2,
10133                          immI src3, rFlagsReg cr) %{
10134   match(Set dst (OrL src1 (RShiftL src2 src3)));
10135 
10136   ins_cost(1.9 * INSN_COST);
10137   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10138 
10139   ins_encode %{
10140     __ orr(as_Register($dst$$reg),
10141               as_Register($src1$$reg),
10142               as_Register($src2$$reg),
10143               Assembler::ASR,
10144               $src3$$constant & 0x3f);
10145   %}
10146 
10147   ins_pipe(ialu_reg_reg_shift);
10148 %}
10149 
10150 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10151                          iRegIorL2I src1, iRegIorL2I src2,
10152                          immI src3, rFlagsReg cr) %{
10153   match(Set dst (OrI src1 (LShiftI src2 src3)));
10154 
10155   ins_cost(1.9 * INSN_COST);
10156   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10157 
10158   ins_encode %{
10159     __ orrw(as_Register($dst$$reg),
10160               as_Register($src1$$reg),
10161               as_Register($src2$$reg),
10162               Assembler::LSL,
10163               $src3$$constant & 0x3f);
10164   %}
10165 
10166   ins_pipe(ialu_reg_reg_shift);
10167 %}
10168 
10169 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10170                          iRegL src1, iRegL src2,
10171                          immI src3, rFlagsReg cr) %{
10172   match(Set dst (OrL src1 (LShiftL src2 src3)));
10173 
10174   ins_cost(1.9 * INSN_COST);
10175   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10176 
10177   ins_encode %{
10178     __ orr(as_Register($dst$$reg),
10179               as_Register($src1$$reg),
10180               as_Register($src2$$reg),
10181               Assembler::LSL,
10182               $src3$$constant & 0x3f);
10183   %}
10184 
10185   ins_pipe(ialu_reg_reg_shift);
10186 %}
10187 
10188 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10189                          iRegIorL2I src1, iRegIorL2I src2,
10190                          immI src3, rFlagsReg cr) %{
10191   match(Set dst (AddI src1 (URShiftI src2 src3)));
10192 
10193   ins_cost(1.9 * INSN_COST);
10194   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10195 
10196   ins_encode %{
10197     __ addw(as_Register($dst$$reg),
10198               as_Register($src1$$reg),
10199               as_Register($src2$$reg),
10200               Assembler::LSR,
10201               $src3$$constant & 0x3f);
10202   %}
10203 
10204   ins_pipe(ialu_reg_reg_shift);
10205 %}
10206 
10207 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10208                          iRegL src1, iRegL src2,
10209                          immI src3, rFlagsReg cr) %{
10210   match(Set dst (AddL src1 (URShiftL src2 src3)));
10211 
10212   ins_cost(1.9 * INSN_COST);
10213   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10214 
10215   ins_encode %{
10216     __ add(as_Register($dst$$reg),
10217               as_Register($src1$$reg),
10218               as_Register($src2$$reg),
10219               Assembler::LSR,
10220               $src3$$constant & 0x3f);
10221   %}
10222 
10223   ins_pipe(ialu_reg_reg_shift);
10224 %}
10225 
10226 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10227                          iRegIorL2I src1, iRegIorL2I src2,
10228                          immI src3, rFlagsReg cr) %{
10229   match(Set dst (AddI src1 (RShiftI src2 src3)));
10230 
10231   ins_cost(1.9 * INSN_COST);
10232   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10233 
10234   ins_encode %{
10235     __ addw(as_Register($dst$$reg),
10236               as_Register($src1$$reg),
10237               as_Register($src2$$reg),
10238               Assembler::ASR,
10239               $src3$$constant & 0x3f);
10240   %}
10241 
10242   ins_pipe(ialu_reg_reg_shift);
10243 %}
10244 
10245 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10246                          iRegL src1, iRegL src2,
10247                          immI src3, rFlagsReg cr) %{
10248   match(Set dst (AddL src1 (RShiftL src2 src3)));
10249 
10250   ins_cost(1.9 * INSN_COST);
10251   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10252 
10253   ins_encode %{
10254     __ add(as_Register($dst$$reg),
10255               as_Register($src1$$reg),
10256               as_Register($src2$$reg),
10257               Assembler::ASR,
10258               $src3$$constant & 0x3f);
10259   %}
10260 
10261   ins_pipe(ialu_reg_reg_shift);
10262 %}
10263 
10264 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10265                          iRegIorL2I src1, iRegIorL2I src2,
10266                          immI src3, rFlagsReg cr) %{
10267   match(Set dst (AddI src1 (LShiftI src2 src3)));
10268 
10269   ins_cost(1.9 * INSN_COST);
10270   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10271 
10272   ins_encode %{
10273     __ addw(as_Register($dst$$reg),
10274               as_Register($src1$$reg),
10275               as_Register($src2$$reg),
10276               Assembler::LSL,
10277               $src3$$constant & 0x3f);
10278   %}
10279 
10280   ins_pipe(ialu_reg_reg_shift);
10281 %}
10282 
10283 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10284                          iRegL src1, iRegL src2,
10285                          immI src3, rFlagsReg cr) %{
10286   match(Set dst (AddL src1 (LShiftL src2 src3)));
10287 
10288   ins_cost(1.9 * INSN_COST);
10289   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10290 
10291   ins_encode %{
10292     __ add(as_Register($dst$$reg),
10293               as_Register($src1$$reg),
10294               as_Register($src2$$reg),
10295               Assembler::LSL,
10296               $src3$$constant & 0x3f);
10297   %}
10298 
10299   ins_pipe(ialu_reg_reg_shift);
10300 %}
10301 
10302 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10303                          iRegIorL2I src1, iRegIorL2I src2,
10304                          immI src3, rFlagsReg cr) %{
10305   match(Set dst (SubI src1 (URShiftI src2 src3)));
10306 
10307   ins_cost(1.9 * INSN_COST);
10308   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10309 
10310   ins_encode %{
10311     __ subw(as_Register($dst$$reg),
10312               as_Register($src1$$reg),
10313               as_Register($src2$$reg),
10314               Assembler::LSR,
10315               $src3$$constant & 0x3f);
10316   %}
10317 
10318   ins_pipe(ialu_reg_reg_shift);
10319 %}
10320 
10321 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10322                          iRegL src1, iRegL src2,
10323                          immI src3, rFlagsReg cr) %{
10324   match(Set dst (SubL src1 (URShiftL src2 src3)));
10325 
10326   ins_cost(1.9 * INSN_COST);
10327   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10328 
10329   ins_encode %{
10330     __ sub(as_Register($dst$$reg),
10331               as_Register($src1$$reg),
10332               as_Register($src2$$reg),
10333               Assembler::LSR,
10334               $src3$$constant & 0x3f);
10335   %}
10336 
10337   ins_pipe(ialu_reg_reg_shift);
10338 %}
10339 
10340 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10341                          iRegIorL2I src1, iRegIorL2I src2,
10342                          immI src3, rFlagsReg cr) %{
10343   match(Set dst (SubI src1 (RShiftI src2 src3)));
10344 
10345   ins_cost(1.9 * INSN_COST);
10346   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10347 
10348   ins_encode %{
10349     __ subw(as_Register($dst$$reg),
10350               as_Register($src1$$reg),
10351               as_Register($src2$$reg),
10352               Assembler::ASR,
10353               $src3$$constant & 0x3f);
10354   %}
10355 
10356   ins_pipe(ialu_reg_reg_shift);
10357 %}
10358 
10359 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10360                          iRegL src1, iRegL src2,
10361                          immI src3, rFlagsReg cr) %{
10362   match(Set dst (SubL src1 (RShiftL src2 src3)));
10363 
10364   ins_cost(1.9 * INSN_COST);
10365   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10366 
10367   ins_encode %{
10368     __ sub(as_Register($dst$$reg),
10369               as_Register($src1$$reg),
10370               as_Register($src2$$reg),
10371               Assembler::ASR,
10372               $src3$$constant & 0x3f);
10373   %}
10374 
10375   ins_pipe(ialu_reg_reg_shift);
10376 %}
10377 
10378 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10379                          iRegIorL2I src1, iRegIorL2I src2,
10380                          immI src3, rFlagsReg cr) %{
10381   match(Set dst (SubI src1 (LShiftI src2 src3)));
10382 
10383   ins_cost(1.9 * INSN_COST);
10384   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10385 
10386   ins_encode %{
10387     __ subw(as_Register($dst$$reg),
10388               as_Register($src1$$reg),
10389               as_Register($src2$$reg),
10390               Assembler::LSL,
10391               $src3$$constant & 0x3f);
10392   %}
10393 
10394   ins_pipe(ialu_reg_reg_shift);
10395 %}
10396 
10397 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10398                          iRegL src1, iRegL src2,
10399                          immI src3, rFlagsReg cr) %{
10400   match(Set dst (SubL src1 (LShiftL src2 src3)));
10401 
10402   ins_cost(1.9 * INSN_COST);
10403   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10404 
10405   ins_encode %{
10406     __ sub(as_Register($dst$$reg),
10407               as_Register($src1$$reg),
10408               as_Register($src2$$reg),
10409               Assembler::LSL,
10410               $src3$$constant & 0x3f);
10411   %}
10412 
10413   ins_pipe(ialu_reg_reg_shift);
10414 %}
10415 
10416 
10417 
10418 // Shift Left followed by Shift Right.
10419 // This idiom is used by the compiler for the i2b bytecode etc.
10420 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10421 %{
10422   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10423   // Make sure we are not going to exceed what sbfm can do.
10424   predicate((unsigned int)n->in(2)->get_int() <= 63
10425             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10426 
10427   ins_cost(INSN_COST * 2);
10428   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10429   ins_encode %{
10430     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10431     int s = 63 - lshift;
10432     int r = (rshift - lshift) & 63;
10433     __ sbfm(as_Register($dst$$reg),
10434             as_Register($src$$reg),
10435             r, s);
10436   %}
10437 
10438   ins_pipe(ialu_reg_shift);
10439 %}
10440 
10441 // Shift Left followed by Shift Right.
10442 // This idiom is used by the compiler for the i2b bytecode etc.
10443 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10444 %{
10445   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10446   // Make sure we are not going to exceed what sbfmw can do.
10447   predicate((unsigned int)n->in(2)->get_int() <= 31
10448             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10449 
10450   ins_cost(INSN_COST * 2);
10451   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10452   ins_encode %{
10453     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10454     int s = 31 - lshift;
10455     int r = (rshift - lshift) & 31;
10456     __ sbfmw(as_Register($dst$$reg),
10457             as_Register($src$$reg),
10458             r, s);
10459   %}
10460 
10461   ins_pipe(ialu_reg_shift);
10462 %}
10463 
10464 // Shift Left followed by Shift Right.
10465 // This idiom is used by the compiler for the i2b bytecode etc.
10466 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10467 %{
10468   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10469   // Make sure we are not going to exceed what ubfm can do.
10470   predicate((unsigned int)n->in(2)->get_int() <= 63
10471             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10472 
10473   ins_cost(INSN_COST * 2);
10474   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10475   ins_encode %{
10476     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10477     int s = 63 - lshift;
10478     int r = (rshift - lshift) & 63;
10479     __ ubfm(as_Register($dst$$reg),
10480             as_Register($src$$reg),
10481             r, s);
10482   %}
10483 
10484   ins_pipe(ialu_reg_shift);
10485 %}
10486 
10487 // Shift Left followed by Shift Right.
10488 // This idiom is used by the compiler for the i2b bytecode etc.
10489 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10490 %{
10491   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10492   // Make sure we are not going to exceed what ubfmw can do.
10493   predicate((unsigned int)n->in(2)->get_int() <= 31
10494             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10495 
10496   ins_cost(INSN_COST * 2);
10497   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10498   ins_encode %{
10499     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10500     int s = 31 - lshift;
10501     int r = (rshift - lshift) & 31;
10502     __ ubfmw(as_Register($dst$$reg),
10503             as_Register($src$$reg),
10504             r, s);
10505   %}
10506 
10507   ins_pipe(ialu_reg_shift);
10508 %}
10509 // Bitfield extract with shift & mask
10510 
10511 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10512 %{
10513   match(Set dst (AndI (URShiftI src rshift) mask));
10514 
10515   ins_cost(INSN_COST);
10516   format %{ "ubfxw $dst, $src, $mask" %}
10517   ins_encode %{
10518     int rshift = $rshift$$constant;
10519     long mask = $mask$$constant;
10520     int width = exact_log2(mask+1);
10521     __ ubfxw(as_Register($dst$$reg),
10522             as_Register($src$$reg), rshift, width);
10523   %}
10524   ins_pipe(ialu_reg_shift);
10525 %}
10526 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10527 %{
10528   match(Set dst (AndL (URShiftL src rshift) mask));
10529 
10530   ins_cost(INSN_COST);
10531   format %{ "ubfx $dst, $src, $mask" %}
10532   ins_encode %{
10533     int rshift = $rshift$$constant;
10534     long mask = $mask$$constant;
10535     int width = exact_log2(mask+1);
10536     __ ubfx(as_Register($dst$$reg),
10537             as_Register($src$$reg), rshift, width);
10538   %}
10539   ins_pipe(ialu_reg_shift);
10540 %}
10541 
10542 // We can use ubfx when extending an And with a mask when we know mask
10543 // is positive.  We know that because immI_bitmask guarantees it.
10544 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10545 %{
10546   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10547 
10548   ins_cost(INSN_COST * 2);
10549   format %{ "ubfx $dst, $src, $mask" %}
10550   ins_encode %{
10551     int rshift = $rshift$$constant;
10552     long mask = $mask$$constant;
10553     int width = exact_log2(mask+1);
10554     __ ubfx(as_Register($dst$$reg),
10555             as_Register($src$$reg), rshift, width);
10556   %}
10557   ins_pipe(ialu_reg_shift);
10558 %}
10559 
10560 // Rotations
10561 
10562 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10563 %{
10564   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10565   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10566 
10567   ins_cost(INSN_COST);
10568   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10569 
10570   ins_encode %{
10571     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10572             $rshift$$constant & 63);
10573   %}
10574   ins_pipe(ialu_reg_reg_extr);
10575 %}
10576 
10577 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10578 %{
10579   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10580   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10581 
10582   ins_cost(INSN_COST);
10583   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10584 
10585   ins_encode %{
10586     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10587             $rshift$$constant & 31);
10588   %}
10589   ins_pipe(ialu_reg_reg_extr);
10590 %}
10591 
10592 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10593 %{
10594   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10595   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10596 
10597   ins_cost(INSN_COST);
10598   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10599 
10600   ins_encode %{
10601     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10602             $rshift$$constant & 63);
10603   %}
10604   ins_pipe(ialu_reg_reg_extr);
10605 %}
10606 
10607 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10608 %{
10609   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10610   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10611 
10612   ins_cost(INSN_COST);
10613   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10614 
10615   ins_encode %{
10616     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10617             $rshift$$constant & 31);
10618   %}
10619   ins_pipe(ialu_reg_reg_extr);
10620 %}
10621 
10622 
10623 // rol expander
10624 
10625 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10626 %{
10627   effect(DEF dst, USE src, USE shift);
10628 
10629   format %{ "rol    $dst, $src, $shift" %}
10630   ins_cost(INSN_COST * 3);
10631   ins_encode %{
10632     __ subw(rscratch1, zr, as_Register($shift$$reg));
10633     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10634             rscratch1);
10635     %}
10636   ins_pipe(ialu_reg_reg_vshift);
10637 %}
10638 
10639 // rol expander
10640 
10641 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10642 %{
10643   effect(DEF dst, USE src, USE shift);
10644 
10645   format %{ "rol    $dst, $src, $shift" %}
10646   ins_cost(INSN_COST * 3);
10647   ins_encode %{
10648     __ subw(rscratch1, zr, as_Register($shift$$reg));
10649     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10650             rscratch1);
10651     %}
10652   ins_pipe(ialu_reg_reg_vshift);
10653 %}
10654 
10655 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10656 %{
10657   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
10658 
10659   expand %{
10660     rolL_rReg(dst, src, shift, cr);
10661   %}
10662 %}
10663 
10664 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10665 %{
10666   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
10667 
10668   expand %{
10669     rolL_rReg(dst, src, shift, cr);
10670   %}
10671 %}
10672 
10673 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10674 %{
10675   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
10676 
10677   expand %{
10678     rolL_rReg(dst, src, shift, cr);
10679   %}
10680 %}
10681 
10682 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10683 %{
10684   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
10685 
10686   expand %{
10687     rolL_rReg(dst, src, shift, cr);
10688   %}
10689 %}
10690 
10691 // ror expander
10692 
10693 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10694 %{
10695   effect(DEF dst, USE src, USE shift);
10696 
10697   format %{ "ror    $dst, $src, $shift" %}
10698   ins_cost(INSN_COST);
10699   ins_encode %{
10700     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10701             as_Register($shift$$reg));
10702     %}
10703   ins_pipe(ialu_reg_reg_vshift);
10704 %}
10705 
10706 // ror expander
10707 
10708 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10709 %{
10710   effect(DEF dst, USE src, USE shift);
10711 
10712   format %{ "ror    $dst, $src, $shift" %}
10713   ins_cost(INSN_COST);
10714   ins_encode %{
10715     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10716             as_Register($shift$$reg));
10717     %}
10718   ins_pipe(ialu_reg_reg_vshift);
10719 %}
10720 
10721 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10722 %{
10723   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
10724 
10725   expand %{
10726     rorL_rReg(dst, src, shift, cr);
10727   %}
10728 %}
10729 
10730 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10731 %{
10732   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
10733 
10734   expand %{
10735     rorL_rReg(dst, src, shift, cr);
10736   %}
10737 %}
10738 
10739 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10740 %{
10741   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
10742 
10743   expand %{
10744     rorL_rReg(dst, src, shift, cr);
10745   %}
10746 %}
10747 
10748 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10749 %{
10750   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
10751 
10752   expand %{
10753     rorL_rReg(dst, src, shift, cr);
10754   %}
10755 %}
10756 
10757 // Add/subtract (extended)
10758 
10759 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
10760 %{
10761   match(Set dst (AddL src1 (ConvI2L src2)));
10762   ins_cost(INSN_COST);
10763   format %{ "add  $dst, $src1, sxtw $src2" %}
10764 
10765    ins_encode %{
10766      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10767             as_Register($src2$$reg), ext::sxtw);
10768    %}
10769   ins_pipe(ialu_reg_reg);
10770 %};
10771 
10772 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
10773 %{
10774   match(Set dst (SubL src1 (ConvI2L src2)));
10775   ins_cost(INSN_COST);
10776   format %{ "sub  $dst, $src1, sxtw $src2" %}
10777 
10778    ins_encode %{
10779      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10780             as_Register($src2$$reg), ext::sxtw);
10781    %}
10782   ins_pipe(ialu_reg_reg);
10783 %};
10784 
10785 
10786 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
10787 %{
10788   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
10789   ins_cost(INSN_COST);
10790   format %{ "add  $dst, $src1, sxth $src2" %}
10791 
10792    ins_encode %{
10793      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10794             as_Register($src2$$reg), ext::sxth);
10795    %}
10796   ins_pipe(ialu_reg_reg);
10797 %}
10798 
10799 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
10800 %{
10801   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
10802   ins_cost(INSN_COST);
10803   format %{ "add  $dst, $src1, sxtb $src2" %}
10804 
10805    ins_encode %{
10806      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10807             as_Register($src2$$reg), ext::sxtb);
10808    %}
10809   ins_pipe(ialu_reg_reg);
10810 %}
10811 
10812 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
10813 %{
10814   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
10815   ins_cost(INSN_COST);
10816   format %{ "add  $dst, $src1, uxtb $src2" %}
10817 
10818    ins_encode %{
10819      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10820             as_Register($src2$$reg), ext::uxtb);
10821    %}
10822   ins_pipe(ialu_reg_reg);
10823 %}
10824 
10825 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
10826 %{
10827   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10828   ins_cost(INSN_COST);
10829   format %{ "add  $dst, $src1, sxth $src2" %}
10830 
10831    ins_encode %{
10832      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10833             as_Register($src2$$reg), ext::sxth);
10834    %}
10835   ins_pipe(ialu_reg_reg);
10836 %}
10837 
10838 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
10839 %{
10840   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10841   ins_cost(INSN_COST);
10842   format %{ "add  $dst, $src1, sxtw $src2" %}
10843 
10844    ins_encode %{
10845      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10846             as_Register($src2$$reg), ext::sxtw);
10847    %}
10848   ins_pipe(ialu_reg_reg);
10849 %}
10850 
10851 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
10852 %{
10853   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10854   ins_cost(INSN_COST);
10855   format %{ "add  $dst, $src1, sxtb $src2" %}
10856 
10857    ins_encode %{
10858      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10859             as_Register($src2$$reg), ext::sxtb);
10860    %}
10861   ins_pipe(ialu_reg_reg);
10862 %}
10863 
10864 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
10865 %{
10866   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
10867   ins_cost(INSN_COST);
10868   format %{ "add  $dst, $src1, uxtb $src2" %}
10869 
10870    ins_encode %{
10871      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10872             as_Register($src2$$reg), ext::uxtb);
10873    %}
10874   ins_pipe(ialu_reg_reg);
10875 %}
10876 
10877 
10878 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
10879 %{
10880   match(Set dst (AddI src1 (AndI src2 mask)));
10881   ins_cost(INSN_COST);
10882   format %{ "addw  $dst, $src1, $src2, uxtb" %}
10883 
10884    ins_encode %{
10885      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
10886             as_Register($src2$$reg), ext::uxtb);
10887    %}
10888   ins_pipe(ialu_reg_reg);
10889 %}
10890 
10891 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
10892 %{
10893   match(Set dst (AddI src1 (AndI src2 mask)));
10894   ins_cost(INSN_COST);
10895   format %{ "addw  $dst, $src1, $src2, uxth" %}
10896 
10897    ins_encode %{
10898      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
10899             as_Register($src2$$reg), ext::uxth);
10900    %}
10901   ins_pipe(ialu_reg_reg);
10902 %}
10903 
10904 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
10905 %{
10906   match(Set dst (AddL src1 (AndL src2 mask)));
10907   ins_cost(INSN_COST);
10908   format %{ "add  $dst, $src1, $src2, uxtb" %}
10909 
10910    ins_encode %{
10911      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10912             as_Register($src2$$reg), ext::uxtb);
10913    %}
10914   ins_pipe(ialu_reg_reg);
10915 %}
10916 
10917 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
10918 %{
10919   match(Set dst (AddL src1 (AndL src2 mask)));
10920   ins_cost(INSN_COST);
10921   format %{ "add  $dst, $src1, $src2, uxth" %}
10922 
10923    ins_encode %{
10924      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10925             as_Register($src2$$reg), ext::uxth);
10926    %}
10927   ins_pipe(ialu_reg_reg);
10928 %}
10929 
10930 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
10931 %{
10932   match(Set dst (AddL src1 (AndL src2 mask)));
10933   ins_cost(INSN_COST);
10934   format %{ "add  $dst, $src1, $src2, uxtw" %}
10935 
10936    ins_encode %{
10937      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10938             as_Register($src2$$reg), ext::uxtw);
10939    %}
10940   ins_pipe(ialu_reg_reg);
10941 %}
10942 
10943 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
10944 %{
10945   match(Set dst (SubI src1 (AndI src2 mask)));
10946   ins_cost(INSN_COST);
10947   format %{ "subw  $dst, $src1, $src2, uxtb" %}
10948 
10949    ins_encode %{
10950      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
10951             as_Register($src2$$reg), ext::uxtb);
10952    %}
10953   ins_pipe(ialu_reg_reg);
10954 %}
10955 
10956 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
10957 %{
10958   match(Set dst (SubI src1 (AndI src2 mask)));
10959   ins_cost(INSN_COST);
10960   format %{ "subw  $dst, $src1, $src2, uxth" %}
10961 
10962    ins_encode %{
10963      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
10964             as_Register($src2$$reg), ext::uxth);
10965    %}
10966   ins_pipe(ialu_reg_reg);
10967 %}
10968 
10969 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
10970 %{
10971   match(Set dst (SubL src1 (AndL src2 mask)));
10972   ins_cost(INSN_COST);
10973   format %{ "sub  $dst, $src1, $src2, uxtb" %}
10974 
10975    ins_encode %{
10976      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10977             as_Register($src2$$reg), ext::uxtb);
10978    %}
10979   ins_pipe(ialu_reg_reg);
10980 %}
10981 
10982 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
10983 %{
10984   match(Set dst (SubL src1 (AndL src2 mask)));
10985   ins_cost(INSN_COST);
10986   format %{ "sub  $dst, $src1, $src2, uxth" %}
10987 
10988    ins_encode %{
10989      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10990             as_Register($src2$$reg), ext::uxth);
10991    %}
10992   ins_pipe(ialu_reg_reg);
10993 %}
10994 
10995 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
10996 %{
10997   match(Set dst (SubL src1 (AndL src2 mask)));
10998   ins_cost(INSN_COST);
10999   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11000 
11001    ins_encode %{
11002      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11003             as_Register($src2$$reg), ext::uxtw);
11004    %}
11005   ins_pipe(ialu_reg_reg);
11006 %}
11007 
11008 // END This section of the file is automatically generated. Do not edit --------------
11009 
11010 // ============================================================================
11011 // Floating Point Arithmetic Instructions
11012 
11013 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11014   match(Set dst (AddF src1 src2));
11015 
11016   ins_cost(INSN_COST * 5);
11017   format %{ "fadds   $dst, $src1, $src2" %}
11018 
11019   ins_encode %{
11020     __ fadds(as_FloatRegister($dst$$reg),
11021              as_FloatRegister($src1$$reg),
11022              as_FloatRegister($src2$$reg));
11023   %}
11024 
11025   ins_pipe(pipe_class_default);
11026 %}
11027 
11028 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11029   match(Set dst (AddD src1 src2));
11030 
11031   ins_cost(INSN_COST * 5);
11032   format %{ "faddd   $dst, $src1, $src2" %}
11033 
11034   ins_encode %{
11035     __ faddd(as_FloatRegister($dst$$reg),
11036              as_FloatRegister($src1$$reg),
11037              as_FloatRegister($src2$$reg));
11038   %}
11039 
11040   ins_pipe(pipe_class_default);
11041 %}
11042 
11043 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11044   match(Set dst (SubF src1 src2));
11045 
11046   ins_cost(INSN_COST * 5);
11047   format %{ "fsubs   $dst, $src1, $src2" %}
11048 
11049   ins_encode %{
11050     __ fsubs(as_FloatRegister($dst$$reg),
11051              as_FloatRegister($src1$$reg),
11052              as_FloatRegister($src2$$reg));
11053   %}
11054 
11055   ins_pipe(pipe_class_default);
11056 %}
11057 
11058 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11059   match(Set dst (SubD src1 src2));
11060 
11061   ins_cost(INSN_COST * 5);
11062   format %{ "fsubd   $dst, $src1, $src2" %}
11063 
11064   ins_encode %{
11065     __ fsubd(as_FloatRegister($dst$$reg),
11066              as_FloatRegister($src1$$reg),
11067              as_FloatRegister($src2$$reg));
11068   %}
11069 
11070   ins_pipe(pipe_class_default);
11071 %}
11072 
11073 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11074   match(Set dst (MulF src1 src2));
11075 
11076   ins_cost(INSN_COST * 6);
11077   format %{ "fmuls   $dst, $src1, $src2" %}
11078 
11079   ins_encode %{
11080     __ fmuls(as_FloatRegister($dst$$reg),
11081              as_FloatRegister($src1$$reg),
11082              as_FloatRegister($src2$$reg));
11083   %}
11084 
11085   ins_pipe(pipe_class_default);
11086 %}
11087 
11088 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11089   match(Set dst (MulD src1 src2));
11090 
11091   ins_cost(INSN_COST * 6);
11092   format %{ "fmuld   $dst, $src1, $src2" %}
11093 
11094   ins_encode %{
11095     __ fmuld(as_FloatRegister($dst$$reg),
11096              as_FloatRegister($src1$$reg),
11097              as_FloatRegister($src2$$reg));
11098   %}
11099 
11100   ins_pipe(pipe_class_default);
11101 %}
11102 
11103 // We cannot use these fused mul w add/sub ops because they don't
11104 // produce the same result as the equivalent separated ops
11105 // (essentially they don't round the intermediate result). that's a
11106 // shame. leaving them here in case we can idenitfy cases where it is
11107 // legitimate to use them
11108 
11109 
11110 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11111 //   match(Set dst (AddF (MulF src1 src2) src3));
11112 
11113 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11114 
11115 //   ins_encode %{
11116 //     __ fmadds(as_FloatRegister($dst$$reg),
11117 //              as_FloatRegister($src1$$reg),
11118 //              as_FloatRegister($src2$$reg),
11119 //              as_FloatRegister($src3$$reg));
11120 //   %}
11121 
11122 //   ins_pipe(pipe_class_default);
11123 // %}
11124 
11125 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11126 //   match(Set dst (AddD (MulD src1 src2) src3));
11127 
11128 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11129 
11130 //   ins_encode %{
11131 //     __ fmaddd(as_FloatRegister($dst$$reg),
11132 //              as_FloatRegister($src1$$reg),
11133 //              as_FloatRegister($src2$$reg),
11134 //              as_FloatRegister($src3$$reg));
11135 //   %}
11136 
11137 //   ins_pipe(pipe_class_default);
11138 // %}
11139 
11140 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11141 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11142 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11143 
11144 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11145 
11146 //   ins_encode %{
11147 //     __ fmsubs(as_FloatRegister($dst$$reg),
11148 //               as_FloatRegister($src1$$reg),
11149 //               as_FloatRegister($src2$$reg),
11150 //              as_FloatRegister($src3$$reg));
11151 //   %}
11152 
11153 //   ins_pipe(pipe_class_default);
11154 // %}
11155 
11156 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11157 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11158 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11159 
11160 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11161 
11162 //   ins_encode %{
11163 //     __ fmsubd(as_FloatRegister($dst$$reg),
11164 //               as_FloatRegister($src1$$reg),
11165 //               as_FloatRegister($src2$$reg),
11166 //               as_FloatRegister($src3$$reg));
11167 //   %}
11168 
11169 //   ins_pipe(pipe_class_default);
11170 // %}
11171 
11172 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11173 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11174 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11175 
11176 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11177 
11178 //   ins_encode %{
11179 //     __ fnmadds(as_FloatRegister($dst$$reg),
11180 //                as_FloatRegister($src1$$reg),
11181 //                as_FloatRegister($src2$$reg),
11182 //                as_FloatRegister($src3$$reg));
11183 //   %}
11184 
11185 //   ins_pipe(pipe_class_default);
11186 // %}
11187 
11188 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11189 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11190 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11191 
11192 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11193 
11194 //   ins_encode %{
11195 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11196 //                as_FloatRegister($src1$$reg),
11197 //                as_FloatRegister($src2$$reg),
11198 //                as_FloatRegister($src3$$reg));
11199 //   %}
11200 
11201 //   ins_pipe(pipe_class_default);
11202 // %}
11203 
11204 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11205 //   match(Set dst (SubF (MulF src1 src2) src3));
11206 
11207 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11208 
11209 //   ins_encode %{
11210 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11211 //                as_FloatRegister($src1$$reg),
11212 //                as_FloatRegister($src2$$reg),
11213 //                as_FloatRegister($src3$$reg));
11214 //   %}
11215 
11216 //   ins_pipe(pipe_class_default);
11217 // %}
11218 
11219 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11220 //   match(Set dst (SubD (MulD src1 src2) src3));
11221 
11222 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11223 
11224 //   ins_encode %{
11225 //   // n.b. insn name should be fnmsubd
11226 //     __ fnmsub(as_FloatRegister($dst$$reg),
11227 //                as_FloatRegister($src1$$reg),
11228 //                as_FloatRegister($src2$$reg),
11229 //                as_FloatRegister($src3$$reg));
11230 //   %}
11231 
11232 //   ins_pipe(pipe_class_default);
11233 // %}
11234 
11235 
11236 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11237   match(Set dst (DivF src1  src2));
11238 
11239   ins_cost(INSN_COST * 18);
11240   format %{ "fdivs   $dst, $src1, $src2" %}
11241 
11242   ins_encode %{
11243     __ fdivs(as_FloatRegister($dst$$reg),
11244              as_FloatRegister($src1$$reg),
11245              as_FloatRegister($src2$$reg));
11246   %}
11247 
11248   ins_pipe(pipe_class_default);
11249 %}
11250 
11251 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11252   match(Set dst (DivD src1  src2));
11253 
11254   ins_cost(INSN_COST * 32);
11255   format %{ "fdivd   $dst, $src1, $src2" %}
11256 
11257   ins_encode %{
11258     __ fdivd(as_FloatRegister($dst$$reg),
11259              as_FloatRegister($src1$$reg),
11260              as_FloatRegister($src2$$reg));
11261   %}
11262 
11263   ins_pipe(pipe_class_default);
11264 %}
11265 
11266 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11267   match(Set dst (NegF src));
11268 
11269   ins_cost(INSN_COST * 3);
11270   format %{ "fneg   $dst, $src" %}
11271 
11272   ins_encode %{
11273     __ fnegs(as_FloatRegister($dst$$reg),
11274              as_FloatRegister($src$$reg));
11275   %}
11276 
11277   ins_pipe(pipe_class_default);
11278 %}
11279 
11280 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11281   match(Set dst (NegD src));
11282 
11283   ins_cost(INSN_COST * 3);
11284   format %{ "fnegd   $dst, $src" %}
11285 
11286   ins_encode %{
11287     __ fnegd(as_FloatRegister($dst$$reg),
11288              as_FloatRegister($src$$reg));
11289   %}
11290 
11291   ins_pipe(pipe_class_default);
11292 %}
11293 
11294 instruct absF_reg(vRegF dst, vRegF src) %{
11295   match(Set dst (AbsF src));
11296 
11297   ins_cost(INSN_COST * 3);
11298   format %{ "fabss   $dst, $src" %}
11299   ins_encode %{
11300     __ fabss(as_FloatRegister($dst$$reg),
11301              as_FloatRegister($src$$reg));
11302   %}
11303 
11304   ins_pipe(pipe_class_default);
11305 %}
11306 
11307 instruct absD_reg(vRegD dst, vRegD src) %{
11308   match(Set dst (AbsD src));
11309 
11310   ins_cost(INSN_COST * 3);
11311   format %{ "fabsd   $dst, $src" %}
11312   ins_encode %{
11313     __ fabsd(as_FloatRegister($dst$$reg),
11314              as_FloatRegister($src$$reg));
11315   %}
11316 
11317   ins_pipe(pipe_class_default);
11318 %}
11319 
11320 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11321   match(Set dst (SqrtD src));
11322 
11323   ins_cost(INSN_COST * 50);
11324   format %{ "fsqrtd  $dst, $src" %}
11325   ins_encode %{
11326     __ fsqrtd(as_FloatRegister($dst$$reg),
11327              as_FloatRegister($src$$reg));
11328   %}
11329 
11330   ins_pipe(pipe_class_default);
11331 %}
11332 
11333 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11334   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11335 
11336   ins_cost(INSN_COST * 50);
11337   format %{ "fsqrts  $dst, $src" %}
11338   ins_encode %{
11339     __ fsqrts(as_FloatRegister($dst$$reg),
11340              as_FloatRegister($src$$reg));
11341   %}
11342 
11343   ins_pipe(pipe_class_default);
11344 %}
11345 
11346 // ============================================================================
11347 // Logical Instructions
11348 
11349 // Integer Logical Instructions
11350 
11351 // And Instructions
11352 
11353 
11354 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11355   match(Set dst (AndI src1 src2));
11356 
11357   format %{ "andw  $dst, $src1, $src2\t# int" %}
11358 
11359   ins_cost(INSN_COST);
11360   ins_encode %{
11361     __ andw(as_Register($dst$$reg),
11362             as_Register($src1$$reg),
11363             as_Register($src2$$reg));
11364   %}
11365 
11366   ins_pipe(ialu_reg_reg);
11367 %}
11368 
11369 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11370   match(Set dst (AndI src1 src2));
11371 
11372   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11373 
11374   ins_cost(INSN_COST);
11375   ins_encode %{
11376     __ andw(as_Register($dst$$reg),
11377             as_Register($src1$$reg),
11378             (unsigned long)($src2$$constant));
11379   %}
11380 
11381   ins_pipe(ialu_reg_imm);
11382 %}
11383 
11384 // Or Instructions
11385 
11386 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11387   match(Set dst (OrI src1 src2));
11388 
11389   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11390 
11391   ins_cost(INSN_COST);
11392   ins_encode %{
11393     __ orrw(as_Register($dst$$reg),
11394             as_Register($src1$$reg),
11395             as_Register($src2$$reg));
11396   %}
11397 
11398   ins_pipe(ialu_reg_reg);
11399 %}
11400 
11401 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11402   match(Set dst (OrI src1 src2));
11403 
11404   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11405 
11406   ins_cost(INSN_COST);
11407   ins_encode %{
11408     __ orrw(as_Register($dst$$reg),
11409             as_Register($src1$$reg),
11410             (unsigned long)($src2$$constant));
11411   %}
11412 
11413   ins_pipe(ialu_reg_imm);
11414 %}
11415 
11416 // Xor Instructions
11417 
11418 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11419   match(Set dst (XorI src1 src2));
11420 
11421   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11422 
11423   ins_cost(INSN_COST);
11424   ins_encode %{
11425     __ eorw(as_Register($dst$$reg),
11426             as_Register($src1$$reg),
11427             as_Register($src2$$reg));
11428   %}
11429 
11430   ins_pipe(ialu_reg_reg);
11431 %}
11432 
11433 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11434   match(Set dst (XorI src1 src2));
11435 
11436   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11437 
11438   ins_cost(INSN_COST);
11439   ins_encode %{
11440     __ eorw(as_Register($dst$$reg),
11441             as_Register($src1$$reg),
11442             (unsigned long)($src2$$constant));
11443   %}
11444 
11445   ins_pipe(ialu_reg_imm);
11446 %}
11447 
11448 // Long Logical Instructions
11449 // TODO
11450 
11451 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11452   match(Set dst (AndL src1 src2));
11453 
11454   format %{ "and  $dst, $src1, $src2\t# int" %}
11455 
11456   ins_cost(INSN_COST);
11457   ins_encode %{
11458     __ andr(as_Register($dst$$reg),
11459             as_Register($src1$$reg),
11460             as_Register($src2$$reg));
11461   %}
11462 
11463   ins_pipe(ialu_reg_reg);
11464 %}
11465 
11466 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11467   match(Set dst (AndL src1 src2));
11468 
11469   format %{ "and  $dst, $src1, $src2\t# int" %}
11470 
11471   ins_cost(INSN_COST);
11472   ins_encode %{
11473     __ andr(as_Register($dst$$reg),
11474             as_Register($src1$$reg),
11475             (unsigned long)($src2$$constant));
11476   %}
11477 
11478   ins_pipe(ialu_reg_imm);
11479 %}
11480 
11481 // Or Instructions
11482 
11483 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11484   match(Set dst (OrL src1 src2));
11485 
11486   format %{ "orr  $dst, $src1, $src2\t# int" %}
11487 
11488   ins_cost(INSN_COST);
11489   ins_encode %{
11490     __ orr(as_Register($dst$$reg),
11491            as_Register($src1$$reg),
11492            as_Register($src2$$reg));
11493   %}
11494 
11495   ins_pipe(ialu_reg_reg);
11496 %}
11497 
11498 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11499   match(Set dst (OrL src1 src2));
11500 
11501   format %{ "orr  $dst, $src1, $src2\t# int" %}
11502 
11503   ins_cost(INSN_COST);
11504   ins_encode %{
11505     __ orr(as_Register($dst$$reg),
11506            as_Register($src1$$reg),
11507            (unsigned long)($src2$$constant));
11508   %}
11509 
11510   ins_pipe(ialu_reg_imm);
11511 %}
11512 
11513 // Xor Instructions
11514 
11515 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11516   match(Set dst (XorL src1 src2));
11517 
11518   format %{ "eor  $dst, $src1, $src2\t# int" %}
11519 
11520   ins_cost(INSN_COST);
11521   ins_encode %{
11522     __ eor(as_Register($dst$$reg),
11523            as_Register($src1$$reg),
11524            as_Register($src2$$reg));
11525   %}
11526 
11527   ins_pipe(ialu_reg_reg);
11528 %}
11529 
11530 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11531   match(Set dst (XorL src1 src2));
11532 
11533   ins_cost(INSN_COST);
11534   format %{ "eor  $dst, $src1, $src2\t# int" %}
11535 
11536   ins_encode %{
11537     __ eor(as_Register($dst$$reg),
11538            as_Register($src1$$reg),
11539            (unsigned long)($src2$$constant));
11540   %}
11541 
11542   ins_pipe(ialu_reg_imm);
11543 %}
11544 
11545 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11546 %{
11547   match(Set dst (ConvI2L src));
11548 
11549   ins_cost(INSN_COST);
11550   format %{ "sxtw  $dst, $src\t# i2l" %}
11551   ins_encode %{
11552     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11553   %}
11554   ins_pipe(ialu_reg_shift);
11555 %}
11556 
11557 // this pattern occurs in bigmath arithmetic
11558 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11559 %{
11560   match(Set dst (AndL (ConvI2L src) mask));
11561 
11562   ins_cost(INSN_COST);
11563   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11564   ins_encode %{
11565     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11566   %}
11567 
11568   ins_pipe(ialu_reg_shift);
11569 %}
11570 
11571 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11572   match(Set dst (ConvL2I src));
11573 
11574   ins_cost(INSN_COST);
11575   format %{ "movw  $dst, $src \t// l2i" %}
11576 
11577   ins_encode %{
11578     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11579   %}
11580 
11581   ins_pipe(ialu_reg);
11582 %}
11583 
11584 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11585 %{
11586   match(Set dst (Conv2B src));
11587   effect(KILL cr);
11588 
11589   format %{
11590     "cmpw $src, zr\n\t"
11591     "cset $dst, ne"
11592   %}
11593 
11594   ins_encode %{
11595     __ cmpw(as_Register($src$$reg), zr);
11596     __ cset(as_Register($dst$$reg), Assembler::NE);
11597   %}
11598 
11599   ins_pipe(ialu_reg);
11600 %}
11601 
11602 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
11603 %{
11604   match(Set dst (Conv2B src));
11605   effect(KILL cr);
11606 
11607   format %{
11608     "cmp  $src, zr\n\t"
11609     "cset $dst, ne"
11610   %}
11611 
11612   ins_encode %{
11613     __ cmp(as_Register($src$$reg), zr);
11614     __ cset(as_Register($dst$$reg), Assembler::NE);
11615   %}
11616 
11617   ins_pipe(ialu_reg);
11618 %}
11619 
11620 instruct convD2F_reg(vRegF dst, vRegD src) %{
11621   match(Set dst (ConvD2F src));
11622 
11623   ins_cost(INSN_COST * 5);
11624   format %{ "fcvtd  $dst, $src \t// d2f" %}
11625 
11626   ins_encode %{
11627     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11628   %}
11629 
11630   ins_pipe(pipe_class_default);
11631 %}
11632 
11633 instruct convF2D_reg(vRegD dst, vRegF src) %{
11634   match(Set dst (ConvF2D src));
11635 
11636   ins_cost(INSN_COST * 5);
11637   format %{ "fcvts  $dst, $src \t// f2d" %}
11638 
11639   ins_encode %{
11640     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11641   %}
11642 
11643   ins_pipe(pipe_class_default);
11644 %}
11645 
11646 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11647   match(Set dst (ConvF2I src));
11648 
11649   ins_cost(INSN_COST * 5);
11650   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
11651 
11652   ins_encode %{
11653     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11654   %}
11655 
11656   ins_pipe(pipe_class_default);
11657 %}
11658 
11659 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
11660   match(Set dst (ConvF2L src));
11661 
11662   ins_cost(INSN_COST * 5);
11663   format %{ "fcvtzs  $dst, $src \t// f2l" %}
11664 
11665   ins_encode %{
11666     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11667   %}
11668 
11669   ins_pipe(pipe_class_default);
11670 %}
11671 
11672 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
11673   match(Set dst (ConvI2F src));
11674 
11675   ins_cost(INSN_COST * 5);
11676   format %{ "scvtfws  $dst, $src \t// i2f" %}
11677 
11678   ins_encode %{
11679     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11680   %}
11681 
11682   ins_pipe(pipe_class_default);
11683 %}
11684 
11685 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
11686   match(Set dst (ConvL2F src));
11687 
11688   ins_cost(INSN_COST * 5);
11689   format %{ "scvtfs  $dst, $src \t// l2f" %}
11690 
11691   ins_encode %{
11692     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11693   %}
11694 
11695   ins_pipe(pipe_class_default);
11696 %}
11697 
11698 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
11699   match(Set dst (ConvD2I src));
11700 
11701   ins_cost(INSN_COST * 5);
11702   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
11703 
11704   ins_encode %{
11705     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11706   %}
11707 
11708   ins_pipe(pipe_class_default);
11709 %}
11710 
11711 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
11712   match(Set dst (ConvD2L src));
11713 
11714   ins_cost(INSN_COST * 5);
11715   format %{ "fcvtzd  $dst, $src \t// d2l" %}
11716 
11717   ins_encode %{
11718     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11719   %}
11720 
11721   ins_pipe(pipe_class_default);
11722 %}
11723 
11724 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
11725   match(Set dst (ConvI2D src));
11726 
11727   ins_cost(INSN_COST * 5);
11728   format %{ "scvtfwd  $dst, $src \t// i2d" %}
11729 
11730   ins_encode %{
11731     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11732   %}
11733 
11734   ins_pipe(pipe_class_default);
11735 %}
11736 
11737 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
11738   match(Set dst (ConvL2D src));
11739 
11740   ins_cost(INSN_COST * 5);
11741   format %{ "scvtfd  $dst, $src \t// l2d" %}
11742 
11743   ins_encode %{
11744     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11745   %}
11746 
11747   ins_pipe(pipe_class_default);
11748 %}
11749 
11750 // stack <-> reg and reg <-> reg shuffles with no conversion
11751 
11752 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
11753 
11754   match(Set dst (MoveF2I src));
11755 
11756   effect(DEF dst, USE src);
11757 
11758   ins_cost(4 * INSN_COST);
11759 
11760   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
11761 
11762   ins_encode %{
11763     __ ldrw($dst$$Register, Address(sp, $src$$disp));
11764   %}
11765 
11766   ins_pipe(iload_reg_reg);
11767 
11768 %}
11769 
11770 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
11771 
11772   match(Set dst (MoveI2F src));
11773 
11774   effect(DEF dst, USE src);
11775 
11776   ins_cost(4 * INSN_COST);
11777 
11778   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
11779 
11780   ins_encode %{
11781     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
11782   %}
11783 
11784   ins_pipe(pipe_class_memory);
11785 
11786 %}
11787 
11788 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
11789 
11790   match(Set dst (MoveD2L src));
11791 
11792   effect(DEF dst, USE src);
11793 
11794   ins_cost(4 * INSN_COST);
11795 
11796   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
11797 
11798   ins_encode %{
11799     __ ldr($dst$$Register, Address(sp, $src$$disp));
11800   %}
11801 
11802   ins_pipe(iload_reg_reg);
11803 
11804 %}
11805 
11806 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
11807 
11808   match(Set dst (MoveL2D src));
11809 
11810   effect(DEF dst, USE src);
11811 
11812   ins_cost(4 * INSN_COST);
11813 
11814   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
11815 
11816   ins_encode %{
11817     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
11818   %}
11819 
11820   ins_pipe(pipe_class_memory);
11821 
11822 %}
11823 
11824 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
11825 
11826   match(Set dst (MoveF2I src));
11827 
11828   effect(DEF dst, USE src);
11829 
11830   ins_cost(INSN_COST);
11831 
11832   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
11833 
11834   ins_encode %{
11835     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
11836   %}
11837 
11838   ins_pipe(pipe_class_memory);
11839 
11840 %}
11841 
11842 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
11843 
11844   match(Set dst (MoveI2F src));
11845 
11846   effect(DEF dst, USE src);
11847 
11848   ins_cost(INSN_COST);
11849 
11850   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
11851 
11852   ins_encode %{
11853     __ strw($src$$Register, Address(sp, $dst$$disp));
11854   %}
11855 
11856   ins_pipe(istore_reg_reg);
11857 
11858 %}
11859 
11860 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
11861 
11862   match(Set dst (MoveD2L src));
11863 
11864   effect(DEF dst, USE src);
11865 
11866   ins_cost(INSN_COST);
11867 
11868   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
11869 
11870   ins_encode %{
11871     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
11872   %}
11873 
11874   ins_pipe(pipe_class_memory);
11875 
11876 %}
11877 
11878 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
11879 
11880   match(Set dst (MoveL2D src));
11881 
11882   effect(DEF dst, USE src);
11883 
11884   ins_cost(INSN_COST);
11885 
11886   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
11887 
11888   ins_encode %{
11889     __ str($src$$Register, Address(sp, $dst$$disp));
11890   %}
11891 
11892   ins_pipe(istore_reg_reg);
11893 
11894 %}
11895 
11896 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11897 
11898   match(Set dst (MoveF2I src));
11899 
11900   effect(DEF dst, USE src);
11901 
11902   ins_cost(INSN_COST);
11903 
11904   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
11905 
11906   ins_encode %{
11907     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
11908   %}
11909 
11910   ins_pipe(pipe_class_memory);
11911 
11912 %}
11913 
11914 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
11915 
11916   match(Set dst (MoveI2F src));
11917 
11918   effect(DEF dst, USE src);
11919 
11920   ins_cost(INSN_COST);
11921 
11922   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
11923 
11924   ins_encode %{
11925     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
11926   %}
11927 
11928   ins_pipe(pipe_class_memory);
11929 
11930 %}
11931 
11932 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
11933 
11934   match(Set dst (MoveD2L src));
11935 
11936   effect(DEF dst, USE src);
11937 
11938   ins_cost(INSN_COST);
11939 
11940   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
11941 
11942   ins_encode %{
11943     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
11944   %}
11945 
11946   ins_pipe(pipe_class_memory);
11947 
11948 %}
11949 
11950 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
11951 
11952   match(Set dst (MoveL2D src));
11953 
11954   effect(DEF dst, USE src);
11955 
11956   ins_cost(INSN_COST);
11957 
11958   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
11959 
11960   ins_encode %{
11961     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
11962   %}
11963 
11964   ins_pipe(pipe_class_memory);
11965 
11966 %}
11967 
11968 // ============================================================================
11969 // clearing of an array
11970 
11971 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
11972 %{
11973   match(Set dummy (ClearArray cnt base));
11974   effect(USE_KILL cnt, USE_KILL base);
11975 
11976   ins_cost(4 * INSN_COST);
11977   format %{ "ClearArray $cnt, $base" %}
11978 
11979   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
11980 
11981   ins_pipe(pipe_class_memory);
11982 %}
11983 
11984 // ============================================================================
11985 // Overflow Math Instructions
11986 
11987 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
11988 %{
11989   match(Set cr (OverflowAddI op1 op2));
11990 
11991   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
11992   ins_cost(INSN_COST);
11993   ins_encode %{
11994     __ cmnw($op1$$Register, $op2$$Register);
11995   %}
11996 
11997   ins_pipe(icmp_reg_reg);
11998 %}
11999 
12000 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12001 %{
12002   match(Set cr (OverflowAddI op1 op2));
12003 
12004   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12005   ins_cost(INSN_COST);
12006   ins_encode %{
12007     __ cmnw($op1$$Register, $op2$$constant);
12008   %}
12009 
12010   ins_pipe(icmp_reg_imm);
12011 %}
12012 
12013 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12014 %{
12015   match(Set cr (OverflowAddL op1 op2));
12016 
12017   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12018   ins_cost(INSN_COST);
12019   ins_encode %{
12020     __ cmn($op1$$Register, $op2$$Register);
12021   %}
12022 
12023   ins_pipe(icmp_reg_reg);
12024 %}
12025 
12026 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12027 %{
12028   match(Set cr (OverflowAddL op1 op2));
12029 
12030   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12031   ins_cost(INSN_COST);
12032   ins_encode %{
12033     __ cmn($op1$$Register, $op2$$constant);
12034   %}
12035 
12036   ins_pipe(icmp_reg_imm);
12037 %}
12038 
12039 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12040 %{
12041   match(Set cr (OverflowSubI op1 op2));
12042 
12043   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12044   ins_cost(INSN_COST);
12045   ins_encode %{
12046     __ cmpw($op1$$Register, $op2$$Register);
12047   %}
12048 
12049   ins_pipe(icmp_reg_reg);
12050 %}
12051 
12052 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12053 %{
12054   match(Set cr (OverflowSubI op1 op2));
12055 
12056   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12057   ins_cost(INSN_COST);
12058   ins_encode %{
12059     __ cmpw($op1$$Register, $op2$$constant);
12060   %}
12061 
12062   ins_pipe(icmp_reg_imm);
12063 %}
12064 
12065 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12066 %{
12067   match(Set cr (OverflowSubL op1 op2));
12068 
12069   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12070   ins_cost(INSN_COST);
12071   ins_encode %{
12072     __ cmp($op1$$Register, $op2$$Register);
12073   %}
12074 
12075   ins_pipe(icmp_reg_reg);
12076 %}
12077 
12078 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12079 %{
12080   match(Set cr (OverflowSubL op1 op2));
12081 
12082   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12083   ins_cost(INSN_COST);
12084   ins_encode %{
12085     __ cmp($op1$$Register, $op2$$constant);
12086   %}
12087 
12088   ins_pipe(icmp_reg_imm);
12089 %}
12090 
12091 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12092 %{
12093   match(Set cr (OverflowSubI zero op1));
12094 
12095   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12096   ins_cost(INSN_COST);
12097   ins_encode %{
12098     __ cmpw(zr, $op1$$Register);
12099   %}
12100 
12101   ins_pipe(icmp_reg_imm);
12102 %}
12103 
12104 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12105 %{
12106   match(Set cr (OverflowSubL zero op1));
12107 
12108   format %{ "cmp   zr, $op1\t# overflow check long" %}
12109   ins_cost(INSN_COST);
12110   ins_encode %{
12111     __ cmp(zr, $op1$$Register);
12112   %}
12113 
12114   ins_pipe(icmp_reg_imm);
12115 %}
12116 
12117 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12118 %{
12119   match(Set cr (OverflowMulI op1 op2));
12120 
12121   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12122             "cmp   rscratch1, rscratch1, sxtw\n\t"
12123             "movw  rscratch1, #0x80000000\n\t"
12124             "cselw rscratch1, rscratch1, zr, NE\n\t"
12125             "cmpw  rscratch1, #1" %}
12126   ins_cost(5 * INSN_COST);
12127   ins_encode %{
12128     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12129     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12130     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12131     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12132     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12133   %}
12134 
12135   ins_pipe(pipe_slow);
12136 %}
12137 
12138 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12139 %{
12140   match(If cmp (OverflowMulI op1 op2));
12141   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12142             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12143   effect(USE labl, KILL cr);
12144 
12145   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12146             "cmp   rscratch1, rscratch1, sxtw\n\t"
12147             "b$cmp   $labl" %}
12148   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12149   ins_encode %{
12150     Label* L = $labl$$label;
12151     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12152     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12153     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12154     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12155   %}
12156 
12157   ins_pipe(pipe_serial);
12158 %}
12159 
12160 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12161 %{
12162   match(Set cr (OverflowMulL op1 op2));
12163 
12164   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12165             "smulh rscratch2, $op1, $op2\n\t"
12166             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12167             "movw  rscratch1, #0x80000000\n\t"
12168             "cselw rscratch1, rscratch1, zr, NE\n\t"
12169             "cmpw  rscratch1, #1" %}
12170   ins_cost(6 * INSN_COST);
12171   ins_encode %{
12172     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12173     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12174     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12175     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12176     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12177     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12178   %}
12179 
12180   ins_pipe(pipe_slow);
12181 %}
12182 
12183 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12184 %{
12185   match(If cmp (OverflowMulL op1 op2));
12186   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12187             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12188   effect(USE labl, KILL cr);
12189 
12190   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12191             "smulh rscratch2, $op1, $op2\n\t"
12192             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12193             "b$cmp $labl" %}
12194   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12195   ins_encode %{
12196     Label* L = $labl$$label;
12197     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12198     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12199     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12200     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12201     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12202   %}
12203 
12204   ins_pipe(pipe_serial);
12205 %}
12206 
12207 // ============================================================================
12208 // Compare Instructions
12209 
12210 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12211 %{
12212   match(Set cr (CmpI op1 op2));
12213 
12214   effect(DEF cr, USE op1, USE op2);
12215 
12216   ins_cost(INSN_COST);
12217   format %{ "cmpw  $op1, $op2" %}
12218 
12219   ins_encode(aarch64_enc_cmpw(op1, op2));
12220 
12221   ins_pipe(icmp_reg_reg);
12222 %}
12223 
12224 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12225 %{
12226   match(Set cr (CmpI op1 zero));
12227 
12228   effect(DEF cr, USE op1);
12229 
12230   ins_cost(INSN_COST);
12231   format %{ "cmpw $op1, 0" %}
12232 
12233   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12234 
12235   ins_pipe(icmp_reg_imm);
12236 %}
12237 
12238 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12239 %{
12240   match(Set cr (CmpI op1 op2));
12241 
12242   effect(DEF cr, USE op1);
12243 
12244   ins_cost(INSN_COST);
12245   format %{ "cmpw  $op1, $op2" %}
12246 
12247   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12248 
12249   ins_pipe(icmp_reg_imm);
12250 %}
12251 
12252 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12253 %{
12254   match(Set cr (CmpI op1 op2));
12255 
12256   effect(DEF cr, USE op1);
12257 
12258   ins_cost(INSN_COST * 2);
12259   format %{ "cmpw  $op1, $op2" %}
12260 
12261   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12262 
12263   ins_pipe(icmp_reg_imm);
12264 %}
12265 
12266 // Unsigned compare Instructions; really, same as signed compare
12267 // except it should only be used to feed an If or a CMovI which takes a
12268 // cmpOpU.
12269 
12270 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12271 %{
12272   match(Set cr (CmpU op1 op2));
12273 
12274   effect(DEF cr, USE op1, USE op2);
12275 
12276   ins_cost(INSN_COST);
12277   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12278 
12279   ins_encode(aarch64_enc_cmpw(op1, op2));
12280 
12281   ins_pipe(icmp_reg_reg);
12282 %}
12283 
12284 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12285 %{
12286   match(Set cr (CmpU op1 zero));
12287 
12288   effect(DEF cr, USE op1);
12289 
12290   ins_cost(INSN_COST);
12291   format %{ "cmpw $op1, #0\t# unsigned" %}
12292 
12293   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12294 
12295   ins_pipe(icmp_reg_imm);
12296 %}
12297 
12298 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12299 %{
12300   match(Set cr (CmpU op1 op2));
12301 
12302   effect(DEF cr, USE op1);
12303 
12304   ins_cost(INSN_COST);
12305   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12306 
12307   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12308 
12309   ins_pipe(icmp_reg_imm);
12310 %}
12311 
12312 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12313 %{
12314   match(Set cr (CmpU op1 op2));
12315 
12316   effect(DEF cr, USE op1);
12317 
12318   ins_cost(INSN_COST * 2);
12319   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12320 
12321   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12322 
12323   ins_pipe(icmp_reg_imm);
12324 %}
12325 
12326 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12327 %{
12328   match(Set cr (CmpL op1 op2));
12329 
12330   effect(DEF cr, USE op1, USE op2);
12331 
12332   ins_cost(INSN_COST);
12333   format %{ "cmp  $op1, $op2" %}
12334 
12335   ins_encode(aarch64_enc_cmp(op1, op2));
12336 
12337   ins_pipe(icmp_reg_reg);
12338 %}
12339 
12340 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
12341 %{
12342   match(Set cr (CmpL op1 zero));
12343 
12344   effect(DEF cr, USE op1);
12345 
12346   ins_cost(INSN_COST);
12347   format %{ "tst  $op1" %}
12348 
12349   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12350 
12351   ins_pipe(icmp_reg_imm);
12352 %}
12353 
12354 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12355 %{
12356   match(Set cr (CmpL op1 op2));
12357 
12358   effect(DEF cr, USE op1);
12359 
12360   ins_cost(INSN_COST);
12361   format %{ "cmp  $op1, $op2" %}
12362 
12363   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12364 
12365   ins_pipe(icmp_reg_imm);
12366 %}
12367 
12368 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12369 %{
12370   match(Set cr (CmpL op1 op2));
12371 
12372   effect(DEF cr, USE op1);
12373 
12374   ins_cost(INSN_COST * 2);
12375   format %{ "cmp  $op1, $op2" %}
12376 
12377   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12378 
12379   ins_pipe(icmp_reg_imm);
12380 %}
12381 
12382 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12383 %{
12384   match(Set cr (CmpP op1 op2));
12385 
12386   effect(DEF cr, USE op1, USE op2);
12387 
12388   ins_cost(INSN_COST);
12389   format %{ "cmp  $op1, $op2\t // ptr" %}
12390 
12391   ins_encode(aarch64_enc_cmpp(op1, op2));
12392 
12393   ins_pipe(icmp_reg_reg);
12394 %}
12395 
12396 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12397 %{
12398   match(Set cr (CmpN op1 op2));
12399 
12400   effect(DEF cr, USE op1, USE op2);
12401 
12402   ins_cost(INSN_COST);
12403   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12404 
12405   ins_encode(aarch64_enc_cmpn(op1, op2));
12406 
12407   ins_pipe(icmp_reg_reg);
12408 %}
12409 
12410 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12411 %{
12412   match(Set cr (CmpP op1 zero));
12413 
12414   effect(DEF cr, USE op1, USE zero);
12415 
12416   ins_cost(INSN_COST);
12417   format %{ "cmp  $op1, 0\t // ptr" %}
12418 
12419   ins_encode(aarch64_enc_testp(op1));
12420 
12421   ins_pipe(icmp_reg_imm);
12422 %}
12423 
12424 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12425 %{
12426   match(Set cr (CmpN op1 zero));
12427 
12428   effect(DEF cr, USE op1, USE zero);
12429 
12430   ins_cost(INSN_COST);
12431   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12432 
12433   ins_encode(aarch64_enc_testn(op1));
12434 
12435   ins_pipe(icmp_reg_imm);
12436 %}
12437 
12438 // FP comparisons
12439 //
12440 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12441 // using normal cmpOp. See declaration of rFlagsReg for details.
12442 
12443 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12444 %{
12445   match(Set cr (CmpF src1 src2));
12446 
12447   ins_cost(3 * INSN_COST);
12448   format %{ "fcmps $src1, $src2" %}
12449 
12450   ins_encode %{
12451     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12452   %}
12453 
12454   ins_pipe(pipe_class_compare);
12455 %}
12456 
12457 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12458 %{
12459   match(Set cr (CmpF src1 src2));
12460 
12461   ins_cost(3 * INSN_COST);
12462   format %{ "fcmps $src1, 0.0" %}
12463 
12464   ins_encode %{
12465     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
12466   %}
12467 
12468   ins_pipe(pipe_class_compare);
12469 %}
12470 // FROM HERE
12471 
12472 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12473 %{
12474   match(Set cr (CmpD src1 src2));
12475 
12476   ins_cost(3 * INSN_COST);
12477   format %{ "fcmpd $src1, $src2" %}
12478 
12479   ins_encode %{
12480     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12481   %}
12482 
12483   ins_pipe(pipe_class_compare);
12484 %}
12485 
12486 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12487 %{
12488   match(Set cr (CmpD src1 src2));
12489 
12490   ins_cost(3 * INSN_COST);
12491   format %{ "fcmpd $src1, 0.0" %}
12492 
12493   ins_encode %{
12494     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
12495   %}
12496 
12497   ins_pipe(pipe_class_compare);
12498 %}
12499 
12500 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12501 %{
12502   match(Set dst (CmpF3 src1 src2));
12503   effect(KILL cr);
12504 
12505   ins_cost(5 * INSN_COST);
12506   format %{ "fcmps $src1, $src2\n\t"
12507             "csinvw($dst, zr, zr, eq\n\t"
12508             "csnegw($dst, $dst, $dst, lt)"
12509   %}
12510 
12511   ins_encode %{
12512     Label done;
12513     FloatRegister s1 = as_FloatRegister($src1$$reg);
12514     FloatRegister s2 = as_FloatRegister($src2$$reg);
12515     Register d = as_Register($dst$$reg);
12516     __ fcmps(s1, s2);
12517     // installs 0 if EQ else -1
12518     __ csinvw(d, zr, zr, Assembler::EQ);
12519     // keeps -1 if less or unordered else installs 1
12520     __ csnegw(d, d, d, Assembler::LT);
12521     __ bind(done);
12522   %}
12523 
12524   ins_pipe(pipe_class_default);
12525 
12526 %}
12527 
12528 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
12529 %{
12530   match(Set dst (CmpD3 src1 src2));
12531   effect(KILL cr);
12532 
12533   ins_cost(5 * INSN_COST);
12534   format %{ "fcmpd $src1, $src2\n\t"
12535             "csinvw($dst, zr, zr, eq\n\t"
12536             "csnegw($dst, $dst, $dst, lt)"
12537   %}
12538 
12539   ins_encode %{
12540     Label done;
12541     FloatRegister s1 = as_FloatRegister($src1$$reg);
12542     FloatRegister s2 = as_FloatRegister($src2$$reg);
12543     Register d = as_Register($dst$$reg);
12544     __ fcmpd(s1, s2);
12545     // installs 0 if EQ else -1
12546     __ csinvw(d, zr, zr, Assembler::EQ);
12547     // keeps -1 if less or unordered else installs 1
12548     __ csnegw(d, d, d, Assembler::LT);
12549     __ bind(done);
12550   %}
12551   ins_pipe(pipe_class_default);
12552 
12553 %}
12554 
12555 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
12556 %{
12557   match(Set dst (CmpF3 src1 zero));
12558   effect(KILL cr);
12559 
12560   ins_cost(5 * INSN_COST);
12561   format %{ "fcmps $src1, 0.0\n\t"
12562             "csinvw($dst, zr, zr, eq\n\t"
12563             "csnegw($dst, $dst, $dst, lt)"
12564   %}
12565 
12566   ins_encode %{
12567     Label done;
12568     FloatRegister s1 = as_FloatRegister($src1$$reg);
12569     Register d = as_Register($dst$$reg);
12570     __ fcmps(s1, 0.0D);
12571     // installs 0 if EQ else -1
12572     __ csinvw(d, zr, zr, Assembler::EQ);
12573     // keeps -1 if less or unordered else installs 1
12574     __ csnegw(d, d, d, Assembler::LT);
12575     __ bind(done);
12576   %}
12577 
12578   ins_pipe(pipe_class_default);
12579 
12580 %}
12581 
12582 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
12583 %{
12584   match(Set dst (CmpD3 src1 zero));
12585   effect(KILL cr);
12586 
12587   ins_cost(5 * INSN_COST);
12588   format %{ "fcmpd $src1, 0.0\n\t"
12589             "csinvw($dst, zr, zr, eq\n\t"
12590             "csnegw($dst, $dst, $dst, lt)"
12591   %}
12592 
12593   ins_encode %{
12594     Label done;
12595     FloatRegister s1 = as_FloatRegister($src1$$reg);
12596     Register d = as_Register($dst$$reg);
12597     __ fcmpd(s1, 0.0D);
12598     // installs 0 if EQ else -1
12599     __ csinvw(d, zr, zr, Assembler::EQ);
12600     // keeps -1 if less or unordered else installs 1
12601     __ csnegw(d, d, d, Assembler::LT);
12602     __ bind(done);
12603   %}
12604   ins_pipe(pipe_class_default);
12605 
12606 %}
12607 
12608 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
12609 %{
12610   match(Set dst (CmpLTMask p q));
12611   effect(KILL cr);
12612 
12613   ins_cost(3 * INSN_COST);
12614 
12615   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
12616             "csetw $dst, lt\n\t"
12617             "subw $dst, zr, $dst"
12618   %}
12619 
12620   ins_encode %{
12621     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
12622     __ csetw(as_Register($dst$$reg), Assembler::LT);
12623     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
12624   %}
12625 
12626   ins_pipe(ialu_reg_reg);
12627 %}
12628 
12629 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
12630 %{
12631   match(Set dst (CmpLTMask src zero));
12632   effect(KILL cr);
12633 
12634   ins_cost(INSN_COST);
12635 
12636   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
12637 
12638   ins_encode %{
12639     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
12640   %}
12641 
12642   ins_pipe(ialu_reg_shift);
12643 %}
12644 
12645 // ============================================================================
12646 // Max and Min
12647 
12648 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
12649 %{
12650   match(Set dst (MinI src1 src2));
12651 
12652   effect(DEF dst, USE src1, USE src2, KILL cr);
12653   size(8);
12654 
12655   ins_cost(INSN_COST * 3);
12656   format %{
12657     "cmpw $src1 $src2\t signed int\n\t"
12658     "cselw $dst, $src1, $src2 lt\t"
12659   %}
12660 
12661   ins_encode %{
12662     __ cmpw(as_Register($src1$$reg),
12663             as_Register($src2$$reg));
12664     __ cselw(as_Register($dst$$reg),
12665              as_Register($src1$$reg),
12666              as_Register($src2$$reg),
12667              Assembler::LT);
12668   %}
12669 
12670   ins_pipe(ialu_reg_reg);
12671 %}
12672 // FROM HERE
12673 
12674 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
12675 %{
12676   match(Set dst (MaxI src1 src2));
12677 
12678   effect(DEF dst, USE src1, USE src2, KILL cr);
12679   size(8);
12680 
12681   ins_cost(INSN_COST * 3);
12682   format %{
12683     "cmpw $src1 $src2\t signed int\n\t"
12684     "cselw $dst, $src1, $src2 gt\t"
12685   %}
12686 
12687   ins_encode %{
12688     __ cmpw(as_Register($src1$$reg),
12689             as_Register($src2$$reg));
12690     __ cselw(as_Register($dst$$reg),
12691              as_Register($src1$$reg),
12692              as_Register($src2$$reg),
12693              Assembler::GT);
12694   %}
12695 
12696   ins_pipe(ialu_reg_reg);
12697 %}
12698 
12699 // ============================================================================
12700 // Branch Instructions
12701 
12702 // Direct Branch.
12703 instruct branch(label lbl)
12704 %{
12705   match(Goto);
12706 
12707   effect(USE lbl);
12708 
12709   ins_cost(BRANCH_COST);
12710   format %{ "b  $lbl" %}
12711 
12712   ins_encode(aarch64_enc_b(lbl));
12713 
12714   ins_pipe(pipe_branch);
12715 %}
12716 
12717 // Conditional Near Branch
12718 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
12719 %{
12720   // Same match rule as `branchConFar'.
12721   match(If cmp cr);
12722 
12723   effect(USE lbl);
12724 
12725   ins_cost(BRANCH_COST);
12726   // If set to 1 this indicates that the current instruction is a
12727   // short variant of a long branch. This avoids using this
12728   // instruction in first-pass matching. It will then only be used in
12729   // the `Shorten_branches' pass.
12730   // ins_short_branch(1);
12731   format %{ "b$cmp  $lbl" %}
12732 
12733   ins_encode(aarch64_enc_br_con(cmp, lbl));
12734 
12735   ins_pipe(pipe_branch_cond);
12736 %}
12737 
12738 // Conditional Near Branch Unsigned
12739 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
12740 %{
12741   // Same match rule as `branchConFar'.
12742   match(If cmp cr);
12743 
12744   effect(USE lbl);
12745 
12746   ins_cost(BRANCH_COST);
12747   // If set to 1 this indicates that the current instruction is a
12748   // short variant of a long branch. This avoids using this
12749   // instruction in first-pass matching. It will then only be used in
12750   // the `Shorten_branches' pass.
12751   // ins_short_branch(1);
12752   format %{ "b$cmp  $lbl\t# unsigned" %}
12753 
12754   ins_encode(aarch64_enc_br_conU(cmp, lbl));
12755 
12756   ins_pipe(pipe_branch_cond);
12757 %}
12758 
12759 // Make use of CBZ and CBNZ.  These instructions, as well as being
12760 // shorter than (cmp; branch), have the additional benefit of not
12761 // killing the flags.
12762 
12763 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
12764   match(If cmp (CmpI op1 op2));
12765   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12766             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12767   effect(USE labl);
12768 
12769   ins_cost(BRANCH_COST);
12770   format %{ "cbw$cmp   $op1, $labl" %}
12771   ins_encode %{
12772     Label* L = $labl$$label;
12773     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12774     if (cond == Assembler::EQ)
12775       __ cbzw($op1$$Register, *L);
12776     else
12777       __ cbnzw($op1$$Register, *L);
12778   %}
12779   ins_pipe(pipe_cmp_branch);
12780 %}
12781 
12782 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
12783   match(If cmp (CmpL op1 op2));
12784   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12785             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12786   effect(USE labl);
12787 
12788   ins_cost(BRANCH_COST);
12789   format %{ "cb$cmp   $op1, $labl" %}
12790   ins_encode %{
12791     Label* L = $labl$$label;
12792     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12793     if (cond == Assembler::EQ)
12794       __ cbz($op1$$Register, *L);
12795     else
12796       __ cbnz($op1$$Register, *L);
12797   %}
12798   ins_pipe(pipe_cmp_branch);
12799 %}
12800 
12801 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
12802   match(If cmp (CmpP op1 op2));
12803   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12804             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12805   effect(USE labl);
12806 
12807   ins_cost(BRANCH_COST);
12808   format %{ "cb$cmp   $op1, $labl" %}
12809   ins_encode %{
12810     Label* L = $labl$$label;
12811     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12812     if (cond == Assembler::EQ)
12813       __ cbz($op1$$Register, *L);
12814     else
12815       __ cbnz($op1$$Register, *L);
12816   %}
12817   ins_pipe(pipe_cmp_branch);
12818 %}
12819 
12820 // Conditional Far Branch
12821 // Conditional Far Branch Unsigned
12822 // TODO: fixme
12823 
12824 // counted loop end branch near
12825 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
12826 %{
12827   match(CountedLoopEnd cmp cr);
12828 
12829   effect(USE lbl);
12830 
12831   ins_cost(BRANCH_COST);
12832   // short variant.
12833   // ins_short_branch(1);
12834   format %{ "b$cmp $lbl \t// counted loop end" %}
12835 
12836   ins_encode(aarch64_enc_br_con(cmp, lbl));
12837 
12838   ins_pipe(pipe_branch);
12839 %}
12840 
12841 // counted loop end branch near Unsigned
12842 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
12843 %{
12844   match(CountedLoopEnd cmp cr);
12845 
12846   effect(USE lbl);
12847 
12848   ins_cost(BRANCH_COST);
12849   // short variant.
12850   // ins_short_branch(1);
12851   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
12852 
12853   ins_encode(aarch64_enc_br_conU(cmp, lbl));
12854 
12855   ins_pipe(pipe_branch);
12856 %}
12857 
12858 // counted loop end branch far
12859 // counted loop end branch far unsigned
12860 // TODO: fixme
12861 
12862 // ============================================================================
12863 // inlined locking and unlocking
12864 
12865 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
12866 %{
12867   match(Set cr (FastLock object box));
12868   effect(TEMP tmp, TEMP tmp2);
12869 
12870   // TODO
12871   // identify correct cost
12872   ins_cost(5 * INSN_COST);
12873   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
12874 
12875   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
12876 
12877   ins_pipe(pipe_serial);
12878 %}
12879 
12880 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
12881 %{
12882   match(Set cr (FastUnlock object box));
12883   effect(TEMP tmp, TEMP tmp2);
12884 
12885   ins_cost(5 * INSN_COST);
12886   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
12887 
12888   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
12889 
12890   ins_pipe(pipe_serial);
12891 %}
12892 
12893 
12894 // ============================================================================
12895 // Safepoint Instructions
12896 
12897 // TODO
12898 // provide a near and far version of this code
12899 
12900 instruct safePoint(iRegP poll)
12901 %{
12902   match(SafePoint poll);
12903 
12904   format %{
12905     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
12906   %}
12907   ins_encode %{
12908     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
12909   %}
12910   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
12911 %}
12912 
12913 
12914 // ============================================================================
12915 // Procedure Call/Return Instructions
12916 
12917 // Call Java Static Instruction
12918 
12919 instruct CallStaticJavaDirect(method meth)
12920 %{
12921   match(CallStaticJava);
12922 
12923   effect(USE meth);
12924 
12925   ins_cost(CALL_COST);
12926 
12927   format %{ "call,static $meth \t// ==> " %}
12928 
12929   ins_encode( aarch64_enc_java_static_call(meth),
12930               aarch64_enc_call_epilog );
12931 
12932   ins_pipe(pipe_class_call);
12933 %}
12934 
12935 // TO HERE
12936 
12937 // Call Java Dynamic Instruction
12938 instruct CallDynamicJavaDirect(method meth)
12939 %{
12940   match(CallDynamicJava);
12941 
12942   effect(USE meth);
12943 
12944   ins_cost(CALL_COST);
12945 
12946   format %{ "CALL,dynamic $meth \t// ==> " %}
12947 
12948   ins_encode( aarch64_enc_java_dynamic_call(meth),
12949                aarch64_enc_call_epilog );
12950 
12951   ins_pipe(pipe_class_call);
12952 %}
12953 
12954 // Call Runtime Instruction
12955 
12956 instruct CallRuntimeDirect(method meth)
12957 %{
12958   match(CallRuntime);
12959 
12960   effect(USE meth);
12961 
12962   ins_cost(CALL_COST);
12963 
12964   format %{ "CALL, runtime $meth" %}
12965 
12966   ins_encode( aarch64_enc_java_to_runtime(meth) );
12967 
12968   ins_pipe(pipe_class_call);
12969 %}
12970 
12971 // Call Runtime Instruction
12972 
12973 instruct CallLeafDirect(method meth)
12974 %{
12975   match(CallLeaf);
12976 
12977   effect(USE meth);
12978 
12979   ins_cost(CALL_COST);
12980 
12981   format %{ "CALL, runtime leaf $meth" %}
12982 
12983   ins_encode( aarch64_enc_java_to_runtime(meth) );
12984 
12985   ins_pipe(pipe_class_call);
12986 %}
12987 
12988 // Call Runtime Instruction
12989 
12990 instruct CallLeafNoFPDirect(method meth)
12991 %{
12992   match(CallLeafNoFP);
12993 
12994   effect(USE meth);
12995 
12996   ins_cost(CALL_COST);
12997 
12998   format %{ "CALL, runtime leaf nofp $meth" %}
12999 
13000   ins_encode( aarch64_enc_java_to_runtime(meth) );
13001 
13002   ins_pipe(pipe_class_call);
13003 %}
13004 
13005 // Tail Call; Jump from runtime stub to Java code.
13006 // Also known as an 'interprocedural jump'.
13007 // Target of jump will eventually return to caller.
13008 // TailJump below removes the return address.
13009 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13010 %{
13011   match(TailCall jump_target method_oop);
13012 
13013   ins_cost(CALL_COST);
13014 
13015   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13016 
13017   ins_encode(aarch64_enc_tail_call(jump_target));
13018 
13019   ins_pipe(pipe_class_call);
13020 %}
13021 
13022 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13023 %{
13024   match(TailJump jump_target ex_oop);
13025 
13026   ins_cost(CALL_COST);
13027 
13028   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13029 
13030   ins_encode(aarch64_enc_tail_jmp(jump_target));
13031 
13032   ins_pipe(pipe_class_call);
13033 %}
13034 
13035 // Create exception oop: created by stack-crawling runtime code.
13036 // Created exception is now available to this handler, and is setup
13037 // just prior to jumping to this handler. No code emitted.
13038 // TODO check
13039 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13040 instruct CreateException(iRegP_R0 ex_oop)
13041 %{
13042   match(Set ex_oop (CreateEx));
13043 
13044   format %{ " -- \t// exception oop; no code emitted" %}
13045 
13046   size(0);
13047 
13048   ins_encode( /*empty*/ );
13049 
13050   ins_pipe(pipe_class_empty);
13051 %}
13052 
13053 // Rethrow exception: The exception oop will come in the first
13054 // argument position. Then JUMP (not call) to the rethrow stub code.
13055 instruct RethrowException() %{
13056   match(Rethrow);
13057   ins_cost(CALL_COST);
13058 
13059   format %{ "b rethrow_stub" %}
13060 
13061   ins_encode( aarch64_enc_rethrow() );
13062 
13063   ins_pipe(pipe_class_call);
13064 %}
13065 
13066 
13067 // Return Instruction
13068 // epilog node loads ret address into lr as part of frame pop
13069 instruct Ret()
13070 %{
13071   match(Return);
13072 
13073   format %{ "ret\t// return register" %}
13074 
13075   ins_encode( aarch64_enc_ret() );
13076 
13077   ins_pipe(pipe_branch);
13078 %}
13079 
13080 // Die now.
13081 instruct ShouldNotReachHere() %{
13082   match(Halt);
13083 
13084   ins_cost(CALL_COST);
13085   format %{ "ShouldNotReachHere" %}
13086 
13087   ins_encode %{
13088     // TODO
13089     // implement proper trap call here
13090     __ brk(999);
13091   %}
13092 
13093   ins_pipe(pipe_class_default);
13094 %}
13095 
13096 // ============================================================================
13097 // Partial Subtype Check
13098 //
13099 // superklass array for an instance of the superklass.  Set a hidden
13100 // internal cache on a hit (cache is checked with exposed code in
13101 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13102 // encoding ALSO sets flags.
13103 
13104 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13105 %{
13106   match(Set result (PartialSubtypeCheck sub super));
13107   effect(KILL cr, KILL temp);
13108 
13109   ins_cost(1100);  // slightly larger than the next version
13110   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13111 
13112   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13113 
13114   opcode(0x1); // Force zero of result reg on hit
13115 
13116   ins_pipe(pipe_class_memory);
13117 %}
13118 
13119 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13120 %{
13121   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13122   effect(KILL temp, KILL result);
13123 
13124   ins_cost(1100);  // slightly larger than the next version
13125   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13126 
13127   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13128 
13129   opcode(0x0); // Don't zero result reg on hit
13130 
13131   ins_pipe(pipe_class_memory);
13132 %}
13133 
13134 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13135                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13136 %{
13137   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13138   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13139 
13140   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13141   ins_encode %{
13142     __ string_compare($str1$$Register, $str2$$Register,
13143                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13144                       $tmp1$$Register);
13145   %}
13146   ins_pipe(pipe_class_memory);
13147 %}
13148 
13149 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13150        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13151 %{
13152   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13153   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13154          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13155   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13156 
13157   ins_encode %{
13158     __ string_indexof($str1$$Register, $str2$$Register,
13159                       $cnt1$$Register, $cnt2$$Register,
13160                       $tmp1$$Register, $tmp2$$Register,
13161                       $tmp3$$Register, $tmp4$$Register,
13162                       -1, $result$$Register);
13163   %}
13164   ins_pipe(pipe_class_memory);
13165 %}
13166 
13167 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13168                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
13169                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13170 %{
13171   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13172   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13173          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13174   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13175 
13176   ins_encode %{
13177     int icnt2 = (int)$int_cnt2$$constant;
13178     __ string_indexof($str1$$Register, $str2$$Register,
13179                       $cnt1$$Register, zr,
13180                       $tmp1$$Register, $tmp2$$Register,
13181                       $tmp3$$Register, $tmp4$$Register,
13182                       icnt2, $result$$Register);
13183   %}
13184   ins_pipe(pipe_class_memory);
13185 %}
13186 
13187 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13188                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13189 %{
13190   match(Set result (StrEquals (Binary str1 str2) cnt));
13191   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13192 
13193   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13194   ins_encode %{
13195     __ string_equals($str1$$Register, $str2$$Register,
13196                       $cnt$$Register, $result$$Register,
13197                       $tmp$$Register);
13198   %}
13199   ins_pipe(pipe_class_memory);
13200 %}
13201 
13202 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13203                       iRegP_R10 tmp, rFlagsReg cr)
13204 %{
13205   match(Set result (AryEq ary1 ary2));
13206   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13207 
13208   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
13209   ins_encode %{
13210     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
13211                           $result$$Register, $tmp$$Register);
13212   %}
13213   ins_pipe(pipe_class_memory);
13214 %}
13215 
13216 // encode char[] to byte[] in ISO_8859_1
13217 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
13218                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
13219                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
13220                           iRegI_R0 result, rFlagsReg cr)
13221 %{
13222   match(Set result (EncodeISOArray src (Binary dst len)));
13223   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
13224          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
13225 
13226   format %{ "Encode array $src,$dst,$len -> $result" %}
13227   ins_encode %{
13228     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
13229          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
13230          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
13231   %}
13232   ins_pipe( pipe_class_memory );
13233 %}
13234 
13235 // ============================================================================
13236 // This name is KNOWN by the ADLC and cannot be changed.
13237 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13238 // for this guy.
13239 instruct tlsLoadP(thread_RegP dst)
13240 %{
13241   match(Set dst (ThreadLocal));
13242 
13243   ins_cost(0);
13244 
13245   format %{ " -- \t// $dst=Thread::current(), empty" %}
13246 
13247   size(0);
13248 
13249   ins_encode( /*empty*/ );
13250 
13251   ins_pipe(pipe_class_empty);
13252 %}
13253 
13254 // ====================VECTOR INSTRUCTIONS=====================================
13255 
13256 // Load vector (32 bits)
13257 instruct loadV4(vecD dst, vmem mem)
13258 %{
13259   predicate(n->as_LoadVector()->memory_size() == 4);
13260   match(Set dst (LoadVector mem));
13261   ins_cost(4 * INSN_COST);
13262   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13263   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13264   ins_pipe(pipe_class_memory);
13265 %}
13266 
13267 // Load vector (64 bits)
13268 instruct loadV8(vecD dst, vmem mem)
13269 %{
13270   predicate(n->as_LoadVector()->memory_size() == 8);
13271   match(Set dst (LoadVector mem));
13272   ins_cost(4 * INSN_COST);
13273   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13274   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13275   ins_pipe(pipe_class_memory);
13276 %}
13277 
13278 // Load Vector (128 bits)
13279 instruct loadV16(vecX dst, vmem mem)
13280 %{
13281   predicate(n->as_LoadVector()->memory_size() == 16);
13282   match(Set dst (LoadVector mem));
13283   ins_cost(4 * INSN_COST);
13284   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13285   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13286   ins_pipe(pipe_class_memory);
13287 %}
13288 
13289 // Store Vector (32 bits)
13290 instruct storeV4(vecD src, vmem mem)
13291 %{
13292   predicate(n->as_StoreVector()->memory_size() == 4);
13293   match(Set mem (StoreVector mem src));
13294   ins_cost(4 * INSN_COST);
13295   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13296   ins_encode( aarch64_enc_strvS(src, mem) );
13297   ins_pipe(pipe_class_memory);
13298 %}
13299 
13300 // Store Vector (64 bits)
13301 instruct storeV8(vecD src, vmem mem)
13302 %{
13303   predicate(n->as_StoreVector()->memory_size() == 8);
13304   match(Set mem (StoreVector mem src));
13305   ins_cost(4 * INSN_COST);
13306   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13307   ins_encode( aarch64_enc_strvD(src, mem) );
13308   ins_pipe(pipe_class_memory);
13309 %}
13310 
13311 // Store Vector (128 bits)
13312 instruct storeV16(vecX src, vmem mem)
13313 %{
13314   predicate(n->as_StoreVector()->memory_size() == 16);
13315   match(Set mem (StoreVector mem src));
13316   ins_cost(4 * INSN_COST);
13317   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13318   ins_encode( aarch64_enc_strvQ(src, mem) );
13319   ins_pipe(pipe_class_memory);
13320 %}
13321 
13322 instruct replicate8B(vecD dst, iRegIorL2I src)
13323 %{
13324   predicate(n->as_Vector()->length() == 4 ||
13325             n->as_Vector()->length() == 8);
13326   match(Set dst (ReplicateB src));
13327   ins_cost(INSN_COST);
13328   format %{ "dup  $dst, $src\t# vector (8B)" %}
13329   ins_encode %{
13330     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
13331   %}
13332   ins_pipe(pipe_class_default);
13333 %}
13334 
13335 instruct replicate16B(vecX dst, iRegIorL2I src)
13336 %{
13337   predicate(n->as_Vector()->length() == 16);
13338   match(Set dst (ReplicateB src));
13339   ins_cost(INSN_COST);
13340   format %{ "dup  $dst, $src\t# vector (16B)" %}
13341   ins_encode %{
13342     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13343   %}
13344   ins_pipe(pipe_class_default);
13345 %}
13346 
13347 instruct replicate8B_imm(vecD dst, immI con)
13348 %{
13349   predicate(n->as_Vector()->length() == 4 ||
13350             n->as_Vector()->length() == 8);
13351   match(Set dst (ReplicateB con));
13352   ins_cost(INSN_COST);
13353   format %{ "movi  $dst, $con\t# vector(8B)" %}
13354   ins_encode %{
13355     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant);
13356   %}
13357   ins_pipe(pipe_class_default);
13358 %}
13359 
13360 instruct replicate16B_imm(vecX dst, immI con)
13361 %{
13362   predicate(n->as_Vector()->length() == 16);
13363   match(Set dst (ReplicateB con));
13364   ins_cost(INSN_COST);
13365   format %{ "movi  $dst, $con\t# vector(16B)" %}
13366   ins_encode %{
13367     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
13368   %}
13369   ins_pipe(pipe_class_default);
13370 %}
13371 
13372 instruct replicate4S(vecD dst, iRegIorL2I src)
13373 %{
13374   predicate(n->as_Vector()->length() == 2 ||
13375             n->as_Vector()->length() == 4);
13376   match(Set dst (ReplicateS src));
13377   ins_cost(INSN_COST);
13378   format %{ "dup  $dst, $src\t# vector (4S)" %}
13379   ins_encode %{
13380     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
13381   %}
13382   ins_pipe(pipe_class_default);
13383 %}
13384 
13385 instruct replicate8S(vecX dst, iRegIorL2I src)
13386 %{
13387   predicate(n->as_Vector()->length() == 8);
13388   match(Set dst (ReplicateS src));
13389   ins_cost(INSN_COST);
13390   format %{ "dup  $dst, $src\t# vector (8S)" %}
13391   ins_encode %{
13392     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13393   %}
13394   ins_pipe(pipe_class_default);
13395 %}
13396 
13397 instruct replicate4S_imm(vecD dst, immI con)
13398 %{
13399   predicate(n->as_Vector()->length() == 2 ||
13400             n->as_Vector()->length() == 4);
13401   match(Set dst (ReplicateS con));
13402   ins_cost(INSN_COST);
13403   format %{ "movi  $dst, $con\t# vector(4H)" %}
13404   ins_encode %{
13405     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant);
13406   %}
13407   ins_pipe(pipe_class_default);
13408 %}
13409 
13410 instruct replicate8S_imm(vecX dst, immI con)
13411 %{
13412   predicate(n->as_Vector()->length() == 8);
13413   match(Set dst (ReplicateS con));
13414   ins_cost(INSN_COST);
13415   format %{ "movi  $dst, $con\t# vector(8H)" %}
13416   ins_encode %{
13417     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
13418   %}
13419   ins_pipe(pipe_class_default);
13420 %}
13421 
13422 instruct replicate2I(vecD dst, iRegIorL2I src)
13423 %{
13424   predicate(n->as_Vector()->length() == 2);
13425   match(Set dst (ReplicateI src));
13426   ins_cost(INSN_COST);
13427   format %{ "dup  $dst, $src\t# vector (2I)" %}
13428   ins_encode %{
13429     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
13430   %}
13431   ins_pipe(pipe_class_default);
13432 %}
13433 
13434 instruct replicate4I(vecX dst, iRegIorL2I src)
13435 %{
13436   predicate(n->as_Vector()->length() == 4);
13437   match(Set dst (ReplicateI src));
13438   ins_cost(INSN_COST);
13439   format %{ "dup  $dst, $src\t# vector (4I)" %}
13440   ins_encode %{
13441     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13442   %}
13443   ins_pipe(pipe_class_default);
13444 %}
13445 
13446 instruct replicate2I_imm(vecD dst, immI con)
13447 %{
13448   predicate(n->as_Vector()->length() == 2);
13449   match(Set dst (ReplicateI con));
13450   ins_cost(INSN_COST);
13451   format %{ "movi  $dst, $con\t# vector(2I)" %}
13452   ins_encode %{
13453     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
13454   %}
13455   ins_pipe(pipe_class_default);
13456 %}
13457 
13458 instruct replicate4I_imm(vecX dst, immI con)
13459 %{
13460   predicate(n->as_Vector()->length() == 4);
13461   match(Set dst (ReplicateI con));
13462   ins_cost(INSN_COST);
13463   format %{ "movi  $dst, $con\t# vector(4I)" %}
13464   ins_encode %{
13465     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13466   %}
13467   ins_pipe(pipe_class_default);
13468 %}
13469 
13470 instruct replicate2L(vecX dst, iRegL src)
13471 %{
13472   predicate(n->as_Vector()->length() == 2);
13473   match(Set dst (ReplicateL src));
13474   ins_cost(INSN_COST);
13475   format %{ "dup  $dst, $src\t# vector (2L)" %}
13476   ins_encode %{
13477     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13478   %}
13479   ins_pipe(pipe_class_default);
13480 %}
13481 
13482 instruct replicate2L_zero(vecX dst, immI0 zero)
13483 %{
13484   predicate(n->as_Vector()->length() == 2);
13485   match(Set dst (ReplicateI zero));
13486   ins_cost(INSN_COST);
13487   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13488   ins_encode %{
13489     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13490            as_FloatRegister($dst$$reg),
13491            as_FloatRegister($dst$$reg));
13492   %}
13493   ins_pipe(pipe_class_default);
13494 %}
13495 
13496 instruct replicate2F(vecD dst, vRegF src)
13497 %{
13498   predicate(n->as_Vector()->length() == 2);
13499   match(Set dst (ReplicateF src));
13500   ins_cost(INSN_COST);
13501   format %{ "dup  $dst, $src\t# vector (2F)" %}
13502   ins_encode %{
13503     __ dup(as_FloatRegister($dst$$reg), __ T2S,
13504            as_FloatRegister($src$$reg));
13505   %}
13506   ins_pipe(pipe_class_default);
13507 %}
13508 
13509 instruct replicate4F(vecX dst, vRegF src)
13510 %{
13511   predicate(n->as_Vector()->length() == 4);
13512   match(Set dst (ReplicateF src));
13513   ins_cost(INSN_COST);
13514   format %{ "dup  $dst, $src\t# vector (4F)" %}
13515   ins_encode %{
13516     __ dup(as_FloatRegister($dst$$reg), __ T4S,
13517            as_FloatRegister($src$$reg));
13518   %}
13519   ins_pipe(pipe_class_default);
13520 %}
13521 
13522 instruct replicate2D(vecX dst, vRegD src)
13523 %{
13524   predicate(n->as_Vector()->length() == 2);
13525   match(Set dst (ReplicateD src));
13526   ins_cost(INSN_COST);
13527   format %{ "dup  $dst, $src\t# vector (2D)" %}
13528   ins_encode %{
13529     __ dup(as_FloatRegister($dst$$reg), __ T2D,
13530            as_FloatRegister($src$$reg));
13531   %}
13532   ins_pipe(pipe_class_default);
13533 %}
13534 
13535 // ====================REDUCTION ARITHMETIC====================================
13536 
13537 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
13538 %{
13539   match(Set dst (AddReductionVI src1 src2));
13540   ins_cost(INSN_COST);
13541   effect(TEMP tmp, TEMP tmp2);
13542   format %{ "umov  $tmp, $src2, S, 0\n\t"
13543             "umov  $tmp2, $src2, S, 1\n\t"
13544             "addw  $dst, $src1, $tmp\n\t"
13545             "addw  $dst, $dst, $tmp2\t add reduction2i"
13546   %}
13547   ins_encode %{
13548     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13549     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13550     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
13551     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
13552   %}
13553   ins_pipe(pipe_class_default);
13554 %}
13555 
13556 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13557 %{
13558   match(Set dst (AddReductionVI src1 src2));
13559   ins_cost(INSN_COST);
13560   effect(TEMP tmp, TEMP tmp2);
13561   format %{ "addv  $tmp, T4S, $src2\n\t"
13562             "umov  $tmp2, $tmp, S, 0\n\t"
13563             "addw  $dst, $tmp2, $src1\t add reduction4i"
13564   %}
13565   ins_encode %{
13566     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13567             as_FloatRegister($src2$$reg));
13568     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13569     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13570   %}
13571   ins_pipe(pipe_class_default);
13572 %}
13573 
13574 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
13575 %{
13576   match(Set dst (MulReductionVI src1 src2));
13577   ins_cost(INSN_COST);
13578   effect(TEMP tmp, TEMP dst);
13579   format %{ "umov  $tmp, $src2, S, 0\n\t"
13580             "mul   $dst, $tmp, $src1\n\t"
13581             "umov  $tmp, $src2, S, 1\n\t"
13582             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
13583   %}
13584   ins_encode %{
13585     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13586     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
13587     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13588     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
13589   %}
13590   ins_pipe(pipe_class_default);
13591 %}
13592 
13593 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13594 %{
13595   match(Set dst (MulReductionVI src1 src2));
13596   ins_cost(INSN_COST);
13597   effect(TEMP tmp, TEMP tmp2, TEMP dst);
13598   format %{ "ins   $tmp, $src2, 0, 1\n\t"
13599             "mul   $tmp, $tmp, $src2\n\t"
13600             "umov  $tmp2, $tmp, S, 0\n\t"
13601             "mul   $dst, $tmp2, $src1\n\t"
13602             "umov  $tmp2, $tmp, S, 1\n\t"
13603             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
13604   %}
13605   ins_encode %{
13606     __ ins(as_FloatRegister($tmp$$reg), __ D,
13607            as_FloatRegister($src2$$reg), 0, 1);
13608     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13609            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13610     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13611     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13612     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13613     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13614   %}
13615   ins_pipe(pipe_class_default);
13616 %}
13617 
13618 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13619 %{
13620   match(Set dst (AddReductionVF src1 src2));
13621   ins_cost(INSN_COST);
13622   effect(TEMP tmp, TEMP dst);
13623   format %{ "fadds $dst, $src1, $src2\n\t"
13624             "ins   $tmp, S, $src2, 0, 1\n\t"
13625             "fadds $dst, $dst, $tmp\t add reduction2f"
13626   %}
13627   ins_encode %{
13628     __ fadds(as_FloatRegister($dst$$reg),
13629              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13630     __ ins(as_FloatRegister($tmp$$reg), __ S,
13631            as_FloatRegister($src2$$reg), 0, 1);
13632     __ fadds(as_FloatRegister($dst$$reg),
13633              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13634   %}
13635   ins_pipe(pipe_class_default);
13636 %}
13637 
13638 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13639 %{
13640   match(Set dst (AddReductionVF src1 src2));
13641   ins_cost(INSN_COST);
13642   effect(TEMP tmp, TEMP dst);
13643   format %{ "fadds $dst, $src1, $src2\n\t"
13644             "ins   $tmp, S, $src2, 0, 1\n\t"
13645             "fadds $dst, $dst, $tmp\n\t"
13646             "ins   $tmp, S, $src2, 0, 2\n\t"
13647             "fadds $dst, $dst, $tmp\n\t"
13648             "ins   $tmp, S, $src2, 0, 3\n\t"
13649             "fadds $dst, $dst, $tmp\t add reduction4f"
13650   %}
13651   ins_encode %{
13652     __ fadds(as_FloatRegister($dst$$reg),
13653              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13654     __ ins(as_FloatRegister($tmp$$reg), __ S,
13655            as_FloatRegister($src2$$reg), 0, 1);
13656     __ fadds(as_FloatRegister($dst$$reg),
13657              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13658     __ ins(as_FloatRegister($tmp$$reg), __ S,
13659            as_FloatRegister($src2$$reg), 0, 2);
13660     __ fadds(as_FloatRegister($dst$$reg),
13661              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13662     __ ins(as_FloatRegister($tmp$$reg), __ S,
13663            as_FloatRegister($src2$$reg), 0, 3);
13664     __ fadds(as_FloatRegister($dst$$reg),
13665              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13666   %}
13667   ins_pipe(pipe_class_default);
13668 %}
13669 
13670 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13671 %{
13672   match(Set dst (MulReductionVF src1 src2));
13673   ins_cost(INSN_COST);
13674   effect(TEMP tmp, TEMP dst);
13675   format %{ "fmuls $dst, $src1, $src2\n\t"
13676             "ins   $tmp, S, $src2, 0, 1\n\t"
13677             "fmuls $dst, $dst, $tmp\t add reduction4f"
13678   %}
13679   ins_encode %{
13680     __ fmuls(as_FloatRegister($dst$$reg),
13681              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13682     __ ins(as_FloatRegister($tmp$$reg), __ S,
13683            as_FloatRegister($src2$$reg), 0, 1);
13684     __ fmuls(as_FloatRegister($dst$$reg),
13685              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13686   %}
13687   ins_pipe(pipe_class_default);
13688 %}
13689 
13690 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13691 %{
13692   match(Set dst (MulReductionVF src1 src2));
13693   ins_cost(INSN_COST);
13694   effect(TEMP tmp, TEMP dst);
13695   format %{ "fmuls $dst, $src1, $src2\n\t"
13696             "ins   $tmp, S, $src2, 0, 1\n\t"
13697             "fmuls $dst, $dst, $tmp\n\t"
13698             "ins   $tmp, S, $src2, 0, 2\n\t"
13699             "fmuls $dst, $dst, $tmp\n\t"
13700             "ins   $tmp, S, $src2, 0, 3\n\t"
13701             "fmuls $dst, $dst, $tmp\t add reduction4f"
13702   %}
13703   ins_encode %{
13704     __ fmuls(as_FloatRegister($dst$$reg),
13705              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13706     __ ins(as_FloatRegister($tmp$$reg), __ S,
13707            as_FloatRegister($src2$$reg), 0, 1);
13708     __ fmuls(as_FloatRegister($dst$$reg),
13709              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13710     __ ins(as_FloatRegister($tmp$$reg), __ S,
13711            as_FloatRegister($src2$$reg), 0, 2);
13712     __ fmuls(as_FloatRegister($dst$$reg),
13713              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13714     __ ins(as_FloatRegister($tmp$$reg), __ S,
13715            as_FloatRegister($src2$$reg), 0, 3);
13716     __ fmuls(as_FloatRegister($dst$$reg),
13717              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13718   %}
13719   ins_pipe(pipe_class_default);
13720 %}
13721 
13722 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13723 %{
13724   match(Set dst (AddReductionVD src1 src2));
13725   ins_cost(INSN_COST);
13726   effect(TEMP tmp, TEMP dst);
13727   format %{ "faddd $dst, $src1, $src2\n\t"
13728             "ins   $tmp, D, $src2, 0, 1\n\t"
13729             "faddd $dst, $dst, $tmp\t add reduction2d"
13730   %}
13731   ins_encode %{
13732     __ faddd(as_FloatRegister($dst$$reg),
13733              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13734     __ ins(as_FloatRegister($tmp$$reg), __ D,
13735            as_FloatRegister($src2$$reg), 0, 1);
13736     __ faddd(as_FloatRegister($dst$$reg),
13737              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13738   %}
13739   ins_pipe(pipe_class_default);
13740 %}
13741 
13742 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13743 %{
13744   match(Set dst (MulReductionVD src1 src2));
13745   ins_cost(INSN_COST);
13746   effect(TEMP tmp, TEMP dst);
13747   format %{ "fmuld $dst, $src1, $src2\n\t"
13748             "ins   $tmp, D, $src2, 0, 1\n\t"
13749             "fmuld $dst, $dst, $tmp\t add reduction2d"
13750   %}
13751   ins_encode %{
13752     __ fmuld(as_FloatRegister($dst$$reg),
13753              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13754     __ ins(as_FloatRegister($tmp$$reg), __ D,
13755            as_FloatRegister($src2$$reg), 0, 1);
13756     __ fmuld(as_FloatRegister($dst$$reg),
13757              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13758   %}
13759   ins_pipe(pipe_class_default);
13760 %}
13761 
13762 // ====================VECTOR ARITHMETIC=======================================
13763 
13764 // --------------------------------- ADD --------------------------------------
13765 
13766 instruct vadd8B(vecD dst, vecD src1, vecD src2)
13767 %{
13768   predicate(n->as_Vector()->length() == 4 ||
13769             n->as_Vector()->length() == 8);
13770   match(Set dst (AddVB src1 src2));
13771   ins_cost(INSN_COST);
13772   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
13773   ins_encode %{
13774     __ addv(as_FloatRegister($dst$$reg), __ T8B,
13775             as_FloatRegister($src1$$reg),
13776             as_FloatRegister($src2$$reg));
13777   %}
13778   ins_pipe(pipe_class_default);
13779 %}
13780 
13781 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13782 %{
13783   predicate(n->as_Vector()->length() == 16);
13784   match(Set dst (AddVB src1 src2));
13785   ins_cost(INSN_COST);
13786   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
13787   ins_encode %{
13788     __ addv(as_FloatRegister($dst$$reg), __ T16B,
13789             as_FloatRegister($src1$$reg),
13790             as_FloatRegister($src2$$reg));
13791   %}
13792   ins_pipe(pipe_class_default);
13793 %}
13794 
13795 instruct vadd4S(vecD dst, vecD src1, vecD src2)
13796 %{
13797   predicate(n->as_Vector()->length() == 2 ||
13798             n->as_Vector()->length() == 4);
13799   match(Set dst (AddVS src1 src2));
13800   ins_cost(INSN_COST);
13801   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
13802   ins_encode %{
13803     __ addv(as_FloatRegister($dst$$reg), __ T4H,
13804             as_FloatRegister($src1$$reg),
13805             as_FloatRegister($src2$$reg));
13806   %}
13807   ins_pipe(pipe_class_default);
13808 %}
13809 
13810 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13811 %{
13812   predicate(n->as_Vector()->length() == 8);
13813   match(Set dst (AddVS src1 src2));
13814   ins_cost(INSN_COST);
13815   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
13816   ins_encode %{
13817     __ addv(as_FloatRegister($dst$$reg), __ T8H,
13818             as_FloatRegister($src1$$reg),
13819             as_FloatRegister($src2$$reg));
13820   %}
13821   ins_pipe(pipe_class_default);
13822 %}
13823 
13824 instruct vadd2I(vecD dst, vecD src1, vecD src2)
13825 %{
13826   predicate(n->as_Vector()->length() == 2);
13827   match(Set dst (AddVI src1 src2));
13828   ins_cost(INSN_COST);
13829   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
13830   ins_encode %{
13831     __ addv(as_FloatRegister($dst$$reg), __ T2S,
13832             as_FloatRegister($src1$$reg),
13833             as_FloatRegister($src2$$reg));
13834   %}
13835   ins_pipe(pipe_class_default);
13836 %}
13837 
13838 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13839 %{
13840   predicate(n->as_Vector()->length() == 4);
13841   match(Set dst (AddVI src1 src2));
13842   ins_cost(INSN_COST);
13843   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
13844   ins_encode %{
13845     __ addv(as_FloatRegister($dst$$reg), __ T4S,
13846             as_FloatRegister($src1$$reg),
13847             as_FloatRegister($src2$$reg));
13848   %}
13849   ins_pipe(pipe_class_default);
13850 %}
13851 
13852 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13853 %{
13854   predicate(n->as_Vector()->length() == 2);
13855   match(Set dst (AddVL src1 src2));
13856   ins_cost(INSN_COST);
13857   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
13858   ins_encode %{
13859     __ addv(as_FloatRegister($dst$$reg), __ T2D,
13860             as_FloatRegister($src1$$reg),
13861             as_FloatRegister($src2$$reg));
13862   %}
13863   ins_pipe(pipe_class_default);
13864 %}
13865 
13866 instruct vadd2F(vecD dst, vecD src1, vecD src2)
13867 %{
13868   predicate(n->as_Vector()->length() == 2);
13869   match(Set dst (AddVF src1 src2));
13870   ins_cost(INSN_COST);
13871   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
13872   ins_encode %{
13873     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
13874             as_FloatRegister($src1$$reg),
13875             as_FloatRegister($src2$$reg));
13876   %}
13877   ins_pipe(pipe_class_default);
13878 %}
13879 
13880 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13881 %{
13882   predicate(n->as_Vector()->length() == 4);
13883   match(Set dst (AddVF src1 src2));
13884   ins_cost(INSN_COST);
13885   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
13886   ins_encode %{
13887     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13888             as_FloatRegister($src1$$reg),
13889             as_FloatRegister($src2$$reg));
13890   %}
13891   ins_pipe(pipe_class_default);
13892 %}
13893 
13894 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13895 %{
13896   match(Set dst (AddVD src1 src2));
13897   ins_cost(INSN_COST);
13898   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
13899   ins_encode %{
13900     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13901             as_FloatRegister($src1$$reg),
13902             as_FloatRegister($src2$$reg));
13903   %}
13904   ins_pipe(pipe_class_default);
13905 %}
13906 
13907 // --------------------------------- SUB --------------------------------------
13908 
13909 instruct vsub8B(vecD dst, vecD src1, vecD src2)
13910 %{
13911   predicate(n->as_Vector()->length() == 4 ||
13912             n->as_Vector()->length() == 8);
13913   match(Set dst (SubVB src1 src2));
13914   ins_cost(INSN_COST);
13915   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
13916   ins_encode %{
13917     __ subv(as_FloatRegister($dst$$reg), __ T8B,
13918             as_FloatRegister($src1$$reg),
13919             as_FloatRegister($src2$$reg));
13920   %}
13921   ins_pipe(pipe_class_default);
13922 %}
13923 
13924 instruct vsub16B(vecX dst, vecX src1, vecX src2)
13925 %{
13926   predicate(n->as_Vector()->length() == 16);
13927   match(Set dst (SubVB src1 src2));
13928   ins_cost(INSN_COST);
13929   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
13930   ins_encode %{
13931     __ subv(as_FloatRegister($dst$$reg), __ T16B,
13932             as_FloatRegister($src1$$reg),
13933             as_FloatRegister($src2$$reg));
13934   %}
13935   ins_pipe(pipe_class_default);
13936 %}
13937 
13938 instruct vsub4S(vecD dst, vecD src1, vecD src2)
13939 %{
13940   predicate(n->as_Vector()->length() == 2 ||
13941             n->as_Vector()->length() == 4);
13942   match(Set dst (SubVS src1 src2));
13943   ins_cost(INSN_COST);
13944   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
13945   ins_encode %{
13946     __ subv(as_FloatRegister($dst$$reg), __ T4H,
13947             as_FloatRegister($src1$$reg),
13948             as_FloatRegister($src2$$reg));
13949   %}
13950   ins_pipe(pipe_class_default);
13951 %}
13952 
13953 instruct vsub8S(vecX dst, vecX src1, vecX src2)
13954 %{
13955   predicate(n->as_Vector()->length() == 8);
13956   match(Set dst (SubVS src1 src2));
13957   ins_cost(INSN_COST);
13958   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
13959   ins_encode %{
13960     __ subv(as_FloatRegister($dst$$reg), __ T8H,
13961             as_FloatRegister($src1$$reg),
13962             as_FloatRegister($src2$$reg));
13963   %}
13964   ins_pipe(pipe_class_default);
13965 %}
13966 
13967 instruct vsub2I(vecD dst, vecD src1, vecD src2)
13968 %{
13969   predicate(n->as_Vector()->length() == 2);
13970   match(Set dst (SubVI src1 src2));
13971   ins_cost(INSN_COST);
13972   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
13973   ins_encode %{
13974     __ subv(as_FloatRegister($dst$$reg), __ T2S,
13975             as_FloatRegister($src1$$reg),
13976             as_FloatRegister($src2$$reg));
13977   %}
13978   ins_pipe(pipe_class_default);
13979 %}
13980 
13981 instruct vsub4I(vecX dst, vecX src1, vecX src2)
13982 %{
13983   predicate(n->as_Vector()->length() == 4);
13984   match(Set dst (SubVI src1 src2));
13985   ins_cost(INSN_COST);
13986   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
13987   ins_encode %{
13988     __ subv(as_FloatRegister($dst$$reg), __ T4S,
13989             as_FloatRegister($src1$$reg),
13990             as_FloatRegister($src2$$reg));
13991   %}
13992   ins_pipe(pipe_class_default);
13993 %}
13994 
13995 instruct vsub2L(vecX dst, vecX src1, vecX src2)
13996 %{
13997   predicate(n->as_Vector()->length() == 2);
13998   match(Set dst (SubVL src1 src2));
13999   ins_cost(INSN_COST);
14000   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14001   ins_encode %{
14002     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14003             as_FloatRegister($src1$$reg),
14004             as_FloatRegister($src2$$reg));
14005   %}
14006   ins_pipe(pipe_class_default);
14007 %}
14008 
14009 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14010 %{
14011   predicate(n->as_Vector()->length() == 2);
14012   match(Set dst (AddVF src1 src2));
14013   ins_cost(INSN_COST);
14014   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14015   ins_encode %{
14016     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14017             as_FloatRegister($src1$$reg),
14018             as_FloatRegister($src2$$reg));
14019   %}
14020   ins_pipe(pipe_class_default);
14021 %}
14022 
14023 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14024 %{
14025   predicate(n->as_Vector()->length() == 4);
14026   match(Set dst (SubVF src1 src2));
14027   ins_cost(INSN_COST);
14028   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14029   ins_encode %{
14030     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14031             as_FloatRegister($src1$$reg),
14032             as_FloatRegister($src2$$reg));
14033   %}
14034   ins_pipe(pipe_class_default);
14035 %}
14036 
14037 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14038 %{
14039   predicate(n->as_Vector()->length() == 2);
14040   match(Set dst (SubVD src1 src2));
14041   ins_cost(INSN_COST);
14042   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14043   ins_encode %{
14044     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14045             as_FloatRegister($src1$$reg),
14046             as_FloatRegister($src2$$reg));
14047   %}
14048   ins_pipe(pipe_class_default);
14049 %}
14050 
14051 // --------------------------------- MUL --------------------------------------
14052 
14053 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14054 %{
14055   predicate(n->as_Vector()->length() == 2 ||
14056             n->as_Vector()->length() == 4);
14057   match(Set dst (MulVS src1 src2));
14058   ins_cost(INSN_COST);
14059   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14060   ins_encode %{
14061     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14062             as_FloatRegister($src1$$reg),
14063             as_FloatRegister($src2$$reg));
14064   %}
14065   ins_pipe(pipe_class_default);
14066 %}
14067 
14068 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14069 %{
14070   predicate(n->as_Vector()->length() == 8);
14071   match(Set dst (MulVS src1 src2));
14072   ins_cost(INSN_COST);
14073   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14074   ins_encode %{
14075     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14076             as_FloatRegister($src1$$reg),
14077             as_FloatRegister($src2$$reg));
14078   %}
14079   ins_pipe(pipe_class_default);
14080 %}
14081 
14082 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14083 %{
14084   predicate(n->as_Vector()->length() == 2);
14085   match(Set dst (MulVI src1 src2));
14086   ins_cost(INSN_COST);
14087   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14088   ins_encode %{
14089     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14090             as_FloatRegister($src1$$reg),
14091             as_FloatRegister($src2$$reg));
14092   %}
14093   ins_pipe(pipe_class_default);
14094 %}
14095 
14096 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14097 %{
14098   predicate(n->as_Vector()->length() == 4);
14099   match(Set dst (MulVI src1 src2));
14100   ins_cost(INSN_COST);
14101   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14102   ins_encode %{
14103     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14104             as_FloatRegister($src1$$reg),
14105             as_FloatRegister($src2$$reg));
14106   %}
14107   ins_pipe(pipe_class_default);
14108 %}
14109 
14110 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14111 %{
14112   predicate(n->as_Vector()->length() == 2);
14113   match(Set dst (MulVF src1 src2));
14114   ins_cost(INSN_COST);
14115   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14116   ins_encode %{
14117     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14118             as_FloatRegister($src1$$reg),
14119             as_FloatRegister($src2$$reg));
14120   %}
14121   ins_pipe(pipe_class_default);
14122 %}
14123 
14124 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14125 %{
14126   predicate(n->as_Vector()->length() == 4);
14127   match(Set dst (MulVF src1 src2));
14128   ins_cost(INSN_COST);
14129   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14130   ins_encode %{
14131     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14132             as_FloatRegister($src1$$reg),
14133             as_FloatRegister($src2$$reg));
14134   %}
14135   ins_pipe(pipe_class_default);
14136 %}
14137 
14138 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14139 %{
14140   predicate(n->as_Vector()->length() == 2);
14141   match(Set dst (MulVD src1 src2));
14142   ins_cost(INSN_COST);
14143   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14144   ins_encode %{
14145     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14146             as_FloatRegister($src1$$reg),
14147             as_FloatRegister($src2$$reg));
14148   %}
14149   ins_pipe(pipe_class_default);
14150 %}
14151 
14152 // --------------------------------- DIV --------------------------------------
14153 
14154 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14155 %{
14156   predicate(n->as_Vector()->length() == 2);
14157   match(Set dst (DivVF src1 src2));
14158   ins_cost(INSN_COST);
14159   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14160   ins_encode %{
14161     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14162             as_FloatRegister($src1$$reg),
14163             as_FloatRegister($src2$$reg));
14164   %}
14165   ins_pipe(pipe_class_default);
14166 %}
14167 
14168 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14169 %{
14170   predicate(n->as_Vector()->length() == 4);
14171   match(Set dst (DivVF src1 src2));
14172   ins_cost(INSN_COST);
14173   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14174   ins_encode %{
14175     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14176             as_FloatRegister($src1$$reg),
14177             as_FloatRegister($src2$$reg));
14178   %}
14179   ins_pipe(pipe_class_default);
14180 %}
14181 
14182 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14183 %{
14184   predicate(n->as_Vector()->length() == 2);
14185   match(Set dst (DivVD src1 src2));
14186   ins_cost(INSN_COST);
14187   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14188   ins_encode %{
14189     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14190             as_FloatRegister($src1$$reg),
14191             as_FloatRegister($src2$$reg));
14192   %}
14193   ins_pipe(pipe_class_default);
14194 %}
14195 
14196 // --------------------------------- AND --------------------------------------
14197 
14198 instruct vand8B(vecD dst, vecD src1, vecD src2)
14199 %{
14200   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14201             n->as_Vector()->length_in_bytes() == 8);
14202   match(Set dst (AndV src1 src2));
14203   ins_cost(INSN_COST);
14204   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14205   ins_encode %{
14206     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14207             as_FloatRegister($src1$$reg),
14208             as_FloatRegister($src2$$reg));
14209   %}
14210   ins_pipe(pipe_class_default);
14211 %}
14212 
14213 instruct vand16B(vecX dst, vecX src1, vecX src2)
14214 %{
14215   predicate(n->as_Vector()->length_in_bytes() == 16);
14216   match(Set dst (AndV src1 src2));
14217   ins_cost(INSN_COST);
14218   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14219   ins_encode %{
14220     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14221             as_FloatRegister($src1$$reg),
14222             as_FloatRegister($src2$$reg));
14223   %}
14224   ins_pipe(pipe_class_default);
14225 %}
14226 
14227 // --------------------------------- OR ---------------------------------------
14228 
14229 instruct vor8B(vecD dst, vecD src1, vecD src2)
14230 %{
14231   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14232             n->as_Vector()->length_in_bytes() == 8);
14233   match(Set dst (OrV src1 src2));
14234   ins_cost(INSN_COST);
14235   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14236   ins_encode %{
14237     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14238             as_FloatRegister($src1$$reg),
14239             as_FloatRegister($src2$$reg));
14240   %}
14241   ins_pipe(pipe_class_default);
14242 %}
14243 
14244 instruct vor16B(vecX dst, vecX src1, vecX src2)
14245 %{
14246   predicate(n->as_Vector()->length_in_bytes() == 16);
14247   match(Set dst (OrV src1 src2));
14248   ins_cost(INSN_COST);
14249   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14250   ins_encode %{
14251     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14252             as_FloatRegister($src1$$reg),
14253             as_FloatRegister($src2$$reg));
14254   %}
14255   ins_pipe(pipe_class_default);
14256 %}
14257 
14258 // --------------------------------- XOR --------------------------------------
14259 
14260 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14261 %{
14262   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14263             n->as_Vector()->length_in_bytes() == 8);
14264   match(Set dst (XorV src1 src2));
14265   ins_cost(INSN_COST);
14266   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14267   ins_encode %{
14268     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14269             as_FloatRegister($src1$$reg),
14270             as_FloatRegister($src2$$reg));
14271   %}
14272   ins_pipe(pipe_class_default);
14273 %}
14274 
14275 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14276 %{
14277   predicate(n->as_Vector()->length_in_bytes() == 16);
14278   match(Set dst (XorV src1 src2));
14279   ins_cost(INSN_COST);
14280   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14281   ins_encode %{
14282     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14283             as_FloatRegister($src1$$reg),
14284             as_FloatRegister($src2$$reg));
14285   %}
14286   ins_pipe(pipe_class_default);
14287 %}
14288 
14289 // ------------------------------ Shift ---------------------------------------
14290 
14291 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
14292   match(Set dst (LShiftCntV cnt));
14293   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
14294   ins_encode %{
14295     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14296   %}
14297   ins_pipe(pipe_class_default);
14298 %}
14299 
14300 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
14301 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
14302   match(Set dst (RShiftCntV cnt));
14303   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
14304   ins_encode %{
14305     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14306     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
14307   %}
14308   ins_pipe(pipe_class_default);
14309 %}
14310 
14311 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
14312   predicate(n->as_Vector()->length() == 4 ||
14313             n->as_Vector()->length() == 8);
14314   match(Set dst (LShiftVB src shift));
14315   match(Set dst (RShiftVB src shift));
14316   ins_cost(INSN_COST);
14317   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14318   ins_encode %{
14319     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14320             as_FloatRegister($src$$reg),
14321             as_FloatRegister($shift$$reg));
14322   %}
14323   ins_pipe(pipe_class_default);
14324 %}
14325 
14326 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14327   predicate(n->as_Vector()->length() == 16);
14328   match(Set dst (LShiftVB src shift));
14329   match(Set dst (RShiftVB src shift));
14330   ins_cost(INSN_COST);
14331   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14332   ins_encode %{
14333     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14334             as_FloatRegister($src$$reg),
14335             as_FloatRegister($shift$$reg));
14336   %}
14337   ins_pipe(pipe_class_default);
14338 %}
14339 
14340 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
14341   predicate(n->as_Vector()->length() == 4 ||
14342             n->as_Vector()->length() == 8);
14343   match(Set dst (URShiftVB src shift));
14344   ins_cost(INSN_COST);
14345   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
14346   ins_encode %{
14347     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
14348             as_FloatRegister($src$$reg),
14349             as_FloatRegister($shift$$reg));
14350   %}
14351   ins_pipe(pipe_class_default);
14352 %}
14353 
14354 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
14355   predicate(n->as_Vector()->length() == 16);
14356   match(Set dst (URShiftVB src shift));
14357   ins_cost(INSN_COST);
14358   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
14359   ins_encode %{
14360     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
14361             as_FloatRegister($src$$reg),
14362             as_FloatRegister($shift$$reg));
14363   %}
14364   ins_pipe(pipe_class_default);
14365 %}
14366 
14367 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
14368   predicate(n->as_Vector()->length() == 4 ||
14369             n->as_Vector()->length() == 8);
14370   match(Set dst (LShiftVB src shift));
14371   ins_cost(INSN_COST);
14372   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
14373   ins_encode %{
14374     int sh = (int)$shift$$constant & 31;
14375     if (sh >= 8) {
14376       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14377              as_FloatRegister($src$$reg),
14378              as_FloatRegister($src$$reg));
14379     } else {
14380       __ shl(as_FloatRegister($dst$$reg), __ T8B,
14381              as_FloatRegister($src$$reg), sh);
14382     }
14383   %}
14384   ins_pipe(pipe_class_default);
14385 %}
14386 
14387 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
14388   predicate(n->as_Vector()->length() == 16);
14389   match(Set dst (LShiftVB src shift));
14390   ins_cost(INSN_COST);
14391   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
14392   ins_encode %{
14393     int sh = (int)$shift$$constant & 31;
14394     if (sh >= 8) {
14395       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14396              as_FloatRegister($src$$reg),
14397              as_FloatRegister($src$$reg));
14398     } else {
14399       __ shl(as_FloatRegister($dst$$reg), __ T16B,
14400              as_FloatRegister($src$$reg), sh);
14401     }
14402   %}
14403   ins_pipe(pipe_class_default);
14404 %}
14405 
14406 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
14407   predicate(n->as_Vector()->length() == 4 ||
14408             n->as_Vector()->length() == 8);
14409   match(Set dst (RShiftVB src shift));
14410   ins_cost(INSN_COST);
14411   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
14412   ins_encode %{
14413     int sh = (int)$shift$$constant & 31;
14414     if (sh >= 8) sh = 7;
14415     sh = -sh & 7;
14416     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
14417            as_FloatRegister($src$$reg), sh);
14418   %}
14419   ins_pipe(pipe_class_default);
14420 %}
14421 
14422 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
14423   predicate(n->as_Vector()->length() == 16);
14424   match(Set dst (RShiftVB src shift));
14425   ins_cost(INSN_COST);
14426   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
14427   ins_encode %{
14428     int sh = (int)$shift$$constant & 31;
14429     if (sh >= 8) sh = 7;
14430     sh = -sh & 7;
14431     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
14432            as_FloatRegister($src$$reg), sh);
14433   %}
14434   ins_pipe(pipe_class_default);
14435 %}
14436 
14437 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
14438   predicate(n->as_Vector()->length() == 4 ||
14439             n->as_Vector()->length() == 8);
14440   match(Set dst (URShiftVB src shift));
14441   ins_cost(INSN_COST);
14442   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
14443   ins_encode %{
14444     int sh = (int)$shift$$constant & 31;
14445     if (sh >= 8) {
14446       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14447              as_FloatRegister($src$$reg),
14448              as_FloatRegister($src$$reg));
14449     } else {
14450       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
14451              as_FloatRegister($src$$reg), -sh & 7);
14452     }
14453   %}
14454   ins_pipe(pipe_class_default);
14455 %}
14456 
14457 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
14458   predicate(n->as_Vector()->length() == 16);
14459   match(Set dst (URShiftVB src shift));
14460   ins_cost(INSN_COST);
14461   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
14462   ins_encode %{
14463     int sh = (int)$shift$$constant & 31;
14464     if (sh >= 8) {
14465       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14466              as_FloatRegister($src$$reg),
14467              as_FloatRegister($src$$reg));
14468     } else {
14469       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
14470              as_FloatRegister($src$$reg), -sh & 7);
14471     }
14472   %}
14473   ins_pipe(pipe_class_default);
14474 %}
14475 
14476 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
14477   predicate(n->as_Vector()->length() == 2 ||
14478             n->as_Vector()->length() == 4);
14479   match(Set dst (LShiftVS src shift));
14480   match(Set dst (RShiftVS src shift));
14481   ins_cost(INSN_COST);
14482   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
14483   ins_encode %{
14484     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
14485             as_FloatRegister($src$$reg),
14486             as_FloatRegister($shift$$reg));
14487   %}
14488   ins_pipe(pipe_class_default);
14489 %}
14490 
14491 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
14492   predicate(n->as_Vector()->length() == 8);
14493   match(Set dst (LShiftVS src shift));
14494   match(Set dst (RShiftVS src shift));
14495   ins_cost(INSN_COST);
14496   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
14497   ins_encode %{
14498     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
14499             as_FloatRegister($src$$reg),
14500             as_FloatRegister($shift$$reg));
14501   %}
14502   ins_pipe(pipe_class_default);
14503 %}
14504 
14505 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
14506   predicate(n->as_Vector()->length() == 2 ||
14507             n->as_Vector()->length() == 4);
14508   match(Set dst (URShiftVS src shift));
14509   ins_cost(INSN_COST);
14510   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
14511   ins_encode %{
14512     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
14513             as_FloatRegister($src$$reg),
14514             as_FloatRegister($shift$$reg));
14515   %}
14516   ins_pipe(pipe_class_default);
14517 %}
14518 
14519 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
14520   predicate(n->as_Vector()->length() == 8);
14521   match(Set dst (URShiftVS src shift));
14522   ins_cost(INSN_COST);
14523   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
14524   ins_encode %{
14525     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
14526             as_FloatRegister($src$$reg),
14527             as_FloatRegister($shift$$reg));
14528   %}
14529   ins_pipe(pipe_class_default);
14530 %}
14531 
14532 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
14533   predicate(n->as_Vector()->length() == 2 ||
14534             n->as_Vector()->length() == 4);
14535   match(Set dst (LShiftVS src shift));
14536   ins_cost(INSN_COST);
14537   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
14538   ins_encode %{
14539     int sh = (int)$shift$$constant & 31;
14540     if (sh >= 16) {
14541       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14542              as_FloatRegister($src$$reg),
14543              as_FloatRegister($src$$reg));
14544     } else {
14545       __ shl(as_FloatRegister($dst$$reg), __ T4H,
14546              as_FloatRegister($src$$reg), sh);
14547     }
14548   %}
14549   ins_pipe(pipe_class_default);
14550 %}
14551 
14552 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
14553   predicate(n->as_Vector()->length() == 8);
14554   match(Set dst (LShiftVS src shift));
14555   ins_cost(INSN_COST);
14556   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
14557   ins_encode %{
14558     int sh = (int)$shift$$constant & 31;
14559     if (sh >= 16) {
14560       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14561              as_FloatRegister($src$$reg),
14562              as_FloatRegister($src$$reg));
14563     } else {
14564       __ shl(as_FloatRegister($dst$$reg), __ T8H,
14565              as_FloatRegister($src$$reg), sh);
14566     }
14567   %}
14568   ins_pipe(pipe_class_default);
14569 %}
14570 
14571 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
14572   predicate(n->as_Vector()->length() == 2 ||
14573             n->as_Vector()->length() == 4);
14574   match(Set dst (RShiftVS src shift));
14575   ins_cost(INSN_COST);
14576   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
14577   ins_encode %{
14578     int sh = (int)$shift$$constant & 31;
14579     if (sh >= 16) sh = 15;
14580     sh = -sh & 15;
14581     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
14582            as_FloatRegister($src$$reg), sh);
14583   %}
14584   ins_pipe(pipe_class_default);
14585 %}
14586 
14587 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
14588   predicate(n->as_Vector()->length() == 8);
14589   match(Set dst (RShiftVS src shift));
14590   ins_cost(INSN_COST);
14591   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
14592   ins_encode %{
14593     int sh = (int)$shift$$constant & 31;
14594     if (sh >= 16) sh = 15;
14595     sh = -sh & 15;
14596     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
14597            as_FloatRegister($src$$reg), sh);
14598   %}
14599   ins_pipe(pipe_class_default);
14600 %}
14601 
14602 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
14603   predicate(n->as_Vector()->length() == 2 ||
14604             n->as_Vector()->length() == 4);
14605   match(Set dst (URShiftVS src shift));
14606   ins_cost(INSN_COST);
14607   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
14608   ins_encode %{
14609     int sh = (int)$shift$$constant & 31;
14610     if (sh >= 16) {
14611       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14612              as_FloatRegister($src$$reg),
14613              as_FloatRegister($src$$reg));
14614     } else {
14615       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
14616              as_FloatRegister($src$$reg), -sh & 15);
14617     }
14618   %}
14619   ins_pipe(pipe_class_default);
14620 %}
14621 
14622 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
14623   predicate(n->as_Vector()->length() == 8);
14624   match(Set dst (URShiftVS src shift));
14625   ins_cost(INSN_COST);
14626   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
14627   ins_encode %{
14628     int sh = (int)$shift$$constant & 31;
14629     if (sh >= 16) {
14630       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14631              as_FloatRegister($src$$reg),
14632              as_FloatRegister($src$$reg));
14633     } else {
14634       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
14635              as_FloatRegister($src$$reg), -sh & 15);
14636     }
14637   %}
14638   ins_pipe(pipe_class_default);
14639 %}
14640 
14641 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
14642   predicate(n->as_Vector()->length() == 2);
14643   match(Set dst (LShiftVI src shift));
14644   match(Set dst (RShiftVI src shift));
14645   ins_cost(INSN_COST);
14646   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
14647   ins_encode %{
14648     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
14649             as_FloatRegister($src$$reg),
14650             as_FloatRegister($shift$$reg));
14651   %}
14652   ins_pipe(pipe_class_default);
14653 %}
14654 
14655 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
14656   predicate(n->as_Vector()->length() == 4);
14657   match(Set dst (LShiftVI src shift));
14658   match(Set dst (RShiftVI src shift));
14659   ins_cost(INSN_COST);
14660   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
14661   ins_encode %{
14662     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
14663             as_FloatRegister($src$$reg),
14664             as_FloatRegister($shift$$reg));
14665   %}
14666   ins_pipe(pipe_class_default);
14667 %}
14668 
14669 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
14670   predicate(n->as_Vector()->length() == 2);
14671   match(Set dst (URShiftVI src shift));
14672   ins_cost(INSN_COST);
14673   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
14674   ins_encode %{
14675     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
14676             as_FloatRegister($src$$reg),
14677             as_FloatRegister($shift$$reg));
14678   %}
14679   ins_pipe(pipe_class_default);
14680 %}
14681 
14682 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
14683   predicate(n->as_Vector()->length() == 4);
14684   match(Set dst (URShiftVI src shift));
14685   ins_cost(INSN_COST);
14686   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
14687   ins_encode %{
14688     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14689             as_FloatRegister($src$$reg),
14690             as_FloatRegister($shift$$reg));
14691   %}
14692   ins_pipe(pipe_class_default);
14693 %}
14694 
14695 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
14696   predicate(n->as_Vector()->length() == 2);
14697   match(Set dst (LShiftVI src shift));
14698   ins_cost(INSN_COST);
14699   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
14700   ins_encode %{
14701     __ shl(as_FloatRegister($dst$$reg), __ T2S,
14702            as_FloatRegister($src$$reg),
14703            (int)$shift$$constant & 31);
14704   %}
14705   ins_pipe(pipe_class_default);
14706 %}
14707 
14708 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
14709   predicate(n->as_Vector()->length() == 4);
14710   match(Set dst (LShiftVI src shift));
14711   ins_cost(INSN_COST);
14712   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
14713   ins_encode %{
14714     __ shl(as_FloatRegister($dst$$reg), __ T4S,
14715            as_FloatRegister($src$$reg),
14716            (int)$shift$$constant & 31);
14717   %}
14718   ins_pipe(pipe_class_default);
14719 %}
14720 
14721 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
14722   predicate(n->as_Vector()->length() == 2);
14723   match(Set dst (RShiftVI src shift));
14724   ins_cost(INSN_COST);
14725   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
14726   ins_encode %{
14727     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
14728             as_FloatRegister($src$$reg),
14729             -(int)$shift$$constant & 31);
14730   %}
14731   ins_pipe(pipe_class_default);
14732 %}
14733 
14734 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
14735   predicate(n->as_Vector()->length() == 4);
14736   match(Set dst (RShiftVI src shift));
14737   ins_cost(INSN_COST);
14738   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
14739   ins_encode %{
14740     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14741             as_FloatRegister($src$$reg),
14742             -(int)$shift$$constant & 31);
14743   %}
14744   ins_pipe(pipe_class_default);
14745 %}
14746 
14747 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
14748   predicate(n->as_Vector()->length() == 2);
14749   match(Set dst (URShiftVI src shift));
14750   ins_cost(INSN_COST);
14751   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
14752   ins_encode %{
14753     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
14754             as_FloatRegister($src$$reg),
14755             -(int)$shift$$constant & 31);
14756   %}
14757   ins_pipe(pipe_class_default);
14758 %}
14759 
14760 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
14761   predicate(n->as_Vector()->length() == 4);
14762   match(Set dst (URShiftVI src shift));
14763   ins_cost(INSN_COST);
14764   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
14765   ins_encode %{
14766     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14767             as_FloatRegister($src$$reg),
14768             -(int)$shift$$constant & 31);
14769   %}
14770   ins_pipe(pipe_class_default);
14771 %}
14772 
14773 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
14774   predicate(n->as_Vector()->length() == 2);
14775   match(Set dst (LShiftVL src shift));
14776   match(Set dst (RShiftVL src shift));
14777   ins_cost(INSN_COST);
14778   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
14779   ins_encode %{
14780     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14781             as_FloatRegister($src$$reg),
14782             as_FloatRegister($shift$$reg));
14783   %}
14784   ins_pipe(pipe_class_default);
14785 %}
14786 
14787 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
14788   predicate(n->as_Vector()->length() == 2);
14789   match(Set dst (URShiftVL src shift));
14790   ins_cost(INSN_COST);
14791   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
14792   ins_encode %{
14793     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14794             as_FloatRegister($src$$reg),
14795             as_FloatRegister($shift$$reg));
14796   %}
14797   ins_pipe(pipe_class_default);
14798 %}
14799 
14800 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
14801   predicate(n->as_Vector()->length() == 2);
14802   match(Set dst (LShiftVL src shift));
14803   ins_cost(INSN_COST);
14804   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
14805   ins_encode %{
14806     __ shl(as_FloatRegister($dst$$reg), __ T2D,
14807            as_FloatRegister($src$$reg),
14808            (int)$shift$$constant & 63);
14809   %}
14810   ins_pipe(pipe_class_default);
14811 %}
14812 
14813 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
14814   predicate(n->as_Vector()->length() == 2);
14815   match(Set dst (RShiftVL src shift));
14816   ins_cost(INSN_COST);
14817   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
14818   ins_encode %{
14819     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14820             as_FloatRegister($src$$reg),
14821             -(int)$shift$$constant & 63);
14822   %}
14823   ins_pipe(pipe_class_default);
14824 %}
14825 
14826 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
14827   predicate(n->as_Vector()->length() == 2);
14828   match(Set dst (URShiftVL src shift));
14829   ins_cost(INSN_COST);
14830   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
14831   ins_encode %{
14832     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14833             as_FloatRegister($src$$reg),
14834             -(int)$shift$$constant & 63);
14835   %}
14836   ins_pipe(pipe_class_default);
14837 %}
14838 
14839 //----------PEEPHOLE RULES-----------------------------------------------------
14840 // These must follow all instruction definitions as they use the names
14841 // defined in the instructions definitions.
14842 //
14843 // peepmatch ( root_instr_name [preceding_instruction]* );
14844 //
14845 // peepconstraint %{
14846 // (instruction_number.operand_name relational_op instruction_number.operand_name
14847 //  [, ...] );
14848 // // instruction numbers are zero-based using left to right order in peepmatch
14849 //
14850 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
14851 // // provide an instruction_number.operand_name for each operand that appears
14852 // // in the replacement instruction's match rule
14853 //
14854 // ---------VM FLAGS---------------------------------------------------------
14855 //
14856 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14857 //
14858 // Each peephole rule is given an identifying number starting with zero and
14859 // increasing by one in the order seen by the parser.  An individual peephole
14860 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14861 // on the command-line.
14862 //
14863 // ---------CURRENT LIMITATIONS----------------------------------------------
14864 //
14865 // Only match adjacent instructions in same basic block
14866 // Only equality constraints
14867 // Only constraints between operands, not (0.dest_reg == RAX_enc)
14868 // Only one replacement instruction
14869 //
14870 // ---------EXAMPLE----------------------------------------------------------
14871 //
14872 // // pertinent parts of existing instructions in architecture description
14873 // instruct movI(iRegINoSp dst, iRegI src)
14874 // %{
14875 //   match(Set dst (CopyI src));
14876 // %}
14877 //
14878 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
14879 // %{
14880 //   match(Set dst (AddI dst src));
14881 //   effect(KILL cr);
14882 // %}
14883 //
14884 // // Change (inc mov) to lea
14885 // peephole %{
14886 //   // increment preceeded by register-register move
14887 //   peepmatch ( incI_iReg movI );
14888 //   // require that the destination register of the increment
14889 //   // match the destination register of the move
14890 //   peepconstraint ( 0.dst == 1.dst );
14891 //   // construct a replacement instruction that sets
14892 //   // the destination to ( move's source register + one )
14893 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
14894 // %}
14895 //
14896 
14897 // Implementation no longer uses movX instructions since
14898 // machine-independent system no longer uses CopyX nodes.
14899 //
14900 // peephole
14901 // %{
14902 //   peepmatch (incI_iReg movI);
14903 //   peepconstraint (0.dst == 1.dst);
14904 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14905 // %}
14906 
14907 // peephole
14908 // %{
14909 //   peepmatch (decI_iReg movI);
14910 //   peepconstraint (0.dst == 1.dst);
14911 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14912 // %}
14913 
14914 // peephole
14915 // %{
14916 //   peepmatch (addI_iReg_imm movI);
14917 //   peepconstraint (0.dst == 1.dst);
14918 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14919 // %}
14920 
14921 // peephole
14922 // %{
14923 //   peepmatch (incL_iReg movL);
14924 //   peepconstraint (0.dst == 1.dst);
14925 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
14926 // %}
14927 
14928 // peephole
14929 // %{
14930 //   peepmatch (decL_iReg movL);
14931 //   peepconstraint (0.dst == 1.dst);
14932 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
14933 // %}
14934 
14935 // peephole
14936 // %{
14937 //   peepmatch (addL_iReg_imm movL);
14938 //   peepconstraint (0.dst == 1.dst);
14939 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
14940 // %}
14941 
14942 // peephole
14943 // %{
14944 //   peepmatch (addP_iReg_imm movP);
14945 //   peepconstraint (0.dst == 1.dst);
14946 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
14947 // %}
14948 
14949 // // Change load of spilled value to only a spill
14950 // instruct storeI(memory mem, iRegI src)
14951 // %{
14952 //   match(Set mem (StoreI mem src));
14953 // %}
14954 //
14955 // instruct loadI(iRegINoSp dst, memory mem)
14956 // %{
14957 //   match(Set dst (LoadI mem));
14958 // %}
14959 //
14960 
14961 //----------SMARTSPILL RULES---------------------------------------------------
14962 // These must follow all instruction definitions as they use the names
14963 // defined in the instructions definitions.
14964 
14965 // Local Variables:
14966 // mode: c++
14967 // End: