1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036   MemBarNode *has_parent_membar(const Node *n,
1037                                 ProjNode *&ctl, ProjNode *&mem);
1038   MemBarNode *has_child_membar(const MemBarNode *n,
1039                                ProjNode *&ctl, ProjNode *&mem);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042   bool unnecessary_acquire(const Node *barrier);
1043   bool needs_acquiring_load(const Node *load);
1044 
1045   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1046   bool unnecessary_release(const Node *barrier);
1047   bool unnecessary_volatile(const Node *barrier);
1048   bool needs_releasing_store(const Node *store);
1049 
1050   // Use barrier instructions for unsafe volatile gets rather than
1051   // trying to identify an exact signature for them
1052   const bool UseBarriersForUnsafeVolatileGet = false;
1053 %}
1054 
1055 source %{
1056 
1057   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1058   // use to implement volatile reads and writes. For a volatile read
1059   // we simply need
1060   //
1061   //   ldar<x>
1062   //
1063   // and for a volatile write we need
1064   //
1065   //   stlr<x>
1066   // 
1067   // Alternatively, we can implement them by pairing a normal
1068   // load/store with a memory barrier. For a volatile read we need
1069   // 
1070   //   ldr<x>
1071   //   dmb ishld
1072   //
1073   // for a volatile write
1074   //
1075   //   dmb ish
1076   //   str<x>
1077   //   dmb ish
1078   //
1079   // In order to generate the desired instruction sequence we need to
1080   // be able to identify specific 'signature' ideal graph node
1081   // sequences which i) occur as a translation of a volatile reads or
1082   // writes and ii) do not occur through any other translation or
1083   // graph transformation. We can then provide alternative aldc
1084   // matching rules which translate these node sequences to the
1085   // desired machine code sequences. Selection of the alternative
1086   // rules can be implemented by predicates which identify the
1087   // relevant node sequences.
1088   //
1089   // The ideal graph generator translates a volatile read to the node
1090   // sequence
1091   //
1092   //   LoadX[mo_acquire]
1093   //   MemBarAcquire
1094   //
1095   // As a special case when using the compressed oops optimization we
1096   // may also see this variant
1097   //
1098   //   LoadN[mo_acquire]
1099   //   DecodeN
1100   //   MemBarAcquire
1101   //
1102   // A volatile write is translated to the node sequence
1103   //
1104   //   MemBarRelease
1105   //   StoreX[mo_release]
1106   //   MemBarVolatile
1107   //
1108   // n.b. the above node patterns are generated with a strict
1109   // 'signature' configuration of input and output dependencies (see
1110   // the predicates below for exact details). The two signatures are
1111   // unique to translated volatile reads/stores -- they will not
1112   // appear as a result of any other bytecode translation or inlining
1113   // nor as a consequence of optimizing transforms.
1114   //
1115   // We also want to catch inlined unsafe volatile gets and puts and
1116   // be able to implement them using either ldar<x>/stlr<x> or some
1117   // combination of ldr<x>/stlr<x> and dmb instructions.
1118   //
1119   // Inlined unsafe volatiles puts manifest as a minor variant of the
1120   // normal volatile put node sequence containing an extra cpuorder
1121   // membar
1122   //
1123   //   MemBarRelease
1124   //   MemBarCPUOrder
1125   //   StoreX[mo_release]
1126   //   MemBarVolatile
1127   //
1128   // n.b. as an aside, the cpuorder membar is not itself subject to
1129   // matching and translation by adlc rules.  However, the rule
1130   // predicates need to detect its presence in order to correctly
1131   // select the desired adlc rules.
1132   //
1133   // Inlined unsafe volatiles gets manifest as a somewhat different
1134   // node sequence to a normal volatile get
1135   //
1136   //   MemBarCPUOrder
1137   //        ||       \\
1138   //   MemBarAcquire LoadX[mo_acquire]
1139   //        ||
1140   //   MemBarCPUOrder
1141   //
1142   // In this case the acquire membar does not directly depend on the
1143   // load. However, we can be sure that the load is generated from an
1144   // inlined unsafe volatile get if we see it dependent on this unique
1145   // sequence of membar nodes. Similarly, given an acquire membar we
1146   // can know that it was added because of an inlined unsafe volatile
1147   // get if it is fed and feeds a cpuorder membar and if its feed
1148   // membar also feeds an acquiring load.
1149   //
1150   // So, where we can identify these volatile read and write
1151   // signatures we can choose to plant either of the above two code
1152   // sequences. For a volatile read we can simply plant a normal
1153   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1154   // also choose to inhibit translation of the MemBarAcquire and
1155   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1156   //
1157   // When we recognise a volatile store signature we can choose to
1158   // plant at a dmb ish as a translation for the MemBarRelease, a
1159   // normal str<x> and then a dmb ish for the MemBarVolatile.
1160   // Alternatively, we can inhibit translation of the MemBarRelease
1161   // and MemBarVolatile and instead plant a simple stlr<x>
1162   // instruction.
1163   //
1164   // Of course, the above only applies when we see these signature
1165   // configurations. We still want to plant dmb instructions in any
1166   // other cases where we may see a MemBarAcquire, MemBarRelease or
1167   // MemBarVolatile. For example, at the end of a constructor which
1168   // writes final/volatile fields we will see a MemBarRelease
1169   // instruction and this needs a 'dmb ish' lest we risk the
1170   // constructed object being visible without making the
1171   // final/volatile field writes visible.
1172   //
1173   // n.b. the translation rules below which rely on detection of the
1174   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1175   // If we see anything other than the signature configurations we
1176   // always just translate the loads and stors to ldr<x> and str<x>
1177   // and translate acquire, release and volatile membars to the
1178   // relevant dmb instructions.
1179   //
1180   // n.b.b as a case in point for the above comment, the current
1181   // predicates don't detect the precise signature for certain types
1182   // of volatile object stores (where the heap_base input type is not
1183   // known at compile-time to be non-NULL). In those cases the
1184   // MemBarRelease and MemBarVolatile bracket an if-then-else sequence
1185   // with a store in each branch (we need a different store depending
1186   // on whether heap_base is actually NULL). In such a case we will
1187   // just plant a dmb both before and after the branch/merge. The
1188   // predicate could (and probably should) be fixed later to also
1189   // detect this case.
1190 
1191   // graph traversal helpers
1192 
1193   // if node n is linked to a parent MemBarNode by an intervening
1194   // Control or Memory ProjNode return the MemBarNode otherwise return
1195   // NULL.
1196   //
1197   // n may only be a Load or a MemBar.
1198   //
1199   // The ProjNode* references c and m are used to return the relevant
1200   // nodes.
1201 
1202   MemBarNode *has_parent_membar(const Node *n, ProjNode *&c, ProjNode *&m)
1203   {
1204     Node *ctl = NULL;
1205     Node *mem = NULL;
1206     Node *membar = NULL;
1207 
1208     if (n->is_Load()) {
1209       ctl = n->lookup(LoadNode::Control);
1210       mem = n->lookup(LoadNode::Memory);
1211     } else if (n->is_MemBar()) {
1212       ctl = n->lookup(TypeFunc::Control);
1213       mem = n->lookup(TypeFunc::Memory);
1214     } else {
1215         return NULL;
1216     }
1217 
1218     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
1219       return NULL;
1220 
1221     c = ctl->as_Proj();
1222 
1223     membar = ctl->lookup(0);
1224 
1225     if (!membar || !membar->is_MemBar())
1226       return NULL;
1227 
1228     m = mem->as_Proj();
1229 
1230     if (mem->lookup(0) != membar)
1231       return NULL;
1232 
1233     return membar->as_MemBar();
1234   }
1235 
1236   // if n is linked to a child MemBarNode by intervening Control and
1237   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1238   //
1239   // The ProjNode** arguments c and m are used to return pointers to
1240   // the relevant nodes. A null argument means don't don't return a
1241   // value.
1242 
1243   MemBarNode *has_child_membar(const MemBarNode *n, ProjNode *&c, ProjNode *&m)
1244   {
1245     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1246     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1247 
1248     // MemBar needs to have both a Ctl and Mem projection
1249     if (! ctl || ! mem)
1250       return NULL;
1251 
1252     c = ctl;
1253     m = mem;
1254 
1255     MemBarNode *child = NULL;
1256     Node *x;
1257 
1258     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1259       x = ctl->fast_out(i);
1260       // if we see a membar we keep hold of it. we may also see a new
1261       // arena copy of the original but it will appear later
1262       if (x->is_MemBar()) {
1263           child = x->as_MemBar();
1264           break;
1265       }
1266     }
1267 
1268     if (child == NULL)
1269       return NULL;
1270 
1271     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1272       x = mem->fast_out(i);
1273       // if we see a membar we keep hold of it. we may also see a new
1274       // arena copy of the original but it will appear later
1275       if (x == child) {
1276         return child;
1277       }
1278     }
1279     return NULL;
1280   }
1281 
1282   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1283 
1284 bool unnecessary_acquire(const Node *barrier) {
1285   // assert barrier->is_MemBar();
1286   if (UseBarriersForVolatile)
1287     // we need to plant a dmb
1288     return false;
1289 
1290   // a volatile read derived from bytecode (or also from an inlined
1291   // SHA field read via LibraryCallKit::load_field_from_object)
1292   // manifests as a LoadX[mo_acquire] followed by an acquire membar
1293   // with a bogus read dependency on it's preceding load. so in those
1294   // cases we will find the load node at the PARMS offset of the
1295   // acquire membar.  n.b. there may be an intervening DecodeN node.
1296   //
1297   // a volatile load derived from an inlined unsafe field access
1298   // manifests as a cpuorder membar with Ctl and Mem projections
1299   // feeding both an acquire membar and a LoadX[mo_acquire]. The
1300   // acquire then feeds another cpuorder membar via Ctl and Mem
1301   // projections. The load has no output dependency on these trailing
1302   // membars because subsequent nodes inserted into the graph take
1303   // their control feed from the final membar cpuorder meaning they
1304   // are all ordered after the load.
1305 
1306   Node *x = barrier->lookup(TypeFunc::Parms);
1307   if (x) {
1308     // we are starting from an acquire and it has a fake dependency
1309     //
1310     // need to check for
1311     //
1312     //   LoadX[mo_acquire]
1313     //   {  |1   }
1314     //   {DecodeN}
1315     //      |Parms
1316     //   MemBarAcquire*
1317     //
1318     // where * tags node we were passed
1319     // and |k means input k
1320     if (x->is_DecodeNarrowPtr())
1321       x = x->in(1);
1322 
1323     return (x->is_Load() && x->as_Load()->is_acquire());
1324   }
1325   
1326   // only continue if we want to try to match unsafe volatile gets
1327   if (UseBarriersForUnsafeVolatileGet)
1328     return false;
1329 
1330   // need to check for
1331   //
1332   //     MemBarCPUOrder
1333   //        ||       \\
1334   //   MemBarAcquire* LoadX[mo_acquire]
1335   //        ||
1336   //   MemBarCPUOrder
1337   //
1338   // where * tags node we were passed
1339   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
1340 
1341   // check for a parent MemBarCPUOrder
1342   ProjNode *ctl;
1343   ProjNode *mem;
1344   MemBarNode *parent = has_parent_membar(barrier, ctl, mem);
1345   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
1346     return false;
1347   // ensure the proj nodes both feed a LoadX[mo_acquire]
1348   LoadNode *ld = NULL;
1349   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1350     x = ctl->fast_out(i);
1351     // if we see a load we keep hold of it and stop searching
1352     if (x->is_Load()) {
1353       ld = x->as_Load();
1354       break;
1355     }
1356   }
1357   // it must be an acquiring load
1358   if (! ld || ! ld->is_acquire())
1359     return false;
1360   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1361     x = mem->fast_out(i);
1362     // if we see the same load we drop it and stop searching
1363     if (x == ld) {
1364       ld = NULL;
1365       break;
1366     }
1367   }
1368   // we must have dropped the load
1369   if (ld)
1370     return false;
1371   // check for a child cpuorder membar
1372   MemBarNode *child  = has_child_membar(barrier->as_MemBar(), ctl, mem);
1373   if (!child || child->Opcode() != Op_MemBarCPUOrder)
1374     return false;
1375 
1376   return true;
1377 }
1378 
1379 bool needs_acquiring_load(const Node *n)
1380 {
1381   // assert n->is_Load();
1382   if (UseBarriersForVolatile)
1383     // we use a normal load and a dmb
1384     return false;
1385 
1386   LoadNode *ld = n->as_Load();
1387 
1388   if (!ld->is_acquire())
1389     return false;
1390 
1391   // check if this load is feeding an acquire membar
1392   //
1393   //   LoadX[mo_acquire]
1394   //   {  |1   }
1395   //   {DecodeN}
1396   //      |Parms
1397   //   MemBarAcquire*
1398   //
1399   // where * tags node we were passed
1400   // and |k means input k
1401 
1402   Node *start = ld;
1403   Node *mbacq = NULL;
1404 
1405   // if we hit a DecodeNarrowPtr we reset the start node and restart
1406   // the search through the outputs
1407  restart:
1408 
1409   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
1410     Node *x = start->fast_out(i);
1411     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
1412       mbacq = x;
1413     } else if (!mbacq &&
1414                (x->is_DecodeNarrowPtr() ||
1415                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
1416       start = x;
1417       goto restart;
1418     }
1419   }
1420 
1421   if (mbacq) {
1422     return true;
1423   }
1424 
1425   // only continue if we want to try to match unsafe volatile gets
1426   if (UseBarriersForUnsafeVolatileGet)
1427     return false;
1428 
1429   // check if Ctl and Proj feed comes from a MemBarCPUOrder
1430   //
1431   //     MemBarCPUOrder
1432   //        ||       \\
1433   //   MemBarAcquire* LoadX[mo_acquire]
1434   //        ||
1435   //   MemBarCPUOrder
1436 
1437   MemBarNode *membar;
1438   ProjNode *ctl;
1439   ProjNode *mem;
1440 
1441   membar = has_parent_membar(ld, ctl, mem);
1442 
1443   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
1444     return false;
1445 
1446   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
1447 
1448   membar = has_child_membar(membar, ctl, mem);
1449 
1450   if (!membar || !membar->Opcode() == Op_MemBarAcquire)
1451     return false;
1452 
1453   membar = has_child_membar(membar, ctl, mem);
1454   
1455   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
1456     return false;
1457 
1458   return true;
1459 }
1460 
1461 bool unnecessary_release(const Node *n) {
1462   // assert n->is_MemBar();
1463   if (UseBarriersForVolatile)
1464     // we need to plant a dmb
1465     return false;
1466 
1467   // ok, so we can omit this release barrier if it has been inserted
1468   // as part of a volatile store sequence
1469   //
1470   //   MemBarRelease
1471   //  {      ||      }
1472   //  {MemBarCPUOrder} -- optional
1473   //         ||     \\
1474   //         ||     StoreX[mo_release]
1475   //         | \     /
1476   //         | MergeMem
1477   //         | /
1478   //   MemBarVolatile
1479   //
1480   // where
1481   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1482   //  | \ and / indicate further routing of the Ctl and Mem feeds
1483   // 
1484   // so we need to check that
1485   //
1486   // ia) the release membar (or its dependent cpuorder membar) feeds
1487   // control to a store node (via a Control project node)
1488   //
1489   // ii) the store is ordered release
1490   //
1491   // iii) the release membar (or its dependent cpuorder membar) feeds
1492   // control to a volatile membar (via the same Control project node)
1493   //
1494   // iv) the release membar feeds memory to a merge mem and to the
1495   // same store (both via a single Memory proj node)
1496   //
1497   // v) the store outputs to the merge mem
1498   //
1499   // vi) the merge mem outputs to the same volatile membar
1500   //
1501   // n.b. if this is an inlined unsafe node then the release membar
1502   // may feed its control and memory links via an intervening cpuorder
1503   // membar. this case can be dealt with when we check the release
1504   // membar projections. if they both feed a single cpuorder membar
1505   // node continue to make the same checks as above but with the
1506   // cpuorder membar substituted for the release membar. if they don't
1507   // both feed a cpuorder membar then the check fails.
1508   //
1509   // n.b.b. for an inlined unsafe store of an object in the case where
1510   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1511   // an embedded if then else where we expect the store. this is
1512   // needed to do the right type of store depending on whether
1513   // heap_base is NULL. We could check for that but for now we can
1514   // just take the hit of on inserting a redundant dmb for this
1515   // redundant volatile membar
1516 
1517   MemBarNode *barrier = n->as_MemBar();
1518   ProjNode *ctl;
1519   ProjNode *mem;
1520   // check for an intervening cpuorder membar
1521   MemBarNode *b = has_child_membar(barrier, ctl, mem);
1522   if (b && b->Opcode() == Op_MemBarCPUOrder) {
1523     // ok, so start form the dependent cpuorder barrier
1524     barrier = b;
1525   }
1526   // check the ctl and mem flow
1527   ctl = barrier->proj_out(TypeFunc::Control);
1528   mem = barrier->proj_out(TypeFunc::Memory);
1529 
1530   // the barrier needs to have both a Ctl and Mem projection
1531   if (! ctl || ! mem)
1532     return false;
1533 
1534   Node *x = NULL;
1535   Node *mbvol = NULL;
1536   StoreNode * st = NULL;
1537 
1538   // For a normal volatile write the Ctl ProjNode should have output
1539   // to a MemBarVolatile and a Store marked as releasing
1540   //
1541   // n.b. for an inlined unsafe store of an object in the case where
1542   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1543   // an embedded if then else where we expect the store. this is
1544   // needed to do the right type of store depending on whether
1545   // heap_base is NULL. We could check for that case too but for now
1546   // we can just take the hit of inserting a dmb and a non-volatile
1547   // store to implement the volatile store
1548 
1549   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1550     x = ctl->fast_out(i);
1551     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1552       if (mbvol) {
1553         return false;
1554       }
1555       mbvol = x;
1556     } else if (x->is_Store()) {
1557       st = x->as_Store();
1558       if (! st->is_release()) {
1559         return false;
1560       }
1561     } else if (!x->is_Mach()) {
1562       // we may see mach nodes added during matching but nothing else
1563       return false;
1564     }
1565   }
1566 
1567   if (!mbvol || !st)
1568     return false;
1569 
1570   // the Mem ProjNode should output to a MergeMem and the same Store
1571   Node *mm = NULL;
1572   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1573     x = mem->fast_out(i);
1574     if (!mm && x->is_MergeMem()) {
1575       mm = x;
1576     } else if (x != st && !x->is_Mach()) {
1577       // we may see mach nodes added during matching but nothing else
1578       return false;
1579     }
1580   }
1581 
1582   if (!mm)
1583     return false;
1584 
1585   // the MergeMem should output to the MemBarVolatile
1586   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1587     x = mm->fast_out(i);
1588     if (x != mbvol && !x->is_Mach()) {
1589       // we may see mach nodes added during matching but nothing else
1590       return false;
1591     }
1592   }
1593 
1594   return true;
1595 }
1596 
1597 bool unnecessary_volatile(const Node *n) {
1598   // assert n->is_MemBar();
1599   if (UseBarriersForVolatile)
1600     // we need to plant a dmb
1601     return false;
1602 
1603   // ok, so we can omit this volatile barrier if it has been inserted
1604   // as part of a volatile store sequence
1605   //
1606   //   MemBarRelease
1607   //  {      ||      }
1608   //  {MemBarCPUOrder} -- optional
1609   //         ||     \\
1610   //         ||     StoreX[mo_release]
1611   //         | \     /
1612   //         | MergeMem
1613   //         | /
1614   //   MemBarVolatile
1615   //
1616   // where
1617   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1618   //  | \ and / indicate further routing of the Ctl and Mem feeds
1619   // 
1620   // we need to check that
1621   //
1622   // i) the volatile membar gets its control feed from a release
1623   // membar (or its dependent cpuorder membar) via a Control project
1624   // node
1625   //
1626   // ii) the release membar (or its dependent cpuorder membar) also
1627   // feeds control to a store node via the same proj node
1628   //
1629   // iii) the store is ordered release
1630   //
1631   // iv) the release membar (or its dependent cpuorder membar) feeds
1632   // memory to a merge mem and to the same store (both via a single
1633   // Memory proj node)
1634   //
1635   // v) the store outputs to the merge mem
1636   //
1637   // vi) the merge mem outputs to the volatile membar
1638   //
1639   // n.b. for an inlined unsafe store of an object in the case where
1640   // !TypePtr::NULL_PTR->higher_equal(type(heap_base_oop)) we may see
1641   // an embedded if then else where we expect the store. this is
1642   // needed to do the right type of store depending on whether
1643   // heap_base is NULL. We could check for that but for now we can
1644   // just take the hit of on inserting a redundant dmb for this
1645   // redundant volatile membar
1646 
1647   MemBarNode *mbvol = n->as_MemBar();
1648   Node *x = n->lookup(TypeFunc::Control);
1649 
1650   if (! x || !x->is_Proj())
1651     return false;
1652 
1653   ProjNode *proj = x->as_Proj();
1654 
1655   x = proj->lookup(0);
1656 
1657   if (!x || !x->is_MemBar())
1658     return false;
1659 
1660   MemBarNode *barrier = x->as_MemBar();
1661 
1662   // if the barrier is a release membar we have what we want. if it is
1663   // a cpuorder membar then we need to ensure that it is fed by a
1664   // release membar in which case we proceed to check the graph below
1665   // this cpuorder membar as the feed
1666 
1667   if (x->Opcode() != Op_MemBarRelease) {
1668     if (x->Opcode() != Op_MemBarCPUOrder)
1669       return false;
1670     ProjNode *ctl;
1671     ProjNode *mem;
1672     MemBarNode *b = has_parent_membar(x, ctl, mem);
1673     if (!b || !b->Opcode() == Op_MemBarRelease)
1674       return false;
1675   }
1676 
1677   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
1678   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1679 
1680   // barrier needs to have both a Ctl and Mem projection
1681   // and we need to have reached it via the Ctl projection
1682   if (! ctl || ! mem || ctl != proj)
1683     return false;
1684 
1685   StoreNode * st = NULL;
1686 
1687   // The Ctl ProjNode should have output to a MemBarVolatile and
1688   // a Store marked as releasing
1689   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1690     x = ctl->fast_out(i);
1691     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1692       if (x != mbvol) {
1693         return false;
1694       }
1695     } else if (x->is_Store()) {
1696       st = x->as_Store();
1697       if (! st->is_release()) {
1698         return false;
1699       }
1700     } else if (!x->is_Mach()){
1701       // we may see mach nodes added during matching but nothing else
1702       return false;
1703     }
1704   }
1705 
1706   if (!st)
1707     return false;
1708 
1709   // the Mem ProjNode should output to a MergeMem and the same Store
1710   Node *mm = NULL;
1711   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1712     x = mem->fast_out(i);
1713     if (!mm && x->is_MergeMem()) {
1714       mm = x;
1715     } else if (x != st && !x->is_Mach()) {
1716       // we may see mach nodes added during matching but nothing else
1717       return false;
1718     }
1719   }
1720 
1721   if (!mm)
1722     return false;
1723 
1724   // the MergeMem should output to the MemBarVolatile
1725   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1726     x = mm->fast_out(i);
1727     if (x != mbvol && !x->is_Mach()) {
1728       // we may see mach nodes added during matching but nothing else
1729       return false;
1730     }
1731   }
1732 
1733   return true;
1734 }
1735 
1736 
1737 
1738 bool needs_releasing_store(const Node *n)
1739 {
1740   // assert n->is_Store();
1741   if (UseBarriersForVolatile)
1742     // we use a normal store and dmb combination
1743     return false;
1744 
1745   StoreNode *st = n->as_Store();
1746 
1747   if (!st->is_release())
1748     return false;
1749 
1750   // check if this store is bracketed by a release (or its dependent
1751   // cpuorder membar) and a volatile membar
1752   //
1753   //   MemBarRelease
1754   //  {      ||      }
1755   //  {MemBarCPUOrder} -- optional
1756   //         ||     \\
1757   //         ||     StoreX[mo_release]
1758   //         | \     /
1759   //         | MergeMem
1760   //         | /
1761   //   MemBarVolatile
1762   //
1763   // where
1764   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1765   //  | \ and / indicate further routing of the Ctl and Mem feeds
1766   // 
1767 
1768 
1769   Node *x = st->lookup(TypeFunc::Control);
1770 
1771   if (! x || !x->is_Proj())
1772     return false;
1773 
1774   ProjNode *proj = x->as_Proj();
1775 
1776   x = proj->lookup(0);
1777 
1778   if (!x || !x->is_MemBar())
1779     return false;
1780 
1781   MemBarNode *barrier = x->as_MemBar();
1782 
1783   // if the barrier is a release membar we have what we want. if it is
1784   // a cpuorder membar then we need to ensure that it is fed by a
1785   // release membar in which case we proceed to check the graph below
1786   // this cpuorder membar as the feed
1787 
1788   if (x->Opcode() != Op_MemBarRelease) {
1789     if (x->Opcode() != Op_MemBarCPUOrder)
1790       return false;
1791     Node *ctl = x->lookup(TypeFunc::Control);
1792     Node *mem = x->lookup(TypeFunc::Memory);
1793     if (!ctl || !ctl->is_Proj() || !mem || !mem->is_Proj())
1794       return false;
1795     x = ctl->lookup(0);
1796     if (!x || !x->is_MemBar() || !x->Opcode() == Op_MemBarRelease)
1797       return false;
1798     Node *y = mem->lookup(0);
1799     if (!y || y != x)
1800       return false;
1801   }
1802 
1803   ProjNode *ctl = barrier->proj_out(TypeFunc::Control);
1804   ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1805 
1806   // MemBarRelease needs to have both a Ctl and Mem projection
1807   // and we need to have reached it via the Ctl projection
1808   if (! ctl || ! mem || ctl != proj)
1809     return false;
1810 
1811   MemBarNode *mbvol = NULL;
1812 
1813   // The Ctl ProjNode should have output to a MemBarVolatile and
1814   // a Store marked as releasing
1815   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1816     x = ctl->fast_out(i);
1817     if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1818       mbvol = x->as_MemBar();
1819     } else if (x->is_Store()) {
1820       if (x != st) {
1821         return false;
1822       }
1823     } else if (!x->is_Mach()){
1824       return false;
1825     }
1826   }
1827 
1828   if (!mbvol)
1829     return false;
1830 
1831   // the Mem ProjNode should output to a MergeMem and the same Store
1832   Node *mm = NULL;
1833   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1834     x = mem->fast_out(i);
1835     if (!mm && x->is_MergeMem()) {
1836       mm = x;
1837     } else if (x != st && !x->is_Mach()) {
1838       return false;
1839     }
1840   }
1841 
1842   if (!mm)
1843     return false;
1844 
1845   // the MergeMem should output to the MemBarVolatile
1846   for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1847     x = mm->fast_out(i);
1848     if (x != mbvol && !x->is_Mach()) {
1849       return false;
1850     }
1851   }
1852 
1853   return true;
1854 }
1855 
1856 
1857 
1858 #define __ _masm.
1859 
1860 // advance declarations for helper functions to convert register
1861 // indices to register objects
1862 
1863 // the ad file has to provide implementations of certain methods
1864 // expected by the generic code
1865 //
1866 // REQUIRED FUNCTIONALITY
1867 
1868 //=============================================================================
1869 
1870 // !!!!! Special hack to get all types of calls to specify the byte offset
1871 //       from the start of the call to the point where the return address
1872 //       will point.
1873 
1874 int MachCallStaticJavaNode::ret_addr_offset()
1875 {
1876   // call should be a simple bl
1877   int off = 4;
1878   return off;
1879 }
1880 
1881 int MachCallDynamicJavaNode::ret_addr_offset()
1882 {
1883   return 16; // movz, movk, movk, bl
1884 }
1885 
1886 int MachCallRuntimeNode::ret_addr_offset() {
1887   // for generated stubs the call will be
1888   //   far_call(addr)
1889   // for real runtime callouts it will be six instructions
1890   // see aarch64_enc_java_to_runtime
1891   //   adr(rscratch2, retaddr)
1892   //   lea(rscratch1, RuntimeAddress(addr)
1893   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1894   //   blrt rscratch1
1895   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1896   if (cb) {
1897     return MacroAssembler::far_branch_size();
1898   } else {
1899     return 6 * NativeInstruction::instruction_size;
1900   }
1901 }
1902 
1903 // Indicate if the safepoint node needs the polling page as an input
1904 
1905 // the shared code plants the oop data at the start of the generated
1906 // code for the safepoint node and that needs ot be at the load
1907 // instruction itself. so we cannot plant a mov of the safepoint poll
1908 // address followed by a load. setting this to true means the mov is
1909 // scheduled as a prior instruction. that's better for scheduling
1910 // anyway.
1911 
1912 bool SafePointNode::needs_polling_address_input()
1913 {
1914   return true;
1915 }
1916 
1917 //=============================================================================
1918 
1919 #ifndef PRODUCT
1920 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1921   st->print("BREAKPOINT");
1922 }
1923 #endif
1924 
1925 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1926   MacroAssembler _masm(&cbuf);
1927   __ brk(0);
1928 }
1929 
1930 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1931   return MachNode::size(ra_);
1932 }
1933 
1934 //=============================================================================
1935 
1936 #ifndef PRODUCT
1937   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1938     st->print("nop \t# %d bytes pad for loops and calls", _count);
1939   }
1940 #endif
1941 
1942   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1943     MacroAssembler _masm(&cbuf);
1944     for (int i = 0; i < _count; i++) {
1945       __ nop();
1946     }
1947   }
1948 
1949   uint MachNopNode::size(PhaseRegAlloc*) const {
1950     return _count * NativeInstruction::instruction_size;
1951   }
1952 
1953 //=============================================================================
1954 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1955 
1956 int Compile::ConstantTable::calculate_table_base_offset() const {
1957   return 0;  // absolute addressing, no offset
1958 }
1959 
1960 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1961 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1962   ShouldNotReachHere();
1963 }
1964 
1965 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1966   // Empty encoding
1967 }
1968 
1969 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1970   return 0;
1971 }
1972 
1973 #ifndef PRODUCT
1974 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1975   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1976 }
1977 #endif
1978 
1979 #ifndef PRODUCT
1980 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1981   Compile* C = ra_->C;
1982 
1983   int framesize = C->frame_slots() << LogBytesPerInt;
1984 
1985   if (C->need_stack_bang(framesize))
1986     st->print("# stack bang size=%d\n\t", framesize);
1987 
1988   if (framesize < ((1 << 9) + 2 * wordSize)) {
1989     st->print("sub  sp, sp, #%d\n\t", framesize);
1990     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1991     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1992   } else {
1993     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1994     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1995     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1996     st->print("sub  sp, sp, rscratch1");
1997   }
1998 }
1999 #endif
2000 
2001 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2002   Compile* C = ra_->C;
2003   MacroAssembler _masm(&cbuf);
2004 
2005   // n.b. frame size includes space for return pc and rfp
2006   const long framesize = C->frame_size_in_bytes();
2007   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2008 
2009   // insert a nop at the start of the prolog so we can patch in a
2010   // branch if we need to invalidate the method later
2011   __ nop();
2012 
2013   int bangsize = C->bang_size_in_bytes();
2014   if (C->need_stack_bang(bangsize) && UseStackBanging)
2015     __ generate_stack_overflow_check(bangsize);
2016 
2017   __ build_frame(framesize);
2018 
2019   if (NotifySimulator) {
2020     __ notify(Assembler::method_entry);
2021   }
2022 
2023   if (VerifyStackAtCalls) {
2024     Unimplemented();
2025   }
2026 
2027   C->set_frame_complete(cbuf.insts_size());
2028 
2029   if (C->has_mach_constant_base_node()) {
2030     // NOTE: We set the table base offset here because users might be
2031     // emitted before MachConstantBaseNode.
2032     Compile::ConstantTable& constant_table = C->constant_table();
2033     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2034   }
2035 }
2036 
2037 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2038 {
2039   return MachNode::size(ra_); // too many variables; just compute it
2040                               // the hard way
2041 }
2042 
2043 int MachPrologNode::reloc() const
2044 {
2045   return 0;
2046 }
2047 
2048 //=============================================================================
2049 
2050 #ifndef PRODUCT
2051 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2052   Compile* C = ra_->C;
2053   int framesize = C->frame_slots() << LogBytesPerInt;
2054 
2055   st->print("# pop frame %d\n\t",framesize);
2056 
2057   if (framesize == 0) {
2058     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2059   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2060     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2061     st->print("add  sp, sp, #%d\n\t", framesize);
2062   } else {
2063     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2064     st->print("add  sp, sp, rscratch1\n\t");
2065     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2066   }
2067 
2068   if (do_polling() && C->is_method_compilation()) {
2069     st->print("# touch polling page\n\t");
2070     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2071     st->print("ldr zr, [rscratch1]");
2072   }
2073 }
2074 #endif
2075 
2076 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2077   Compile* C = ra_->C;
2078   MacroAssembler _masm(&cbuf);
2079   int framesize = C->frame_slots() << LogBytesPerInt;
2080 
2081   __ remove_frame(framesize);
2082 
2083   if (NotifySimulator) {
2084     __ notify(Assembler::method_reentry);
2085   }
2086 
2087   if (do_polling() && C->is_method_compilation()) {
2088     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
2089   }
2090 }
2091 
2092 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
2093   // Variable size. Determine dynamically.
2094   return MachNode::size(ra_);
2095 }
2096 
2097 int MachEpilogNode::reloc() const {
2098   // Return number of relocatable values contained in this instruction.
2099   return 1; // 1 for polling page.
2100 }
2101 
2102 const Pipeline * MachEpilogNode::pipeline() const {
2103   return MachNode::pipeline_class();
2104 }
2105 
2106 // This method seems to be obsolete. It is declared in machnode.hpp
2107 // and defined in all *.ad files, but it is never called. Should we
2108 // get rid of it?
2109 int MachEpilogNode::safepoint_offset() const {
2110   assert(do_polling(), "no return for this epilog node");
2111   return 4;
2112 }
2113 
2114 //=============================================================================
2115 
2116 // Figure out which register class each belongs in: rc_int, rc_float or
2117 // rc_stack.
2118 enum RC { rc_bad, rc_int, rc_float, rc_stack };
2119 
2120 static enum RC rc_class(OptoReg::Name reg) {
2121 
2122   if (reg == OptoReg::Bad) {
2123     return rc_bad;
2124   }
2125 
2126   // we have 30 int registers * 2 halves
2127   // (rscratch1 and rscratch2 are omitted)
2128 
2129   if (reg < 60) {
2130     return rc_int;
2131   }
2132 
2133   // we have 32 float register * 2 halves
2134   if (reg < 60 + 128) {
2135     return rc_float;
2136   }
2137 
2138   // Between float regs & stack is the flags regs.
2139   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
2140 
2141   return rc_stack;
2142 }
2143 
2144 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
2145   Compile* C = ra_->C;
2146 
2147   // Get registers to move.
2148   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
2149   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
2150   OptoReg::Name dst_hi = ra_->get_reg_second(this);
2151   OptoReg::Name dst_lo = ra_->get_reg_first(this);
2152 
2153   enum RC src_hi_rc = rc_class(src_hi);
2154   enum RC src_lo_rc = rc_class(src_lo);
2155   enum RC dst_hi_rc = rc_class(dst_hi);
2156   enum RC dst_lo_rc = rc_class(dst_lo);
2157 
2158   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2159 
2160   if (src_hi != OptoReg::Bad) {
2161     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2162            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2163            "expected aligned-adjacent pairs");
2164   }
2165 
2166   if (src_lo == dst_lo && src_hi == dst_hi) {
2167     return 0;            // Self copy, no move.
2168   }
2169 
2170   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
2171               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
2172   int src_offset = ra_->reg2offset(src_lo);
2173   int dst_offset = ra_->reg2offset(dst_lo);
2174 
2175   if (bottom_type()->isa_vect() != NULL) {
2176     uint ireg = ideal_reg();
2177     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2178     if (cbuf) {
2179       MacroAssembler _masm(cbuf);
2180       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2181       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2182         // stack->stack
2183         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2184         if (ireg == Op_VecD) {
2185           __ unspill(rscratch1, true, src_offset);
2186           __ spill(rscratch1, true, dst_offset);
2187         } else {
2188           __ spill_copy128(src_offset, dst_offset);
2189         }
2190       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2191         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2192                ireg == Op_VecD ? __ T8B : __ T16B,
2193                as_FloatRegister(Matcher::_regEncode[src_lo]));
2194       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2195         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
2196                        ireg == Op_VecD ? __ D : __ Q,
2197                        ra_->reg2offset(dst_lo));
2198       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2199         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2200                        ireg == Op_VecD ? __ D : __ Q,
2201                        ra_->reg2offset(src_lo));
2202       } else {
2203         ShouldNotReachHere();
2204       }
2205     }
2206   } else if (cbuf) {
2207     MacroAssembler _masm(cbuf);
2208     switch (src_lo_rc) {
2209     case rc_int:
2210       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2211         if (is64) {
2212             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2213                    as_Register(Matcher::_regEncode[src_lo]));
2214         } else {
2215             MacroAssembler _masm(cbuf);
2216             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
2217                     as_Register(Matcher::_regEncode[src_lo]));
2218         }
2219       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
2220         if (is64) {
2221             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2222                      as_Register(Matcher::_regEncode[src_lo]));
2223         } else {
2224             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2225                      as_Register(Matcher::_regEncode[src_lo]));
2226         }
2227       } else {                    // gpr --> stack spill
2228         assert(dst_lo_rc == rc_stack, "spill to bad register class");
2229         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
2230       }
2231       break;
2232     case rc_float:
2233       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
2234         if (is64) {
2235             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
2236                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2237         } else {
2238             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
2239                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2240         }
2241       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
2242           if (cbuf) {
2243             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2244                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2245         } else {
2246             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2247                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2248         }
2249       } else {                    // fpr --> stack spill
2250         assert(dst_lo_rc == rc_stack, "spill to bad register class");
2251         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
2252                  is64 ? __ D : __ S, dst_offset);
2253       }
2254       break;
2255     case rc_stack:
2256       if (dst_lo_rc == rc_int) {  // stack --> gpr load
2257         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
2258       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
2259         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2260                    is64 ? __ D : __ S, src_offset);
2261       } else {                    // stack --> stack copy
2262         assert(dst_lo_rc == rc_stack, "spill to bad register class");
2263         __ unspill(rscratch1, is64, src_offset);
2264         __ spill(rscratch1, is64, dst_offset);
2265       }
2266       break;
2267     default:
2268       assert(false, "bad rc_class for spill");
2269       ShouldNotReachHere();
2270     }
2271   }
2272 
2273   if (st) {
2274     st->print("spill ");
2275     if (src_lo_rc == rc_stack) {
2276       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
2277     } else {
2278       st->print("%s -> ", Matcher::regName[src_lo]);
2279     }
2280     if (dst_lo_rc == rc_stack) {
2281       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
2282     } else {
2283       st->print("%s", Matcher::regName[dst_lo]);
2284     }
2285     if (bottom_type()->isa_vect() != NULL) {
2286       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
2287     } else {
2288       st->print("\t# spill size = %d", is64 ? 64:32);
2289     }
2290   }
2291 
2292   return 0;
2293 
2294 }
2295 
2296 #ifndef PRODUCT
2297 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2298   if (!ra_)
2299     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2300   else
2301     implementation(NULL, ra_, false, st);
2302 }
2303 #endif
2304 
2305 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2306   implementation(&cbuf, ra_, false, NULL);
2307 }
2308 
2309 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2310   return MachNode::size(ra_);
2311 }
2312 
2313 //=============================================================================
2314 
2315 #ifndef PRODUCT
2316 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2317   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2318   int reg = ra_->get_reg_first(this);
2319   st->print("add %s, rsp, #%d]\t# box lock",
2320             Matcher::regName[reg], offset);
2321 }
2322 #endif
2323 
2324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2325   MacroAssembler _masm(&cbuf);
2326 
2327   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2328   int reg    = ra_->get_encode(this);
2329 
2330   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2331     __ add(as_Register(reg), sp, offset);
2332   } else {
2333     ShouldNotReachHere();
2334   }
2335 }
2336 
2337 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2338   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2339   return 4;
2340 }
2341 
2342 //=============================================================================
2343 
2344 #ifndef PRODUCT
2345 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2346 {
2347   st->print_cr("# MachUEPNode");
2348   if (UseCompressedClassPointers) {
2349     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2350     if (Universe::narrow_klass_shift() != 0) {
2351       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2352     }
2353   } else {
2354    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2355   }
2356   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2357   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2358 }
2359 #endif
2360 
2361 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2362 {
2363   // This is the unverified entry point.
2364   MacroAssembler _masm(&cbuf);
2365 
2366   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2367   Label skip;
2368   // TODO
2369   // can we avoid this skip and still use a reloc?
2370   __ br(Assembler::EQ, skip);
2371   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2372   __ bind(skip);
2373 }
2374 
2375 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2376 {
2377   return MachNode::size(ra_);
2378 }
2379 
2380 // REQUIRED EMIT CODE
2381 
2382 //=============================================================================
2383 
2384 // Emit exception handler code.
2385 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2386 {
2387   // mov rscratch1 #exception_blob_entry_point
2388   // br rscratch1
2389   // Note that the code buffer's insts_mark is always relative to insts.
2390   // That's why we must use the macroassembler to generate a handler.
2391   MacroAssembler _masm(&cbuf);
2392   address base = __ start_a_stub(size_exception_handler());
2393   if (base == NULL) {
2394     ciEnv::current()->record_failure("CodeCache is full");
2395     return 0;  // CodeBuffer::expand failed
2396   }
2397   int offset = __ offset();
2398   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2399   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2400   __ end_a_stub();
2401   return offset;
2402 }
2403 
2404 // Emit deopt handler code.
2405 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2406 {
2407   // Note that the code buffer's insts_mark is always relative to insts.
2408   // That's why we must use the macroassembler to generate a handler.
2409   MacroAssembler _masm(&cbuf);
2410   address base = __ start_a_stub(size_deopt_handler());
2411   if (base == NULL) {
2412     ciEnv::current()->record_failure("CodeCache is full");
2413     return 0;  // CodeBuffer::expand failed
2414   }
2415   int offset = __ offset();
2416 
2417   __ adr(lr, __ pc());
2418   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2419 
2420   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2421   __ end_a_stub();
2422   return offset;
2423 }
2424 
2425 // REQUIRED MATCHER CODE
2426 
2427 //=============================================================================
2428 
2429 const bool Matcher::match_rule_supported(int opcode) {
2430 
2431   // TODO
2432   // identify extra cases that we might want to provide match rules for
2433   // e.g. Op_StrEquals and other intrinsics
2434   if (!has_match_rule(opcode)) {
2435     return false;
2436   }
2437 
2438   return true;  // Per default match rules are supported.
2439 }
2440 
2441 int Matcher::regnum_to_fpu_offset(int regnum)
2442 {
2443   Unimplemented();
2444   return 0;
2445 }
2446 
2447 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
2448 {
2449   Unimplemented();
2450   return false;
2451 }
2452 
2453 const bool Matcher::isSimpleConstant64(jlong value) {
2454   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2455   // Probably always true, even if a temp register is required.
2456   return true;
2457 }
2458 
2459 // true just means we have fast l2f conversion
2460 const bool Matcher::convL2FSupported(void) {
2461   return true;
2462 }
2463 
2464 // Vector width in bytes.
2465 const int Matcher::vector_width_in_bytes(BasicType bt) {
2466   int size = MIN2(16,(int)MaxVectorSize);
2467   // Minimum 2 values in vector
2468   if (size < 2*type2aelembytes(bt)) size = 0;
2469   // But never < 4
2470   if (size < 4) size = 0;
2471   return size;
2472 }
2473 
2474 // Limits on vector size (number of elements) loaded into vector.
2475 const int Matcher::max_vector_size(const BasicType bt) {
2476   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2477 }
2478 const int Matcher::min_vector_size(const BasicType bt) {
2479 //  For the moment limit the vector size to 8 bytes
2480     int size = 8 / type2aelembytes(bt);
2481     if (size < 2) size = 2;
2482     return size;
2483 }
2484 
2485 // Vector ideal reg.
2486 const int Matcher::vector_ideal_reg(int len) {
2487   switch(len) {
2488     case  8: return Op_VecD;
2489     case 16: return Op_VecX;
2490   }
2491   ShouldNotReachHere();
2492   return 0;
2493 }
2494 
2495 const int Matcher::vector_shift_count_ideal_reg(int size) {
2496   return Op_VecX;
2497 }
2498 
2499 // AES support not yet implemented
2500 const bool Matcher::pass_original_key_for_aes() {
2501   return false;
2502 }
2503 
2504 // x86 supports misaligned vectors store/load.
2505 const bool Matcher::misaligned_vectors_ok() {
2506   return !AlignVector; // can be changed by flag
2507 }
2508 
2509 // false => size gets scaled to BytesPerLong, ok.
2510 const bool Matcher::init_array_count_is_in_bytes = false;
2511 
2512 // Threshold size for cleararray.
2513 const int Matcher::init_array_short_size = 18 * BytesPerLong;
2514 
2515 // Use conditional move (CMOVL)
2516 const int Matcher::long_cmove_cost() {
2517   // long cmoves are no more expensive than int cmoves
2518   return 0;
2519 }
2520 
2521 const int Matcher::float_cmove_cost() {
2522   // float cmoves are no more expensive than int cmoves
2523   return 0;
2524 }
2525 
2526 // Does the CPU require late expand (see block.cpp for description of late expand)?
2527 const bool Matcher::require_postalloc_expand = false;
2528 
2529 // Should the Matcher clone shifts on addressing modes, expecting them
2530 // to be subsumed into complex addressing expressions or compute them
2531 // into registers?  True for Intel but false for most RISCs
2532 const bool Matcher::clone_shift_expressions = false;
2533 
2534 // Do we need to mask the count passed to shift instructions or does
2535 // the cpu only look at the lower 5/6 bits anyway?
2536 const bool Matcher::need_masked_shift_count = false;
2537 
2538 // This affects two different things:
2539 //  - how Decode nodes are matched
2540 //  - how ImplicitNullCheck opportunities are recognized
2541 // If true, the matcher will try to remove all Decodes and match them
2542 // (as operands) into nodes. NullChecks are not prepared to deal with
2543 // Decodes by final_graph_reshaping().
2544 // If false, final_graph_reshaping() forces the decode behind the Cmp
2545 // for a NullCheck. The matcher matches the Decode node into a register.
2546 // Implicit_null_check optimization moves the Decode along with the
2547 // memory operation back up before the NullCheck.
2548 bool Matcher::narrow_oop_use_complex_address() {
2549   return Universe::narrow_oop_shift() == 0;
2550 }
2551 
2552 bool Matcher::narrow_klass_use_complex_address() {
2553 // TODO
2554 // decide whether we need to set this to true
2555   return false;
2556 }
2557 
2558 // Is it better to copy float constants, or load them directly from
2559 // memory?  Intel can load a float constant from a direct address,
2560 // requiring no extra registers.  Most RISCs will have to materialize
2561 // an address into a register first, so they would do better to copy
2562 // the constant from stack.
2563 const bool Matcher::rematerialize_float_constants = false;
2564 
2565 // If CPU can load and store mis-aligned doubles directly then no
2566 // fixup is needed.  Else we split the double into 2 integer pieces
2567 // and move it piece-by-piece.  Only happens when passing doubles into
2568 // C code as the Java calling convention forces doubles to be aligned.
2569 const bool Matcher::misaligned_doubles_ok = true;
2570 
2571 // No-op on amd64
2572 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2573   Unimplemented();
2574 }
2575 
2576 // Advertise here if the CPU requires explicit rounding operations to
2577 // implement the UseStrictFP mode.
2578 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2579 
2580 // Are floats converted to double when stored to stack during
2581 // deoptimization?
2582 bool Matcher::float_in_double() { return true; }
2583 
2584 // Do ints take an entire long register or just half?
2585 // The relevant question is how the int is callee-saved:
2586 // the whole long is written but de-opt'ing will have to extract
2587 // the relevant 32 bits.
2588 const bool Matcher::int_in_long = true;
2589 
2590 // Return whether or not this register is ever used as an argument.
2591 // This function is used on startup to build the trampoline stubs in
2592 // generateOptoStub.  Registers not mentioned will be killed by the VM
2593 // call in the trampoline, and arguments in those registers not be
2594 // available to the callee.
2595 bool Matcher::can_be_java_arg(int reg)
2596 {
2597   return
2598     reg ==  R0_num || reg == R0_H_num ||
2599     reg ==  R1_num || reg == R1_H_num ||
2600     reg ==  R2_num || reg == R2_H_num ||
2601     reg ==  R3_num || reg == R3_H_num ||
2602     reg ==  R4_num || reg == R4_H_num ||
2603     reg ==  R5_num || reg == R5_H_num ||
2604     reg ==  R6_num || reg == R6_H_num ||
2605     reg ==  R7_num || reg == R7_H_num ||
2606     reg ==  V0_num || reg == V0_H_num ||
2607     reg ==  V1_num || reg == V1_H_num ||
2608     reg ==  V2_num || reg == V2_H_num ||
2609     reg ==  V3_num || reg == V3_H_num ||
2610     reg ==  V4_num || reg == V4_H_num ||
2611     reg ==  V5_num || reg == V5_H_num ||
2612     reg ==  V6_num || reg == V6_H_num ||
2613     reg ==  V7_num || reg == V7_H_num;
2614 }
2615 
2616 bool Matcher::is_spillable_arg(int reg)
2617 {
2618   return can_be_java_arg(reg);
2619 }
2620 
2621 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2622   return false;
2623 }
2624 
2625 RegMask Matcher::divI_proj_mask() {
2626   ShouldNotReachHere();
2627   return RegMask();
2628 }
2629 
2630 // Register for MODI projection of divmodI.
2631 RegMask Matcher::modI_proj_mask() {
2632   ShouldNotReachHere();
2633   return RegMask();
2634 }
2635 
2636 // Register for DIVL projection of divmodL.
2637 RegMask Matcher::divL_proj_mask() {
2638   ShouldNotReachHere();
2639   return RegMask();
2640 }
2641 
2642 // Register for MODL projection of divmodL.
2643 RegMask Matcher::modL_proj_mask() {
2644   ShouldNotReachHere();
2645   return RegMask();
2646 }
2647 
2648 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2649   return FP_REG_mask();
2650 }
2651 
2652 // helper for encoding java_to_runtime calls on sim
2653 //
2654 // this is needed to compute the extra arguments required when
2655 // planting a call to the simulator blrt instruction. the TypeFunc
2656 // can be queried to identify the counts for integral, and floating
2657 // arguments and the return type
2658 
2659 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2660 {
2661   int gps = 0;
2662   int fps = 0;
2663   const TypeTuple *domain = tf->domain();
2664   int max = domain->cnt();
2665   for (int i = TypeFunc::Parms; i < max; i++) {
2666     const Type *t = domain->field_at(i);
2667     switch(t->basic_type()) {
2668     case T_FLOAT:
2669     case T_DOUBLE:
2670       fps++;
2671     default:
2672       gps++;
2673     }
2674   }
2675   gpcnt = gps;
2676   fpcnt = fps;
2677   BasicType rt = tf->return_type();
2678   switch (rt) {
2679   case T_VOID:
2680     rtype = MacroAssembler::ret_type_void;
2681     break;
2682   default:
2683     rtype = MacroAssembler::ret_type_integral;
2684     break;
2685   case T_FLOAT:
2686     rtype = MacroAssembler::ret_type_float;
2687     break;
2688   case T_DOUBLE:
2689     rtype = MacroAssembler::ret_type_double;
2690     break;
2691   }
2692 }
2693 
2694 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2695   MacroAssembler _masm(&cbuf);                                          \
2696   {                                                                     \
2697     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2698     guarantee(DISP == 0, "mode not permitted for volatile");            \
2699     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2700     __ INSN(REG, as_Register(BASE));                                    \
2701   }
2702 
2703 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2704 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2705 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2706                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2707 
2708   // Used for all non-volatile memory accesses.  The use of
2709   // $mem->opcode() to discover whether this pattern uses sign-extended
2710   // offsets is something of a kludge.
2711   static void loadStore(MacroAssembler masm, mem_insn insn,
2712                          Register reg, int opcode,
2713                          Register base, int index, int size, int disp)
2714   {
2715     Address::extend scale;
2716 
2717     // Hooboy, this is fugly.  We need a way to communicate to the
2718     // encoder that the index needs to be sign extended, so we have to
2719     // enumerate all the cases.
2720     switch (opcode) {
2721     case INDINDEXSCALEDOFFSETI2L:
2722     case INDINDEXSCALEDI2L:
2723     case INDINDEXSCALEDOFFSETI2LN:
2724     case INDINDEXSCALEDI2LN:
2725     case INDINDEXOFFSETI2L:
2726     case INDINDEXOFFSETI2LN:
2727       scale = Address::sxtw(size);
2728       break;
2729     default:
2730       scale = Address::lsl(size);
2731     }
2732 
2733     if (index == -1) {
2734       (masm.*insn)(reg, Address(base, disp));
2735     } else {
2736       if (disp == 0) {
2737         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2738       } else {
2739         masm.lea(rscratch1, Address(base, disp));
2740         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2741       }
2742     }
2743   }
2744 
2745   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2746                          FloatRegister reg, int opcode,
2747                          Register base, int index, int size, int disp)
2748   {
2749     Address::extend scale;
2750 
2751     switch (opcode) {
2752     case INDINDEXSCALEDOFFSETI2L:
2753     case INDINDEXSCALEDI2L:
2754     case INDINDEXSCALEDOFFSETI2LN:
2755     case INDINDEXSCALEDI2LN:
2756       scale = Address::sxtw(size);
2757       break;
2758     default:
2759       scale = Address::lsl(size);
2760     }
2761 
2762      if (index == -1) {
2763       (masm.*insn)(reg, Address(base, disp));
2764     } else {
2765       if (disp == 0) {
2766         (masm.*insn)(reg, Address(base, as_Register(index), scale));
2767       } else {
2768         masm.lea(rscratch1, Address(base, disp));
2769         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
2770       }
2771     }
2772   }
2773 
2774   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2775                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2776                          int opcode, Register base, int index, int size, int disp)
2777   {
2778     if (index == -1) {
2779       (masm.*insn)(reg, T, Address(base, disp));
2780     } else {
2781       assert(disp == 0, "unsupported address mode");
2782       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2783     }
2784   }
2785 
2786 %}
2787 
2788 
2789 
2790 //----------ENCODING BLOCK-----------------------------------------------------
2791 // This block specifies the encoding classes used by the compiler to
2792 // output byte streams.  Encoding classes are parameterized macros
2793 // used by Machine Instruction Nodes in order to generate the bit
2794 // encoding of the instruction.  Operands specify their base encoding
2795 // interface with the interface keyword.  There are currently
2796 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2797 // COND_INTER.  REG_INTER causes an operand to generate a function
2798 // which returns its register number when queried.  CONST_INTER causes
2799 // an operand to generate a function which returns the value of the
2800 // constant when queried.  MEMORY_INTER causes an operand to generate
2801 // four functions which return the Base Register, the Index Register,
2802 // the Scale Value, and the Offset Value of the operand when queried.
2803 // COND_INTER causes an operand to generate six functions which return
2804 // the encoding code (ie - encoding bits for the instruction)
2805 // associated with each basic boolean condition for a conditional
2806 // instruction.
2807 //
2808 // Instructions specify two basic values for encoding.  Again, a
2809 // function is available to check if the constant displacement is an
2810 // oop. They use the ins_encode keyword to specify their encoding
2811 // classes (which must be a sequence of enc_class names, and their
2812 // parameters, specified in the encoding block), and they use the
2813 // opcode keyword to specify, in order, their primary, secondary, and
2814 // tertiary opcode.  Only the opcode sections which a particular
2815 // instruction needs for encoding need to be specified.
2816 encode %{
2817   // Build emit functions for each basic byte or larger field in the
2818   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2819   // from C++ code in the enc_class source block.  Emit functions will
2820   // live in the main source block for now.  In future, we can
2821   // generalize this by adding a syntax that specifies the sizes of
2822   // fields in an order, so that the adlc can build the emit functions
2823   // automagically
2824 
2825   // catch all for unimplemented encodings
2826   enc_class enc_unimplemented %{
2827     MacroAssembler _masm(&cbuf);
2828     __ unimplemented("C2 catch all");
2829   %}
2830 
2831   // BEGIN Non-volatile memory access
2832 
2833   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2834     Register dst_reg = as_Register($dst$$reg);
2835     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2836                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2837   %}
2838 
2839   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2840     Register dst_reg = as_Register($dst$$reg);
2841     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2842                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2843   %}
2844 
2845   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2846     Register dst_reg = as_Register($dst$$reg);
2847     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2848                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2849   %}
2850 
2851   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2852     Register dst_reg = as_Register($dst$$reg);
2853     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2854                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2855   %}
2856 
2857   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2858     Register dst_reg = as_Register($dst$$reg);
2859     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2860                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2861   %}
2862 
2863   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2864     Register dst_reg = as_Register($dst$$reg);
2865     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2866                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2867   %}
2868 
2869   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2870     Register dst_reg = as_Register($dst$$reg);
2871     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2872                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2873   %}
2874 
2875   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2876     Register dst_reg = as_Register($dst$$reg);
2877     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2878                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2879   %}
2880 
2881   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2882     Register dst_reg = as_Register($dst$$reg);
2883     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2884                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2885   %}
2886 
2887   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2888     Register dst_reg = as_Register($dst$$reg);
2889     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2890                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2891   %}
2892 
2893   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2894     Register dst_reg = as_Register($dst$$reg);
2895     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2896                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2897   %}
2898 
2899   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2900     Register dst_reg = as_Register($dst$$reg);
2901     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2902                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2903   %}
2904 
2905   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2906     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2907     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2908                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2909   %}
2910 
2911   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2912     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2913     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2914                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2915   %}
2916 
2917   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2918     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2919     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2920        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2921   %}
2922 
2923   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2924     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2925     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2926        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2927   %}
2928 
2929   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2930     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2931     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2932        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2933   %}
2934 
2935   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2936     Register src_reg = as_Register($src$$reg);
2937     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2938                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2939   %}
2940 
2941   enc_class aarch64_enc_strb0(memory mem) %{
2942     MacroAssembler _masm(&cbuf);
2943     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2944                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2945   %}
2946 
2947   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2948     Register src_reg = as_Register($src$$reg);
2949     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2950                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2951   %}
2952 
2953   enc_class aarch64_enc_strh0(memory mem) %{
2954     MacroAssembler _masm(&cbuf);
2955     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2956                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2957   %}
2958 
2959   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2960     Register src_reg = as_Register($src$$reg);
2961     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2962                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2963   %}
2964 
2965   enc_class aarch64_enc_strw0(memory mem) %{
2966     MacroAssembler _masm(&cbuf);
2967     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2968                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2969   %}
2970 
2971   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2972     Register src_reg = as_Register($src$$reg);
2973     // we sometimes get asked to store the stack pointer into the
2974     // current thread -- we cannot do that directly on AArch64
2975     if (src_reg == r31_sp) {
2976       MacroAssembler _masm(&cbuf);
2977       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2978       __ mov(rscratch2, sp);
2979       src_reg = rscratch2;
2980     }
2981     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2982                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2983   %}
2984 
2985   enc_class aarch64_enc_str0(memory mem) %{
2986     MacroAssembler _masm(&cbuf);
2987     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2988                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2989   %}
2990 
2991   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2992     FloatRegister src_reg = as_FloatRegister($src$$reg);
2993     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2994                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2995   %}
2996 
2997   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2998     FloatRegister src_reg = as_FloatRegister($src$$reg);
2999     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3000                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3001   %}
3002 
3003   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
3004     FloatRegister src_reg = as_FloatRegister($src$$reg);
3005     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3006        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3007   %}
3008 
3009   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
3010     FloatRegister src_reg = as_FloatRegister($src$$reg);
3011     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3012        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3013   %}
3014 
3015   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3016     FloatRegister src_reg = as_FloatRegister($src$$reg);
3017     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3018        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3019   %}
3020 
3021   // END Non-volatile memory access
3022 
3023   // volatile loads and stores
3024 
3025   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3026     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3027                  rscratch1, stlrb);
3028   %}
3029 
3030   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
3031     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3032                  rscratch1, stlrh);
3033   %}
3034 
3035   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
3036     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3037                  rscratch1, stlrw);
3038   %}
3039 
3040 
3041   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
3042     Register dst_reg = as_Register($dst$$reg);
3043     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3044              rscratch1, ldarb);
3045     __ sxtbw(dst_reg, dst_reg);
3046   %}
3047 
3048   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
3049     Register dst_reg = as_Register($dst$$reg);
3050     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3051              rscratch1, ldarb);
3052     __ sxtb(dst_reg, dst_reg);
3053   %}
3054 
3055   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
3056     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3057              rscratch1, ldarb);
3058   %}
3059 
3060   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
3061     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3062              rscratch1, ldarb);
3063   %}
3064 
3065   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
3066     Register dst_reg = as_Register($dst$$reg);
3067     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3068              rscratch1, ldarh);
3069     __ sxthw(dst_reg, dst_reg);
3070   %}
3071 
3072   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
3073     Register dst_reg = as_Register($dst$$reg);
3074     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3075              rscratch1, ldarh);
3076     __ sxth(dst_reg, dst_reg);
3077   %}
3078 
3079   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
3080     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3081              rscratch1, ldarh);
3082   %}
3083 
3084   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
3085     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3086              rscratch1, ldarh);
3087   %}
3088 
3089   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
3090     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3091              rscratch1, ldarw);
3092   %}
3093 
3094   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
3095     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3096              rscratch1, ldarw);
3097   %}
3098 
3099   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
3100     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3101              rscratch1, ldar);
3102   %}
3103 
3104   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
3105     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3106              rscratch1, ldarw);
3107     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
3108   %}
3109 
3110   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
3111     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3112              rscratch1, ldar);
3113     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
3114   %}
3115 
3116   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
3117     Register src_reg = as_Register($src$$reg);
3118     // we sometimes get asked to store the stack pointer into the
3119     // current thread -- we cannot do that directly on AArch64
3120     if (src_reg == r31_sp) {
3121         MacroAssembler _masm(&cbuf);
3122       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3123       __ mov(rscratch2, sp);
3124       src_reg = rscratch2;
3125     }
3126     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3127                  rscratch1, stlr);
3128   %}
3129 
3130   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
3131     {
3132       MacroAssembler _masm(&cbuf);
3133       FloatRegister src_reg = as_FloatRegister($src$$reg);
3134       __ fmovs(rscratch2, src_reg);
3135     }
3136     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3137                  rscratch1, stlrw);
3138   %}
3139 
3140   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
3141     {
3142       MacroAssembler _masm(&cbuf);
3143       FloatRegister src_reg = as_FloatRegister($src$$reg);
3144       __ fmovd(rscratch2, src_reg);
3145     }
3146     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3147                  rscratch1, stlr);
3148   %}
3149 
3150   // synchronized read/update encodings
3151 
3152   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
3153     MacroAssembler _masm(&cbuf);
3154     Register dst_reg = as_Register($dst$$reg);
3155     Register base = as_Register($mem$$base);
3156     int index = $mem$$index;
3157     int scale = $mem$$scale;
3158     int disp = $mem$$disp;
3159     if (index == -1) {
3160        if (disp != 0) {
3161         __ lea(rscratch1, Address(base, disp));
3162         __ ldaxr(dst_reg, rscratch1);
3163       } else {
3164         // TODO
3165         // should we ever get anything other than this case?
3166         __ ldaxr(dst_reg, base);
3167       }
3168     } else {
3169       Register index_reg = as_Register(index);
3170       if (disp == 0) {
3171         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
3172         __ ldaxr(dst_reg, rscratch1);
3173       } else {
3174         __ lea(rscratch1, Address(base, disp));
3175         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
3176         __ ldaxr(dst_reg, rscratch1);
3177       }
3178     }
3179   %}
3180 
3181   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
3182     MacroAssembler _masm(&cbuf);
3183     Register src_reg = as_Register($src$$reg);
3184     Register base = as_Register($mem$$base);
3185     int index = $mem$$index;
3186     int scale = $mem$$scale;
3187     int disp = $mem$$disp;
3188     if (index == -1) {
3189        if (disp != 0) {
3190         __ lea(rscratch2, Address(base, disp));
3191         __ stlxr(rscratch1, src_reg, rscratch2);
3192       } else {
3193         // TODO
3194         // should we ever get anything other than this case?
3195         __ stlxr(rscratch1, src_reg, base);
3196       }
3197     } else {
3198       Register index_reg = as_Register(index);
3199       if (disp == 0) {
3200         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3201         __ stlxr(rscratch1, src_reg, rscratch2);
3202       } else {
3203         __ lea(rscratch2, Address(base, disp));
3204         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3205         __ stlxr(rscratch1, src_reg, rscratch2);
3206       }
3207     }
3208     __ cmpw(rscratch1, zr);
3209   %}
3210 
3211   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
3212     MacroAssembler _masm(&cbuf);
3213     Register old_reg = as_Register($oldval$$reg);
3214     Register new_reg = as_Register($newval$$reg);
3215     Register base = as_Register($mem$$base);
3216     Register addr_reg;
3217     int index = $mem$$index;
3218     int scale = $mem$$scale;
3219     int disp = $mem$$disp;
3220     if (index == -1) {
3221        if (disp != 0) {
3222         __ lea(rscratch2, Address(base, disp));
3223         addr_reg = rscratch2;
3224       } else {
3225         // TODO
3226         // should we ever get anything other than this case?
3227         addr_reg = base;
3228       }
3229     } else {
3230       Register index_reg = as_Register(index);
3231       if (disp == 0) {
3232         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3233         addr_reg = rscratch2;
3234       } else {
3235         __ lea(rscratch2, Address(base, disp));
3236         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3237         addr_reg = rscratch2;
3238       }
3239     }
3240     Label retry_load, done;
3241     __ bind(retry_load);
3242     __ ldxr(rscratch1, addr_reg);
3243     __ cmp(rscratch1, old_reg);
3244     __ br(Assembler::NE, done);
3245     __ stlxr(rscratch1, new_reg, addr_reg);
3246     __ cbnzw(rscratch1, retry_load);
3247     __ bind(done);
3248   %}
3249 
3250   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3251     MacroAssembler _masm(&cbuf);
3252     Register old_reg = as_Register($oldval$$reg);
3253     Register new_reg = as_Register($newval$$reg);
3254     Register base = as_Register($mem$$base);
3255     Register addr_reg;
3256     int index = $mem$$index;
3257     int scale = $mem$$scale;
3258     int disp = $mem$$disp;
3259     if (index == -1) {
3260        if (disp != 0) {
3261         __ lea(rscratch2, Address(base, disp));
3262         addr_reg = rscratch2;
3263       } else {
3264         // TODO
3265         // should we ever get anything other than this case?
3266         addr_reg = base;
3267       }
3268     } else {
3269       Register index_reg = as_Register(index);
3270       if (disp == 0) {
3271         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3272         addr_reg = rscratch2;
3273       } else {
3274         __ lea(rscratch2, Address(base, disp));
3275         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3276         addr_reg = rscratch2;
3277       }
3278     }
3279     Label retry_load, done;
3280     __ bind(retry_load);
3281     __ ldxrw(rscratch1, addr_reg);
3282     __ cmpw(rscratch1, old_reg);
3283     __ br(Assembler::NE, done);
3284     __ stlxrw(rscratch1, new_reg, addr_reg);
3285     __ cbnzw(rscratch1, retry_load);
3286     __ bind(done);
3287   %}
3288 
3289   // auxiliary used for CompareAndSwapX to set result register
3290   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
3291     MacroAssembler _masm(&cbuf);
3292     Register res_reg = as_Register($res$$reg);
3293     __ cset(res_reg, Assembler::EQ);
3294   %}
3295 
3296   // prefetch encodings
3297 
3298   enc_class aarch64_enc_prefetchw(memory mem) %{
3299     MacroAssembler _masm(&cbuf);
3300     Register base = as_Register($mem$$base);
3301     int index = $mem$$index;
3302     int scale = $mem$$scale;
3303     int disp = $mem$$disp;
3304     if (index == -1) {
3305       __ prfm(Address(base, disp), PSTL1KEEP);
3306       __ nop();
3307     } else {
3308       Register index_reg = as_Register(index);
3309       if (disp == 0) {
3310         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
3311       } else {
3312         __ lea(rscratch1, Address(base, disp));
3313         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3314       }
3315     }
3316   %}
3317 
3318   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
3319     MacroAssembler _masm(&cbuf);
3320     Register cnt_reg = as_Register($cnt$$reg);
3321     Register base_reg = as_Register($base$$reg);
3322     // base is word aligned
3323     // cnt is count of words
3324 
3325     Label loop;
3326     Label entry;
3327 
3328 //  Algorithm:
3329 //
3330 //    scratch1 = cnt & 7;
3331 //    cnt -= scratch1;
3332 //    p += scratch1;
3333 //    switch (scratch1) {
3334 //      do {
3335 //        cnt -= 8;
3336 //          p[-8] = 0;
3337 //        case 7:
3338 //          p[-7] = 0;
3339 //        case 6:
3340 //          p[-6] = 0;
3341 //          // ...
3342 //        case 1:
3343 //          p[-1] = 0;
3344 //        case 0:
3345 //          p += 8;
3346 //      } while (cnt);
3347 //    }
3348 
3349     const int unroll = 8; // Number of str(zr) instructions we'll unroll
3350 
3351     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
3352     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
3353     // base_reg always points to the end of the region we're about to zero
3354     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
3355     __ adr(rscratch2, entry);
3356     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
3357     __ br(rscratch2);
3358     __ bind(loop);
3359     __ sub(cnt_reg, cnt_reg, unroll);
3360     for (int i = -unroll; i < 0; i++)
3361       __ str(zr, Address(base_reg, i * wordSize));
3362     __ bind(entry);
3363     __ add(base_reg, base_reg, unroll * wordSize);
3364     __ cbnz(cnt_reg, loop);
3365   %}
3366 
3367   /// mov envcodings
3368 
3369   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3370     MacroAssembler _masm(&cbuf);
3371     u_int32_t con = (u_int32_t)$src$$constant;
3372     Register dst_reg = as_Register($dst$$reg);
3373     if (con == 0) {
3374       __ movw(dst_reg, zr);
3375     } else {
3376       __ movw(dst_reg, con);
3377     }
3378   %}
3379 
3380   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3381     MacroAssembler _masm(&cbuf);
3382     Register dst_reg = as_Register($dst$$reg);
3383     u_int64_t con = (u_int64_t)$src$$constant;
3384     if (con == 0) {
3385       __ mov(dst_reg, zr);
3386     } else {
3387       __ mov(dst_reg, con);
3388     }
3389   %}
3390 
3391   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3392     MacroAssembler _masm(&cbuf);
3393     Register dst_reg = as_Register($dst$$reg);
3394     address con = (address)$src$$constant;
3395     if (con == NULL || con == (address)1) {
3396       ShouldNotReachHere();
3397     } else {
3398       relocInfo::relocType rtype = $src->constant_reloc();
3399       if (rtype == relocInfo::oop_type) {
3400         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3401       } else if (rtype == relocInfo::metadata_type) {
3402         __ mov_metadata(dst_reg, (Metadata*)con);
3403       } else {
3404         assert(rtype == relocInfo::none, "unexpected reloc type");
3405         if (con < (address)(uintptr_t)os::vm_page_size()) {
3406           __ mov(dst_reg, con);
3407         } else {
3408           unsigned long offset;
3409           __ adrp(dst_reg, con, offset);
3410           __ add(dst_reg, dst_reg, offset);
3411         }
3412       }
3413     }
3414   %}
3415 
3416   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3417     MacroAssembler _masm(&cbuf);
3418     Register dst_reg = as_Register($dst$$reg);
3419     __ mov(dst_reg, zr);
3420   %}
3421 
3422   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3423     MacroAssembler _masm(&cbuf);
3424     Register dst_reg = as_Register($dst$$reg);
3425     __ mov(dst_reg, (u_int64_t)1);
3426   %}
3427 
3428   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3429     MacroAssembler _masm(&cbuf);
3430     address page = (address)$src$$constant;
3431     Register dst_reg = as_Register($dst$$reg);
3432     unsigned long off;
3433     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3434     assert(off == 0, "assumed offset == 0");
3435   %}
3436 
3437   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3438     MacroAssembler _masm(&cbuf);
3439     address page = (address)$src$$constant;
3440     Register dst_reg = as_Register($dst$$reg);
3441     unsigned long off;
3442     __ adrp(dst_reg, ExternalAddress(page), off);
3443     assert(off == 0, "assumed offset == 0");
3444   %}
3445 
3446   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3447     MacroAssembler _masm(&cbuf);
3448     Register dst_reg = as_Register($dst$$reg);
3449     address con = (address)$src$$constant;
3450     if (con == NULL) {
3451       ShouldNotReachHere();
3452     } else {
3453       relocInfo::relocType rtype = $src->constant_reloc();
3454       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3455       __ set_narrow_oop(dst_reg, (jobject)con);
3456     }
3457   %}
3458 
3459   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3460     MacroAssembler _masm(&cbuf);
3461     Register dst_reg = as_Register($dst$$reg);
3462     __ mov(dst_reg, zr);
3463   %}
3464 
3465   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3466     MacroAssembler _masm(&cbuf);
3467     Register dst_reg = as_Register($dst$$reg);
3468     address con = (address)$src$$constant;
3469     if (con == NULL) {
3470       ShouldNotReachHere();
3471     } else {
3472       relocInfo::relocType rtype = $src->constant_reloc();
3473       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3474       __ set_narrow_klass(dst_reg, (Klass *)con);
3475     }
3476   %}
3477 
3478   // arithmetic encodings
3479 
3480   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3481     MacroAssembler _masm(&cbuf);
3482     Register dst_reg = as_Register($dst$$reg);
3483     Register src_reg = as_Register($src1$$reg);
3484     int32_t con = (int32_t)$src2$$constant;
3485     // add has primary == 0, subtract has primary == 1
3486     if ($primary) { con = -con; }
3487     if (con < 0) {
3488       __ subw(dst_reg, src_reg, -con);
3489     } else {
3490       __ addw(dst_reg, src_reg, con);
3491     }
3492   %}
3493 
3494   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3495     MacroAssembler _masm(&cbuf);
3496     Register dst_reg = as_Register($dst$$reg);
3497     Register src_reg = as_Register($src1$$reg);
3498     int32_t con = (int32_t)$src2$$constant;
3499     // add has primary == 0, subtract has primary == 1
3500     if ($primary) { con = -con; }
3501     if (con < 0) {
3502       __ sub(dst_reg, src_reg, -con);
3503     } else {
3504       __ add(dst_reg, src_reg, con);
3505     }
3506   %}
3507 
3508   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3509     MacroAssembler _masm(&cbuf);
3510    Register dst_reg = as_Register($dst$$reg);
3511    Register src1_reg = as_Register($src1$$reg);
3512    Register src2_reg = as_Register($src2$$reg);
3513     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3514   %}
3515 
3516   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3517     MacroAssembler _masm(&cbuf);
3518    Register dst_reg = as_Register($dst$$reg);
3519    Register src1_reg = as_Register($src1$$reg);
3520    Register src2_reg = as_Register($src2$$reg);
3521     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3522   %}
3523 
3524   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3525     MacroAssembler _masm(&cbuf);
3526    Register dst_reg = as_Register($dst$$reg);
3527    Register src1_reg = as_Register($src1$$reg);
3528    Register src2_reg = as_Register($src2$$reg);
3529     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3530   %}
3531 
3532   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3533     MacroAssembler _masm(&cbuf);
3534    Register dst_reg = as_Register($dst$$reg);
3535    Register src1_reg = as_Register($src1$$reg);
3536    Register src2_reg = as_Register($src2$$reg);
3537     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3538   %}
3539 
3540   // compare instruction encodings
3541 
3542   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3543     MacroAssembler _masm(&cbuf);
3544     Register reg1 = as_Register($src1$$reg);
3545     Register reg2 = as_Register($src2$$reg);
3546     __ cmpw(reg1, reg2);
3547   %}
3548 
3549   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3550     MacroAssembler _masm(&cbuf);
3551     Register reg = as_Register($src1$$reg);
3552     int32_t val = $src2$$constant;
3553     if (val >= 0) {
3554       __ subsw(zr, reg, val);
3555     } else {
3556       __ addsw(zr, reg, -val);
3557     }
3558   %}
3559 
3560   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3561     MacroAssembler _masm(&cbuf);
3562     Register reg1 = as_Register($src1$$reg);
3563     u_int32_t val = (u_int32_t)$src2$$constant;
3564     __ movw(rscratch1, val);
3565     __ cmpw(reg1, rscratch1);
3566   %}
3567 
3568   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3569     MacroAssembler _masm(&cbuf);
3570     Register reg1 = as_Register($src1$$reg);
3571     Register reg2 = as_Register($src2$$reg);
3572     __ cmp(reg1, reg2);
3573   %}
3574 
3575   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3576     MacroAssembler _masm(&cbuf);
3577     Register reg = as_Register($src1$$reg);
3578     int64_t val = $src2$$constant;
3579     if (val >= 0) {
3580       __ subs(zr, reg, val);
3581     } else if (val != -val) {
3582       __ adds(zr, reg, -val);
3583     } else {
3584     // aargh, Long.MIN_VALUE is a special case
3585       __ orr(rscratch1, zr, (u_int64_t)val);
3586       __ subs(zr, reg, rscratch1);
3587     }
3588   %}
3589 
3590   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3591     MacroAssembler _masm(&cbuf);
3592     Register reg1 = as_Register($src1$$reg);
3593     u_int64_t val = (u_int64_t)$src2$$constant;
3594     __ mov(rscratch1, val);
3595     __ cmp(reg1, rscratch1);
3596   %}
3597 
3598   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3599     MacroAssembler _masm(&cbuf);
3600     Register reg1 = as_Register($src1$$reg);
3601     Register reg2 = as_Register($src2$$reg);
3602     __ cmp(reg1, reg2);
3603   %}
3604 
3605   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3606     MacroAssembler _masm(&cbuf);
3607     Register reg1 = as_Register($src1$$reg);
3608     Register reg2 = as_Register($src2$$reg);
3609     __ cmpw(reg1, reg2);
3610   %}
3611 
3612   enc_class aarch64_enc_testp(iRegP src) %{
3613     MacroAssembler _masm(&cbuf);
3614     Register reg = as_Register($src$$reg);
3615     __ cmp(reg, zr);
3616   %}
3617 
3618   enc_class aarch64_enc_testn(iRegN src) %{
3619     MacroAssembler _masm(&cbuf);
3620     Register reg = as_Register($src$$reg);
3621     __ cmpw(reg, zr);
3622   %}
3623 
3624   enc_class aarch64_enc_b(label lbl) %{
3625     MacroAssembler _masm(&cbuf);
3626     Label *L = $lbl$$label;
3627     __ b(*L);
3628   %}
3629 
3630   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3631     MacroAssembler _masm(&cbuf);
3632     Label *L = $lbl$$label;
3633     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3634   %}
3635 
3636   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3637     MacroAssembler _masm(&cbuf);
3638     Label *L = $lbl$$label;
3639     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3640   %}
3641 
3642   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3643   %{
3644      Register sub_reg = as_Register($sub$$reg);
3645      Register super_reg = as_Register($super$$reg);
3646      Register temp_reg = as_Register($temp$$reg);
3647      Register result_reg = as_Register($result$$reg);
3648 
3649      Label miss;
3650      MacroAssembler _masm(&cbuf);
3651      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3652                                      NULL, &miss,
3653                                      /*set_cond_codes:*/ true);
3654      if ($primary) {
3655        __ mov(result_reg, zr);
3656      }
3657      __ bind(miss);
3658   %}
3659 
3660   enc_class aarch64_enc_java_static_call(method meth) %{
3661     MacroAssembler _masm(&cbuf);
3662 
3663     address addr = (address)$meth$$method;
3664     address call;
3665     if (!_method) {
3666       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3667       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3668     } else if (_optimized_virtual) {
3669       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
3670     } else {
3671       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
3672     }
3673     if (call == NULL) {
3674       ciEnv::current()->record_failure("CodeCache is full"); 
3675       return;
3676     }
3677 
3678     if (_method) {
3679       // Emit stub for static call
3680       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3681       if (stub == NULL) {
3682         ciEnv::current()->record_failure("CodeCache is full"); 
3683         return;
3684       }
3685     }
3686   %}
3687 
3688   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3689     MacroAssembler _masm(&cbuf);
3690     address call = __ ic_call((address)$meth$$method);
3691     if (call == NULL) {
3692       ciEnv::current()->record_failure("CodeCache is full"); 
3693       return;
3694     }
3695   %}
3696 
3697   enc_class aarch64_enc_call_epilog() %{
3698     MacroAssembler _masm(&cbuf);
3699     if (VerifyStackAtCalls) {
3700       // Check that stack depth is unchanged: find majik cookie on stack
3701       __ call_Unimplemented();
3702     }
3703   %}
3704 
3705   enc_class aarch64_enc_java_to_runtime(method meth) %{
3706     MacroAssembler _masm(&cbuf);
3707 
3708     // some calls to generated routines (arraycopy code) are scheduled
3709     // by C2 as runtime calls. if so we can call them using a br (they
3710     // will be in a reachable segment) otherwise we have to use a blrt
3711     // which loads the absolute address into a register.
3712     address entry = (address)$meth$$method;
3713     CodeBlob *cb = CodeCache::find_blob(entry);
3714     if (cb) {
3715       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3716       if (call == NULL) {
3717         ciEnv::current()->record_failure("CodeCache is full"); 
3718         return;
3719       }
3720     } else {
3721       int gpcnt;
3722       int fpcnt;
3723       int rtype;
3724       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3725       Label retaddr;
3726       __ adr(rscratch2, retaddr);
3727       __ lea(rscratch1, RuntimeAddress(entry));
3728       // Leave a breadcrumb for JavaThread::pd_last_frame().
3729       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3730       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3731       __ bind(retaddr);
3732       __ add(sp, sp, 2 * wordSize);
3733     }
3734   %}
3735 
3736   enc_class aarch64_enc_rethrow() %{
3737     MacroAssembler _masm(&cbuf);
3738     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3739   %}
3740 
3741   enc_class aarch64_enc_ret() %{
3742     MacroAssembler _masm(&cbuf);
3743     __ ret(lr);
3744   %}
3745 
3746   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3747     MacroAssembler _masm(&cbuf);
3748     Register target_reg = as_Register($jump_target$$reg);
3749     __ br(target_reg);
3750   %}
3751 
3752   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3753     MacroAssembler _masm(&cbuf);
3754     Register target_reg = as_Register($jump_target$$reg);
3755     // exception oop should be in r0
3756     // ret addr has been popped into lr
3757     // callee expects it in r3
3758     __ mov(r3, lr);
3759     __ br(target_reg);
3760   %}
3761 
3762   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3763     MacroAssembler _masm(&cbuf);
3764     Register oop = as_Register($object$$reg);
3765     Register box = as_Register($box$$reg);
3766     Register disp_hdr = as_Register($tmp$$reg);
3767     Register tmp = as_Register($tmp2$$reg);
3768     Label cont;
3769     Label object_has_monitor;
3770     Label cas_failed;
3771 
3772     assert_different_registers(oop, box, tmp, disp_hdr);
3773 
3774     // Load markOop from object into displaced_header.
3775     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3776 
3777     // Always do locking in runtime.
3778     if (EmitSync & 0x01) {
3779       __ cmp(oop, zr);
3780       return;
3781     }
3782 
3783     if (UseBiasedLocking) {
3784       __ biased_locking_enter(disp_hdr, oop, box, tmp, true, cont);
3785     }
3786 
3787     // Handle existing monitor
3788     if (EmitSync & 0x02) {
3789       // we can use AArch64's bit test and branch here but
3790       // markoopDesc does not define a bit index just the bit value
3791       // so assert in case the bit pos changes
3792 #     define __monitor_value_log2 1
3793       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3794       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3795 #     undef __monitor_value_log2
3796     }
3797 
3798     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3799     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3800 
3801     // Load Compare Value application register.
3802 
3803     // Initialize the box. (Must happen before we update the object mark!)
3804     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3805 
3806     // Compare object markOop with mark and if equal exchange scratch1
3807     // with object markOop.
3808     // Note that this is simply a CAS: it does not generate any
3809     // barriers.  These are separately generated by
3810     // membar_acquire_lock().
3811     {
3812       Label retry_load;
3813       __ bind(retry_load);
3814       __ ldxr(tmp, oop);
3815       __ cmp(tmp, disp_hdr);
3816       __ br(Assembler::NE, cas_failed);
3817       // use stlxr to ensure update is immediately visible
3818       __ stlxr(tmp, box, oop);
3819       __ cbzw(tmp, cont);
3820       __ b(retry_load);
3821     }
3822 
3823     // Formerly:
3824     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3825     //               /*newv=*/box,
3826     //               /*addr=*/oop,
3827     //               /*tmp=*/tmp,
3828     //               cont,
3829     //               /*fail*/NULL);
3830 
3831     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3832 
3833     // If the compare-and-exchange succeeded, then we found an unlocked
3834     // object, will have now locked it will continue at label cont
3835 
3836     __ bind(cas_failed);
3837     // We did not see an unlocked object so try the fast recursive case.
3838 
3839     // Check if the owner is self by comparing the value in the
3840     // markOop of object (disp_hdr) with the stack pointer.
3841     __ mov(rscratch1, sp);
3842     __ sub(disp_hdr, disp_hdr, rscratch1);
3843     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3844     // If condition is true we are cont and hence we can store 0 as the
3845     // displaced header in the box, which indicates that it is a recursive lock.
3846     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3847     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3848 
3849     // Handle existing monitor.
3850     if ((EmitSync & 0x02) == 0) {
3851       __ b(cont);
3852 
3853       __ bind(object_has_monitor);
3854       // The object's monitor m is unlocked iff m->owner == NULL,
3855       // otherwise m->owner may contain a thread or a stack address.
3856       //
3857       // Try to CAS m->owner from NULL to current thread.
3858       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3859       __ mov(disp_hdr, zr);
3860 
3861       {
3862         Label retry_load, fail;
3863         __ bind(retry_load);
3864         __ ldxr(rscratch1, tmp);
3865         __ cmp(disp_hdr, rscratch1);
3866         __ br(Assembler::NE, fail);
3867         // use stlxr to ensure update is immediately visible
3868         __ stlxr(rscratch1, rthread, tmp);
3869         __ cbnzw(rscratch1, retry_load);
3870         __ bind(fail);
3871       }
3872 
3873       // Label next;
3874       // __ cmpxchgptr(/*oldv=*/disp_hdr,
3875       //               /*newv=*/rthread,
3876       //               /*addr=*/tmp,
3877       //               /*tmp=*/rscratch1,
3878       //               /*succeed*/next,
3879       //               /*fail*/NULL);
3880       // __ bind(next);
3881 
3882       // store a non-null value into the box.
3883       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3884 
3885       // PPC port checks the following invariants
3886       // #ifdef ASSERT
3887       // bne(flag, cont);
3888       // We have acquired the monitor, check some invariants.
3889       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
3890       // Invariant 1: _recursions should be 0.
3891       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
3892       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
3893       //                        "monitor->_recursions should be 0", -1);
3894       // Invariant 2: OwnerIsThread shouldn't be 0.
3895       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
3896       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
3897       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
3898       // #endif
3899     }
3900 
3901     __ bind(cont);
3902     // flag == EQ indicates success
3903     // flag == NE indicates failure
3904 
3905   %}
3906 
3907   // TODO
3908   // reimplement this with custom cmpxchgptr code
3909   // which avoids some of the unnecessary branching
3910   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3911     MacroAssembler _masm(&cbuf);
3912     Register oop = as_Register($object$$reg);
3913     Register box = as_Register($box$$reg);
3914     Register disp_hdr = as_Register($tmp$$reg);
3915     Register tmp = as_Register($tmp2$$reg);
3916     Label cont;
3917     Label object_has_monitor;
3918     Label cas_failed;
3919 
3920     assert_different_registers(oop, box, tmp, disp_hdr);
3921 
3922     // Always do locking in runtime.
3923     if (EmitSync & 0x01) {
3924       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3925       return;
3926     }
3927 
3928     if (UseBiasedLocking) {
3929       __ biased_locking_exit(oop, tmp, cont);
3930     }
3931 
3932     // Find the lock address and load the displaced header from the stack.
3933     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3934 
3935     // If the displaced header is 0, we have a recursive unlock.
3936     __ cmp(disp_hdr, zr);
3937     __ br(Assembler::EQ, cont);
3938 
3939 
3940     // Handle existing monitor.
3941     if ((EmitSync & 0x02) == 0) {
3942       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3943       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3944     }
3945 
3946     // Check if it is still a light weight lock, this is is true if we
3947     // see the stack address of the basicLock in the markOop of the
3948     // object.
3949 
3950       {
3951         Label retry_load;
3952         __ bind(retry_load);
3953         __ ldxr(tmp, oop);
3954         __ cmp(box, tmp);
3955         __ br(Assembler::NE, cas_failed);
3956         // use stlxr to ensure update is immediately visible
3957         __ stlxr(tmp, disp_hdr, oop);
3958         __ cbzw(tmp, cont);
3959         __ b(retry_load);
3960       }
3961 
3962     // __ cmpxchgptr(/*compare_value=*/box,
3963     //               /*exchange_value=*/disp_hdr,
3964     //               /*where=*/oop,
3965     //               /*result=*/tmp,
3966     //               cont,
3967     //               /*cas_failed*/NULL);
3968     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3969 
3970     __ bind(cas_failed);
3971 
3972     // Handle existing monitor.
3973     if ((EmitSync & 0x02) == 0) {
3974       __ b(cont);
3975 
3976       __ bind(object_has_monitor);
3977       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3978       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3979       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3980       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3981       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3982       __ cmp(rscratch1, zr);
3983       __ br(Assembler::NE, cont);
3984 
3985       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3986       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3987       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3988       __ cmp(rscratch1, zr);
3989       __ cbnz(rscratch1, cont);
3990       // need a release store here
3991       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3992       __ stlr(rscratch1, tmp); // rscratch1 is zero
3993     }
3994 
3995     __ bind(cont);
3996     // flag == EQ indicates success
3997     // flag == NE indicates failure
3998   %}
3999 
4000 %}
4001 
4002 //----------FRAME--------------------------------------------------------------
4003 // Definition of frame structure and management information.
4004 //
4005 //  S T A C K   L A Y O U T    Allocators stack-slot number
4006 //                             |   (to get allocators register number
4007 //  G  Owned by    |        |  v    add OptoReg::stack0())
4008 //  r   CALLER     |        |
4009 //  o     |        +--------+      pad to even-align allocators stack-slot
4010 //  w     V        |  pad0  |        numbers; owned by CALLER
4011 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4012 //  h     ^        |   in   |  5
4013 //        |        |  args  |  4   Holes in incoming args owned by SELF
4014 //  |     |        |        |  3
4015 //  |     |        +--------+
4016 //  V     |        | old out|      Empty on Intel, window on Sparc
4017 //        |    old |preserve|      Must be even aligned.
4018 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4019 //        |        |   in   |  3   area for Intel ret address
4020 //     Owned by    |preserve|      Empty on Sparc.
4021 //       SELF      +--------+
4022 //        |        |  pad2  |  2   pad to align old SP
4023 //        |        +--------+  1
4024 //        |        | locks  |  0
4025 //        |        +--------+----> OptoReg::stack0(), even aligned
4026 //        |        |  pad1  | 11   pad to align new SP
4027 //        |        +--------+
4028 //        |        |        | 10
4029 //        |        | spills |  9   spills
4030 //        V        |        |  8   (pad0 slot for callee)
4031 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4032 //        ^        |  out   |  7
4033 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4034 //     Owned by    +--------+
4035 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4036 //        |    new |preserve|      Must be even-aligned.
4037 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4038 //        |        |        |
4039 //
4040 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4041 //         known from SELF's arguments and the Java calling convention.
4042 //         Region 6-7 is determined per call site.
4043 // Note 2: If the calling convention leaves holes in the incoming argument
4044 //         area, those holes are owned by SELF.  Holes in the outgoing area
4045 //         are owned by the CALLEE.  Holes should not be nessecary in the
4046 //         incoming area, as the Java calling convention is completely under
4047 //         the control of the AD file.  Doubles can be sorted and packed to
4048 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4049 //         varargs C calling conventions.
4050 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4051 //         even aligned with pad0 as needed.
4052 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4053 //           (the latter is true on Intel but is it false on AArch64?)
4054 //         region 6-11 is even aligned; it may be padded out more so that
4055 //         the region from SP to FP meets the minimum stack alignment.
4056 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4057 //         alignment.  Region 11, pad1, may be dynamically extended so that
4058 //         SP meets the minimum alignment.
4059 
4060 frame %{
4061   // What direction does stack grow in (assumed to be same for C & Java)
4062   stack_direction(TOWARDS_LOW);
4063 
4064   // These three registers define part of the calling convention
4065   // between compiled code and the interpreter.
4066 
4067   // Inline Cache Register or methodOop for I2C.
4068   inline_cache_reg(R12);
4069 
4070   // Method Oop Register when calling interpreter.
4071   interpreter_method_oop_reg(R12);
4072 
4073   // Number of stack slots consumed by locking an object
4074   sync_stack_slots(2);
4075 
4076   // Compiled code's Frame Pointer
4077   frame_pointer(R31);
4078 
4079   // Interpreter stores its frame pointer in a register which is
4080   // stored to the stack by I2CAdaptors.
4081   // I2CAdaptors convert from interpreted java to compiled java.
4082   interpreter_frame_pointer(R29);
4083 
4084   // Stack alignment requirement
4085   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4086 
4087   // Number of stack slots between incoming argument block and the start of
4088   // a new frame.  The PROLOG must add this many slots to the stack.  The
4089   // EPILOG must remove this many slots. aarch64 needs two slots for
4090   // return address and fp.
4091   // TODO think this is correct but check
4092   in_preserve_stack_slots(4);
4093 
4094   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4095   // for calls to C.  Supports the var-args backing area for register parms.
4096   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4097 
4098   // The after-PROLOG location of the return address.  Location of
4099   // return address specifies a type (REG or STACK) and a number
4100   // representing the register number (i.e. - use a register name) or
4101   // stack slot.
4102   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4103   // Otherwise, it is above the locks and verification slot and alignment word
4104   // TODO this may well be correct but need to check why that - 2 is there
4105   // ppc port uses 0 but we definitely need to allow for fixed_slots
4106   // which folds in the space used for monitors
4107   return_addr(STACK - 2 +
4108               round_to((Compile::current()->in_preserve_stack_slots() +
4109                         Compile::current()->fixed_slots()),
4110                        stack_alignment_in_slots()));
4111 
4112   // Body of function which returns an integer array locating
4113   // arguments either in registers or in stack slots.  Passed an array
4114   // of ideal registers called "sig" and a "length" count.  Stack-slot
4115   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4116   // arguments for a CALLEE.  Incoming stack arguments are
4117   // automatically biased by the preserve_stack_slots field above.
4118 
4119   calling_convention
4120   %{
4121     // No difference between ingoing/outgoing just pass false
4122     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4123   %}
4124 
4125   c_calling_convention
4126   %{
4127     // This is obviously always outgoing
4128     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
4129   %}
4130 
4131   // Location of compiled Java return values.  Same as C for now.
4132   return_value
4133   %{
4134     // TODO do we allow ideal_reg == Op_RegN???
4135     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4136            "only return normal values");
4137 
4138     static const int lo[Op_RegL + 1] = { // enum name
4139       0,                                 // Op_Node
4140       0,                                 // Op_Set
4141       R0_num,                            // Op_RegN
4142       R0_num,                            // Op_RegI
4143       R0_num,                            // Op_RegP
4144       V0_num,                            // Op_RegF
4145       V0_num,                            // Op_RegD
4146       R0_num                             // Op_RegL
4147     };
4148 
4149     static const int hi[Op_RegL + 1] = { // enum name
4150       0,                                 // Op_Node
4151       0,                                 // Op_Set
4152       OptoReg::Bad,                       // Op_RegN
4153       OptoReg::Bad,                      // Op_RegI
4154       R0_H_num,                          // Op_RegP
4155       OptoReg::Bad,                      // Op_RegF
4156       V0_H_num,                          // Op_RegD
4157       R0_H_num                           // Op_RegL
4158     };
4159 
4160     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4161   %}
4162 %}
4163 
4164 //----------ATTRIBUTES---------------------------------------------------------
4165 //----------Operand Attributes-------------------------------------------------
4166 op_attrib op_cost(1);        // Required cost attribute
4167 
4168 //----------Instruction Attributes---------------------------------------------
4169 ins_attrib ins_cost(INSN_COST); // Required cost attribute
4170 ins_attrib ins_size(32);        // Required size attribute (in bits)
4171 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4172                                 // a non-matching short branch variant
4173                                 // of some long branch?
4174 ins_attrib ins_alignment(4);    // Required alignment attribute (must
4175                                 // be a power of 2) specifies the
4176                                 // alignment that some part of the
4177                                 // instruction (not necessarily the
4178                                 // start) requires.  If > 1, a
4179                                 // compute_padding() function must be
4180                                 // provided for the instruction
4181 
4182 //----------OPERANDS-----------------------------------------------------------
4183 // Operand definitions must precede instruction definitions for correct parsing
4184 // in the ADLC because operands constitute user defined types which are used in
4185 // instruction definitions.
4186 
4187 //----------Simple Operands----------------------------------------------------
4188 
4189 // Integer operands 32 bit
4190 // 32 bit immediate
4191 operand immI()
4192 %{
4193   match(ConI);
4194 
4195   op_cost(0);
4196   format %{ %}
4197   interface(CONST_INTER);
4198 %}
4199 
4200 // 32 bit zero
4201 operand immI0()
4202 %{
4203   predicate(n->get_int() == 0);
4204   match(ConI);
4205 
4206   op_cost(0);
4207   format %{ %}
4208   interface(CONST_INTER);
4209 %}
4210 
4211 // 32 bit unit increment
4212 operand immI_1()
4213 %{
4214   predicate(n->get_int() == 1);
4215   match(ConI);
4216 
4217   op_cost(0);
4218   format %{ %}
4219   interface(CONST_INTER);
4220 %}
4221 
4222 // 32 bit unit decrement
4223 operand immI_M1()
4224 %{
4225   predicate(n->get_int() == -1);
4226   match(ConI);
4227 
4228   op_cost(0);
4229   format %{ %}
4230   interface(CONST_INTER);
4231 %}
4232 
4233 operand immI_le_4()
4234 %{
4235   predicate(n->get_int() <= 4);
4236   match(ConI);
4237 
4238   op_cost(0);
4239   format %{ %}
4240   interface(CONST_INTER);
4241 %}
4242 
4243 operand immI_31()
4244 %{
4245   predicate(n->get_int() == 31);
4246   match(ConI);
4247 
4248   op_cost(0);
4249   format %{ %}
4250   interface(CONST_INTER);
4251 %}
4252 
4253 operand immI_8()
4254 %{
4255   predicate(n->get_int() == 8);
4256   match(ConI);
4257 
4258   op_cost(0);
4259   format %{ %}
4260   interface(CONST_INTER);
4261 %}
4262 
4263 operand immI_16()
4264 %{
4265   predicate(n->get_int() == 16);
4266   match(ConI);
4267 
4268   op_cost(0);
4269   format %{ %}
4270   interface(CONST_INTER);
4271 %}
4272 
4273 operand immI_24()
4274 %{
4275   predicate(n->get_int() == 24);
4276   match(ConI);
4277 
4278   op_cost(0);
4279   format %{ %}
4280   interface(CONST_INTER);
4281 %}
4282 
4283 operand immI_32()
4284 %{
4285   predicate(n->get_int() == 32);
4286   match(ConI);
4287 
4288   op_cost(0);
4289   format %{ %}
4290   interface(CONST_INTER);
4291 %}
4292 
4293 operand immI_48()
4294 %{
4295   predicate(n->get_int() == 48);
4296   match(ConI);
4297 
4298   op_cost(0);
4299   format %{ %}
4300   interface(CONST_INTER);
4301 %}
4302 
4303 operand immI_56()
4304 %{
4305   predicate(n->get_int() == 56);
4306   match(ConI);
4307 
4308   op_cost(0);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 operand immI_64()
4314 %{
4315   predicate(n->get_int() == 64);
4316   match(ConI);
4317 
4318   op_cost(0);
4319   format %{ %}
4320   interface(CONST_INTER);
4321 %}
4322 
4323 operand immI_255()
4324 %{
4325   predicate(n->get_int() == 255);
4326   match(ConI);
4327 
4328   op_cost(0);
4329   format %{ %}
4330   interface(CONST_INTER);
4331 %}
4332 
4333 operand immI_65535()
4334 %{
4335   predicate(n->get_int() == 65535);
4336   match(ConI);
4337 
4338   op_cost(0);
4339   format %{ %}
4340   interface(CONST_INTER);
4341 %}
4342 
4343 operand immL_63()
4344 %{
4345   predicate(n->get_int() == 63);
4346   match(ConI);
4347 
4348   op_cost(0);
4349   format %{ %}
4350   interface(CONST_INTER);
4351 %}
4352 
4353 operand immL_255()
4354 %{
4355   predicate(n->get_int() == 255);
4356   match(ConI);
4357 
4358   op_cost(0);
4359   format %{ %}
4360   interface(CONST_INTER);
4361 %}
4362 
4363 operand immL_65535()
4364 %{
4365   predicate(n->get_long() == 65535L);
4366   match(ConL);
4367 
4368   op_cost(0);
4369   format %{ %}
4370   interface(CONST_INTER);
4371 %}
4372 
4373 operand immL_4294967295()
4374 %{
4375   predicate(n->get_long() == 4294967295L);
4376   match(ConL);
4377 
4378   op_cost(0);
4379   format %{ %}
4380   interface(CONST_INTER);
4381 %}
4382 
4383 operand immL_bitmask()
4384 %{
4385   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4386             && is_power_of_2(n->get_long() + 1));
4387   match(ConL);
4388 
4389   op_cost(0);
4390   format %{ %}
4391   interface(CONST_INTER);
4392 %}
4393 
4394 operand immI_bitmask()
4395 %{
4396   predicate(((n->get_int() & 0xc0000000) == 0)
4397             && is_power_of_2(n->get_int() + 1));
4398   match(ConI);
4399 
4400   op_cost(0);
4401   format %{ %}
4402   interface(CONST_INTER);
4403 %}
4404 
4405 // Scale values for scaled offset addressing modes (up to long but not quad)
4406 operand immIScale()
4407 %{
4408   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4409   match(ConI);
4410 
4411   op_cost(0);
4412   format %{ %}
4413   interface(CONST_INTER);
4414 %}
4415 
4416 // 26 bit signed offset -- for pc-relative branches
4417 operand immI26()
4418 %{
4419   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4420   match(ConI);
4421 
4422   op_cost(0);
4423   format %{ %}
4424   interface(CONST_INTER);
4425 %}
4426 
4427 // 19 bit signed offset -- for pc-relative loads
4428 operand immI19()
4429 %{
4430   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4431   match(ConI);
4432 
4433   op_cost(0);
4434   format %{ %}
4435   interface(CONST_INTER);
4436 %}
4437 
4438 // 12 bit unsigned offset -- for base plus immediate loads
4439 operand immIU12()
4440 %{
4441   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4442   match(ConI);
4443 
4444   op_cost(0);
4445   format %{ %}
4446   interface(CONST_INTER);
4447 %}
4448 
4449 operand immLU12()
4450 %{
4451   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4452   match(ConL);
4453 
4454   op_cost(0);
4455   format %{ %}
4456   interface(CONST_INTER);
4457 %}
4458 
4459 // Offset for scaled or unscaled immediate loads and stores
4460 operand immIOffset()
4461 %{
4462   predicate(Address::offset_ok_for_immed(n->get_int()));
4463   match(ConI);
4464 
4465   op_cost(0);
4466   format %{ %}
4467   interface(CONST_INTER);
4468 %}
4469 
4470 operand immLoffset()
4471 %{
4472   predicate(Address::offset_ok_for_immed(n->get_long()));
4473   match(ConL);
4474 
4475   op_cost(0);
4476   format %{ %}
4477   interface(CONST_INTER);
4478 %}
4479 
4480 // 32 bit integer valid for add sub immediate
4481 operand immIAddSub()
4482 %{
4483   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4484   match(ConI);
4485   op_cost(0);
4486   format %{ %}
4487   interface(CONST_INTER);
4488 %}
4489 
4490 // 32 bit unsigned integer valid for logical immediate
4491 // TODO -- check this is right when e.g the mask is 0x80000000
4492 operand immILog()
4493 %{
4494   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4495   match(ConI);
4496 
4497   op_cost(0);
4498   format %{ %}
4499   interface(CONST_INTER);
4500 %}
4501 
4502 // Integer operands 64 bit
4503 // 64 bit immediate
4504 operand immL()
4505 %{
4506   match(ConL);
4507 
4508   op_cost(0);
4509   format %{ %}
4510   interface(CONST_INTER);
4511 %}
4512 
4513 // 64 bit zero
4514 operand immL0()
4515 %{
4516   predicate(n->get_long() == 0);
4517   match(ConL);
4518 
4519   op_cost(0);
4520   format %{ %}
4521   interface(CONST_INTER);
4522 %}
4523 
4524 // 64 bit unit increment
4525 operand immL_1()
4526 %{
4527   predicate(n->get_long() == 1);
4528   match(ConL);
4529 
4530   op_cost(0);
4531   format %{ %}
4532   interface(CONST_INTER);
4533 %}
4534 
4535 // 64 bit unit decrement
4536 operand immL_M1()
4537 %{
4538   predicate(n->get_long() == -1);
4539   match(ConL);
4540 
4541   op_cost(0);
4542   format %{ %}
4543   interface(CONST_INTER);
4544 %}
4545 
4546 // 32 bit offset of pc in thread anchor
4547 
4548 operand immL_pc_off()
4549 %{
4550   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4551                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4552   match(ConL);
4553 
4554   op_cost(0);
4555   format %{ %}
4556   interface(CONST_INTER);
4557 %}
4558 
4559 // 64 bit integer valid for add sub immediate
4560 operand immLAddSub()
4561 %{
4562   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4563   match(ConL);
4564   op_cost(0);
4565   format %{ %}
4566   interface(CONST_INTER);
4567 %}
4568 
4569 // 64 bit integer valid for logical immediate
4570 operand immLLog()
4571 %{
4572   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4573   match(ConL);
4574   op_cost(0);
4575   format %{ %}
4576   interface(CONST_INTER);
4577 %}
4578 
4579 // Long Immediate: low 32-bit mask
4580 operand immL_32bits()
4581 %{
4582   predicate(n->get_long() == 0xFFFFFFFFL);
4583   match(ConL);
4584   op_cost(0);
4585   format %{ %}
4586   interface(CONST_INTER);
4587 %}
4588 
4589 // Pointer operands
4590 // Pointer Immediate
4591 operand immP()
4592 %{
4593   match(ConP);
4594 
4595   op_cost(0);
4596   format %{ %}
4597   interface(CONST_INTER);
4598 %}
4599 
4600 // NULL Pointer Immediate
4601 operand immP0()
4602 %{
4603   predicate(n->get_ptr() == 0);
4604   match(ConP);
4605 
4606   op_cost(0);
4607   format %{ %}
4608   interface(CONST_INTER);
4609 %}
4610 
4611 // Pointer Immediate One
4612 // this is used in object initialization (initial object header)
4613 operand immP_1()
4614 %{
4615   predicate(n->get_ptr() == 1);
4616   match(ConP);
4617 
4618   op_cost(0);
4619   format %{ %}
4620   interface(CONST_INTER);
4621 %}
4622 
4623 // Polling Page Pointer Immediate
4624 operand immPollPage()
4625 %{
4626   predicate((address)n->get_ptr() == os::get_polling_page());
4627   match(ConP);
4628 
4629   op_cost(0);
4630   format %{ %}
4631   interface(CONST_INTER);
4632 %}
4633 
4634 // Card Table Byte Map Base
4635 operand immByteMapBase()
4636 %{
4637   // Get base of card map
4638   predicate((jbyte*)n->get_ptr() ==
4639         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
4640   match(ConP);
4641 
4642   op_cost(0);
4643   format %{ %}
4644   interface(CONST_INTER);
4645 %}
4646 
4647 // Pointer Immediate Minus One
4648 // this is used when we want to write the current PC to the thread anchor
4649 operand immP_M1()
4650 %{
4651   predicate(n->get_ptr() == -1);
4652   match(ConP);
4653 
4654   op_cost(0);
4655   format %{ %}
4656   interface(CONST_INTER);
4657 %}
4658 
4659 // Pointer Immediate Minus Two
4660 // this is used when we want to write the current PC to the thread anchor
4661 operand immP_M2()
4662 %{
4663   predicate(n->get_ptr() == -2);
4664   match(ConP);
4665 
4666   op_cost(0);
4667   format %{ %}
4668   interface(CONST_INTER);
4669 %}
4670 
4671 // Float and Double operands
4672 // Double Immediate
4673 operand immD()
4674 %{
4675   match(ConD);
4676   op_cost(0);
4677   format %{ %}
4678   interface(CONST_INTER);
4679 %}
4680 
4681 // Double Immediate: +0.0d
4682 operand immD0()
4683 %{
4684   predicate(jlong_cast(n->getd()) == 0);
4685   match(ConD);
4686 
4687   op_cost(0);
4688   format %{ %}
4689   interface(CONST_INTER);
4690 %}
4691 
4692 // constant 'double +0.0'.
4693 operand immDPacked()
4694 %{
4695   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4696   match(ConD);
4697   op_cost(0);
4698   format %{ %}
4699   interface(CONST_INTER);
4700 %}
4701 
4702 // Float Immediate
4703 operand immF()
4704 %{
4705   match(ConF);
4706   op_cost(0);
4707   format %{ %}
4708   interface(CONST_INTER);
4709 %}
4710 
4711 // Float Immediate: +0.0f.
4712 operand immF0()
4713 %{
4714   predicate(jint_cast(n->getf()) == 0);
4715   match(ConF);
4716 
4717   op_cost(0);
4718   format %{ %}
4719   interface(CONST_INTER);
4720 %}
4721 
4722 //
4723 operand immFPacked()
4724 %{
4725   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4726   match(ConF);
4727   op_cost(0);
4728   format %{ %}
4729   interface(CONST_INTER);
4730 %}
4731 
4732 // Narrow pointer operands
4733 // Narrow Pointer Immediate
4734 operand immN()
4735 %{
4736   match(ConN);
4737 
4738   op_cost(0);
4739   format %{ %}
4740   interface(CONST_INTER);
4741 %}
4742 
4743 // Narrow NULL Pointer Immediate
4744 operand immN0()
4745 %{
4746   predicate(n->get_narrowcon() == 0);
4747   match(ConN);
4748 
4749   op_cost(0);
4750   format %{ %}
4751   interface(CONST_INTER);
4752 %}
4753 
4754 operand immNKlass()
4755 %{
4756   match(ConNKlass);
4757 
4758   op_cost(0);
4759   format %{ %}
4760   interface(CONST_INTER);
4761 %}
4762 
4763 // Integer 32 bit Register Operands
4764 // Integer 32 bitRegister (excludes SP)
4765 operand iRegI()
4766 %{
4767   constraint(ALLOC_IN_RC(any_reg32));
4768   match(RegI);
4769   match(iRegINoSp);
4770   op_cost(0);
4771   format %{ %}
4772   interface(REG_INTER);
4773 %}
4774 
4775 // Integer 32 bit Register not Special
4776 operand iRegINoSp()
4777 %{
4778   constraint(ALLOC_IN_RC(no_special_reg32));
4779   match(RegI);
4780   op_cost(0);
4781   format %{ %}
4782   interface(REG_INTER);
4783 %}
4784 
4785 // Integer 64 bit Register Operands
4786 // Integer 64 bit Register (includes SP)
4787 operand iRegL()
4788 %{
4789   constraint(ALLOC_IN_RC(any_reg));
4790   match(RegL);
4791   match(iRegLNoSp);
4792   op_cost(0);
4793   format %{ %}
4794   interface(REG_INTER);
4795 %}
4796 
4797 // Integer 64 bit Register not Special
4798 operand iRegLNoSp()
4799 %{
4800   constraint(ALLOC_IN_RC(no_special_reg));
4801   match(RegL);
4802   format %{ %}
4803   interface(REG_INTER);
4804 %}
4805 
4806 // Pointer Register Operands
4807 // Pointer Register
4808 operand iRegP()
4809 %{
4810   constraint(ALLOC_IN_RC(ptr_reg));
4811   match(RegP);
4812   match(iRegPNoSp);
4813   match(iRegP_R0);
4814   //match(iRegP_R2);
4815   //match(iRegP_R4);
4816   //match(iRegP_R5);
4817   match(thread_RegP);
4818   op_cost(0);
4819   format %{ %}
4820   interface(REG_INTER);
4821 %}
4822 
4823 // Pointer 64 bit Register not Special
4824 operand iRegPNoSp()
4825 %{
4826   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4827   match(RegP);
4828   // match(iRegP);
4829   // match(iRegP_R0);
4830   // match(iRegP_R2);
4831   // match(iRegP_R4);
4832   // match(iRegP_R5);
4833   // match(thread_RegP);
4834   op_cost(0);
4835   format %{ %}
4836   interface(REG_INTER);
4837 %}
4838 
4839 // Pointer 64 bit Register R0 only
4840 operand iRegP_R0()
4841 %{
4842   constraint(ALLOC_IN_RC(r0_reg));
4843   match(RegP);
4844   // match(iRegP);
4845   match(iRegPNoSp);
4846   op_cost(0);
4847   format %{ %}
4848   interface(REG_INTER);
4849 %}
4850 
4851 // Pointer 64 bit Register R1 only
4852 operand iRegP_R1()
4853 %{
4854   constraint(ALLOC_IN_RC(r1_reg));
4855   match(RegP);
4856   // match(iRegP);
4857   match(iRegPNoSp);
4858   op_cost(0);
4859   format %{ %}
4860   interface(REG_INTER);
4861 %}
4862 
4863 // Pointer 64 bit Register R2 only
4864 operand iRegP_R2()
4865 %{
4866   constraint(ALLOC_IN_RC(r2_reg));
4867   match(RegP);
4868   // match(iRegP);
4869   match(iRegPNoSp);
4870   op_cost(0);
4871   format %{ %}
4872   interface(REG_INTER);
4873 %}
4874 
4875 // Pointer 64 bit Register R3 only
4876 operand iRegP_R3()
4877 %{
4878   constraint(ALLOC_IN_RC(r3_reg));
4879   match(RegP);
4880   // match(iRegP);
4881   match(iRegPNoSp);
4882   op_cost(0);
4883   format %{ %}
4884   interface(REG_INTER);
4885 %}
4886 
4887 // Pointer 64 bit Register R4 only
4888 operand iRegP_R4()
4889 %{
4890   constraint(ALLOC_IN_RC(r4_reg));
4891   match(RegP);
4892   // match(iRegP);
4893   match(iRegPNoSp);
4894   op_cost(0);
4895   format %{ %}
4896   interface(REG_INTER);
4897 %}
4898 
4899 // Pointer 64 bit Register R5 only
4900 operand iRegP_R5()
4901 %{
4902   constraint(ALLOC_IN_RC(r5_reg));
4903   match(RegP);
4904   // match(iRegP);
4905   match(iRegPNoSp);
4906   op_cost(0);
4907   format %{ %}
4908   interface(REG_INTER);
4909 %}
4910 
4911 // Pointer 64 bit Register R10 only
4912 operand iRegP_R10()
4913 %{
4914   constraint(ALLOC_IN_RC(r10_reg));
4915   match(RegP);
4916   // match(iRegP);
4917   match(iRegPNoSp);
4918   op_cost(0);
4919   format %{ %}
4920   interface(REG_INTER);
4921 %}
4922 
4923 // Long 64 bit Register R11 only
4924 operand iRegL_R11()
4925 %{
4926   constraint(ALLOC_IN_RC(r11_reg));
4927   match(RegL);
4928   match(iRegLNoSp);
4929   op_cost(0);
4930   format %{ %}
4931   interface(REG_INTER);
4932 %}
4933 
4934 // Pointer 64 bit Register FP only
4935 operand iRegP_FP()
4936 %{
4937   constraint(ALLOC_IN_RC(fp_reg));
4938   match(RegP);
4939   // match(iRegP);
4940   op_cost(0);
4941   format %{ %}
4942   interface(REG_INTER);
4943 %}
4944 
4945 // Register R0 only
4946 operand iRegI_R0()
4947 %{
4948   constraint(ALLOC_IN_RC(int_r0_reg));
4949   match(RegI);
4950   match(iRegINoSp);
4951   op_cost(0);
4952   format %{ %}
4953   interface(REG_INTER);
4954 %}
4955 
4956 // Register R2 only
4957 operand iRegI_R2()
4958 %{
4959   constraint(ALLOC_IN_RC(int_r2_reg));
4960   match(RegI);
4961   match(iRegINoSp);
4962   op_cost(0);
4963   format %{ %}
4964   interface(REG_INTER);
4965 %}
4966 
4967 // Register R3 only
4968 operand iRegI_R3()
4969 %{
4970   constraint(ALLOC_IN_RC(int_r3_reg));
4971   match(RegI);
4972   match(iRegINoSp);
4973   op_cost(0);
4974   format %{ %}
4975   interface(REG_INTER);
4976 %}
4977 
4978 
4979 // Register R2 only
4980 operand iRegI_R4()
4981 %{
4982   constraint(ALLOC_IN_RC(int_r4_reg));
4983   match(RegI);
4984   match(iRegINoSp);
4985   op_cost(0);
4986   format %{ %}
4987   interface(REG_INTER);
4988 %}
4989 
4990 
4991 // Pointer Register Operands
4992 // Narrow Pointer Register
4993 operand iRegN()
4994 %{
4995   constraint(ALLOC_IN_RC(any_reg32));
4996   match(RegN);
4997   match(iRegNNoSp);
4998   op_cost(0);
4999   format %{ %}
5000   interface(REG_INTER);
5001 %}
5002 
5003 // Integer 64 bit Register not Special
5004 operand iRegNNoSp()
5005 %{
5006   constraint(ALLOC_IN_RC(no_special_reg32));
5007   match(RegN);
5008   op_cost(0);
5009   format %{ %}
5010   interface(REG_INTER);
5011 %}
5012 
5013 // heap base register -- used for encoding immN0
5014 
5015 operand iRegIHeapbase()
5016 %{
5017   constraint(ALLOC_IN_RC(heapbase_reg));
5018   match(RegI);
5019   op_cost(0);
5020   format %{ %}
5021   interface(REG_INTER);
5022 %}
5023 
5024 // Float Register
5025 // Float register operands
5026 operand vRegF()
5027 %{
5028   constraint(ALLOC_IN_RC(float_reg));
5029   match(RegF);
5030 
5031   op_cost(0);
5032   format %{ %}
5033   interface(REG_INTER);
5034 %}
5035 
5036 // Double Register
5037 // Double register operands
5038 operand vRegD()
5039 %{
5040   constraint(ALLOC_IN_RC(double_reg));
5041   match(RegD);
5042 
5043   op_cost(0);
5044   format %{ %}
5045   interface(REG_INTER);
5046 %}
5047 
5048 operand vecD()
5049 %{
5050   constraint(ALLOC_IN_RC(vectord_reg));
5051   match(VecD);
5052 
5053   op_cost(0);
5054   format %{ %}
5055   interface(REG_INTER);
5056 %}
5057 
5058 operand vecX()
5059 %{
5060   constraint(ALLOC_IN_RC(vectorx_reg));
5061   match(VecX);
5062 
5063   op_cost(0);
5064   format %{ %}
5065   interface(REG_INTER);
5066 %}
5067 
5068 operand vRegD_V0()
5069 %{
5070   constraint(ALLOC_IN_RC(v0_reg));
5071   match(RegD);
5072   op_cost(0);
5073   format %{ %}
5074   interface(REG_INTER);
5075 %}
5076 
5077 operand vRegD_V1()
5078 %{
5079   constraint(ALLOC_IN_RC(v1_reg));
5080   match(RegD);
5081   op_cost(0);
5082   format %{ %}
5083   interface(REG_INTER);
5084 %}
5085 
5086 operand vRegD_V2()
5087 %{
5088   constraint(ALLOC_IN_RC(v2_reg));
5089   match(RegD);
5090   op_cost(0);
5091   format %{ %}
5092   interface(REG_INTER);
5093 %}
5094 
5095 operand vRegD_V3()
5096 %{
5097   constraint(ALLOC_IN_RC(v3_reg));
5098   match(RegD);
5099   op_cost(0);
5100   format %{ %}
5101   interface(REG_INTER);
5102 %}
5103 
5104 // Flags register, used as output of signed compare instructions
5105 
5106 // note that on AArch64 we also use this register as the output for
5107 // for floating point compare instructions (CmpF CmpD). this ensures
5108 // that ordered inequality tests use GT, GE, LT or LE none of which
5109 // pass through cases where the result is unordered i.e. one or both
5110 // inputs to the compare is a NaN. this means that the ideal code can
5111 // replace e.g. a GT with an LE and not end up capturing the NaN case
5112 // (where the comparison should always fail). EQ and NE tests are
5113 // always generated in ideal code so that unordered folds into the NE
5114 // case, matching the behaviour of AArch64 NE.
5115 //
5116 // This differs from x86 where the outputs of FP compares use a
5117 // special FP flags registers and where compares based on this
5118 // register are distinguished into ordered inequalities (cmpOpUCF) and
5119 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
5120 // to explicitly handle the unordered case in branches. x86 also has
5121 // to include extra CMoveX rules to accept a cmpOpUCF input.
5122 
5123 operand rFlagsReg()
5124 %{
5125   constraint(ALLOC_IN_RC(int_flags));
5126   match(RegFlags);
5127 
5128   op_cost(0);
5129   format %{ "RFLAGS" %}
5130   interface(REG_INTER);
5131 %}
5132 
5133 // Flags register, used as output of unsigned compare instructions
5134 operand rFlagsRegU()
5135 %{
5136   constraint(ALLOC_IN_RC(int_flags));
5137   match(RegFlags);
5138 
5139   op_cost(0);
5140   format %{ "RFLAGSU" %}
5141   interface(REG_INTER);
5142 %}
5143 
5144 // Special Registers
5145 
5146 // Method Register
5147 operand inline_cache_RegP(iRegP reg)
5148 %{
5149   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
5150   match(reg);
5151   match(iRegPNoSp);
5152   op_cost(0);
5153   format %{ %}
5154   interface(REG_INTER);
5155 %}
5156 
5157 operand interpreter_method_oop_RegP(iRegP reg)
5158 %{
5159   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
5160   match(reg);
5161   match(iRegPNoSp);
5162   op_cost(0);
5163   format %{ %}
5164   interface(REG_INTER);
5165 %}
5166 
5167 // Thread Register
5168 operand thread_RegP(iRegP reg)
5169 %{
5170   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
5171   match(reg);
5172   op_cost(0);
5173   format %{ %}
5174   interface(REG_INTER);
5175 %}
5176 
5177 operand lr_RegP(iRegP reg)
5178 %{
5179   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
5180   match(reg);
5181   op_cost(0);
5182   format %{ %}
5183   interface(REG_INTER);
5184 %}
5185 
5186 //----------Memory Operands----------------------------------------------------
5187 
5188 operand indirect(iRegP reg)
5189 %{
5190   constraint(ALLOC_IN_RC(ptr_reg));
5191   match(reg);
5192   op_cost(0);
5193   format %{ "[$reg]" %}
5194   interface(MEMORY_INTER) %{
5195     base($reg);
5196     index(0xffffffff);
5197     scale(0x0);
5198     disp(0x0);
5199   %}
5200 %}
5201 
5202 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
5203 %{
5204   constraint(ALLOC_IN_RC(ptr_reg));
5205   match(AddP (AddP reg (LShiftL lreg scale)) off);
5206   op_cost(INSN_COST);
5207   format %{ "$reg, $lreg lsl($scale), $off" %}
5208   interface(MEMORY_INTER) %{
5209     base($reg);
5210     index($lreg);
5211     scale($scale);
5212     disp($off);
5213   %}
5214 %}
5215 
5216 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
5217 %{
5218   constraint(ALLOC_IN_RC(ptr_reg));
5219   match(AddP (AddP reg (LShiftL lreg scale)) off);
5220   op_cost(INSN_COST);
5221   format %{ "$reg, $lreg lsl($scale), $off" %}
5222   interface(MEMORY_INTER) %{
5223     base($reg);
5224     index($lreg);
5225     scale($scale);
5226     disp($off);
5227   %}
5228 %}
5229 
5230 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
5231 %{
5232   constraint(ALLOC_IN_RC(ptr_reg));
5233   match(AddP (AddP reg (ConvI2L ireg)) off);
5234   op_cost(INSN_COST);
5235   format %{ "$reg, $ireg, $off I2L" %}
5236   interface(MEMORY_INTER) %{
5237     base($reg);
5238     index($ireg);
5239     scale(0x0);
5240     disp($off);
5241   %}
5242 %}
5243 
5244 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
5245 %{
5246   constraint(ALLOC_IN_RC(ptr_reg));
5247   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
5248   op_cost(INSN_COST);
5249   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
5250   interface(MEMORY_INTER) %{
5251     base($reg);
5252     index($ireg);
5253     scale($scale);
5254     disp($off);
5255   %}
5256 %}
5257 
5258 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
5259 %{
5260   constraint(ALLOC_IN_RC(ptr_reg));
5261   match(AddP reg (LShiftL (ConvI2L ireg) scale));
5262   op_cost(0);
5263   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
5264   interface(MEMORY_INTER) %{
5265     base($reg);
5266     index($ireg);
5267     scale($scale);
5268     disp(0x0);
5269   %}
5270 %}
5271 
5272 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
5273 %{
5274   constraint(ALLOC_IN_RC(ptr_reg));
5275   match(AddP reg (LShiftL lreg scale));
5276   op_cost(0);
5277   format %{ "$reg, $lreg lsl($scale)" %}
5278   interface(MEMORY_INTER) %{
5279     base($reg);
5280     index($lreg);
5281     scale($scale);
5282     disp(0x0);
5283   %}
5284 %}
5285 
5286 operand indIndex(iRegP reg, iRegL lreg)
5287 %{
5288   constraint(ALLOC_IN_RC(ptr_reg));
5289   match(AddP reg lreg);
5290   op_cost(0);
5291   format %{ "$reg, $lreg" %}
5292   interface(MEMORY_INTER) %{
5293     base($reg);
5294     index($lreg);
5295     scale(0x0);
5296     disp(0x0);
5297   %}
5298 %}
5299 
5300 operand indOffI(iRegP reg, immIOffset off)
5301 %{
5302   constraint(ALLOC_IN_RC(ptr_reg));
5303   match(AddP reg off);
5304   op_cost(0);
5305   format %{ "[$reg, $off]" %}
5306   interface(MEMORY_INTER) %{
5307     base($reg);
5308     index(0xffffffff);
5309     scale(0x0);
5310     disp($off);
5311   %}
5312 %}
5313 
5314 operand indOffL(iRegP reg, immLoffset off)
5315 %{
5316   constraint(ALLOC_IN_RC(ptr_reg));
5317   match(AddP reg off);
5318   op_cost(0);
5319   format %{ "[$reg, $off]" %}
5320   interface(MEMORY_INTER) %{
5321     base($reg);
5322     index(0xffffffff);
5323     scale(0x0);
5324     disp($off);
5325   %}
5326 %}
5327 
5328 
5329 operand indirectN(iRegN reg)
5330 %{
5331   predicate(Universe::narrow_oop_shift() == 0);
5332   constraint(ALLOC_IN_RC(ptr_reg));
5333   match(DecodeN reg);
5334   op_cost(0);
5335   format %{ "[$reg]\t# narrow" %}
5336   interface(MEMORY_INTER) %{
5337     base($reg);
5338     index(0xffffffff);
5339     scale(0x0);
5340     disp(0x0);
5341   %}
5342 %}
5343 
5344 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5345 %{
5346   predicate(Universe::narrow_oop_shift() == 0);
5347   constraint(ALLOC_IN_RC(ptr_reg));
5348   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5349   op_cost(0);
5350   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5351   interface(MEMORY_INTER) %{
5352     base($reg);
5353     index($lreg);
5354     scale($scale);
5355     disp($off);
5356   %}
5357 %}
5358 
5359 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5360 %{
5361   predicate(Universe::narrow_oop_shift() == 0);
5362   constraint(ALLOC_IN_RC(ptr_reg));
5363   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5364   op_cost(INSN_COST);
5365   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5366   interface(MEMORY_INTER) %{
5367     base($reg);
5368     index($lreg);
5369     scale($scale);
5370     disp($off);
5371   %}
5372 %}
5373 
5374 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5375 %{
5376   predicate(Universe::narrow_oop_shift() == 0);
5377   constraint(ALLOC_IN_RC(ptr_reg));
5378   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5379   op_cost(INSN_COST);
5380   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5381   interface(MEMORY_INTER) %{
5382     base($reg);
5383     index($ireg);
5384     scale(0x0);
5385     disp($off);
5386   %}
5387 %}
5388 
5389 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5390 %{
5391   predicate(Universe::narrow_oop_shift() == 0);
5392   constraint(ALLOC_IN_RC(ptr_reg));
5393   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5394   op_cost(INSN_COST);
5395   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5396   interface(MEMORY_INTER) %{
5397     base($reg);
5398     index($ireg);
5399     scale($scale);
5400     disp($off);
5401   %}
5402 %}
5403 
5404 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5405 %{
5406   predicate(Universe::narrow_oop_shift() == 0);
5407   constraint(ALLOC_IN_RC(ptr_reg));
5408   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5409   op_cost(0);
5410   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5411   interface(MEMORY_INTER) %{
5412     base($reg);
5413     index($ireg);
5414     scale($scale);
5415     disp(0x0);
5416   %}
5417 %}
5418 
5419 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5420 %{
5421   predicate(Universe::narrow_oop_shift() == 0);
5422   constraint(ALLOC_IN_RC(ptr_reg));
5423   match(AddP (DecodeN reg) (LShiftL lreg scale));
5424   op_cost(0);
5425   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5426   interface(MEMORY_INTER) %{
5427     base($reg);
5428     index($lreg);
5429     scale($scale);
5430     disp(0x0);
5431   %}
5432 %}
5433 
5434 operand indIndexN(iRegN reg, iRegL lreg)
5435 %{
5436   predicate(Universe::narrow_oop_shift() == 0);
5437   constraint(ALLOC_IN_RC(ptr_reg));
5438   match(AddP (DecodeN reg) lreg);
5439   op_cost(0);
5440   format %{ "$reg, $lreg\t# narrow" %}
5441   interface(MEMORY_INTER) %{
5442     base($reg);
5443     index($lreg);
5444     scale(0x0);
5445     disp(0x0);
5446   %}
5447 %}
5448 
5449 operand indOffIN(iRegN reg, immIOffset off)
5450 %{
5451   predicate(Universe::narrow_oop_shift() == 0);
5452   constraint(ALLOC_IN_RC(ptr_reg));
5453   match(AddP (DecodeN reg) off);
5454   op_cost(0);
5455   format %{ "[$reg, $off]\t# narrow" %}
5456   interface(MEMORY_INTER) %{
5457     base($reg);
5458     index(0xffffffff);
5459     scale(0x0);
5460     disp($off);
5461   %}
5462 %}
5463 
5464 operand indOffLN(iRegN reg, immLoffset off)
5465 %{
5466   predicate(Universe::narrow_oop_shift() == 0);
5467   constraint(ALLOC_IN_RC(ptr_reg));
5468   match(AddP (DecodeN reg) off);
5469   op_cost(0);
5470   format %{ "[$reg, $off]\t# narrow" %}
5471   interface(MEMORY_INTER) %{
5472     base($reg);
5473     index(0xffffffff);
5474     scale(0x0);
5475     disp($off);
5476   %}
5477 %}
5478 
5479 
5480 
5481 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5482 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5483 %{
5484   constraint(ALLOC_IN_RC(ptr_reg));
5485   match(AddP reg off);
5486   op_cost(0);
5487   format %{ "[$reg, $off]" %}
5488   interface(MEMORY_INTER) %{
5489     base($reg);
5490     index(0xffffffff);
5491     scale(0x0);
5492     disp($off);
5493   %}
5494 %}
5495 
5496 //----------Special Memory Operands--------------------------------------------
5497 // Stack Slot Operand - This operand is used for loading and storing temporary
5498 //                      values on the stack where a match requires a value to
5499 //                      flow through memory.
5500 operand stackSlotP(sRegP reg)
5501 %{
5502   constraint(ALLOC_IN_RC(stack_slots));
5503   op_cost(100);
5504   // No match rule because this operand is only generated in matching
5505   // match(RegP);
5506   format %{ "[$reg]" %}
5507   interface(MEMORY_INTER) %{
5508     base(0x1e);  // RSP
5509     index(0x0);  // No Index
5510     scale(0x0);  // No Scale
5511     disp($reg);  // Stack Offset
5512   %}
5513 %}
5514 
5515 operand stackSlotI(sRegI reg)
5516 %{
5517   constraint(ALLOC_IN_RC(stack_slots));
5518   // No match rule because this operand is only generated in matching
5519   // match(RegI);
5520   format %{ "[$reg]" %}
5521   interface(MEMORY_INTER) %{
5522     base(0x1e);  // RSP
5523     index(0x0);  // No Index
5524     scale(0x0);  // No Scale
5525     disp($reg);  // Stack Offset
5526   %}
5527 %}
5528 
5529 operand stackSlotF(sRegF reg)
5530 %{
5531   constraint(ALLOC_IN_RC(stack_slots));
5532   // No match rule because this operand is only generated in matching
5533   // match(RegF);
5534   format %{ "[$reg]" %}
5535   interface(MEMORY_INTER) %{
5536     base(0x1e);  // RSP
5537     index(0x0);  // No Index
5538     scale(0x0);  // No Scale
5539     disp($reg);  // Stack Offset
5540   %}
5541 %}
5542 
5543 operand stackSlotD(sRegD reg)
5544 %{
5545   constraint(ALLOC_IN_RC(stack_slots));
5546   // No match rule because this operand is only generated in matching
5547   // match(RegD);
5548   format %{ "[$reg]" %}
5549   interface(MEMORY_INTER) %{
5550     base(0x1e);  // RSP
5551     index(0x0);  // No Index
5552     scale(0x0);  // No Scale
5553     disp($reg);  // Stack Offset
5554   %}
5555 %}
5556 
5557 operand stackSlotL(sRegL reg)
5558 %{
5559   constraint(ALLOC_IN_RC(stack_slots));
5560   // No match rule because this operand is only generated in matching
5561   // match(RegL);
5562   format %{ "[$reg]" %}
5563   interface(MEMORY_INTER) %{
5564     base(0x1e);  // RSP
5565     index(0x0);  // No Index
5566     scale(0x0);  // No Scale
5567     disp($reg);  // Stack Offset
5568   %}
5569 %}
5570 
5571 // Operands for expressing Control Flow
5572 // NOTE: Label is a predefined operand which should not be redefined in
5573 //       the AD file. It is generically handled within the ADLC.
5574 
5575 //----------Conditional Branch Operands----------------------------------------
5576 // Comparison Op  - This is the operation of the comparison, and is limited to
5577 //                  the following set of codes:
5578 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5579 //
5580 // Other attributes of the comparison, such as unsignedness, are specified
5581 // by the comparison instruction that sets a condition code flags register.
5582 // That result is represented by a flags operand whose subtype is appropriate
5583 // to the unsignedness (etc.) of the comparison.
5584 //
5585 // Later, the instruction which matches both the Comparison Op (a Bool) and
5586 // the flags (produced by the Cmp) specifies the coding of the comparison op
5587 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5588 
5589 // used for signed integral comparisons and fp comparisons
5590 
5591 operand cmpOp()
5592 %{
5593   match(Bool);
5594 
5595   format %{ "" %}
5596   interface(COND_INTER) %{
5597     equal(0x0, "eq");
5598     not_equal(0x1, "ne");
5599     less(0xb, "lt");
5600     greater_equal(0xa, "ge");
5601     less_equal(0xd, "le");
5602     greater(0xc, "gt");
5603     overflow(0x6, "vs");
5604     no_overflow(0x7, "vc");
5605   %}
5606 %}
5607 
5608 // used for unsigned integral comparisons
5609 
5610 operand cmpOpU()
5611 %{
5612   match(Bool);
5613 
5614   format %{ "" %}
5615   interface(COND_INTER) %{
5616     equal(0x0, "eq");
5617     not_equal(0x1, "ne");
5618     less(0x3, "lo");
5619     greater_equal(0x2, "hs");
5620     less_equal(0x9, "ls");
5621     greater(0x8, "hi");
5622     overflow(0x6, "vs");
5623     no_overflow(0x7, "vc");
5624   %}
5625 %}
5626 
5627 // Special operand allowing long args to int ops to be truncated for free
5628 
5629 operand iRegL2I(iRegL reg) %{
5630 
5631   op_cost(0);
5632 
5633   match(ConvL2I reg);
5634 
5635   format %{ "l2i($reg)" %}
5636 
5637   interface(REG_INTER)
5638 %}
5639 
5640 opclass vmem(indirect, indIndex, indOffI, indOffL);
5641 
5642 //----------OPERAND CLASSES----------------------------------------------------
5643 // Operand Classes are groups of operands that are used as to simplify
5644 // instruction definitions by not requiring the AD writer to specify
5645 // separate instructions for every form of operand when the
5646 // instruction accepts multiple operand types with the same basic
5647 // encoding and format. The classic case of this is memory operands.
5648 
5649 // memory is used to define read/write location for load/store
5650 // instruction defs. we can turn a memory op into an Address
5651 
5652 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
5653                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
5654 
5655 
5656 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5657 // operations. it allows the src to be either an iRegI or a (ConvL2I
5658 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5659 // can be elided because the 32-bit instruction will just employ the
5660 // lower 32 bits anyway.
5661 //
5662 // n.b. this does not elide all L2I conversions. if the truncated
5663 // value is consumed by more than one operation then the ConvL2I
5664 // cannot be bundled into the consuming nodes so an l2i gets planted
5665 // (actually a movw $dst $src) and the downstream instructions consume
5666 // the result of the l2i as an iRegI input. That's a shame since the
5667 // movw is actually redundant but its not too costly.
5668 
5669 opclass iRegIorL2I(iRegI, iRegL2I);
5670 
5671 //----------PIPELINE-----------------------------------------------------------
5672 // Rules which define the behavior of the target architectures pipeline.
5673 // Integer ALU reg operation
5674 pipeline %{
5675 
5676 attributes %{
5677   // ARM instructions are of fixed length
5678   fixed_size_instructions;        // Fixed size instructions TODO does
5679   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5680   // ARM instructions come in 32-bit word units
5681   instruction_unit_size = 4;         // An instruction is 4 bytes long
5682   instruction_fetch_unit_size = 64;  // The processor fetches one line
5683   instruction_fetch_units = 1;       // of 64 bytes
5684 
5685   // List of nop instructions
5686   nops( MachNop );
5687 %}
5688 
5689 // We don't use an actual pipeline model so don't care about resources
5690 // or description. we do use pipeline classes to introduce fixed
5691 // latencies
5692 
5693 //----------RESOURCES----------------------------------------------------------
5694 // Resources are the functional units available to the machine
5695 
5696 resources( INS0, INS1, INS01 = INS0 | INS1,
5697            ALU0, ALU1, ALU = ALU0 | ALU1,
5698            MAC,
5699            DIV,
5700            BRANCH,
5701            LDST,
5702            NEON_FP);
5703 
5704 //----------PIPELINE DESCRIPTION-----------------------------------------------
5705 // Pipeline Description specifies the stages in the machine's pipeline
5706 
5707 pipe_desc(ISS, EX1, EX2, WR);
5708 
5709 //----------PIPELINE CLASSES---------------------------------------------------
5710 // Pipeline Classes describe the stages in which input and output are
5711 // referenced by the hardware pipeline.
5712 
5713 //------- Integer ALU operations --------------------------
5714 
5715 // Integer ALU reg-reg operation
5716 // Operands needed in EX1, result generated in EX2
5717 // Eg.  ADD     x0, x1, x2
5718 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5719 %{
5720   single_instruction;
5721   dst    : EX2(write);
5722   src1   : EX1(read);
5723   src2   : EX1(read);
5724   INS01  : ISS; // Dual issue as instruction 0 or 1
5725   ALU    : EX2;
5726 %}
5727 
5728 // Integer ALU reg-reg operation with constant shift
5729 // Shifted register must be available in LATE_ISS instead of EX1
5730 // Eg.  ADD     x0, x1, x2, LSL #2
5731 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5732 %{
5733   single_instruction;
5734   dst    : EX2(write);
5735   src1   : EX1(read);
5736   src2   : ISS(read);
5737   INS01  : ISS;
5738   ALU    : EX2;
5739 %}
5740 
5741 // Integer ALU reg operation with constant shift
5742 // Eg.  LSL     x0, x1, #shift
5743 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5744 %{
5745   single_instruction;
5746   dst    : EX2(write);
5747   src1   : ISS(read);
5748   INS01  : ISS;
5749   ALU    : EX2;
5750 %}
5751 
5752 // Integer ALU reg-reg operation with variable shift
5753 // Both operands must be available in LATE_ISS instead of EX1
5754 // Result is available in EX1 instead of EX2
5755 // Eg.  LSLV    x0, x1, x2
5756 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5757 %{
5758   single_instruction;
5759   dst    : EX1(write);
5760   src1   : ISS(read);
5761   src2   : ISS(read);
5762   INS01  : ISS;
5763   ALU    : EX1;
5764 %}
5765 
5766 // Integer ALU reg-reg operation with extract
5767 // As for _vshift above, but result generated in EX2
5768 // Eg.  EXTR    x0, x1, x2, #N
5769 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5770 %{
5771   single_instruction;
5772   dst    : EX2(write);
5773   src1   : ISS(read);
5774   src2   : ISS(read);
5775   INS1   : ISS; // Can only dual issue as Instruction 1
5776   ALU    : EX1;
5777 %}
5778 
5779 // Integer ALU reg operation
5780 // Eg.  NEG     x0, x1
5781 pipe_class ialu_reg(iRegI dst, iRegI src)
5782 %{
5783   single_instruction;
5784   dst    : EX2(write);
5785   src    : EX1(read);
5786   INS01  : ISS;
5787   ALU    : EX2;
5788 %}
5789 
5790 // Integer ALU reg mmediate operation
5791 // Eg.  ADD     x0, x1, #N
5792 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5793 %{
5794   single_instruction;
5795   dst    : EX2(write);
5796   src1   : EX1(read);
5797   INS01  : ISS;
5798   ALU    : EX2;
5799 %}
5800 
5801 // Integer ALU immediate operation (no source operands)
5802 // Eg.  MOV     x0, #N
5803 pipe_class ialu_imm(iRegI dst)
5804 %{
5805   single_instruction;
5806   dst    : EX1(write);
5807   INS01  : ISS;
5808   ALU    : EX1;
5809 %}
5810 
5811 //------- Compare operation -------------------------------
5812 
5813 // Compare reg-reg
5814 // Eg.  CMP     x0, x1
5815 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
5816 %{
5817   single_instruction;
5818 //  fixed_latency(16);
5819   cr     : EX2(write);
5820   op1    : EX1(read);
5821   op2    : EX1(read);
5822   INS01  : ISS;
5823   ALU    : EX2;
5824 %}
5825 
5826 // Compare reg-reg
5827 // Eg.  CMP     x0, #N
5828 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
5829 %{
5830   single_instruction;
5831 //  fixed_latency(16);
5832   cr     : EX2(write);
5833   op1    : EX1(read);
5834   INS01  : ISS;
5835   ALU    : EX2;
5836 %}
5837 
5838 //------- Conditional instructions ------------------------
5839 
5840 // Conditional no operands
5841 // Eg.  CSINC   x0, zr, zr, <cond>
5842 pipe_class icond_none(iRegI dst, rFlagsReg cr)
5843 %{
5844   single_instruction;
5845   cr     : EX1(read);
5846   dst    : EX2(write);
5847   INS01  : ISS;
5848   ALU    : EX2;
5849 %}
5850 
5851 // Conditional 2 operand
5852 // EG.  CSEL    X0, X1, X2, <cond>
5853 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
5854 %{
5855   single_instruction;
5856   cr     : EX1(read);
5857   src1   : EX1(read);
5858   src2   : EX1(read);
5859   dst    : EX2(write);
5860   INS01  : ISS;
5861   ALU    : EX2;
5862 %}
5863 
5864 // Conditional 2 operand
5865 // EG.  CSEL    X0, X1, X2, <cond>
5866 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
5867 %{
5868   single_instruction;
5869   cr     : EX1(read);
5870   src    : EX1(read);
5871   dst    : EX2(write);
5872   INS01  : ISS;
5873   ALU    : EX2;
5874 %}
5875 
5876 //------- Multiply pipeline operations --------------------
5877 
5878 // Multiply reg-reg
5879 // Eg.  MUL     w0, w1, w2
5880 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5881 %{
5882   single_instruction;
5883   dst    : WR(write);
5884   src1   : ISS(read);
5885   src2   : ISS(read);
5886   INS01  : ISS;
5887   MAC    : WR;
5888 %}
5889 
5890 // Multiply accumulate
5891 // Eg.  MADD    w0, w1, w2, w3
5892 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
5893 %{
5894   single_instruction;
5895   dst    : WR(write);
5896   src1   : ISS(read);
5897   src2   : ISS(read);
5898   src3   : ISS(read);
5899   INS01  : ISS;
5900   MAC    : WR;
5901 %}
5902 
5903 // Eg.  MUL     w0, w1, w2
5904 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5905 %{
5906   single_instruction;
5907   fixed_latency(3); // Maximum latency for 64 bit mul
5908   dst    : WR(write);
5909   src1   : ISS(read);
5910   src2   : ISS(read);
5911   INS01  : ISS;
5912   MAC    : WR;
5913 %}
5914 
5915 // Multiply accumulate
5916 // Eg.  MADD    w0, w1, w2, w3
5917 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
5918 %{
5919   single_instruction;
5920   fixed_latency(3); // Maximum latency for 64 bit mul
5921   dst    : WR(write);
5922   src1   : ISS(read);
5923   src2   : ISS(read);
5924   src3   : ISS(read);
5925   INS01  : ISS;
5926   MAC    : WR;
5927 %}
5928 
5929 //------- Divide pipeline operations --------------------
5930 
5931 // Eg.  SDIV    w0, w1, w2
5932 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5933 %{
5934   single_instruction;
5935   fixed_latency(8); // Maximum latency for 32 bit divide
5936   dst    : WR(write);
5937   src1   : ISS(read);
5938   src2   : ISS(read);
5939   INS0   : ISS; // Can only dual issue as instruction 0
5940   DIV    : WR;
5941 %}
5942 
5943 // Eg.  SDIV    x0, x1, x2
5944 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5945 %{
5946   single_instruction;
5947   fixed_latency(16); // Maximum latency for 64 bit divide
5948   dst    : WR(write);
5949   src1   : ISS(read);
5950   src2   : ISS(read);
5951   INS0   : ISS; // Can only dual issue as instruction 0
5952   DIV    : WR;
5953 %}
5954 
5955 //------- Load pipeline operations ------------------------
5956 
5957 // Load - prefetch
5958 // Eg.  PFRM    <mem>
5959 pipe_class iload_prefetch(memory mem)
5960 %{
5961   single_instruction;
5962   mem    : ISS(read);
5963   INS01  : ISS;
5964   LDST   : WR;
5965 %}
5966 
5967 // Load - reg, mem
5968 // Eg.  LDR     x0, <mem>
5969 pipe_class iload_reg_mem(iRegI dst, memory mem)
5970 %{
5971   single_instruction;
5972   dst    : WR(write);
5973   mem    : ISS(read);
5974   INS01  : ISS;
5975   LDST   : WR;
5976 %}
5977 
5978 // Load - reg, reg
5979 // Eg.  LDR     x0, [sp, x1]
5980 pipe_class iload_reg_reg(iRegI dst, iRegI src)
5981 %{
5982   single_instruction;
5983   dst    : WR(write);
5984   src    : ISS(read);
5985   INS01  : ISS;
5986   LDST   : WR;
5987 %}
5988 
5989 //------- Store pipeline operations -----------------------
5990 
5991 // Store - zr, mem
5992 // Eg.  STR     zr, <mem>
5993 pipe_class istore_mem(memory mem)
5994 %{
5995   single_instruction;
5996   mem    : ISS(read);
5997   INS01  : ISS;
5998   LDST   : WR;
5999 %}
6000 
6001 // Store - reg, mem
6002 // Eg.  STR     x0, <mem>
6003 pipe_class istore_reg_mem(iRegI src, memory mem)
6004 %{
6005   single_instruction;
6006   mem    : ISS(read);
6007   src    : EX2(read);
6008   INS01  : ISS;
6009   LDST   : WR;
6010 %}
6011 
6012 // Store - reg, reg
6013 // Eg. STR      x0, [sp, x1]
6014 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6015 %{
6016   single_instruction;
6017   dst    : ISS(read);
6018   src    : EX2(read);
6019   INS01  : ISS;
6020   LDST   : WR;
6021 %}
6022 
6023 //------- Store pipeline operations -----------------------
6024 
6025 // Branch
6026 pipe_class pipe_branch()
6027 %{
6028   single_instruction;
6029   INS01  : ISS;
6030   BRANCH : EX1;
6031 %}
6032 
6033 // Conditional branch
6034 pipe_class pipe_branch_cond(rFlagsReg cr)
6035 %{
6036   single_instruction;
6037   cr     : EX1(read);
6038   INS01  : ISS;
6039   BRANCH : EX1;
6040 %}
6041 
6042 // Compare & Branch
6043 // EG.  CBZ/CBNZ
6044 pipe_class pipe_cmp_branch(iRegI op1)
6045 %{
6046   single_instruction;
6047   op1    : EX1(read);
6048   INS01  : ISS;
6049   BRANCH : EX1;
6050 %}
6051 
6052 //------- Synchronisation operations ----------------------
6053 
6054 // Any operation requiring serialization.
6055 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6056 pipe_class pipe_serial()
6057 %{
6058   single_instruction;
6059   force_serialization;
6060   fixed_latency(16);
6061   INS01  : ISS(2); // Cannot dual issue with any other instruction
6062   LDST   : WR;
6063 %}
6064 
6065 // Generic big/slow expanded idiom - also serialized
6066 pipe_class pipe_slow()
6067 %{
6068   instruction_count(10);
6069   multiple_bundles;
6070   force_serialization;
6071   fixed_latency(16);
6072   INS01  : ISS(2); // Cannot dual issue with any other instruction
6073   LDST   : WR;
6074 %}
6075 
6076 // Empty pipeline class
6077 pipe_class pipe_class_empty()
6078 %{
6079   single_instruction;
6080   fixed_latency(0);
6081 %}
6082 
6083 // Default pipeline class.
6084 pipe_class pipe_class_default()
6085 %{
6086   single_instruction;
6087   fixed_latency(2);
6088 %}
6089 
6090 // Pipeline class for compares.
6091 pipe_class pipe_class_compare()
6092 %{
6093   single_instruction;
6094   fixed_latency(16);
6095 %}
6096 
6097 // Pipeline class for memory operations.
6098 pipe_class pipe_class_memory()
6099 %{
6100   single_instruction;
6101   fixed_latency(16);
6102 %}
6103 
6104 // Pipeline class for call.
6105 pipe_class pipe_class_call()
6106 %{
6107   single_instruction;
6108   fixed_latency(100);
6109 %}
6110 
6111 // Define the class for the Nop node.
6112 define %{
6113    MachNop = pipe_class_empty;
6114 %}
6115 
6116 %}
6117 //----------INSTRUCTIONS-------------------------------------------------------
6118 //
6119 // match      -- States which machine-independent subtree may be replaced
6120 //               by this instruction.
6121 // ins_cost   -- The estimated cost of this instruction is used by instruction
6122 //               selection to identify a minimum cost tree of machine
6123 //               instructions that matches a tree of machine-independent
6124 //               instructions.
6125 // format     -- A string providing the disassembly for this instruction.
6126 //               The value of an instruction's operand may be inserted
6127 //               by referring to it with a '$' prefix.
6128 // opcode     -- Three instruction opcodes may be provided.  These are referred
6129 //               to within an encode class as $primary, $secondary, and $tertiary
6130 //               rrspectively.  The primary opcode is commonly used to
6131 //               indicate the type of machine instruction, while secondary
6132 //               and tertiary are often used for prefix options or addressing
6133 //               modes.
6134 // ins_encode -- A list of encode classes with parameters. The encode class
6135 //               name must have been defined in an 'enc_class' specification
6136 //               in the encode section of the architecture description.
6137 
6138 // ============================================================================
6139 // Memory (Load/Store) Instructions
6140 
6141 // Load Instructions
6142 
6143 // Load Byte (8 bit signed)
6144 instruct loadB(iRegINoSp dst, memory mem)
6145 %{
6146   match(Set dst (LoadB mem));
6147   predicate(!needs_acquiring_load(n));
6148 
6149   ins_cost(4 * INSN_COST);
6150   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6151 
6152   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6153 
6154   ins_pipe(iload_reg_mem);
6155 %}
6156 
6157 // Load Byte (8 bit signed) into long
6158 instruct loadB2L(iRegLNoSp dst, memory mem)
6159 %{
6160   match(Set dst (ConvI2L (LoadB mem)));
6161   predicate(!needs_acquiring_load(n->in(1)));
6162 
6163   ins_cost(4 * INSN_COST);
6164   format %{ "ldrsb  $dst, $mem\t# byte" %}
6165 
6166   ins_encode(aarch64_enc_ldrsb(dst, mem));
6167 
6168   ins_pipe(iload_reg_mem);
6169 %}
6170 
6171 // Load Byte (8 bit unsigned)
6172 instruct loadUB(iRegINoSp dst, memory mem)
6173 %{
6174   match(Set dst (LoadUB mem));
6175   predicate(!needs_acquiring_load(n));
6176 
6177   ins_cost(4 * INSN_COST);
6178   format %{ "ldrbw  $dst, $mem\t# byte" %}
6179 
6180   ins_encode(aarch64_enc_ldrb(dst, mem));
6181 
6182   ins_pipe(iload_reg_mem);
6183 %}
6184 
6185 // Load Byte (8 bit unsigned) into long
6186 instruct loadUB2L(iRegLNoSp dst, memory mem)
6187 %{
6188   match(Set dst (ConvI2L (LoadUB mem)));
6189   predicate(!needs_acquiring_load(n->in(1)));
6190 
6191   ins_cost(4 * INSN_COST);
6192   format %{ "ldrb  $dst, $mem\t# byte" %}
6193 
6194   ins_encode(aarch64_enc_ldrb(dst, mem));
6195 
6196   ins_pipe(iload_reg_mem);
6197 %}
6198 
6199 // Load Short (16 bit signed)
6200 instruct loadS(iRegINoSp dst, memory mem)
6201 %{
6202   match(Set dst (LoadS mem));
6203   predicate(!needs_acquiring_load(n));
6204 
6205   ins_cost(4 * INSN_COST);
6206   format %{ "ldrshw  $dst, $mem\t# short" %}
6207 
6208   ins_encode(aarch64_enc_ldrshw(dst, mem));
6209 
6210   ins_pipe(iload_reg_mem);
6211 %}
6212 
6213 // Load Short (16 bit signed) into long
6214 instruct loadS2L(iRegLNoSp dst, memory mem)
6215 %{
6216   match(Set dst (ConvI2L (LoadS mem)));
6217   predicate(!needs_acquiring_load(n->in(1)));
6218 
6219   ins_cost(4 * INSN_COST);
6220   format %{ "ldrsh  $dst, $mem\t# short" %}
6221 
6222   ins_encode(aarch64_enc_ldrsh(dst, mem));
6223 
6224   ins_pipe(iload_reg_mem);
6225 %}
6226 
6227 // Load Char (16 bit unsigned)
6228 instruct loadUS(iRegINoSp dst, memory mem)
6229 %{
6230   match(Set dst (LoadUS mem));
6231   predicate(!needs_acquiring_load(n));
6232 
6233   ins_cost(4 * INSN_COST);
6234   format %{ "ldrh  $dst, $mem\t# short" %}
6235 
6236   ins_encode(aarch64_enc_ldrh(dst, mem));
6237 
6238   ins_pipe(iload_reg_mem);
6239 %}
6240 
6241 // Load Short/Char (16 bit unsigned) into long
6242 instruct loadUS2L(iRegLNoSp dst, memory mem)
6243 %{
6244   match(Set dst (ConvI2L (LoadUS mem)));
6245   predicate(!needs_acquiring_load(n->in(1)));
6246 
6247   ins_cost(4 * INSN_COST);
6248   format %{ "ldrh  $dst, $mem\t# short" %}
6249 
6250   ins_encode(aarch64_enc_ldrh(dst, mem));
6251 
6252   ins_pipe(iload_reg_mem);
6253 %}
6254 
6255 // Load Integer (32 bit signed)
6256 instruct loadI(iRegINoSp dst, memory mem)
6257 %{
6258   match(Set dst (LoadI mem));
6259   predicate(!needs_acquiring_load(n));
6260 
6261   ins_cost(4 * INSN_COST);
6262   format %{ "ldrw  $dst, $mem\t# int" %}
6263 
6264   ins_encode(aarch64_enc_ldrw(dst, mem));
6265 
6266   ins_pipe(iload_reg_mem);
6267 %}
6268 
6269 // Load Integer (32 bit signed) into long
6270 instruct loadI2L(iRegLNoSp dst, memory mem)
6271 %{
6272   match(Set dst (ConvI2L (LoadI mem)));
6273   predicate(!needs_acquiring_load(n->in(1)));
6274 
6275   ins_cost(4 * INSN_COST);
6276   format %{ "ldrsw  $dst, $mem\t# int" %}
6277 
6278   ins_encode(aarch64_enc_ldrsw(dst, mem));
6279 
6280   ins_pipe(iload_reg_mem);
6281 %}
6282 
6283 // Load Integer (32 bit unsigned) into long
6284 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6285 %{
6286   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6287   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6288 
6289   ins_cost(4 * INSN_COST);
6290   format %{ "ldrw  $dst, $mem\t# int" %}
6291 
6292   ins_encode(aarch64_enc_ldrw(dst, mem));
6293 
6294   ins_pipe(iload_reg_mem);
6295 %}
6296 
6297 // Load Long (64 bit signed)
6298 instruct loadL(iRegLNoSp dst, memory mem)
6299 %{
6300   match(Set dst (LoadL mem));
6301   predicate(!needs_acquiring_load(n));
6302 
6303   ins_cost(4 * INSN_COST);
6304   format %{ "ldr  $dst, $mem\t# int" %}
6305 
6306   ins_encode(aarch64_enc_ldr(dst, mem));
6307 
6308   ins_pipe(iload_reg_mem);
6309 %}
6310 
6311 // Load Range
6312 instruct loadRange(iRegINoSp dst, memory mem)
6313 %{
6314   match(Set dst (LoadRange mem));
6315 
6316   ins_cost(4 * INSN_COST);
6317   format %{ "ldrw  $dst, $mem\t# range" %}
6318 
6319   ins_encode(aarch64_enc_ldrw(dst, mem));
6320 
6321   ins_pipe(iload_reg_mem);
6322 %}
6323 
6324 // Load Pointer
6325 instruct loadP(iRegPNoSp dst, memory mem)
6326 %{
6327   match(Set dst (LoadP mem));
6328   predicate(!needs_acquiring_load(n));
6329 
6330   ins_cost(4 * INSN_COST);
6331   format %{ "ldr  $dst, $mem\t# ptr" %}
6332 
6333   ins_encode(aarch64_enc_ldr(dst, mem));
6334 
6335   ins_pipe(iload_reg_mem);
6336 %}
6337 
6338 // Load Compressed Pointer
6339 instruct loadN(iRegNNoSp dst, memory mem)
6340 %{
6341   match(Set dst (LoadN mem));
6342   predicate(!needs_acquiring_load(n));
6343 
6344   ins_cost(4 * INSN_COST);
6345   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6346 
6347   ins_encode(aarch64_enc_ldrw(dst, mem));
6348 
6349   ins_pipe(iload_reg_mem);
6350 %}
6351 
6352 // Load Klass Pointer
6353 instruct loadKlass(iRegPNoSp dst, memory mem)
6354 %{
6355   match(Set dst (LoadKlass mem));
6356   predicate(!needs_acquiring_load(n));
6357 
6358   ins_cost(4 * INSN_COST);
6359   format %{ "ldr  $dst, $mem\t# class" %}
6360 
6361   ins_encode(aarch64_enc_ldr(dst, mem));
6362 
6363   ins_pipe(iload_reg_mem);
6364 %}
6365 
6366 // Load Narrow Klass Pointer
6367 instruct loadNKlass(iRegNNoSp dst, memory mem)
6368 %{
6369   match(Set dst (LoadNKlass mem));
6370   predicate(!needs_acquiring_load(n));
6371 
6372   ins_cost(4 * INSN_COST);
6373   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6374 
6375   ins_encode(aarch64_enc_ldrw(dst, mem));
6376 
6377   ins_pipe(iload_reg_mem);
6378 %}
6379 
6380 // Load Float
6381 instruct loadF(vRegF dst, memory mem)
6382 %{
6383   match(Set dst (LoadF mem));
6384   predicate(!needs_acquiring_load(n));
6385 
6386   ins_cost(4 * INSN_COST);
6387   format %{ "ldrs  $dst, $mem\t# float" %}
6388 
6389   ins_encode( aarch64_enc_ldrs(dst, mem) );
6390 
6391   ins_pipe(pipe_class_memory);
6392 %}
6393 
6394 // Load Double
6395 instruct loadD(vRegD dst, memory mem)
6396 %{
6397   match(Set dst (LoadD mem));
6398   predicate(!needs_acquiring_load(n));
6399 
6400   ins_cost(4 * INSN_COST);
6401   format %{ "ldrd  $dst, $mem\t# double" %}
6402 
6403   ins_encode( aarch64_enc_ldrd(dst, mem) );
6404 
6405   ins_pipe(pipe_class_memory);
6406 %}
6407 
6408 
6409 // Load Int Constant
6410 instruct loadConI(iRegINoSp dst, immI src)
6411 %{
6412   match(Set dst src);
6413 
6414   ins_cost(INSN_COST);
6415   format %{ "mov $dst, $src\t# int" %}
6416 
6417   ins_encode( aarch64_enc_movw_imm(dst, src) );
6418 
6419   ins_pipe(ialu_imm);
6420 %}
6421 
6422 // Load Long Constant
6423 instruct loadConL(iRegLNoSp dst, immL src)
6424 %{
6425   match(Set dst src);
6426 
6427   ins_cost(INSN_COST);
6428   format %{ "mov $dst, $src\t# long" %}
6429 
6430   ins_encode( aarch64_enc_mov_imm(dst, src) );
6431 
6432   ins_pipe(ialu_imm);
6433 %}
6434 
6435 // Load Pointer Constant
6436 
6437 instruct loadConP(iRegPNoSp dst, immP con)
6438 %{
6439   match(Set dst con);
6440 
6441   ins_cost(INSN_COST * 4);
6442   format %{
6443     "mov  $dst, $con\t# ptr\n\t"
6444   %}
6445 
6446   ins_encode(aarch64_enc_mov_p(dst, con));
6447 
6448   ins_pipe(ialu_imm);
6449 %}
6450 
6451 // Load Null Pointer Constant
6452 
6453 instruct loadConP0(iRegPNoSp dst, immP0 con)
6454 %{
6455   match(Set dst con);
6456 
6457   ins_cost(INSN_COST);
6458   format %{ "mov  $dst, $con\t# NULL ptr" %}
6459 
6460   ins_encode(aarch64_enc_mov_p0(dst, con));
6461 
6462   ins_pipe(ialu_imm);
6463 %}
6464 
6465 // Load Pointer Constant One
6466 
6467 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6468 %{
6469   match(Set dst con);
6470 
6471   ins_cost(INSN_COST);
6472   format %{ "mov  $dst, $con\t# NULL ptr" %}
6473 
6474   ins_encode(aarch64_enc_mov_p1(dst, con));
6475 
6476   ins_pipe(ialu_imm);
6477 %}
6478 
6479 // Load Poll Page Constant
6480 
6481 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6482 %{
6483   match(Set dst con);
6484 
6485   ins_cost(INSN_COST);
6486   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6487 
6488   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6489 
6490   ins_pipe(ialu_imm);
6491 %}
6492 
6493 // Load Byte Map Base Constant
6494 
6495 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6496 %{
6497   match(Set dst con);
6498 
6499   ins_cost(INSN_COST);
6500   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6501 
6502   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6503 
6504   ins_pipe(ialu_imm);
6505 %}
6506 
6507 // Load Narrow Pointer Constant
6508 
6509 instruct loadConN(iRegNNoSp dst, immN con)
6510 %{
6511   match(Set dst con);
6512 
6513   ins_cost(INSN_COST * 4);
6514   format %{ "mov  $dst, $con\t# compressed ptr" %}
6515 
6516   ins_encode(aarch64_enc_mov_n(dst, con));
6517 
6518   ins_pipe(ialu_imm);
6519 %}
6520 
6521 // Load Narrow Null Pointer Constant
6522 
6523 instruct loadConN0(iRegNNoSp dst, immN0 con)
6524 %{
6525   match(Set dst con);
6526 
6527   ins_cost(INSN_COST);
6528   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6529 
6530   ins_encode(aarch64_enc_mov_n0(dst, con));
6531 
6532   ins_pipe(ialu_imm);
6533 %}
6534 
6535 // Load Narrow Klass Constant
6536 
6537 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6538 %{
6539   match(Set dst con);
6540 
6541   ins_cost(INSN_COST);
6542   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6543 
6544   ins_encode(aarch64_enc_mov_nk(dst, con));
6545 
6546   ins_pipe(ialu_imm);
6547 %}
6548 
6549 // Load Packed Float Constant
6550 
6551 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6552   match(Set dst con);
6553   ins_cost(INSN_COST * 4);
6554   format %{ "fmovs  $dst, $con"%}
6555   ins_encode %{
6556     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6557   %}
6558 
6559   ins_pipe(pipe_class_default);
6560 %}
6561 
6562 // Load Float Constant
6563 
6564 instruct loadConF(vRegF dst, immF con) %{
6565   match(Set dst con);
6566 
6567   ins_cost(INSN_COST * 4);
6568 
6569   format %{
6570     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6571   %}
6572 
6573   ins_encode %{
6574     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6575   %}
6576 
6577   ins_pipe(pipe_class_default);
6578 %}
6579 
6580 // Load Packed Double Constant
6581 
6582 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6583   match(Set dst con);
6584   ins_cost(INSN_COST);
6585   format %{ "fmovd  $dst, $con"%}
6586   ins_encode %{
6587     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6588   %}
6589 
6590   ins_pipe(pipe_class_default);
6591 %}
6592 
6593 // Load Double Constant
6594 
6595 instruct loadConD(vRegD dst, immD con) %{
6596   match(Set dst con);
6597 
6598   ins_cost(INSN_COST * 5);
6599   format %{
6600     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6601   %}
6602 
6603   ins_encode %{
6604     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6605   %}
6606 
6607   ins_pipe(pipe_class_default);
6608 %}
6609 
6610 // Store Instructions
6611 
6612 // Store CMS card-mark Immediate
6613 instruct storeimmCM0(immI0 zero, memory mem)
6614 %{
6615   match(Set mem (StoreCM mem zero));
6616 
6617   ins_cost(INSN_COST);
6618   format %{ "strb zr, $mem\t# byte" %}
6619 
6620   ins_encode(aarch64_enc_strb0(mem));
6621 
6622   ins_pipe(istore_mem);
6623 %}
6624 
6625 // Store Byte
6626 instruct storeB(iRegIorL2I src, memory mem)
6627 %{
6628   match(Set mem (StoreB mem src));
6629   predicate(!needs_releasing_store(n));
6630 
6631   ins_cost(INSN_COST);
6632   format %{ "strb  $src, $mem\t# byte" %}
6633 
6634   ins_encode(aarch64_enc_strb(src, mem));
6635 
6636   ins_pipe(istore_reg_mem);
6637 %}
6638 
6639 
6640 instruct storeimmB0(immI0 zero, memory mem)
6641 %{
6642   match(Set mem (StoreB mem zero));
6643   predicate(!needs_releasing_store(n));
6644 
6645   ins_cost(INSN_COST);
6646   format %{ "strb zr, $mem\t# byte" %}
6647 
6648   ins_encode(aarch64_enc_strb0(mem));
6649 
6650   ins_pipe(istore_mem);
6651 %}
6652 
6653 // Store Char/Short
6654 instruct storeC(iRegIorL2I src, memory mem)
6655 %{
6656   match(Set mem (StoreC mem src));
6657   predicate(!needs_releasing_store(n));
6658 
6659   ins_cost(INSN_COST);
6660   format %{ "strh  $src, $mem\t# short" %}
6661 
6662   ins_encode(aarch64_enc_strh(src, mem));
6663 
6664   ins_pipe(istore_reg_mem);
6665 %}
6666 
6667 instruct storeimmC0(immI0 zero, memory mem)
6668 %{
6669   match(Set mem (StoreC mem zero));
6670   predicate(!needs_releasing_store(n));
6671 
6672   ins_cost(INSN_COST);
6673   format %{ "strh  zr, $mem\t# short" %}
6674 
6675   ins_encode(aarch64_enc_strh0(mem));
6676 
6677   ins_pipe(istore_mem);
6678 %}
6679 
6680 // Store Integer
6681 
6682 instruct storeI(iRegIorL2I src, memory mem)
6683 %{
6684   match(Set mem(StoreI mem src));
6685   predicate(!needs_releasing_store(n));
6686 
6687   ins_cost(INSN_COST);
6688   format %{ "strw  $src, $mem\t# int" %}
6689 
6690   ins_encode(aarch64_enc_strw(src, mem));
6691 
6692   ins_pipe(istore_reg_mem);
6693 %}
6694 
6695 instruct storeimmI0(immI0 zero, memory mem)
6696 %{
6697   match(Set mem(StoreI mem zero));
6698   predicate(!needs_releasing_store(n));
6699 
6700   ins_cost(INSN_COST);
6701   format %{ "strw  zr, $mem\t# int" %}
6702 
6703   ins_encode(aarch64_enc_strw0(mem));
6704 
6705   ins_pipe(istore_mem);
6706 %}
6707 
6708 // Store Long (64 bit signed)
6709 instruct storeL(iRegL src, memory mem)
6710 %{
6711   match(Set mem (StoreL mem src));
6712   predicate(!needs_releasing_store(n));
6713 
6714   ins_cost(INSN_COST);
6715   format %{ "str  $src, $mem\t# int" %}
6716 
6717   ins_encode(aarch64_enc_str(src, mem));
6718 
6719   ins_pipe(istore_reg_mem);
6720 %}
6721 
6722 // Store Long (64 bit signed)
6723 instruct storeimmL0(immL0 zero, memory mem)
6724 %{
6725   match(Set mem (StoreL mem zero));
6726   predicate(!needs_releasing_store(n));
6727 
6728   ins_cost(INSN_COST);
6729   format %{ "str  zr, $mem\t# int" %}
6730 
6731   ins_encode(aarch64_enc_str0(mem));
6732 
6733   ins_pipe(istore_mem);
6734 %}
6735 
6736 // Store Pointer
6737 instruct storeP(iRegP src, memory mem)
6738 %{
6739   match(Set mem (StoreP mem src));
6740   predicate(!needs_releasing_store(n));
6741 
6742   ins_cost(INSN_COST);
6743   format %{ "str  $src, $mem\t# ptr" %}
6744 
6745   ins_encode(aarch64_enc_str(src, mem));
6746 
6747   ins_pipe(istore_reg_mem);
6748 %}
6749 
6750 // Store Pointer
6751 instruct storeimmP0(immP0 zero, memory mem)
6752 %{
6753   match(Set mem (StoreP mem zero));
6754   predicate(!needs_releasing_store(n));
6755 
6756   ins_cost(INSN_COST);
6757   format %{ "str zr, $mem\t# ptr" %}
6758 
6759   ins_encode(aarch64_enc_str0(mem));
6760 
6761   ins_pipe(istore_mem);
6762 %}
6763 
6764 // Store Compressed Pointer
6765 instruct storeN(iRegN src, memory mem)
6766 %{
6767   match(Set mem (StoreN mem src));
6768   predicate(!needs_releasing_store(n));
6769 
6770   ins_cost(INSN_COST);
6771   format %{ "strw  $src, $mem\t# compressed ptr" %}
6772 
6773   ins_encode(aarch64_enc_strw(src, mem));
6774 
6775   ins_pipe(istore_reg_mem);
6776 %}
6777 
6778 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6779 %{
6780   match(Set mem (StoreN mem zero));
6781   predicate(Universe::narrow_oop_base() == NULL &&
6782             Universe::narrow_klass_base() == NULL &&
6783             (!needs_releasing_store(n)));
6784 
6785   ins_cost(INSN_COST);
6786   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
6787 
6788   ins_encode(aarch64_enc_strw(heapbase, mem));
6789 
6790   ins_pipe(istore_reg_mem);
6791 %}
6792 
6793 // Store Float
6794 instruct storeF(vRegF src, memory mem)
6795 %{
6796   match(Set mem (StoreF mem src));
6797   predicate(!needs_releasing_store(n));
6798 
6799   ins_cost(INSN_COST);
6800   format %{ "strs  $src, $mem\t# float" %}
6801 
6802   ins_encode( aarch64_enc_strs(src, mem) );
6803 
6804   ins_pipe(pipe_class_memory);
6805 %}
6806 
6807 // TODO
6808 // implement storeImmF0 and storeFImmPacked
6809 
6810 // Store Double
6811 instruct storeD(vRegD src, memory mem)
6812 %{
6813   match(Set mem (StoreD mem src));
6814   predicate(!needs_releasing_store(n));
6815 
6816   ins_cost(INSN_COST);
6817   format %{ "strd  $src, $mem\t# double" %}
6818 
6819   ins_encode( aarch64_enc_strd(src, mem) );
6820 
6821   ins_pipe(pipe_class_memory);
6822 %}
6823 
6824 // Store Compressed Klass Pointer
6825 instruct storeNKlass(iRegN src, memory mem)
6826 %{
6827   predicate(!needs_releasing_store(n));
6828   match(Set mem (StoreNKlass mem src));
6829 
6830   ins_cost(INSN_COST);
6831   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
6832 
6833   ins_encode(aarch64_enc_strw(src, mem));
6834 
6835   ins_pipe(istore_reg_mem);
6836 %}
6837 
6838 // TODO
6839 // implement storeImmD0 and storeDImmPacked
6840 
6841 // prefetch instructions
6842 // Must be safe to execute with invalid address (cannot fault).
6843 
6844 instruct prefetchalloc( memory mem ) %{
6845   match(PrefetchAllocation mem);
6846 
6847   ins_cost(INSN_COST);
6848   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
6849 
6850   ins_encode( aarch64_enc_prefetchw(mem) );
6851 
6852   ins_pipe(iload_prefetch);
6853 %}
6854 
6855 //  ---------------- volatile loads and stores ----------------
6856 
6857 // Load Byte (8 bit signed)
6858 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
6859 %{
6860   match(Set dst (LoadB mem));
6861 
6862   ins_cost(VOLATILE_REF_COST);
6863   format %{ "ldarsb  $dst, $mem\t# byte" %}
6864 
6865   ins_encode(aarch64_enc_ldarsb(dst, mem));
6866 
6867   ins_pipe(pipe_serial);
6868 %}
6869 
6870 // Load Byte (8 bit signed) into long
6871 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
6872 %{
6873   match(Set dst (ConvI2L (LoadB mem)));
6874 
6875   ins_cost(VOLATILE_REF_COST);
6876   format %{ "ldarsb  $dst, $mem\t# byte" %}
6877 
6878   ins_encode(aarch64_enc_ldarsb(dst, mem));
6879 
6880   ins_pipe(pipe_serial);
6881 %}
6882 
6883 // Load Byte (8 bit unsigned)
6884 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
6885 %{
6886   match(Set dst (LoadUB mem));
6887 
6888   ins_cost(VOLATILE_REF_COST);
6889   format %{ "ldarb  $dst, $mem\t# byte" %}
6890 
6891   ins_encode(aarch64_enc_ldarb(dst, mem));
6892 
6893   ins_pipe(pipe_serial);
6894 %}
6895 
6896 // Load Byte (8 bit unsigned) into long
6897 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
6898 %{
6899   match(Set dst (ConvI2L (LoadUB mem)));
6900 
6901   ins_cost(VOLATILE_REF_COST);
6902   format %{ "ldarb  $dst, $mem\t# byte" %}
6903 
6904   ins_encode(aarch64_enc_ldarb(dst, mem));
6905 
6906   ins_pipe(pipe_serial);
6907 %}
6908 
6909 // Load Short (16 bit signed)
6910 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
6911 %{
6912   match(Set dst (LoadS mem));
6913 
6914   ins_cost(VOLATILE_REF_COST);
6915   format %{ "ldarshw  $dst, $mem\t# short" %}
6916 
6917   ins_encode(aarch64_enc_ldarshw(dst, mem));
6918 
6919   ins_pipe(pipe_serial);
6920 %}
6921 
6922 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
6923 %{
6924   match(Set dst (LoadUS mem));
6925 
6926   ins_cost(VOLATILE_REF_COST);
6927   format %{ "ldarhw  $dst, $mem\t# short" %}
6928 
6929   ins_encode(aarch64_enc_ldarhw(dst, mem));
6930 
6931   ins_pipe(pipe_serial);
6932 %}
6933 
6934 // Load Short/Char (16 bit unsigned) into long
6935 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
6936 %{
6937   match(Set dst (ConvI2L (LoadUS mem)));
6938 
6939   ins_cost(VOLATILE_REF_COST);
6940   format %{ "ldarh  $dst, $mem\t# short" %}
6941 
6942   ins_encode(aarch64_enc_ldarh(dst, mem));
6943 
6944   ins_pipe(pipe_serial);
6945 %}
6946 
6947 // Load Short/Char (16 bit signed) into long
6948 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
6949 %{
6950   match(Set dst (ConvI2L (LoadS mem)));
6951 
6952   ins_cost(VOLATILE_REF_COST);
6953   format %{ "ldarh  $dst, $mem\t# short" %}
6954 
6955   ins_encode(aarch64_enc_ldarsh(dst, mem));
6956 
6957   ins_pipe(pipe_serial);
6958 %}
6959 
6960 // Load Integer (32 bit signed)
6961 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
6962 %{
6963   match(Set dst (LoadI mem));
6964 
6965   ins_cost(VOLATILE_REF_COST);
6966   format %{ "ldarw  $dst, $mem\t# int" %}
6967 
6968   ins_encode(aarch64_enc_ldarw(dst, mem));
6969 
6970   ins_pipe(pipe_serial);
6971 %}
6972 
6973 // Load Integer (32 bit unsigned) into long
6974 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
6975 %{
6976   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6977 
6978   ins_cost(VOLATILE_REF_COST);
6979   format %{ "ldarw  $dst, $mem\t# int" %}
6980 
6981   ins_encode(aarch64_enc_ldarw(dst, mem));
6982 
6983   ins_pipe(pipe_serial);
6984 %}
6985 
6986 // Load Long (64 bit signed)
6987 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
6988 %{
6989   match(Set dst (LoadL mem));
6990 
6991   ins_cost(VOLATILE_REF_COST);
6992   format %{ "ldar  $dst, $mem\t# int" %}
6993 
6994   ins_encode(aarch64_enc_ldar(dst, mem));
6995 
6996   ins_pipe(pipe_serial);
6997 %}
6998 
6999 // Load Pointer
7000 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7001 %{
7002   match(Set dst (LoadP mem));
7003 
7004   ins_cost(VOLATILE_REF_COST);
7005   format %{ "ldar  $dst, $mem\t# ptr" %}
7006 
7007   ins_encode(aarch64_enc_ldar(dst, mem));
7008 
7009   ins_pipe(pipe_serial);
7010 %}
7011 
7012 // Load Compressed Pointer
7013 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7014 %{
7015   match(Set dst (LoadN mem));
7016 
7017   ins_cost(VOLATILE_REF_COST);
7018   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7019 
7020   ins_encode(aarch64_enc_ldarw(dst, mem));
7021 
7022   ins_pipe(pipe_serial);
7023 %}
7024 
7025 // Load Float
7026 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7027 %{
7028   match(Set dst (LoadF mem));
7029 
7030   ins_cost(VOLATILE_REF_COST);
7031   format %{ "ldars  $dst, $mem\t# float" %}
7032 
7033   ins_encode( aarch64_enc_fldars(dst, mem) );
7034 
7035   ins_pipe(pipe_serial);
7036 %}
7037 
7038 // Load Double
7039 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7040 %{
7041   match(Set dst (LoadD mem));
7042 
7043   ins_cost(VOLATILE_REF_COST);
7044   format %{ "ldard  $dst, $mem\t# double" %}
7045 
7046   ins_encode( aarch64_enc_fldard(dst, mem) );
7047 
7048   ins_pipe(pipe_serial);
7049 %}
7050 
7051 // Store Byte
7052 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7053 %{
7054   match(Set mem (StoreB mem src));
7055 
7056   ins_cost(VOLATILE_REF_COST);
7057   format %{ "stlrb  $src, $mem\t# byte" %}
7058 
7059   ins_encode(aarch64_enc_stlrb(src, mem));
7060 
7061   ins_pipe(pipe_class_memory);
7062 %}
7063 
7064 // Store Char/Short
7065 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7066 %{
7067   match(Set mem (StoreC mem src));
7068 
7069   ins_cost(VOLATILE_REF_COST);
7070   format %{ "stlrh  $src, $mem\t# short" %}
7071 
7072   ins_encode(aarch64_enc_stlrh(src, mem));
7073 
7074   ins_pipe(pipe_class_memory);
7075 %}
7076 
7077 // Store Integer
7078 
7079 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7080 %{
7081   match(Set mem(StoreI mem src));
7082 
7083   ins_cost(VOLATILE_REF_COST);
7084   format %{ "stlrw  $src, $mem\t# int" %}
7085 
7086   ins_encode(aarch64_enc_stlrw(src, mem));
7087 
7088   ins_pipe(pipe_class_memory);
7089 %}
7090 
7091 // Store Long (64 bit signed)
7092 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7093 %{
7094   match(Set mem (StoreL mem src));
7095 
7096   ins_cost(VOLATILE_REF_COST);
7097   format %{ "stlr  $src, $mem\t# int" %}
7098 
7099   ins_encode(aarch64_enc_stlr(src, mem));
7100 
7101   ins_pipe(pipe_class_memory);
7102 %}
7103 
7104 // Store Pointer
7105 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7106 %{
7107   match(Set mem (StoreP mem src));
7108 
7109   ins_cost(VOLATILE_REF_COST);
7110   format %{ "stlr  $src, $mem\t# ptr" %}
7111 
7112   ins_encode(aarch64_enc_stlr(src, mem));
7113 
7114   ins_pipe(pipe_class_memory);
7115 %}
7116 
7117 // Store Compressed Pointer
7118 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7119 %{
7120   match(Set mem (StoreN mem src));
7121 
7122   ins_cost(VOLATILE_REF_COST);
7123   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7124 
7125   ins_encode(aarch64_enc_stlrw(src, mem));
7126 
7127   ins_pipe(pipe_class_memory);
7128 %}
7129 
7130 // Store Float
7131 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7132 %{
7133   match(Set mem (StoreF mem src));
7134 
7135   ins_cost(VOLATILE_REF_COST);
7136   format %{ "stlrs  $src, $mem\t# float" %}
7137 
7138   ins_encode( aarch64_enc_fstlrs(src, mem) );
7139 
7140   ins_pipe(pipe_class_memory);
7141 %}
7142 
7143 // TODO
7144 // implement storeImmF0 and storeFImmPacked
7145 
7146 // Store Double
7147 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7148 %{
7149   match(Set mem (StoreD mem src));
7150 
7151   ins_cost(VOLATILE_REF_COST);
7152   format %{ "stlrd  $src, $mem\t# double" %}
7153 
7154   ins_encode( aarch64_enc_fstlrd(src, mem) );
7155 
7156   ins_pipe(pipe_class_memory);
7157 %}
7158 
7159 //  ---------------- end of volatile loads and stores ----------------
7160 
7161 // ============================================================================
7162 // BSWAP Instructions
7163 
7164 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7165   match(Set dst (ReverseBytesI src));
7166 
7167   ins_cost(INSN_COST);
7168   format %{ "revw  $dst, $src" %}
7169 
7170   ins_encode %{
7171     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7172   %}
7173 
7174   ins_pipe(ialu_reg);
7175 %}
7176 
7177 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7178   match(Set dst (ReverseBytesL src));
7179 
7180   ins_cost(INSN_COST);
7181   format %{ "rev  $dst, $src" %}
7182 
7183   ins_encode %{
7184     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7185   %}
7186 
7187   ins_pipe(ialu_reg);
7188 %}
7189 
7190 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7191   match(Set dst (ReverseBytesUS src));
7192 
7193   ins_cost(INSN_COST);
7194   format %{ "rev16w  $dst, $src" %}
7195 
7196   ins_encode %{
7197     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7198   %}
7199 
7200   ins_pipe(ialu_reg);
7201 %}
7202 
7203 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7204   match(Set dst (ReverseBytesS src));
7205 
7206   ins_cost(INSN_COST);
7207   format %{ "rev16w  $dst, $src\n\t"
7208             "sbfmw $dst, $dst, #0, #15" %}
7209 
7210   ins_encode %{
7211     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7212     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7213   %}
7214 
7215   ins_pipe(ialu_reg);
7216 %}
7217 
7218 // ============================================================================
7219 // Zero Count Instructions
7220 
7221 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7222   match(Set dst (CountLeadingZerosI src));
7223 
7224   ins_cost(INSN_COST);
7225   format %{ "clzw  $dst, $src" %}
7226   ins_encode %{
7227     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7228   %}
7229 
7230   ins_pipe(ialu_reg);
7231 %}
7232 
7233 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7234   match(Set dst (CountLeadingZerosL src));
7235 
7236   ins_cost(INSN_COST);
7237   format %{ "clz   $dst, $src" %}
7238   ins_encode %{
7239     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7240   %}
7241 
7242   ins_pipe(ialu_reg);
7243 %}
7244 
7245 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7246   match(Set dst (CountTrailingZerosI src));
7247 
7248   ins_cost(INSN_COST * 2);
7249   format %{ "rbitw  $dst, $src\n\t"
7250             "clzw   $dst, $dst" %}
7251   ins_encode %{
7252     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7253     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7254   %}
7255 
7256   ins_pipe(ialu_reg);
7257 %}
7258 
7259 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7260   match(Set dst (CountTrailingZerosL src));
7261 
7262   ins_cost(INSN_COST * 2);
7263   format %{ "rbit   $dst, $src\n\t"
7264             "clz    $dst, $dst" %}
7265   ins_encode %{
7266     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7267     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7268   %}
7269 
7270   ins_pipe(ialu_reg);
7271 %}
7272 
7273 //---------- Population Count Instructions -------------------------------------
7274 //
7275 
7276 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7277   predicate(UsePopCountInstruction);
7278   match(Set dst (PopCountI src));
7279   effect(TEMP tmp);
7280   ins_cost(INSN_COST * 13);
7281 
7282   format %{ "movw   $src, $src\n\t"
7283             "mov    $tmp, $src\t# vector (1D)\n\t"
7284             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7285             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7286             "mov    $dst, $tmp\t# vector (1D)" %}
7287   ins_encode %{
7288     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7289     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7290     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7291     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7292     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7293   %}
7294 
7295   ins_pipe(pipe_class_default);
7296 %}
7297 
7298 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7299   predicate(UsePopCountInstruction);
7300   match(Set dst (PopCountI (LoadI mem)));
7301   effect(TEMP tmp);
7302   ins_cost(INSN_COST * 13);
7303 
7304   format %{ "ldrs   $tmp, $mem\n\t"
7305             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7306             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7307             "mov    $dst, $tmp\t# vector (1D)" %}
7308   ins_encode %{
7309     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7310     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7311                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7312     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7313     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7314     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7315   %}
7316 
7317   ins_pipe(pipe_class_default);
7318 %}
7319 
7320 // Note: Long.bitCount(long) returns an int.
7321 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7322   predicate(UsePopCountInstruction);
7323   match(Set dst (PopCountL src));
7324   effect(TEMP tmp);
7325   ins_cost(INSN_COST * 13);
7326 
7327   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7328             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7329             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7330             "mov    $dst, $tmp\t# vector (1D)" %}
7331   ins_encode %{
7332     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7333     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7334     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7335     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7336   %}
7337 
7338   ins_pipe(pipe_class_default);
7339 %}
7340 
7341 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7342   predicate(UsePopCountInstruction);
7343   match(Set dst (PopCountL (LoadL mem)));
7344   effect(TEMP tmp);
7345   ins_cost(INSN_COST * 13);
7346 
7347   format %{ "ldrd   $tmp, $mem\n\t"
7348             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7349             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7350             "mov    $dst, $tmp\t# vector (1D)" %}
7351   ins_encode %{
7352     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7353     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7354                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7355     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7356     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7357     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7358   %}
7359 
7360   ins_pipe(pipe_class_default);
7361 %}
7362 
7363 // ============================================================================
7364 // MemBar Instruction
7365 
7366 instruct load_fence() %{
7367   match(LoadFence);
7368   ins_cost(VOLATILE_REF_COST);
7369 
7370   format %{ "load_fence" %}
7371 
7372   ins_encode %{
7373     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7374   %}
7375   ins_pipe(pipe_serial);
7376 %}
7377 
7378 instruct unnecessary_membar_acquire() %{
7379   predicate(unnecessary_acquire(n));
7380   match(MemBarAcquire);
7381   ins_cost(0);
7382 
7383   format %{ "membar_acquire (elided)" %}
7384 
7385   ins_encode %{
7386     __ block_comment("membar_acquire (elided)");
7387   %}
7388 
7389   ins_pipe(pipe_class_empty);
7390 %}
7391 
7392 instruct membar_acquire() %{
7393   match(MemBarAcquire);
7394   ins_cost(VOLATILE_REF_COST);
7395 
7396   format %{ "membar_acquire" %}
7397 
7398   ins_encode %{
7399     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7400   %}
7401 
7402   ins_pipe(pipe_serial);
7403 %}
7404 
7405 
7406 instruct membar_acquire_lock() %{
7407   match(MemBarAcquireLock);
7408   ins_cost(VOLATILE_REF_COST);
7409 
7410   format %{ "membar_acquire_lock" %}
7411 
7412   ins_encode %{
7413     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7414   %}
7415 
7416   ins_pipe(pipe_serial);
7417 %}
7418 
7419 instruct store_fence() %{
7420   match(StoreFence);
7421   ins_cost(VOLATILE_REF_COST);
7422 
7423   format %{ "store_fence" %}
7424 
7425   ins_encode %{
7426     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7427   %}
7428   ins_pipe(pipe_serial);
7429 %}
7430 
7431 instruct unnecessary_membar_release() %{
7432   predicate(unnecessary_release(n));
7433   match(MemBarRelease);
7434   ins_cost(0);
7435 
7436   format %{ "membar_release (elided)" %}
7437 
7438   ins_encode %{
7439     __ block_comment("membar_release (elided)");
7440   %}
7441   ins_pipe(pipe_serial);
7442 %}
7443 
7444 instruct membar_release() %{
7445   match(MemBarRelease);
7446   ins_cost(VOLATILE_REF_COST);
7447 
7448   format %{ "membar_release" %}
7449 
7450   ins_encode %{
7451     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7452   %}
7453   ins_pipe(pipe_serial);
7454 %}
7455 
7456 instruct membar_storestore() %{
7457   match(MemBarStoreStore);
7458   ins_cost(VOLATILE_REF_COST);
7459 
7460   format %{ "MEMBAR-store-store" %}
7461 
7462   ins_encode %{
7463     __ membar(Assembler::StoreStore);
7464   %}
7465   ins_pipe(pipe_serial);
7466 %}
7467 
7468 instruct membar_release_lock() %{
7469   match(MemBarReleaseLock);
7470   ins_cost(VOLATILE_REF_COST);
7471 
7472   format %{ "membar_release_lock" %}
7473 
7474   ins_encode %{
7475     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7476   %}
7477 
7478   ins_pipe(pipe_serial);
7479 %}
7480 
7481 instruct unnecessary_membar_volatile() %{
7482   predicate(unnecessary_volatile(n));
7483   match(MemBarVolatile);
7484   ins_cost(0);
7485 
7486   format %{ "membar_volatile (elided)" %}
7487 
7488   ins_encode %{
7489     __ block_comment("membar_volatile (elided)");
7490   %}
7491 
7492   ins_pipe(pipe_serial);
7493 %}
7494 
7495 instruct membar_volatile() %{
7496   match(MemBarVolatile);
7497   ins_cost(VOLATILE_REF_COST*100);
7498 
7499   format %{ "membar_volatile" %}
7500 
7501   ins_encode %{
7502     __ membar(Assembler::StoreLoad);
7503   %}
7504 
7505   ins_pipe(pipe_serial);
7506 %}
7507 
7508 // ============================================================================
7509 // Cast/Convert Instructions
7510 
7511 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7512   match(Set dst (CastX2P src));
7513 
7514   ins_cost(INSN_COST);
7515   format %{ "mov $dst, $src\t# long -> ptr" %}
7516 
7517   ins_encode %{
7518     if ($dst$$reg != $src$$reg) {
7519       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7520     }
7521   %}
7522 
7523   ins_pipe(ialu_reg);
7524 %}
7525 
7526 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7527   match(Set dst (CastP2X src));
7528 
7529   ins_cost(INSN_COST);
7530   format %{ "mov $dst, $src\t# ptr -> long" %}
7531 
7532   ins_encode %{
7533     if ($dst$$reg != $src$$reg) {
7534       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7535     }
7536   %}
7537 
7538   ins_pipe(ialu_reg);
7539 %}
7540 
7541 // Convert oop into int for vectors alignment masking
7542 instruct convP2I(iRegINoSp dst, iRegP src) %{
7543   match(Set dst (ConvL2I (CastP2X src)));
7544 
7545   ins_cost(INSN_COST);
7546   format %{ "movw $dst, $src\t# ptr -> int" %}
7547   ins_encode %{
7548     __ movw($dst$$Register, $src$$Register);
7549   %}
7550 
7551   ins_pipe(ialu_reg);
7552 %}
7553 
7554 // Convert compressed oop into int for vectors alignment masking
7555 // in case of 32bit oops (heap < 4Gb).
7556 instruct convN2I(iRegINoSp dst, iRegN src)
7557 %{
7558   predicate(Universe::narrow_oop_shift() == 0);
7559   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7560 
7561   ins_cost(INSN_COST);
7562   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7563   ins_encode %{
7564     __ movw($dst$$Register, $src$$Register);
7565   %}
7566 
7567   ins_pipe(ialu_reg);
7568 %}
7569 
7570 
7571 // Convert oop pointer into compressed form
7572 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7573   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7574   match(Set dst (EncodeP src));
7575   effect(KILL cr);
7576   ins_cost(INSN_COST * 3);
7577   format %{ "encode_heap_oop $dst, $src" %}
7578   ins_encode %{
7579     Register s = $src$$Register;
7580     Register d = $dst$$Register;
7581     __ encode_heap_oop(d, s);
7582   %}
7583   ins_pipe(ialu_reg);
7584 %}
7585 
7586 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7587   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7588   match(Set dst (EncodeP src));
7589   ins_cost(INSN_COST * 3);
7590   format %{ "encode_heap_oop_not_null $dst, $src" %}
7591   ins_encode %{
7592     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7593   %}
7594   ins_pipe(ialu_reg);
7595 %}
7596 
7597 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7598   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7599             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7600   match(Set dst (DecodeN src));
7601   ins_cost(INSN_COST * 3);
7602   format %{ "decode_heap_oop $dst, $src" %}
7603   ins_encode %{
7604     Register s = $src$$Register;
7605     Register d = $dst$$Register;
7606     __ decode_heap_oop(d, s);
7607   %}
7608   ins_pipe(ialu_reg);
7609 %}
7610 
7611 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7612   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7613             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7614   match(Set dst (DecodeN src));
7615   ins_cost(INSN_COST * 3);
7616   format %{ "decode_heap_oop_not_null $dst, $src" %}
7617   ins_encode %{
7618     Register s = $src$$Register;
7619     Register d = $dst$$Register;
7620     __ decode_heap_oop_not_null(d, s);
7621   %}
7622   ins_pipe(ialu_reg);
7623 %}
7624 
7625 // n.b. AArch64 implementations of encode_klass_not_null and
7626 // decode_klass_not_null do not modify the flags register so, unlike
7627 // Intel, we don't kill CR as a side effect here
7628 
7629 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7630   match(Set dst (EncodePKlass src));
7631 
7632   ins_cost(INSN_COST * 3);
7633   format %{ "encode_klass_not_null $dst,$src" %}
7634 
7635   ins_encode %{
7636     Register src_reg = as_Register($src$$reg);
7637     Register dst_reg = as_Register($dst$$reg);
7638     __ encode_klass_not_null(dst_reg, src_reg);
7639   %}
7640 
7641    ins_pipe(ialu_reg);
7642 %}
7643 
7644 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7645   match(Set dst (DecodeNKlass src));
7646 
7647   ins_cost(INSN_COST * 3);
7648   format %{ "decode_klass_not_null $dst,$src" %}
7649 
7650   ins_encode %{
7651     Register src_reg = as_Register($src$$reg);
7652     Register dst_reg = as_Register($dst$$reg);
7653     if (dst_reg != src_reg) {
7654       __ decode_klass_not_null(dst_reg, src_reg);
7655     } else {
7656       __ decode_klass_not_null(dst_reg);
7657     }
7658   %}
7659 
7660    ins_pipe(ialu_reg);
7661 %}
7662 
7663 instruct checkCastPP(iRegPNoSp dst)
7664 %{
7665   match(Set dst (CheckCastPP dst));
7666 
7667   size(0);
7668   format %{ "# checkcastPP of $dst" %}
7669   ins_encode(/* empty encoding */);
7670   ins_pipe(pipe_class_empty);
7671 %}
7672 
7673 instruct castPP(iRegPNoSp dst)
7674 %{
7675   match(Set dst (CastPP dst));
7676 
7677   size(0);
7678   format %{ "# castPP of $dst" %}
7679   ins_encode(/* empty encoding */);
7680   ins_pipe(pipe_class_empty);
7681 %}
7682 
7683 instruct castII(iRegI dst)
7684 %{
7685   match(Set dst (CastII dst));
7686 
7687   size(0);
7688   format %{ "# castII of $dst" %}
7689   ins_encode(/* empty encoding */);
7690   ins_cost(0);
7691   ins_pipe(pipe_class_empty);
7692 %}
7693 
7694 // ============================================================================
7695 // Atomic operation instructions
7696 //
7697 // Intel and SPARC both implement Ideal Node LoadPLocked and
7698 // Store{PIL}Conditional instructions using a normal load for the
7699 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7700 //
7701 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7702 // pair to lock object allocations from Eden space when not using
7703 // TLABs.
7704 //
7705 // There does not appear to be a Load{IL}Locked Ideal Node and the
7706 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7707 // and to use StoreIConditional only for 32-bit and StoreLConditional
7708 // only for 64-bit.
7709 //
7710 // We implement LoadPLocked and StorePLocked instructions using,
7711 // respectively the AArch64 hw load-exclusive and store-conditional
7712 // instructions. Whereas we must implement each of
7713 // Store{IL}Conditional using a CAS which employs a pair of
7714 // instructions comprising a load-exclusive followed by a
7715 // store-conditional.
7716 
7717 
7718 // Locked-load (linked load) of the current heap-top
7719 // used when updating the eden heap top
7720 // implemented using ldaxr on AArch64
7721 
7722 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7723 %{
7724   match(Set dst (LoadPLocked mem));
7725 
7726   ins_cost(VOLATILE_REF_COST);
7727 
7728   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7729 
7730   ins_encode(aarch64_enc_ldaxr(dst, mem));
7731 
7732   ins_pipe(pipe_serial);
7733 %}
7734 
7735 // Conditional-store of the updated heap-top.
7736 // Used during allocation of the shared heap.
7737 // Sets flag (EQ) on success.
7738 // implemented using stlxr on AArch64.
7739 
7740 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7741 %{
7742   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7743 
7744   ins_cost(VOLATILE_REF_COST);
7745 
7746  // TODO
7747  // do we need to do a store-conditional release or can we just use a
7748  // plain store-conditional?
7749 
7750   format %{
7751     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7752     "cmpw rscratch1, zr\t# EQ on successful write"
7753   %}
7754 
7755   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7756 
7757   ins_pipe(pipe_serial);
7758 %}
7759 
7760 // this has to be implemented as a CAS
7761 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7762 %{
7763   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7764 
7765   ins_cost(VOLATILE_REF_COST);
7766 
7767   format %{
7768     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7769     "cmpw rscratch1, zr\t# EQ on successful write"
7770   %}
7771 
7772   ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
7773 
7774   ins_pipe(pipe_slow);
7775 %}
7776 
7777 // this has to be implemented as a CAS
7778 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
7779 %{
7780   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7781 
7782   ins_cost(VOLATILE_REF_COST);
7783 
7784   format %{
7785     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7786     "cmpw rscratch1, zr\t# EQ on successful write"
7787   %}
7788 
7789   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
7790 
7791   ins_pipe(pipe_slow);
7792 %}
7793 
7794 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
7795 // can't match them
7796 
7797 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
7798 
7799   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
7800 
7801   effect(KILL cr);
7802 
7803  format %{
7804     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
7805     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7806  %}
7807 
7808  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
7809             aarch64_enc_cset_eq(res));
7810 
7811   ins_pipe(pipe_slow);
7812 %}
7813 
7814 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
7815 
7816   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
7817 
7818   effect(KILL cr);
7819 
7820  format %{
7821     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
7822     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7823  %}
7824 
7825  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
7826             aarch64_enc_cset_eq(res));
7827 
7828   ins_pipe(pipe_slow);
7829 %}
7830 
7831 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
7832 
7833   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
7834 
7835   effect(KILL cr);
7836 
7837  format %{
7838     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
7839     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7840  %}
7841 
7842  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
7843             aarch64_enc_cset_eq(res));
7844 
7845   ins_pipe(pipe_slow);
7846 %}
7847 
7848 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
7849 
7850   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
7851 
7852   effect(KILL cr);
7853 
7854  format %{
7855     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
7856     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
7857  %}
7858 
7859  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
7860             aarch64_enc_cset_eq(res));
7861 
7862   ins_pipe(pipe_slow);
7863 %}
7864 
7865 
7866 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
7867   match(Set prev (GetAndSetI mem newv));
7868   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
7869   ins_encode %{
7870     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
7871   %}
7872   ins_pipe(pipe_serial);
7873 %}
7874 
7875 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
7876   match(Set prev (GetAndSetL mem newv));
7877   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
7878   ins_encode %{
7879     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
7880   %}
7881   ins_pipe(pipe_serial);
7882 %}
7883 
7884 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
7885   match(Set prev (GetAndSetN mem newv));
7886   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
7887   ins_encode %{
7888     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
7889   %}
7890   ins_pipe(pipe_serial);
7891 %}
7892 
7893 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
7894   match(Set prev (GetAndSetP mem newv));
7895   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
7896   ins_encode %{
7897     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
7898   %}
7899   ins_pipe(pipe_serial);
7900 %}
7901 
7902 
7903 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
7904   match(Set newval (GetAndAddL mem incr));
7905   ins_cost(INSN_COST * 10);
7906   format %{ "get_and_addL $newval, [$mem], $incr" %}
7907   ins_encode %{
7908     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
7909   %}
7910   ins_pipe(pipe_serial);
7911 %}
7912 
7913 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
7914   predicate(n->as_LoadStore()->result_not_used());
7915   match(Set dummy (GetAndAddL mem incr));
7916   ins_cost(INSN_COST * 9);
7917   format %{ "get_and_addL [$mem], $incr" %}
7918   ins_encode %{
7919     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
7920   %}
7921   ins_pipe(pipe_serial);
7922 %}
7923 
7924 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
7925   match(Set newval (GetAndAddL mem incr));
7926   ins_cost(INSN_COST * 10);
7927   format %{ "get_and_addL $newval, [$mem], $incr" %}
7928   ins_encode %{
7929     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
7930   %}
7931   ins_pipe(pipe_serial);
7932 %}
7933 
7934 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
7935   predicate(n->as_LoadStore()->result_not_used());
7936   match(Set dummy (GetAndAddL mem incr));
7937   ins_cost(INSN_COST * 9);
7938   format %{ "get_and_addL [$mem], $incr" %}
7939   ins_encode %{
7940     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
7941   %}
7942   ins_pipe(pipe_serial);
7943 %}
7944 
7945 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
7946   match(Set newval (GetAndAddI mem incr));
7947   ins_cost(INSN_COST * 10);
7948   format %{ "get_and_addI $newval, [$mem], $incr" %}
7949   ins_encode %{
7950     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
7951   %}
7952   ins_pipe(pipe_serial);
7953 %}
7954 
7955 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
7956   predicate(n->as_LoadStore()->result_not_used());
7957   match(Set dummy (GetAndAddI mem incr));
7958   ins_cost(INSN_COST * 9);
7959   format %{ "get_and_addI [$mem], $incr" %}
7960   ins_encode %{
7961     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
7962   %}
7963   ins_pipe(pipe_serial);
7964 %}
7965 
7966 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
7967   match(Set newval (GetAndAddI mem incr));
7968   ins_cost(INSN_COST * 10);
7969   format %{ "get_and_addI $newval, [$mem], $incr" %}
7970   ins_encode %{
7971     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
7972   %}
7973   ins_pipe(pipe_serial);
7974 %}
7975 
7976 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
7977   predicate(n->as_LoadStore()->result_not_used());
7978   match(Set dummy (GetAndAddI mem incr));
7979   ins_cost(INSN_COST * 9);
7980   format %{ "get_and_addI [$mem], $incr" %}
7981   ins_encode %{
7982     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
7983   %}
7984   ins_pipe(pipe_serial);
7985 %}
7986 
7987 // Manifest a CmpL result in an integer register.
7988 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
7989 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
7990 %{
7991   match(Set dst (CmpL3 src1 src2));
7992   effect(KILL flags);
7993 
7994   ins_cost(INSN_COST * 6);
7995   format %{
7996       "cmp $src1, $src2"
7997       "csetw $dst, ne"
7998       "cnegw $dst, lt"
7999   %}
8000   // format %{ "CmpL3 $dst, $src1, $src2" %}
8001   ins_encode %{
8002     __ cmp($src1$$Register, $src2$$Register);
8003     __ csetw($dst$$Register, Assembler::NE);
8004     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8005   %}
8006 
8007   ins_pipe(pipe_class_default);
8008 %}
8009 
8010 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8011 %{
8012   match(Set dst (CmpL3 src1 src2));
8013   effect(KILL flags);
8014 
8015   ins_cost(INSN_COST * 6);
8016   format %{
8017       "cmp $src1, $src2"
8018       "csetw $dst, ne"
8019       "cnegw $dst, lt"
8020   %}
8021   ins_encode %{
8022     int32_t con = (int32_t)$src2$$constant;
8023      if (con < 0) {
8024       __ adds(zr, $src1$$Register, -con);
8025     } else {
8026       __ subs(zr, $src1$$Register, con);
8027     }
8028     __ csetw($dst$$Register, Assembler::NE);
8029     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8030   %}
8031 
8032   ins_pipe(pipe_class_default);
8033 %}
8034 
8035 // ============================================================================
8036 // Conditional Move Instructions
8037 
8038 // n.b. we have identical rules for both a signed compare op (cmpOp)
8039 // and an unsigned compare op (cmpOpU). it would be nice if we could
8040 // define an op class which merged both inputs and use it to type the
8041 // argument to a single rule. unfortunatelyt his fails because the
8042 // opclass does not live up to the COND_INTER interface of its
8043 // component operands. When the generic code tries to negate the
8044 // operand it ends up running the generci Machoper::negate method
8045 // which throws a ShouldNotHappen. So, we have to provide two flavours
8046 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8047 
8048 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8049   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8050 
8051   ins_cost(INSN_COST * 2);
8052   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8053 
8054   ins_encode %{
8055     __ cselw(as_Register($dst$$reg),
8056              as_Register($src2$$reg),
8057              as_Register($src1$$reg),
8058              (Assembler::Condition)$cmp$$cmpcode);
8059   %}
8060 
8061   ins_pipe(icond_reg_reg);
8062 %}
8063 
8064 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8065   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8066 
8067   ins_cost(INSN_COST * 2);
8068   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8069 
8070   ins_encode %{
8071     __ cselw(as_Register($dst$$reg),
8072              as_Register($src2$$reg),
8073              as_Register($src1$$reg),
8074              (Assembler::Condition)$cmp$$cmpcode);
8075   %}
8076 
8077   ins_pipe(icond_reg_reg);
8078 %}
8079 
8080 // special cases where one arg is zero
8081 
8082 // n.b. this is selected in preference to the rule above because it
8083 // avoids loading constant 0 into a source register
8084 
8085 // TODO
8086 // we ought only to be able to cull one of these variants as the ideal
8087 // transforms ought always to order the zero consistently (to left/right?)
8088 
8089 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8090   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8091 
8092   ins_cost(INSN_COST * 2);
8093   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8094 
8095   ins_encode %{
8096     __ cselw(as_Register($dst$$reg),
8097              as_Register($src$$reg),
8098              zr,
8099              (Assembler::Condition)$cmp$$cmpcode);
8100   %}
8101 
8102   ins_pipe(icond_reg);
8103 %}
8104 
8105 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8106   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8107 
8108   ins_cost(INSN_COST * 2);
8109   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8110 
8111   ins_encode %{
8112     __ cselw(as_Register($dst$$reg),
8113              as_Register($src$$reg),
8114              zr,
8115              (Assembler::Condition)$cmp$$cmpcode);
8116   %}
8117 
8118   ins_pipe(icond_reg);
8119 %}
8120 
8121 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8122   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8123 
8124   ins_cost(INSN_COST * 2);
8125   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8126 
8127   ins_encode %{
8128     __ cselw(as_Register($dst$$reg),
8129              zr,
8130              as_Register($src$$reg),
8131              (Assembler::Condition)$cmp$$cmpcode);
8132   %}
8133 
8134   ins_pipe(icond_reg);
8135 %}
8136 
8137 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8138   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8139 
8140   ins_cost(INSN_COST * 2);
8141   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8142 
8143   ins_encode %{
8144     __ cselw(as_Register($dst$$reg),
8145              zr,
8146              as_Register($src$$reg),
8147              (Assembler::Condition)$cmp$$cmpcode);
8148   %}
8149 
8150   ins_pipe(icond_reg);
8151 %}
8152 
8153 // special case for creating a boolean 0 or 1
8154 
8155 // n.b. this is selected in preference to the rule above because it
8156 // avoids loading constants 0 and 1 into a source register
8157 
8158 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8159   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8160 
8161   ins_cost(INSN_COST * 2);
8162   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8163 
8164   ins_encode %{
8165     // equivalently
8166     // cset(as_Register($dst$$reg),
8167     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8168     __ csincw(as_Register($dst$$reg),
8169              zr,
8170              zr,
8171              (Assembler::Condition)$cmp$$cmpcode);
8172   %}
8173 
8174   ins_pipe(icond_none);
8175 %}
8176 
8177 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8178   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8179 
8180   ins_cost(INSN_COST * 2);
8181   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8182 
8183   ins_encode %{
8184     // equivalently
8185     // cset(as_Register($dst$$reg),
8186     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8187     __ csincw(as_Register($dst$$reg),
8188              zr,
8189              zr,
8190              (Assembler::Condition)$cmp$$cmpcode);
8191   %}
8192 
8193   ins_pipe(icond_none);
8194 %}
8195 
8196 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8197   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8198 
8199   ins_cost(INSN_COST * 2);
8200   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8201 
8202   ins_encode %{
8203     __ csel(as_Register($dst$$reg),
8204             as_Register($src2$$reg),
8205             as_Register($src1$$reg),
8206             (Assembler::Condition)$cmp$$cmpcode);
8207   %}
8208 
8209   ins_pipe(icond_reg_reg);
8210 %}
8211 
8212 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8213   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8214 
8215   ins_cost(INSN_COST * 2);
8216   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8217 
8218   ins_encode %{
8219     __ csel(as_Register($dst$$reg),
8220             as_Register($src2$$reg),
8221             as_Register($src1$$reg),
8222             (Assembler::Condition)$cmp$$cmpcode);
8223   %}
8224 
8225   ins_pipe(icond_reg_reg);
8226 %}
8227 
8228 // special cases where one arg is zero
8229 
8230 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8231   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8232 
8233   ins_cost(INSN_COST * 2);
8234   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8235 
8236   ins_encode %{
8237     __ csel(as_Register($dst$$reg),
8238             zr,
8239             as_Register($src$$reg),
8240             (Assembler::Condition)$cmp$$cmpcode);
8241   %}
8242 
8243   ins_pipe(icond_reg);
8244 %}
8245 
8246 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8247   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8248 
8249   ins_cost(INSN_COST * 2);
8250   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8251 
8252   ins_encode %{
8253     __ csel(as_Register($dst$$reg),
8254             zr,
8255             as_Register($src$$reg),
8256             (Assembler::Condition)$cmp$$cmpcode);
8257   %}
8258 
8259   ins_pipe(icond_reg);
8260 %}
8261 
8262 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8263   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8264 
8265   ins_cost(INSN_COST * 2);
8266   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8267 
8268   ins_encode %{
8269     __ csel(as_Register($dst$$reg),
8270             as_Register($src$$reg),
8271             zr,
8272             (Assembler::Condition)$cmp$$cmpcode);
8273   %}
8274 
8275   ins_pipe(icond_reg);
8276 %}
8277 
8278 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8279   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8280 
8281   ins_cost(INSN_COST * 2);
8282   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8283 
8284   ins_encode %{
8285     __ csel(as_Register($dst$$reg),
8286             as_Register($src$$reg),
8287             zr,
8288             (Assembler::Condition)$cmp$$cmpcode);
8289   %}
8290 
8291   ins_pipe(icond_reg);
8292 %}
8293 
8294 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8295   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8296 
8297   ins_cost(INSN_COST * 2);
8298   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8299 
8300   ins_encode %{
8301     __ csel(as_Register($dst$$reg),
8302             as_Register($src2$$reg),
8303             as_Register($src1$$reg),
8304             (Assembler::Condition)$cmp$$cmpcode);
8305   %}
8306 
8307   ins_pipe(icond_reg_reg);
8308 %}
8309 
8310 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8311   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8312 
8313   ins_cost(INSN_COST * 2);
8314   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8315 
8316   ins_encode %{
8317     __ csel(as_Register($dst$$reg),
8318             as_Register($src2$$reg),
8319             as_Register($src1$$reg),
8320             (Assembler::Condition)$cmp$$cmpcode);
8321   %}
8322 
8323   ins_pipe(icond_reg_reg);
8324 %}
8325 
8326 // special cases where one arg is zero
8327 
8328 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8329   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8330 
8331   ins_cost(INSN_COST * 2);
8332   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8333 
8334   ins_encode %{
8335     __ csel(as_Register($dst$$reg),
8336             zr,
8337             as_Register($src$$reg),
8338             (Assembler::Condition)$cmp$$cmpcode);
8339   %}
8340 
8341   ins_pipe(icond_reg);
8342 %}
8343 
8344 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8345   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8346 
8347   ins_cost(INSN_COST * 2);
8348   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8349 
8350   ins_encode %{
8351     __ csel(as_Register($dst$$reg),
8352             zr,
8353             as_Register($src$$reg),
8354             (Assembler::Condition)$cmp$$cmpcode);
8355   %}
8356 
8357   ins_pipe(icond_reg);
8358 %}
8359 
8360 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8361   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8362 
8363   ins_cost(INSN_COST * 2);
8364   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8365 
8366   ins_encode %{
8367     __ csel(as_Register($dst$$reg),
8368             as_Register($src$$reg),
8369             zr,
8370             (Assembler::Condition)$cmp$$cmpcode);
8371   %}
8372 
8373   ins_pipe(icond_reg);
8374 %}
8375 
8376 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8377   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8378 
8379   ins_cost(INSN_COST * 2);
8380   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8381 
8382   ins_encode %{
8383     __ csel(as_Register($dst$$reg),
8384             as_Register($src$$reg),
8385             zr,
8386             (Assembler::Condition)$cmp$$cmpcode);
8387   %}
8388 
8389   ins_pipe(icond_reg);
8390 %}
8391 
8392 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8393   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8394 
8395   ins_cost(INSN_COST * 2);
8396   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8397 
8398   ins_encode %{
8399     __ cselw(as_Register($dst$$reg),
8400              as_Register($src2$$reg),
8401              as_Register($src1$$reg),
8402              (Assembler::Condition)$cmp$$cmpcode);
8403   %}
8404 
8405   ins_pipe(icond_reg_reg);
8406 %}
8407 
8408 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
8409   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
8410 
8411   ins_cost(INSN_COST * 2);
8412   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
8413 
8414   ins_encode %{
8415     __ cselw(as_Register($dst$$reg),
8416              as_Register($src2$$reg),
8417              as_Register($src1$$reg),
8418              (Assembler::Condition)$cmp$$cmpcode);
8419   %}
8420 
8421   ins_pipe(icond_reg_reg);
8422 %}
8423 
8424 // special cases where one arg is zero
8425 
8426 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8427   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8428 
8429   ins_cost(INSN_COST * 2);
8430   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
8431 
8432   ins_encode %{
8433     __ cselw(as_Register($dst$$reg),
8434              zr,
8435              as_Register($src$$reg),
8436              (Assembler::Condition)$cmp$$cmpcode);
8437   %}
8438 
8439   ins_pipe(icond_reg);
8440 %}
8441 
8442 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
8443   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
8444 
8445   ins_cost(INSN_COST * 2);
8446   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
8447 
8448   ins_encode %{
8449     __ cselw(as_Register($dst$$reg),
8450              zr,
8451              as_Register($src$$reg),
8452              (Assembler::Condition)$cmp$$cmpcode);
8453   %}
8454 
8455   ins_pipe(icond_reg);
8456 %}
8457 
8458 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8459   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8460 
8461   ins_cost(INSN_COST * 2);
8462   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
8463 
8464   ins_encode %{
8465     __ cselw(as_Register($dst$$reg),
8466              as_Register($src$$reg),
8467              zr,
8468              (Assembler::Condition)$cmp$$cmpcode);
8469   %}
8470 
8471   ins_pipe(icond_reg);
8472 %}
8473 
8474 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
8475   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
8476 
8477   ins_cost(INSN_COST * 2);
8478   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
8479 
8480   ins_encode %{
8481     __ cselw(as_Register($dst$$reg),
8482              as_Register($src$$reg),
8483              zr,
8484              (Assembler::Condition)$cmp$$cmpcode);
8485   %}
8486 
8487   ins_pipe(icond_reg);
8488 %}
8489 
8490 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
8491 %{
8492   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8493 
8494   ins_cost(INSN_COST * 3);
8495 
8496   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8497   ins_encode %{
8498     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8499     __ fcsels(as_FloatRegister($dst$$reg),
8500               as_FloatRegister($src2$$reg),
8501               as_FloatRegister($src1$$reg),
8502               cond);
8503   %}
8504 
8505   ins_pipe(pipe_class_default);
8506 %}
8507 
8508 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
8509 %{
8510   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
8511 
8512   ins_cost(INSN_COST * 3);
8513 
8514   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8515   ins_encode %{
8516     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8517     __ fcsels(as_FloatRegister($dst$$reg),
8518               as_FloatRegister($src2$$reg),
8519               as_FloatRegister($src1$$reg),
8520               cond);
8521   %}
8522 
8523   ins_pipe(pipe_class_default);
8524 %}
8525 
8526 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
8527 %{
8528   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8529 
8530   ins_cost(INSN_COST * 3);
8531 
8532   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
8533   ins_encode %{
8534     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8535     __ fcseld(as_FloatRegister($dst$$reg),
8536               as_FloatRegister($src2$$reg),
8537               as_FloatRegister($src1$$reg),
8538               cond);
8539   %}
8540 
8541   ins_pipe(pipe_class_default);
8542 %}
8543 
8544 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
8545 %{
8546   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
8547 
8548   ins_cost(INSN_COST * 3);
8549 
8550   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
8551   ins_encode %{
8552     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
8553     __ fcseld(as_FloatRegister($dst$$reg),
8554               as_FloatRegister($src2$$reg),
8555               as_FloatRegister($src1$$reg),
8556               cond);
8557   %}
8558 
8559   ins_pipe(pipe_class_default);
8560 %}
8561 
8562 // ============================================================================
8563 // Arithmetic Instructions
8564 //
8565 
8566 // Integer Addition
8567 
8568 // TODO
8569 // these currently employ operations which do not set CR and hence are
8570 // not flagged as killing CR but we would like to isolate the cases
8571 // where we want to set flags from those where we don't. need to work
8572 // out how to do that.
8573 
8574 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8575   match(Set dst (AddI src1 src2));
8576 
8577   ins_cost(INSN_COST);
8578   format %{ "addw  $dst, $src1, $src2" %}
8579 
8580   ins_encode %{
8581     __ addw(as_Register($dst$$reg),
8582             as_Register($src1$$reg),
8583             as_Register($src2$$reg));
8584   %}
8585 
8586   ins_pipe(ialu_reg_reg);
8587 %}
8588 
8589 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8590   match(Set dst (AddI src1 src2));
8591 
8592   ins_cost(INSN_COST);
8593   format %{ "addw $dst, $src1, $src2" %}
8594 
8595   // use opcode to indicate that this is an add not a sub
8596   opcode(0x0);
8597 
8598   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8599 
8600   ins_pipe(ialu_reg_imm);
8601 %}
8602 
8603 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
8604   match(Set dst (AddI (ConvL2I src1) src2));
8605 
8606   ins_cost(INSN_COST);
8607   format %{ "addw $dst, $src1, $src2" %}
8608 
8609   // use opcode to indicate that this is an add not a sub
8610   opcode(0x0);
8611 
8612   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8613 
8614   ins_pipe(ialu_reg_imm);
8615 %}
8616 
8617 // Pointer Addition
8618 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
8619   match(Set dst (AddP src1 src2));
8620 
8621   ins_cost(INSN_COST);
8622   format %{ "add $dst, $src1, $src2\t# ptr" %}
8623 
8624   ins_encode %{
8625     __ add(as_Register($dst$$reg),
8626            as_Register($src1$$reg),
8627            as_Register($src2$$reg));
8628   %}
8629 
8630   ins_pipe(ialu_reg_reg);
8631 %}
8632 
8633 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
8634   match(Set dst (AddP src1 (ConvI2L src2)));
8635 
8636   ins_cost(1.9 * INSN_COST);
8637   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
8638 
8639   ins_encode %{
8640     __ add(as_Register($dst$$reg),
8641            as_Register($src1$$reg),
8642            as_Register($src2$$reg), ext::sxtw);
8643   %}
8644 
8645   ins_pipe(ialu_reg_reg);
8646 %}
8647 
8648 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
8649   match(Set dst (AddP src1 (LShiftL src2 scale)));
8650 
8651   ins_cost(1.9 * INSN_COST);
8652   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
8653 
8654   ins_encode %{
8655     __ lea(as_Register($dst$$reg),
8656            Address(as_Register($src1$$reg), as_Register($src2$$reg),
8657                    Address::lsl($scale$$constant)));
8658   %}
8659 
8660   ins_pipe(ialu_reg_reg_shift);
8661 %}
8662 
8663 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
8664   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
8665 
8666   ins_cost(1.9 * INSN_COST);
8667   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
8668 
8669   ins_encode %{
8670     __ lea(as_Register($dst$$reg),
8671            Address(as_Register($src1$$reg), as_Register($src2$$reg),
8672                    Address::sxtw($scale$$constant)));
8673   %}
8674 
8675   ins_pipe(ialu_reg_reg_shift);
8676 %}
8677 
8678 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
8679   match(Set dst (LShiftL (ConvI2L src) scale));
8680 
8681   ins_cost(INSN_COST);
8682   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
8683 
8684   ins_encode %{
8685     __ sbfiz(as_Register($dst$$reg),
8686           as_Register($src$$reg),
8687           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
8688   %}
8689 
8690   ins_pipe(ialu_reg_shift);
8691 %}
8692 
8693 // Pointer Immediate Addition
8694 // n.b. this needs to be more expensive than using an indirect memory
8695 // operand
8696 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
8697   match(Set dst (AddP src1 src2));
8698 
8699   ins_cost(INSN_COST);
8700   format %{ "add $dst, $src1, $src2\t# ptr" %}
8701 
8702   // use opcode to indicate that this is an add not a sub
8703   opcode(0x0);
8704 
8705   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8706 
8707   ins_pipe(ialu_reg_imm);
8708 %}
8709 
8710 // Long Addition
8711 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8712 
8713   match(Set dst (AddL src1 src2));
8714 
8715   ins_cost(INSN_COST);
8716   format %{ "add  $dst, $src1, $src2" %}
8717 
8718   ins_encode %{
8719     __ add(as_Register($dst$$reg),
8720            as_Register($src1$$reg),
8721            as_Register($src2$$reg));
8722   %}
8723 
8724   ins_pipe(ialu_reg_reg);
8725 %}
8726 
8727 // No constant pool entries requiredLong Immediate Addition.
8728 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
8729   match(Set dst (AddL src1 src2));
8730 
8731   ins_cost(INSN_COST);
8732   format %{ "add $dst, $src1, $src2" %}
8733 
8734   // use opcode to indicate that this is an add not a sub
8735   opcode(0x0);
8736 
8737   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8738 
8739   ins_pipe(ialu_reg_imm);
8740 %}
8741 
8742 // Integer Subtraction
8743 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8744   match(Set dst (SubI src1 src2));
8745 
8746   ins_cost(INSN_COST);
8747   format %{ "subw  $dst, $src1, $src2" %}
8748 
8749   ins_encode %{
8750     __ subw(as_Register($dst$$reg),
8751             as_Register($src1$$reg),
8752             as_Register($src2$$reg));
8753   %}
8754 
8755   ins_pipe(ialu_reg_reg);
8756 %}
8757 
8758 // Immediate Subtraction
8759 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
8760   match(Set dst (SubI src1 src2));
8761 
8762   ins_cost(INSN_COST);
8763   format %{ "subw $dst, $src1, $src2" %}
8764 
8765   // use opcode to indicate that this is a sub not an add
8766   opcode(0x1);
8767 
8768   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
8769 
8770   ins_pipe(ialu_reg_imm);
8771 %}
8772 
8773 // Long Subtraction
8774 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8775 
8776   match(Set dst (SubL src1 src2));
8777 
8778   ins_cost(INSN_COST);
8779   format %{ "sub  $dst, $src1, $src2" %}
8780 
8781   ins_encode %{
8782     __ sub(as_Register($dst$$reg),
8783            as_Register($src1$$reg),
8784            as_Register($src2$$reg));
8785   %}
8786 
8787   ins_pipe(ialu_reg_reg);
8788 %}
8789 
8790 // No constant pool entries requiredLong Immediate Subtraction.
8791 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
8792   match(Set dst (SubL src1 src2));
8793 
8794   ins_cost(INSN_COST);
8795   format %{ "sub$dst, $src1, $src2" %}
8796 
8797   // use opcode to indicate that this is a sub not an add
8798   opcode(0x1);
8799 
8800   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
8801 
8802   ins_pipe(ialu_reg_imm);
8803 %}
8804 
8805 // Integer Negation (special case for sub)
8806 
8807 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
8808   match(Set dst (SubI zero src));
8809 
8810   ins_cost(INSN_COST);
8811   format %{ "negw $dst, $src\t# int" %}
8812 
8813   ins_encode %{
8814     __ negw(as_Register($dst$$reg),
8815             as_Register($src$$reg));
8816   %}
8817 
8818   ins_pipe(ialu_reg);
8819 %}
8820 
8821 // Long Negation
8822 
8823 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
8824   match(Set dst (SubL zero src));
8825 
8826   ins_cost(INSN_COST);
8827   format %{ "neg $dst, $src\t# long" %}
8828 
8829   ins_encode %{
8830     __ neg(as_Register($dst$$reg),
8831            as_Register($src$$reg));
8832   %}
8833 
8834   ins_pipe(ialu_reg);
8835 %}
8836 
8837 // Integer Multiply
8838 
8839 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8840   match(Set dst (MulI src1 src2));
8841 
8842   ins_cost(INSN_COST * 3);
8843   format %{ "mulw  $dst, $src1, $src2" %}
8844 
8845   ins_encode %{
8846     __ mulw(as_Register($dst$$reg),
8847             as_Register($src1$$reg),
8848             as_Register($src2$$reg));
8849   %}
8850 
8851   ins_pipe(imul_reg_reg);
8852 %}
8853 
8854 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8855   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
8856 
8857   ins_cost(INSN_COST * 3);
8858   format %{ "smull  $dst, $src1, $src2" %}
8859 
8860   ins_encode %{
8861     __ smull(as_Register($dst$$reg),
8862              as_Register($src1$$reg),
8863              as_Register($src2$$reg));
8864   %}
8865 
8866   ins_pipe(imul_reg_reg);
8867 %}
8868 
8869 // Long Multiply
8870 
8871 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
8872   match(Set dst (MulL src1 src2));
8873 
8874   ins_cost(INSN_COST * 5);
8875   format %{ "mul  $dst, $src1, $src2" %}
8876 
8877   ins_encode %{
8878     __ mul(as_Register($dst$$reg),
8879            as_Register($src1$$reg),
8880            as_Register($src2$$reg));
8881   %}
8882 
8883   ins_pipe(lmul_reg_reg);
8884 %}
8885 
8886 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
8887 %{
8888   match(Set dst (MulHiL src1 src2));
8889 
8890   ins_cost(INSN_COST * 7);
8891   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
8892 
8893   ins_encode %{
8894     __ smulh(as_Register($dst$$reg),
8895              as_Register($src1$$reg),
8896              as_Register($src2$$reg));
8897   %}
8898 
8899   ins_pipe(lmul_reg_reg);
8900 %}
8901 
8902 // Combined Integer Multiply & Add/Sub
8903 
8904 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
8905   match(Set dst (AddI src3 (MulI src1 src2)));
8906 
8907   ins_cost(INSN_COST * 3);
8908   format %{ "madd  $dst, $src1, $src2, $src3" %}
8909 
8910   ins_encode %{
8911     __ maddw(as_Register($dst$$reg),
8912              as_Register($src1$$reg),
8913              as_Register($src2$$reg),
8914              as_Register($src3$$reg));
8915   %}
8916 
8917   ins_pipe(imac_reg_reg);
8918 %}
8919 
8920 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
8921   match(Set dst (SubI src3 (MulI src1 src2)));
8922 
8923   ins_cost(INSN_COST * 3);
8924   format %{ "msub  $dst, $src1, $src2, $src3" %}
8925 
8926   ins_encode %{
8927     __ msubw(as_Register($dst$$reg),
8928              as_Register($src1$$reg),
8929              as_Register($src2$$reg),
8930              as_Register($src3$$reg));
8931   %}
8932 
8933   ins_pipe(imac_reg_reg);
8934 %}
8935 
8936 // Combined Long Multiply & Add/Sub
8937 
8938 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
8939   match(Set dst (AddL src3 (MulL src1 src2)));
8940 
8941   ins_cost(INSN_COST * 5);
8942   format %{ "madd  $dst, $src1, $src2, $src3" %}
8943 
8944   ins_encode %{
8945     __ madd(as_Register($dst$$reg),
8946             as_Register($src1$$reg),
8947             as_Register($src2$$reg),
8948             as_Register($src3$$reg));
8949   %}
8950 
8951   ins_pipe(lmac_reg_reg);
8952 %}
8953 
8954 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
8955   match(Set dst (SubL src3 (MulL src1 src2)));
8956 
8957   ins_cost(INSN_COST * 5);
8958   format %{ "msub  $dst, $src1, $src2, $src3" %}
8959 
8960   ins_encode %{
8961     __ msub(as_Register($dst$$reg),
8962             as_Register($src1$$reg),
8963             as_Register($src2$$reg),
8964             as_Register($src3$$reg));
8965   %}
8966 
8967   ins_pipe(lmac_reg_reg);
8968 %}
8969 
8970 // Integer Divide
8971 
8972 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8973   match(Set dst (DivI src1 src2));
8974 
8975   ins_cost(INSN_COST * 19);
8976   format %{ "sdivw  $dst, $src1, $src2" %}
8977 
8978   ins_encode(aarch64_enc_divw(dst, src1, src2));
8979   ins_pipe(idiv_reg_reg);
8980 %}
8981 
8982 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
8983   match(Set dst (URShiftI (RShiftI src1 div1) div2));
8984   ins_cost(INSN_COST);
8985   format %{ "lsrw $dst, $src1, $div1" %}
8986   ins_encode %{
8987     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
8988   %}
8989   ins_pipe(ialu_reg_shift);
8990 %}
8991 
8992 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
8993   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
8994   ins_cost(INSN_COST);
8995   format %{ "addw $dst, $src, LSR $div1" %}
8996 
8997   ins_encode %{
8998     __ addw(as_Register($dst$$reg),
8999               as_Register($src$$reg),
9000               as_Register($src$$reg),
9001               Assembler::LSR, 31);
9002   %}
9003   ins_pipe(ialu_reg);
9004 %}
9005 
9006 // Long Divide
9007 
9008 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9009   match(Set dst (DivL src1 src2));
9010 
9011   ins_cost(INSN_COST * 35);
9012   format %{ "sdiv   $dst, $src1, $src2" %}
9013 
9014   ins_encode(aarch64_enc_div(dst, src1, src2));
9015   ins_pipe(ldiv_reg_reg);
9016 %}
9017 
9018 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9019   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9020   ins_cost(INSN_COST);
9021   format %{ "lsr $dst, $src1, $div1" %}
9022   ins_encode %{
9023     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9024   %}
9025   ins_pipe(ialu_reg_shift);
9026 %}
9027 
9028 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9029   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9030   ins_cost(INSN_COST);
9031   format %{ "add $dst, $src, $div1" %}
9032 
9033   ins_encode %{
9034     __ add(as_Register($dst$$reg),
9035               as_Register($src$$reg),
9036               as_Register($src$$reg),
9037               Assembler::LSR, 63);
9038   %}
9039   ins_pipe(ialu_reg);
9040 %}
9041 
9042 // Integer Remainder
9043 
9044 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9045   match(Set dst (ModI src1 src2));
9046 
9047   ins_cost(INSN_COST * 22);
9048   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9049             "msubw($dst, rscratch1, $src2, $src1" %}
9050 
9051   ins_encode(aarch64_enc_modw(dst, src1, src2));
9052   ins_pipe(idiv_reg_reg);
9053 %}
9054 
9055 // Long Remainder
9056 
9057 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9058   match(Set dst (ModL src1 src2));
9059 
9060   ins_cost(INSN_COST * 38);
9061   format %{ "sdiv   rscratch1, $src1, $src2\n"
9062             "msub($dst, rscratch1, $src2, $src1" %}
9063 
9064   ins_encode(aarch64_enc_mod(dst, src1, src2));
9065   ins_pipe(ldiv_reg_reg);
9066 %}
9067 
9068 // Integer Shifts
9069 
9070 // Shift Left Register
9071 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9072   match(Set dst (LShiftI src1 src2));
9073 
9074   ins_cost(INSN_COST * 2);
9075   format %{ "lslvw  $dst, $src1, $src2" %}
9076 
9077   ins_encode %{
9078     __ lslvw(as_Register($dst$$reg),
9079              as_Register($src1$$reg),
9080              as_Register($src2$$reg));
9081   %}
9082 
9083   ins_pipe(ialu_reg_reg_vshift);
9084 %}
9085 
9086 // Shift Left Immediate
9087 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9088   match(Set dst (LShiftI src1 src2));
9089 
9090   ins_cost(INSN_COST);
9091   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9092 
9093   ins_encode %{
9094     __ lslw(as_Register($dst$$reg),
9095             as_Register($src1$$reg),
9096             $src2$$constant & 0x1f);
9097   %}
9098 
9099   ins_pipe(ialu_reg_shift);
9100 %}
9101 
9102 // Shift Right Logical Register
9103 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9104   match(Set dst (URShiftI src1 src2));
9105 
9106   ins_cost(INSN_COST * 2);
9107   format %{ "lsrvw  $dst, $src1, $src2" %}
9108 
9109   ins_encode %{
9110     __ lsrvw(as_Register($dst$$reg),
9111              as_Register($src1$$reg),
9112              as_Register($src2$$reg));
9113   %}
9114 
9115   ins_pipe(ialu_reg_reg_vshift);
9116 %}
9117 
9118 // Shift Right Logical Immediate
9119 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9120   match(Set dst (URShiftI src1 src2));
9121 
9122   ins_cost(INSN_COST);
9123   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9124 
9125   ins_encode %{
9126     __ lsrw(as_Register($dst$$reg),
9127             as_Register($src1$$reg),
9128             $src2$$constant & 0x1f);
9129   %}
9130 
9131   ins_pipe(ialu_reg_shift);
9132 %}
9133 
9134 // Shift Right Arithmetic Register
9135 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9136   match(Set dst (RShiftI src1 src2));
9137 
9138   ins_cost(INSN_COST * 2);
9139   format %{ "asrvw  $dst, $src1, $src2" %}
9140 
9141   ins_encode %{
9142     __ asrvw(as_Register($dst$$reg),
9143              as_Register($src1$$reg),
9144              as_Register($src2$$reg));
9145   %}
9146 
9147   ins_pipe(ialu_reg_reg_vshift);
9148 %}
9149 
9150 // Shift Right Arithmetic Immediate
9151 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9152   match(Set dst (RShiftI src1 src2));
9153 
9154   ins_cost(INSN_COST);
9155   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9156 
9157   ins_encode %{
9158     __ asrw(as_Register($dst$$reg),
9159             as_Register($src1$$reg),
9160             $src2$$constant & 0x1f);
9161   %}
9162 
9163   ins_pipe(ialu_reg_shift);
9164 %}
9165 
9166 // Combined Int Mask and Right Shift (using UBFM)
9167 // TODO
9168 
9169 // Long Shifts
9170 
9171 // Shift Left Register
9172 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9173   match(Set dst (LShiftL src1 src2));
9174 
9175   ins_cost(INSN_COST * 2);
9176   format %{ "lslv  $dst, $src1, $src2" %}
9177 
9178   ins_encode %{
9179     __ lslv(as_Register($dst$$reg),
9180             as_Register($src1$$reg),
9181             as_Register($src2$$reg));
9182   %}
9183 
9184   ins_pipe(ialu_reg_reg_vshift);
9185 %}
9186 
9187 // Shift Left Immediate
9188 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9189   match(Set dst (LShiftL src1 src2));
9190 
9191   ins_cost(INSN_COST);
9192   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9193 
9194   ins_encode %{
9195     __ lsl(as_Register($dst$$reg),
9196             as_Register($src1$$reg),
9197             $src2$$constant & 0x3f);
9198   %}
9199 
9200   ins_pipe(ialu_reg_shift);
9201 %}
9202 
9203 // Shift Right Logical Register
9204 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9205   match(Set dst (URShiftL src1 src2));
9206 
9207   ins_cost(INSN_COST * 2);
9208   format %{ "lsrv  $dst, $src1, $src2" %}
9209 
9210   ins_encode %{
9211     __ lsrv(as_Register($dst$$reg),
9212             as_Register($src1$$reg),
9213             as_Register($src2$$reg));
9214   %}
9215 
9216   ins_pipe(ialu_reg_reg_vshift);
9217 %}
9218 
9219 // Shift Right Logical Immediate
9220 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9221   match(Set dst (URShiftL src1 src2));
9222 
9223   ins_cost(INSN_COST);
9224   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9225 
9226   ins_encode %{
9227     __ lsr(as_Register($dst$$reg),
9228            as_Register($src1$$reg),
9229            $src2$$constant & 0x3f);
9230   %}
9231 
9232   ins_pipe(ialu_reg_shift);
9233 %}
9234 
9235 // A special-case pattern for card table stores.
9236 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9237   match(Set dst (URShiftL (CastP2X src1) src2));
9238 
9239   ins_cost(INSN_COST);
9240   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9241 
9242   ins_encode %{
9243     __ lsr(as_Register($dst$$reg),
9244            as_Register($src1$$reg),
9245            $src2$$constant & 0x3f);
9246   %}
9247 
9248   ins_pipe(ialu_reg_shift);
9249 %}
9250 
9251 // Shift Right Arithmetic Register
9252 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9253   match(Set dst (RShiftL src1 src2));
9254 
9255   ins_cost(INSN_COST * 2);
9256   format %{ "asrv  $dst, $src1, $src2" %}
9257 
9258   ins_encode %{
9259     __ asrv(as_Register($dst$$reg),
9260             as_Register($src1$$reg),
9261             as_Register($src2$$reg));
9262   %}
9263 
9264   ins_pipe(ialu_reg_reg_vshift);
9265 %}
9266 
9267 // Shift Right Arithmetic Immediate
9268 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9269   match(Set dst (RShiftL src1 src2));
9270 
9271   ins_cost(INSN_COST);
9272   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9273 
9274   ins_encode %{
9275     __ asr(as_Register($dst$$reg),
9276            as_Register($src1$$reg),
9277            $src2$$constant & 0x3f);
9278   %}
9279 
9280   ins_pipe(ialu_reg_shift);
9281 %}
9282 
9283 // BEGIN This section of the file is automatically generated. Do not edit --------------
9284 
9285 instruct regL_not_reg(iRegLNoSp dst,
9286                          iRegL src1, immL_M1 m1,
9287                          rFlagsReg cr) %{
9288   match(Set dst (XorL src1 m1));
9289   ins_cost(INSN_COST);
9290   format %{ "eon  $dst, $src1, zr" %}
9291 
9292   ins_encode %{
9293     __ eon(as_Register($dst$$reg),
9294               as_Register($src1$$reg),
9295               zr,
9296               Assembler::LSL, 0);
9297   %}
9298 
9299   ins_pipe(ialu_reg);
9300 %}
9301 instruct regI_not_reg(iRegINoSp dst,
9302                          iRegIorL2I src1, immI_M1 m1,
9303                          rFlagsReg cr) %{
9304   match(Set dst (XorI src1 m1));
9305   ins_cost(INSN_COST);
9306   format %{ "eonw  $dst, $src1, zr" %}
9307 
9308   ins_encode %{
9309     __ eonw(as_Register($dst$$reg),
9310               as_Register($src1$$reg),
9311               zr,
9312               Assembler::LSL, 0);
9313   %}
9314 
9315   ins_pipe(ialu_reg);
9316 %}
9317 
9318 instruct AndI_reg_not_reg(iRegINoSp dst,
9319                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9320                          rFlagsReg cr) %{
9321   match(Set dst (AndI src1 (XorI src2 m1)));
9322   ins_cost(INSN_COST);
9323   format %{ "bicw  $dst, $src1, $src2" %}
9324 
9325   ins_encode %{
9326     __ bicw(as_Register($dst$$reg),
9327               as_Register($src1$$reg),
9328               as_Register($src2$$reg),
9329               Assembler::LSL, 0);
9330   %}
9331 
9332   ins_pipe(ialu_reg_reg);
9333 %}
9334 
9335 instruct AndL_reg_not_reg(iRegLNoSp dst,
9336                          iRegL src1, iRegL src2, immL_M1 m1,
9337                          rFlagsReg cr) %{
9338   match(Set dst (AndL src1 (XorL src2 m1)));
9339   ins_cost(INSN_COST);
9340   format %{ "bic  $dst, $src1, $src2" %}
9341 
9342   ins_encode %{
9343     __ bic(as_Register($dst$$reg),
9344               as_Register($src1$$reg),
9345               as_Register($src2$$reg),
9346               Assembler::LSL, 0);
9347   %}
9348 
9349   ins_pipe(ialu_reg_reg);
9350 %}
9351 
9352 instruct OrI_reg_not_reg(iRegINoSp dst,
9353                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9354                          rFlagsReg cr) %{
9355   match(Set dst (OrI src1 (XorI src2 m1)));
9356   ins_cost(INSN_COST);
9357   format %{ "ornw  $dst, $src1, $src2" %}
9358 
9359   ins_encode %{
9360     __ ornw(as_Register($dst$$reg),
9361               as_Register($src1$$reg),
9362               as_Register($src2$$reg),
9363               Assembler::LSL, 0);
9364   %}
9365 
9366   ins_pipe(ialu_reg_reg);
9367 %}
9368 
9369 instruct OrL_reg_not_reg(iRegLNoSp dst,
9370                          iRegL src1, iRegL src2, immL_M1 m1,
9371                          rFlagsReg cr) %{
9372   match(Set dst (OrL src1 (XorL src2 m1)));
9373   ins_cost(INSN_COST);
9374   format %{ "orn  $dst, $src1, $src2" %}
9375 
9376   ins_encode %{
9377     __ orn(as_Register($dst$$reg),
9378               as_Register($src1$$reg),
9379               as_Register($src2$$reg),
9380               Assembler::LSL, 0);
9381   %}
9382 
9383   ins_pipe(ialu_reg_reg);
9384 %}
9385 
9386 instruct XorI_reg_not_reg(iRegINoSp dst,
9387                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9388                          rFlagsReg cr) %{
9389   match(Set dst (XorI m1 (XorI src2 src1)));
9390   ins_cost(INSN_COST);
9391   format %{ "eonw  $dst, $src1, $src2" %}
9392 
9393   ins_encode %{
9394     __ eonw(as_Register($dst$$reg),
9395               as_Register($src1$$reg),
9396               as_Register($src2$$reg),
9397               Assembler::LSL, 0);
9398   %}
9399 
9400   ins_pipe(ialu_reg_reg);
9401 %}
9402 
9403 instruct XorL_reg_not_reg(iRegLNoSp dst,
9404                          iRegL src1, iRegL src2, immL_M1 m1,
9405                          rFlagsReg cr) %{
9406   match(Set dst (XorL m1 (XorL src2 src1)));
9407   ins_cost(INSN_COST);
9408   format %{ "eon  $dst, $src1, $src2" %}
9409 
9410   ins_encode %{
9411     __ eon(as_Register($dst$$reg),
9412               as_Register($src1$$reg),
9413               as_Register($src2$$reg),
9414               Assembler::LSL, 0);
9415   %}
9416 
9417   ins_pipe(ialu_reg_reg);
9418 %}
9419 
9420 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
9421                          iRegIorL2I src1, iRegIorL2I src2,
9422                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9423   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
9424   ins_cost(1.9 * INSN_COST);
9425   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
9426 
9427   ins_encode %{
9428     __ bicw(as_Register($dst$$reg),
9429               as_Register($src1$$reg),
9430               as_Register($src2$$reg),
9431               Assembler::LSR,
9432               $src3$$constant & 0x3f);
9433   %}
9434 
9435   ins_pipe(ialu_reg_reg_shift);
9436 %}
9437 
9438 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
9439                          iRegL src1, iRegL src2,
9440                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9441   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
9442   ins_cost(1.9 * INSN_COST);
9443   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
9444 
9445   ins_encode %{
9446     __ bic(as_Register($dst$$reg),
9447               as_Register($src1$$reg),
9448               as_Register($src2$$reg),
9449               Assembler::LSR,
9450               $src3$$constant & 0x3f);
9451   %}
9452 
9453   ins_pipe(ialu_reg_reg_shift);
9454 %}
9455 
9456 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
9457                          iRegIorL2I src1, iRegIorL2I src2,
9458                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9459   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
9460   ins_cost(1.9 * INSN_COST);
9461   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
9462 
9463   ins_encode %{
9464     __ bicw(as_Register($dst$$reg),
9465               as_Register($src1$$reg),
9466               as_Register($src2$$reg),
9467               Assembler::ASR,
9468               $src3$$constant & 0x3f);
9469   %}
9470 
9471   ins_pipe(ialu_reg_reg_shift);
9472 %}
9473 
9474 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
9475                          iRegL src1, iRegL src2,
9476                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9477   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
9478   ins_cost(1.9 * INSN_COST);
9479   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
9480 
9481   ins_encode %{
9482     __ bic(as_Register($dst$$reg),
9483               as_Register($src1$$reg),
9484               as_Register($src2$$reg),
9485               Assembler::ASR,
9486               $src3$$constant & 0x3f);
9487   %}
9488 
9489   ins_pipe(ialu_reg_reg_shift);
9490 %}
9491 
9492 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
9493                          iRegIorL2I src1, iRegIorL2I src2,
9494                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9495   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
9496   ins_cost(1.9 * INSN_COST);
9497   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
9498 
9499   ins_encode %{
9500     __ bicw(as_Register($dst$$reg),
9501               as_Register($src1$$reg),
9502               as_Register($src2$$reg),
9503               Assembler::LSL,
9504               $src3$$constant & 0x3f);
9505   %}
9506 
9507   ins_pipe(ialu_reg_reg_shift);
9508 %}
9509 
9510 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
9511                          iRegL src1, iRegL src2,
9512                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9513   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
9514   ins_cost(1.9 * INSN_COST);
9515   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
9516 
9517   ins_encode %{
9518     __ bic(as_Register($dst$$reg),
9519               as_Register($src1$$reg),
9520               as_Register($src2$$reg),
9521               Assembler::LSL,
9522               $src3$$constant & 0x3f);
9523   %}
9524 
9525   ins_pipe(ialu_reg_reg_shift);
9526 %}
9527 
9528 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
9529                          iRegIorL2I src1, iRegIorL2I src2,
9530                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9531   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
9532   ins_cost(1.9 * INSN_COST);
9533   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
9534 
9535   ins_encode %{
9536     __ eonw(as_Register($dst$$reg),
9537               as_Register($src1$$reg),
9538               as_Register($src2$$reg),
9539               Assembler::LSR,
9540               $src3$$constant & 0x3f);
9541   %}
9542 
9543   ins_pipe(ialu_reg_reg_shift);
9544 %}
9545 
9546 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
9547                          iRegL src1, iRegL src2,
9548                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9549   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
9550   ins_cost(1.9 * INSN_COST);
9551   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
9552 
9553   ins_encode %{
9554     __ eon(as_Register($dst$$reg),
9555               as_Register($src1$$reg),
9556               as_Register($src2$$reg),
9557               Assembler::LSR,
9558               $src3$$constant & 0x3f);
9559   %}
9560 
9561   ins_pipe(ialu_reg_reg_shift);
9562 %}
9563 
9564 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
9565                          iRegIorL2I src1, iRegIorL2I src2,
9566                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9567   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
9568   ins_cost(1.9 * INSN_COST);
9569   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
9570 
9571   ins_encode %{
9572     __ eonw(as_Register($dst$$reg),
9573               as_Register($src1$$reg),
9574               as_Register($src2$$reg),
9575               Assembler::ASR,
9576               $src3$$constant & 0x3f);
9577   %}
9578 
9579   ins_pipe(ialu_reg_reg_shift);
9580 %}
9581 
9582 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
9583                          iRegL src1, iRegL src2,
9584                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9585   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
9586   ins_cost(1.9 * INSN_COST);
9587   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
9588 
9589   ins_encode %{
9590     __ eon(as_Register($dst$$reg),
9591               as_Register($src1$$reg),
9592               as_Register($src2$$reg),
9593               Assembler::ASR,
9594               $src3$$constant & 0x3f);
9595   %}
9596 
9597   ins_pipe(ialu_reg_reg_shift);
9598 %}
9599 
9600 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
9601                          iRegIorL2I src1, iRegIorL2I src2,
9602                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9603   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
9604   ins_cost(1.9 * INSN_COST);
9605   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
9606 
9607   ins_encode %{
9608     __ eonw(as_Register($dst$$reg),
9609               as_Register($src1$$reg),
9610               as_Register($src2$$reg),
9611               Assembler::LSL,
9612               $src3$$constant & 0x3f);
9613   %}
9614 
9615   ins_pipe(ialu_reg_reg_shift);
9616 %}
9617 
9618 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
9619                          iRegL src1, iRegL src2,
9620                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9621   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
9622   ins_cost(1.9 * INSN_COST);
9623   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
9624 
9625   ins_encode %{
9626     __ eon(as_Register($dst$$reg),
9627               as_Register($src1$$reg),
9628               as_Register($src2$$reg),
9629               Assembler::LSL,
9630               $src3$$constant & 0x3f);
9631   %}
9632 
9633   ins_pipe(ialu_reg_reg_shift);
9634 %}
9635 
9636 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
9637                          iRegIorL2I src1, iRegIorL2I src2,
9638                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9639   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
9640   ins_cost(1.9 * INSN_COST);
9641   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
9642 
9643   ins_encode %{
9644     __ ornw(as_Register($dst$$reg),
9645               as_Register($src1$$reg),
9646               as_Register($src2$$reg),
9647               Assembler::LSR,
9648               $src3$$constant & 0x3f);
9649   %}
9650 
9651   ins_pipe(ialu_reg_reg_shift);
9652 %}
9653 
9654 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
9655                          iRegL src1, iRegL src2,
9656                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9657   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
9658   ins_cost(1.9 * INSN_COST);
9659   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
9660 
9661   ins_encode %{
9662     __ orn(as_Register($dst$$reg),
9663               as_Register($src1$$reg),
9664               as_Register($src2$$reg),
9665               Assembler::LSR,
9666               $src3$$constant & 0x3f);
9667   %}
9668 
9669   ins_pipe(ialu_reg_reg_shift);
9670 %}
9671 
9672 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
9673                          iRegIorL2I src1, iRegIorL2I src2,
9674                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9675   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
9676   ins_cost(1.9 * INSN_COST);
9677   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
9678 
9679   ins_encode %{
9680     __ ornw(as_Register($dst$$reg),
9681               as_Register($src1$$reg),
9682               as_Register($src2$$reg),
9683               Assembler::ASR,
9684               $src3$$constant & 0x3f);
9685   %}
9686 
9687   ins_pipe(ialu_reg_reg_shift);
9688 %}
9689 
9690 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
9691                          iRegL src1, iRegL src2,
9692                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9693   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
9694   ins_cost(1.9 * INSN_COST);
9695   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
9696 
9697   ins_encode %{
9698     __ orn(as_Register($dst$$reg),
9699               as_Register($src1$$reg),
9700               as_Register($src2$$reg),
9701               Assembler::ASR,
9702               $src3$$constant & 0x3f);
9703   %}
9704 
9705   ins_pipe(ialu_reg_reg_shift);
9706 %}
9707 
9708 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
9709                          iRegIorL2I src1, iRegIorL2I src2,
9710                          immI src3, immI_M1 src4, rFlagsReg cr) %{
9711   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
9712   ins_cost(1.9 * INSN_COST);
9713   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
9714 
9715   ins_encode %{
9716     __ ornw(as_Register($dst$$reg),
9717               as_Register($src1$$reg),
9718               as_Register($src2$$reg),
9719               Assembler::LSL,
9720               $src3$$constant & 0x3f);
9721   %}
9722 
9723   ins_pipe(ialu_reg_reg_shift);
9724 %}
9725 
9726 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
9727                          iRegL src1, iRegL src2,
9728                          immI src3, immL_M1 src4, rFlagsReg cr) %{
9729   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
9730   ins_cost(1.9 * INSN_COST);
9731   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
9732 
9733   ins_encode %{
9734     __ orn(as_Register($dst$$reg),
9735               as_Register($src1$$reg),
9736               as_Register($src2$$reg),
9737               Assembler::LSL,
9738               $src3$$constant & 0x3f);
9739   %}
9740 
9741   ins_pipe(ialu_reg_reg_shift);
9742 %}
9743 
9744 instruct AndI_reg_URShift_reg(iRegINoSp dst,
9745                          iRegIorL2I src1, iRegIorL2I src2,
9746                          immI src3, rFlagsReg cr) %{
9747   match(Set dst (AndI src1 (URShiftI src2 src3)));
9748 
9749   ins_cost(1.9 * INSN_COST);
9750   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
9751 
9752   ins_encode %{
9753     __ andw(as_Register($dst$$reg),
9754               as_Register($src1$$reg),
9755               as_Register($src2$$reg),
9756               Assembler::LSR,
9757               $src3$$constant & 0x3f);
9758   %}
9759 
9760   ins_pipe(ialu_reg_reg_shift);
9761 %}
9762 
9763 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
9764                          iRegL src1, iRegL src2,
9765                          immI src3, rFlagsReg cr) %{
9766   match(Set dst (AndL src1 (URShiftL src2 src3)));
9767 
9768   ins_cost(1.9 * INSN_COST);
9769   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
9770 
9771   ins_encode %{
9772     __ andr(as_Register($dst$$reg),
9773               as_Register($src1$$reg),
9774               as_Register($src2$$reg),
9775               Assembler::LSR,
9776               $src3$$constant & 0x3f);
9777   %}
9778 
9779   ins_pipe(ialu_reg_reg_shift);
9780 %}
9781 
9782 instruct AndI_reg_RShift_reg(iRegINoSp dst,
9783                          iRegIorL2I src1, iRegIorL2I src2,
9784                          immI src3, rFlagsReg cr) %{
9785   match(Set dst (AndI src1 (RShiftI src2 src3)));
9786 
9787   ins_cost(1.9 * INSN_COST);
9788   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
9789 
9790   ins_encode %{
9791     __ andw(as_Register($dst$$reg),
9792               as_Register($src1$$reg),
9793               as_Register($src2$$reg),
9794               Assembler::ASR,
9795               $src3$$constant & 0x3f);
9796   %}
9797 
9798   ins_pipe(ialu_reg_reg_shift);
9799 %}
9800 
9801 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
9802                          iRegL src1, iRegL src2,
9803                          immI src3, rFlagsReg cr) %{
9804   match(Set dst (AndL src1 (RShiftL src2 src3)));
9805 
9806   ins_cost(1.9 * INSN_COST);
9807   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
9808 
9809   ins_encode %{
9810     __ andr(as_Register($dst$$reg),
9811               as_Register($src1$$reg),
9812               as_Register($src2$$reg),
9813               Assembler::ASR,
9814               $src3$$constant & 0x3f);
9815   %}
9816 
9817   ins_pipe(ialu_reg_reg_shift);
9818 %}
9819 
9820 instruct AndI_reg_LShift_reg(iRegINoSp dst,
9821                          iRegIorL2I src1, iRegIorL2I src2,
9822                          immI src3, rFlagsReg cr) %{
9823   match(Set dst (AndI src1 (LShiftI src2 src3)));
9824 
9825   ins_cost(1.9 * INSN_COST);
9826   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
9827 
9828   ins_encode %{
9829     __ andw(as_Register($dst$$reg),
9830               as_Register($src1$$reg),
9831               as_Register($src2$$reg),
9832               Assembler::LSL,
9833               $src3$$constant & 0x3f);
9834   %}
9835 
9836   ins_pipe(ialu_reg_reg_shift);
9837 %}
9838 
9839 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
9840                          iRegL src1, iRegL src2,
9841                          immI src3, rFlagsReg cr) %{
9842   match(Set dst (AndL src1 (LShiftL src2 src3)));
9843 
9844   ins_cost(1.9 * INSN_COST);
9845   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
9846 
9847   ins_encode %{
9848     __ andr(as_Register($dst$$reg),
9849               as_Register($src1$$reg),
9850               as_Register($src2$$reg),
9851               Assembler::LSL,
9852               $src3$$constant & 0x3f);
9853   %}
9854 
9855   ins_pipe(ialu_reg_reg_shift);
9856 %}
9857 
9858 instruct XorI_reg_URShift_reg(iRegINoSp dst,
9859                          iRegIorL2I src1, iRegIorL2I src2,
9860                          immI src3, rFlagsReg cr) %{
9861   match(Set dst (XorI src1 (URShiftI src2 src3)));
9862 
9863   ins_cost(1.9 * INSN_COST);
9864   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
9865 
9866   ins_encode %{
9867     __ eorw(as_Register($dst$$reg),
9868               as_Register($src1$$reg),
9869               as_Register($src2$$reg),
9870               Assembler::LSR,
9871               $src3$$constant & 0x3f);
9872   %}
9873 
9874   ins_pipe(ialu_reg_reg_shift);
9875 %}
9876 
9877 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
9878                          iRegL src1, iRegL src2,
9879                          immI src3, rFlagsReg cr) %{
9880   match(Set dst (XorL src1 (URShiftL src2 src3)));
9881 
9882   ins_cost(1.9 * INSN_COST);
9883   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
9884 
9885   ins_encode %{
9886     __ eor(as_Register($dst$$reg),
9887               as_Register($src1$$reg),
9888               as_Register($src2$$reg),
9889               Assembler::LSR,
9890               $src3$$constant & 0x3f);
9891   %}
9892 
9893   ins_pipe(ialu_reg_reg_shift);
9894 %}
9895 
9896 instruct XorI_reg_RShift_reg(iRegINoSp dst,
9897                          iRegIorL2I src1, iRegIorL2I src2,
9898                          immI src3, rFlagsReg cr) %{
9899   match(Set dst (XorI src1 (RShiftI src2 src3)));
9900 
9901   ins_cost(1.9 * INSN_COST);
9902   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
9903 
9904   ins_encode %{
9905     __ eorw(as_Register($dst$$reg),
9906               as_Register($src1$$reg),
9907               as_Register($src2$$reg),
9908               Assembler::ASR,
9909               $src3$$constant & 0x3f);
9910   %}
9911 
9912   ins_pipe(ialu_reg_reg_shift);
9913 %}
9914 
9915 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
9916                          iRegL src1, iRegL src2,
9917                          immI src3, rFlagsReg cr) %{
9918   match(Set dst (XorL src1 (RShiftL src2 src3)));
9919 
9920   ins_cost(1.9 * INSN_COST);
9921   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
9922 
9923   ins_encode %{
9924     __ eor(as_Register($dst$$reg),
9925               as_Register($src1$$reg),
9926               as_Register($src2$$reg),
9927               Assembler::ASR,
9928               $src3$$constant & 0x3f);
9929   %}
9930 
9931   ins_pipe(ialu_reg_reg_shift);
9932 %}
9933 
9934 instruct XorI_reg_LShift_reg(iRegINoSp dst,
9935                          iRegIorL2I src1, iRegIorL2I src2,
9936                          immI src3, rFlagsReg cr) %{
9937   match(Set dst (XorI src1 (LShiftI src2 src3)));
9938 
9939   ins_cost(1.9 * INSN_COST);
9940   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
9941 
9942   ins_encode %{
9943     __ eorw(as_Register($dst$$reg),
9944               as_Register($src1$$reg),
9945               as_Register($src2$$reg),
9946               Assembler::LSL,
9947               $src3$$constant & 0x3f);
9948   %}
9949 
9950   ins_pipe(ialu_reg_reg_shift);
9951 %}
9952 
9953 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
9954                          iRegL src1, iRegL src2,
9955                          immI src3, rFlagsReg cr) %{
9956   match(Set dst (XorL src1 (LShiftL src2 src3)));
9957 
9958   ins_cost(1.9 * INSN_COST);
9959   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
9960 
9961   ins_encode %{
9962     __ eor(as_Register($dst$$reg),
9963               as_Register($src1$$reg),
9964               as_Register($src2$$reg),
9965               Assembler::LSL,
9966               $src3$$constant & 0x3f);
9967   %}
9968 
9969   ins_pipe(ialu_reg_reg_shift);
9970 %}
9971 
9972 instruct OrI_reg_URShift_reg(iRegINoSp dst,
9973                          iRegIorL2I src1, iRegIorL2I src2,
9974                          immI src3, rFlagsReg cr) %{
9975   match(Set dst (OrI src1 (URShiftI src2 src3)));
9976 
9977   ins_cost(1.9 * INSN_COST);
9978   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
9979 
9980   ins_encode %{
9981     __ orrw(as_Register($dst$$reg),
9982               as_Register($src1$$reg),
9983               as_Register($src2$$reg),
9984               Assembler::LSR,
9985               $src3$$constant & 0x3f);
9986   %}
9987 
9988   ins_pipe(ialu_reg_reg_shift);
9989 %}
9990 
9991 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
9992                          iRegL src1, iRegL src2,
9993                          immI src3, rFlagsReg cr) %{
9994   match(Set dst (OrL src1 (URShiftL src2 src3)));
9995 
9996   ins_cost(1.9 * INSN_COST);
9997   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
9998 
9999   ins_encode %{
10000     __ orr(as_Register($dst$$reg),
10001               as_Register($src1$$reg),
10002               as_Register($src2$$reg),
10003               Assembler::LSR,
10004               $src3$$constant & 0x3f);
10005   %}
10006 
10007   ins_pipe(ialu_reg_reg_shift);
10008 %}
10009 
10010 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10011                          iRegIorL2I src1, iRegIorL2I src2,
10012                          immI src3, rFlagsReg cr) %{
10013   match(Set dst (OrI src1 (RShiftI src2 src3)));
10014 
10015   ins_cost(1.9 * INSN_COST);
10016   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10017 
10018   ins_encode %{
10019     __ orrw(as_Register($dst$$reg),
10020               as_Register($src1$$reg),
10021               as_Register($src2$$reg),
10022               Assembler::ASR,
10023               $src3$$constant & 0x3f);
10024   %}
10025 
10026   ins_pipe(ialu_reg_reg_shift);
10027 %}
10028 
10029 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10030                          iRegL src1, iRegL src2,
10031                          immI src3, rFlagsReg cr) %{
10032   match(Set dst (OrL src1 (RShiftL src2 src3)));
10033 
10034   ins_cost(1.9 * INSN_COST);
10035   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10036 
10037   ins_encode %{
10038     __ orr(as_Register($dst$$reg),
10039               as_Register($src1$$reg),
10040               as_Register($src2$$reg),
10041               Assembler::ASR,
10042               $src3$$constant & 0x3f);
10043   %}
10044 
10045   ins_pipe(ialu_reg_reg_shift);
10046 %}
10047 
10048 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10049                          iRegIorL2I src1, iRegIorL2I src2,
10050                          immI src3, rFlagsReg cr) %{
10051   match(Set dst (OrI src1 (LShiftI src2 src3)));
10052 
10053   ins_cost(1.9 * INSN_COST);
10054   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10055 
10056   ins_encode %{
10057     __ orrw(as_Register($dst$$reg),
10058               as_Register($src1$$reg),
10059               as_Register($src2$$reg),
10060               Assembler::LSL,
10061               $src3$$constant & 0x3f);
10062   %}
10063 
10064   ins_pipe(ialu_reg_reg_shift);
10065 %}
10066 
10067 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10068                          iRegL src1, iRegL src2,
10069                          immI src3, rFlagsReg cr) %{
10070   match(Set dst (OrL src1 (LShiftL src2 src3)));
10071 
10072   ins_cost(1.9 * INSN_COST);
10073   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10074 
10075   ins_encode %{
10076     __ orr(as_Register($dst$$reg),
10077               as_Register($src1$$reg),
10078               as_Register($src2$$reg),
10079               Assembler::LSL,
10080               $src3$$constant & 0x3f);
10081   %}
10082 
10083   ins_pipe(ialu_reg_reg_shift);
10084 %}
10085 
10086 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10087                          iRegIorL2I src1, iRegIorL2I src2,
10088                          immI src3, rFlagsReg cr) %{
10089   match(Set dst (AddI src1 (URShiftI src2 src3)));
10090 
10091   ins_cost(1.9 * INSN_COST);
10092   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10093 
10094   ins_encode %{
10095     __ addw(as_Register($dst$$reg),
10096               as_Register($src1$$reg),
10097               as_Register($src2$$reg),
10098               Assembler::LSR,
10099               $src3$$constant & 0x3f);
10100   %}
10101 
10102   ins_pipe(ialu_reg_reg_shift);
10103 %}
10104 
10105 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10106                          iRegL src1, iRegL src2,
10107                          immI src3, rFlagsReg cr) %{
10108   match(Set dst (AddL src1 (URShiftL src2 src3)));
10109 
10110   ins_cost(1.9 * INSN_COST);
10111   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10112 
10113   ins_encode %{
10114     __ add(as_Register($dst$$reg),
10115               as_Register($src1$$reg),
10116               as_Register($src2$$reg),
10117               Assembler::LSR,
10118               $src3$$constant & 0x3f);
10119   %}
10120 
10121   ins_pipe(ialu_reg_reg_shift);
10122 %}
10123 
10124 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10125                          iRegIorL2I src1, iRegIorL2I src2,
10126                          immI src3, rFlagsReg cr) %{
10127   match(Set dst (AddI src1 (RShiftI src2 src3)));
10128 
10129   ins_cost(1.9 * INSN_COST);
10130   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10131 
10132   ins_encode %{
10133     __ addw(as_Register($dst$$reg),
10134               as_Register($src1$$reg),
10135               as_Register($src2$$reg),
10136               Assembler::ASR,
10137               $src3$$constant & 0x3f);
10138   %}
10139 
10140   ins_pipe(ialu_reg_reg_shift);
10141 %}
10142 
10143 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10144                          iRegL src1, iRegL src2,
10145                          immI src3, rFlagsReg cr) %{
10146   match(Set dst (AddL src1 (RShiftL src2 src3)));
10147 
10148   ins_cost(1.9 * INSN_COST);
10149   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10150 
10151   ins_encode %{
10152     __ add(as_Register($dst$$reg),
10153               as_Register($src1$$reg),
10154               as_Register($src2$$reg),
10155               Assembler::ASR,
10156               $src3$$constant & 0x3f);
10157   %}
10158 
10159   ins_pipe(ialu_reg_reg_shift);
10160 %}
10161 
10162 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10163                          iRegIorL2I src1, iRegIorL2I src2,
10164                          immI src3, rFlagsReg cr) %{
10165   match(Set dst (AddI src1 (LShiftI src2 src3)));
10166 
10167   ins_cost(1.9 * INSN_COST);
10168   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10169 
10170   ins_encode %{
10171     __ addw(as_Register($dst$$reg),
10172               as_Register($src1$$reg),
10173               as_Register($src2$$reg),
10174               Assembler::LSL,
10175               $src3$$constant & 0x3f);
10176   %}
10177 
10178   ins_pipe(ialu_reg_reg_shift);
10179 %}
10180 
10181 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10182                          iRegL src1, iRegL src2,
10183                          immI src3, rFlagsReg cr) %{
10184   match(Set dst (AddL src1 (LShiftL src2 src3)));
10185 
10186   ins_cost(1.9 * INSN_COST);
10187   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10188 
10189   ins_encode %{
10190     __ add(as_Register($dst$$reg),
10191               as_Register($src1$$reg),
10192               as_Register($src2$$reg),
10193               Assembler::LSL,
10194               $src3$$constant & 0x3f);
10195   %}
10196 
10197   ins_pipe(ialu_reg_reg_shift);
10198 %}
10199 
10200 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10201                          iRegIorL2I src1, iRegIorL2I src2,
10202                          immI src3, rFlagsReg cr) %{
10203   match(Set dst (SubI src1 (URShiftI src2 src3)));
10204 
10205   ins_cost(1.9 * INSN_COST);
10206   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10207 
10208   ins_encode %{
10209     __ subw(as_Register($dst$$reg),
10210               as_Register($src1$$reg),
10211               as_Register($src2$$reg),
10212               Assembler::LSR,
10213               $src3$$constant & 0x3f);
10214   %}
10215 
10216   ins_pipe(ialu_reg_reg_shift);
10217 %}
10218 
10219 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10220                          iRegL src1, iRegL src2,
10221                          immI src3, rFlagsReg cr) %{
10222   match(Set dst (SubL src1 (URShiftL src2 src3)));
10223 
10224   ins_cost(1.9 * INSN_COST);
10225   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10226 
10227   ins_encode %{
10228     __ sub(as_Register($dst$$reg),
10229               as_Register($src1$$reg),
10230               as_Register($src2$$reg),
10231               Assembler::LSR,
10232               $src3$$constant & 0x3f);
10233   %}
10234 
10235   ins_pipe(ialu_reg_reg_shift);
10236 %}
10237 
10238 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10239                          iRegIorL2I src1, iRegIorL2I src2,
10240                          immI src3, rFlagsReg cr) %{
10241   match(Set dst (SubI src1 (RShiftI src2 src3)));
10242 
10243   ins_cost(1.9 * INSN_COST);
10244   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10245 
10246   ins_encode %{
10247     __ subw(as_Register($dst$$reg),
10248               as_Register($src1$$reg),
10249               as_Register($src2$$reg),
10250               Assembler::ASR,
10251               $src3$$constant & 0x3f);
10252   %}
10253 
10254   ins_pipe(ialu_reg_reg_shift);
10255 %}
10256 
10257 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10258                          iRegL src1, iRegL src2,
10259                          immI src3, rFlagsReg cr) %{
10260   match(Set dst (SubL src1 (RShiftL src2 src3)));
10261 
10262   ins_cost(1.9 * INSN_COST);
10263   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10264 
10265   ins_encode %{
10266     __ sub(as_Register($dst$$reg),
10267               as_Register($src1$$reg),
10268               as_Register($src2$$reg),
10269               Assembler::ASR,
10270               $src3$$constant & 0x3f);
10271   %}
10272 
10273   ins_pipe(ialu_reg_reg_shift);
10274 %}
10275 
10276 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10277                          iRegIorL2I src1, iRegIorL2I src2,
10278                          immI src3, rFlagsReg cr) %{
10279   match(Set dst (SubI src1 (LShiftI src2 src3)));
10280 
10281   ins_cost(1.9 * INSN_COST);
10282   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10283 
10284   ins_encode %{
10285     __ subw(as_Register($dst$$reg),
10286               as_Register($src1$$reg),
10287               as_Register($src2$$reg),
10288               Assembler::LSL,
10289               $src3$$constant & 0x3f);
10290   %}
10291 
10292   ins_pipe(ialu_reg_reg_shift);
10293 %}
10294 
10295 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10296                          iRegL src1, iRegL src2,
10297                          immI src3, rFlagsReg cr) %{
10298   match(Set dst (SubL src1 (LShiftL src2 src3)));
10299 
10300   ins_cost(1.9 * INSN_COST);
10301   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10302 
10303   ins_encode %{
10304     __ sub(as_Register($dst$$reg),
10305               as_Register($src1$$reg),
10306               as_Register($src2$$reg),
10307               Assembler::LSL,
10308               $src3$$constant & 0x3f);
10309   %}
10310 
10311   ins_pipe(ialu_reg_reg_shift);
10312 %}
10313 
10314 
10315 
10316 // Shift Left followed by Shift Right.
10317 // This idiom is used by the compiler for the i2b bytecode etc.
10318 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10319 %{
10320   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10321   // Make sure we are not going to exceed what sbfm can do.
10322   predicate((unsigned int)n->in(2)->get_int() <= 63
10323             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10324 
10325   ins_cost(INSN_COST * 2);
10326   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10327   ins_encode %{
10328     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10329     int s = 63 - lshift;
10330     int r = (rshift - lshift) & 63;
10331     __ sbfm(as_Register($dst$$reg),
10332             as_Register($src$$reg),
10333             r, s);
10334   %}
10335 
10336   ins_pipe(ialu_reg_shift);
10337 %}
10338 
10339 // Shift Left followed by Shift Right.
10340 // This idiom is used by the compiler for the i2b bytecode etc.
10341 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10342 %{
10343   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10344   // Make sure we are not going to exceed what sbfmw can do.
10345   predicate((unsigned int)n->in(2)->get_int() <= 31
10346             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10347 
10348   ins_cost(INSN_COST * 2);
10349   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10350   ins_encode %{
10351     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10352     int s = 31 - lshift;
10353     int r = (rshift - lshift) & 31;
10354     __ sbfmw(as_Register($dst$$reg),
10355             as_Register($src$$reg),
10356             r, s);
10357   %}
10358 
10359   ins_pipe(ialu_reg_shift);
10360 %}
10361 
10362 // Shift Left followed by Shift Right.
10363 // This idiom is used by the compiler for the i2b bytecode etc.
10364 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10365 %{
10366   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10367   // Make sure we are not going to exceed what ubfm can do.
10368   predicate((unsigned int)n->in(2)->get_int() <= 63
10369             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10370 
10371   ins_cost(INSN_COST * 2);
10372   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10373   ins_encode %{
10374     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10375     int s = 63 - lshift;
10376     int r = (rshift - lshift) & 63;
10377     __ ubfm(as_Register($dst$$reg),
10378             as_Register($src$$reg),
10379             r, s);
10380   %}
10381 
10382   ins_pipe(ialu_reg_shift);
10383 %}
10384 
10385 // Shift Left followed by Shift Right.
10386 // This idiom is used by the compiler for the i2b bytecode etc.
10387 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10388 %{
10389   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
10390   // Make sure we are not going to exceed what ubfmw can do.
10391   predicate((unsigned int)n->in(2)->get_int() <= 31
10392             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10393 
10394   ins_cost(INSN_COST * 2);
10395   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10396   ins_encode %{
10397     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10398     int s = 31 - lshift;
10399     int r = (rshift - lshift) & 31;
10400     __ ubfmw(as_Register($dst$$reg),
10401             as_Register($src$$reg),
10402             r, s);
10403   %}
10404 
10405   ins_pipe(ialu_reg_shift);
10406 %}
10407 // Bitfield extract with shift & mask
10408 
10409 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10410 %{
10411   match(Set dst (AndI (URShiftI src rshift) mask));
10412 
10413   ins_cost(INSN_COST);
10414   format %{ "ubfxw $dst, $src, $mask" %}
10415   ins_encode %{
10416     int rshift = $rshift$$constant;
10417     long mask = $mask$$constant;
10418     int width = exact_log2(mask+1);
10419     __ ubfxw(as_Register($dst$$reg),
10420             as_Register($src$$reg), rshift, width);
10421   %}
10422   ins_pipe(ialu_reg_shift);
10423 %}
10424 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
10425 %{
10426   match(Set dst (AndL (URShiftL src rshift) mask));
10427 
10428   ins_cost(INSN_COST);
10429   format %{ "ubfx $dst, $src, $mask" %}
10430   ins_encode %{
10431     int rshift = $rshift$$constant;
10432     long mask = $mask$$constant;
10433     int width = exact_log2(mask+1);
10434     __ ubfx(as_Register($dst$$reg),
10435             as_Register($src$$reg), rshift, width);
10436   %}
10437   ins_pipe(ialu_reg_shift);
10438 %}
10439 
10440 // We can use ubfx when extending an And with a mask when we know mask
10441 // is positive.  We know that because immI_bitmask guarantees it.
10442 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
10443 %{
10444   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
10445 
10446   ins_cost(INSN_COST * 2);
10447   format %{ "ubfx $dst, $src, $mask" %}
10448   ins_encode %{
10449     int rshift = $rshift$$constant;
10450     long mask = $mask$$constant;
10451     int width = exact_log2(mask+1);
10452     __ ubfx(as_Register($dst$$reg),
10453             as_Register($src$$reg), rshift, width);
10454   %}
10455   ins_pipe(ialu_reg_shift);
10456 %}
10457 
10458 // Rotations
10459 
10460 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10461 %{
10462   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10463   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10464 
10465   ins_cost(INSN_COST);
10466   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10467 
10468   ins_encode %{
10469     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10470             $rshift$$constant & 63);
10471   %}
10472   ins_pipe(ialu_reg_reg_extr);
10473 %}
10474 
10475 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10476 %{
10477   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10478   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10479 
10480   ins_cost(INSN_COST);
10481   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10482 
10483   ins_encode %{
10484     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10485             $rshift$$constant & 31);
10486   %}
10487   ins_pipe(ialu_reg_reg_extr);
10488 %}
10489 
10490 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
10491 %{
10492   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
10493   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
10494 
10495   ins_cost(INSN_COST);
10496   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10497 
10498   ins_encode %{
10499     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10500             $rshift$$constant & 63);
10501   %}
10502   ins_pipe(ialu_reg_reg_extr);
10503 %}
10504 
10505 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
10506 %{
10507   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
10508   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
10509 
10510   ins_cost(INSN_COST);
10511   format %{ "extr $dst, $src1, $src2, #$rshift" %}
10512 
10513   ins_encode %{
10514     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
10515             $rshift$$constant & 31);
10516   %}
10517   ins_pipe(ialu_reg_reg_extr);
10518 %}
10519 
10520 
10521 // rol expander
10522 
10523 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10524 %{
10525   effect(DEF dst, USE src, USE shift);
10526 
10527   format %{ "rol    $dst, $src, $shift" %}
10528   ins_cost(INSN_COST * 3);
10529   ins_encode %{
10530     __ subw(rscratch1, zr, as_Register($shift$$reg));
10531     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10532             rscratch1);
10533     %}
10534   ins_pipe(ialu_reg_reg_vshift);
10535 %}
10536 
10537 // rol expander
10538 
10539 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10540 %{
10541   effect(DEF dst, USE src, USE shift);
10542 
10543   format %{ "rol    $dst, $src, $shift" %}
10544   ins_cost(INSN_COST * 3);
10545   ins_encode %{
10546     __ subw(rscratch1, zr, as_Register($shift$$reg));
10547     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10548             rscratch1);
10549     %}
10550   ins_pipe(ialu_reg_reg_vshift);
10551 %}
10552 
10553 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10554 %{
10555   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
10556 
10557   expand %{
10558     rolL_rReg(dst, src, shift, cr);
10559   %}
10560 %}
10561 
10562 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10563 %{
10564   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
10565 
10566   expand %{
10567     rolL_rReg(dst, src, shift, cr);
10568   %}
10569 %}
10570 
10571 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10572 %{
10573   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
10574 
10575   expand %{
10576     rolL_rReg(dst, src, shift, cr);
10577   %}
10578 %}
10579 
10580 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10581 %{
10582   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
10583 
10584   expand %{
10585     rolL_rReg(dst, src, shift, cr);
10586   %}
10587 %}
10588 
10589 // ror expander
10590 
10591 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
10592 %{
10593   effect(DEF dst, USE src, USE shift);
10594 
10595   format %{ "ror    $dst, $src, $shift" %}
10596   ins_cost(INSN_COST);
10597   ins_encode %{
10598     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
10599             as_Register($shift$$reg));
10600     %}
10601   ins_pipe(ialu_reg_reg_vshift);
10602 %}
10603 
10604 // ror expander
10605 
10606 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
10607 %{
10608   effect(DEF dst, USE src, USE shift);
10609 
10610   format %{ "ror    $dst, $src, $shift" %}
10611   ins_cost(INSN_COST);
10612   ins_encode %{
10613     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
10614             as_Register($shift$$reg));
10615     %}
10616   ins_pipe(ialu_reg_reg_vshift);
10617 %}
10618 
10619 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
10620 %{
10621   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
10622 
10623   expand %{
10624     rorL_rReg(dst, src, shift, cr);
10625   %}
10626 %}
10627 
10628 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10629 %{
10630   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
10631 
10632   expand %{
10633     rorL_rReg(dst, src, shift, cr);
10634   %}
10635 %}
10636 
10637 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
10638 %{
10639   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
10640 
10641   expand %{
10642     rorL_rReg(dst, src, shift, cr);
10643   %}
10644 %}
10645 
10646 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
10647 %{
10648   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
10649 
10650   expand %{
10651     rorL_rReg(dst, src, shift, cr);
10652   %}
10653 %}
10654 
10655 // Add/subtract (extended)
10656 
10657 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
10658 %{
10659   match(Set dst (AddL src1 (ConvI2L src2)));
10660   ins_cost(INSN_COST);
10661   format %{ "add  $dst, $src1, sxtw $src2" %}
10662 
10663    ins_encode %{
10664      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10665             as_Register($src2$$reg), ext::sxtw);
10666    %}
10667   ins_pipe(ialu_reg_reg);
10668 %};
10669 
10670 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
10671 %{
10672   match(Set dst (SubL src1 (ConvI2L src2)));
10673   ins_cost(INSN_COST);
10674   format %{ "sub  $dst, $src1, sxtw $src2" %}
10675 
10676    ins_encode %{
10677      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10678             as_Register($src2$$reg), ext::sxtw);
10679    %}
10680   ins_pipe(ialu_reg_reg);
10681 %};
10682 
10683 
10684 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
10685 %{
10686   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
10687   ins_cost(INSN_COST);
10688   format %{ "add  $dst, $src1, sxth $src2" %}
10689 
10690    ins_encode %{
10691      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10692             as_Register($src2$$reg), ext::sxth);
10693    %}
10694   ins_pipe(ialu_reg_reg);
10695 %}
10696 
10697 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
10698 %{
10699   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
10700   ins_cost(INSN_COST);
10701   format %{ "add  $dst, $src1, sxtb $src2" %}
10702 
10703    ins_encode %{
10704      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10705             as_Register($src2$$reg), ext::sxtb);
10706    %}
10707   ins_pipe(ialu_reg_reg);
10708 %}
10709 
10710 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
10711 %{
10712   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
10713   ins_cost(INSN_COST);
10714   format %{ "add  $dst, $src1, uxtb $src2" %}
10715 
10716    ins_encode %{
10717      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10718             as_Register($src2$$reg), ext::uxtb);
10719    %}
10720   ins_pipe(ialu_reg_reg);
10721 %}
10722 
10723 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
10724 %{
10725   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10726   ins_cost(INSN_COST);
10727   format %{ "add  $dst, $src1, sxth $src2" %}
10728 
10729    ins_encode %{
10730      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10731             as_Register($src2$$reg), ext::sxth);
10732    %}
10733   ins_pipe(ialu_reg_reg);
10734 %}
10735 
10736 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
10737 %{
10738   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10739   ins_cost(INSN_COST);
10740   format %{ "add  $dst, $src1, sxtw $src2" %}
10741 
10742    ins_encode %{
10743      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10744             as_Register($src2$$reg), ext::sxtw);
10745    %}
10746   ins_pipe(ialu_reg_reg);
10747 %}
10748 
10749 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
10750 %{
10751   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
10752   ins_cost(INSN_COST);
10753   format %{ "add  $dst, $src1, sxtb $src2" %}
10754 
10755    ins_encode %{
10756      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10757             as_Register($src2$$reg), ext::sxtb);
10758    %}
10759   ins_pipe(ialu_reg_reg);
10760 %}
10761 
10762 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
10763 %{
10764   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
10765   ins_cost(INSN_COST);
10766   format %{ "add  $dst, $src1, uxtb $src2" %}
10767 
10768    ins_encode %{
10769      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10770             as_Register($src2$$reg), ext::uxtb);
10771    %}
10772   ins_pipe(ialu_reg_reg);
10773 %}
10774 
10775 
10776 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
10777 %{
10778   match(Set dst (AddI src1 (AndI src2 mask)));
10779   ins_cost(INSN_COST);
10780   format %{ "addw  $dst, $src1, $src2, uxtb" %}
10781 
10782    ins_encode %{
10783      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
10784             as_Register($src2$$reg), ext::uxtb);
10785    %}
10786   ins_pipe(ialu_reg_reg);
10787 %}
10788 
10789 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
10790 %{
10791   match(Set dst (AddI src1 (AndI src2 mask)));
10792   ins_cost(INSN_COST);
10793   format %{ "addw  $dst, $src1, $src2, uxth" %}
10794 
10795    ins_encode %{
10796      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
10797             as_Register($src2$$reg), ext::uxth);
10798    %}
10799   ins_pipe(ialu_reg_reg);
10800 %}
10801 
10802 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
10803 %{
10804   match(Set dst (AddL src1 (AndL src2 mask)));
10805   ins_cost(INSN_COST);
10806   format %{ "add  $dst, $src1, $src2, uxtb" %}
10807 
10808    ins_encode %{
10809      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10810             as_Register($src2$$reg), ext::uxtb);
10811    %}
10812   ins_pipe(ialu_reg_reg);
10813 %}
10814 
10815 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
10816 %{
10817   match(Set dst (AddL src1 (AndL src2 mask)));
10818   ins_cost(INSN_COST);
10819   format %{ "add  $dst, $src1, $src2, uxth" %}
10820 
10821    ins_encode %{
10822      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10823             as_Register($src2$$reg), ext::uxth);
10824    %}
10825   ins_pipe(ialu_reg_reg);
10826 %}
10827 
10828 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
10829 %{
10830   match(Set dst (AddL src1 (AndL src2 mask)));
10831   ins_cost(INSN_COST);
10832   format %{ "add  $dst, $src1, $src2, uxtw" %}
10833 
10834    ins_encode %{
10835      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
10836             as_Register($src2$$reg), ext::uxtw);
10837    %}
10838   ins_pipe(ialu_reg_reg);
10839 %}
10840 
10841 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
10842 %{
10843   match(Set dst (SubI src1 (AndI src2 mask)));
10844   ins_cost(INSN_COST);
10845   format %{ "subw  $dst, $src1, $src2, uxtb" %}
10846 
10847    ins_encode %{
10848      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
10849             as_Register($src2$$reg), ext::uxtb);
10850    %}
10851   ins_pipe(ialu_reg_reg);
10852 %}
10853 
10854 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
10855 %{
10856   match(Set dst (SubI src1 (AndI src2 mask)));
10857   ins_cost(INSN_COST);
10858   format %{ "subw  $dst, $src1, $src2, uxth" %}
10859 
10860    ins_encode %{
10861      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
10862             as_Register($src2$$reg), ext::uxth);
10863    %}
10864   ins_pipe(ialu_reg_reg);
10865 %}
10866 
10867 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
10868 %{
10869   match(Set dst (SubL src1 (AndL src2 mask)));
10870   ins_cost(INSN_COST);
10871   format %{ "sub  $dst, $src1, $src2, uxtb" %}
10872 
10873    ins_encode %{
10874      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10875             as_Register($src2$$reg), ext::uxtb);
10876    %}
10877   ins_pipe(ialu_reg_reg);
10878 %}
10879 
10880 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
10881 %{
10882   match(Set dst (SubL src1 (AndL src2 mask)));
10883   ins_cost(INSN_COST);
10884   format %{ "sub  $dst, $src1, $src2, uxth" %}
10885 
10886    ins_encode %{
10887      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10888             as_Register($src2$$reg), ext::uxth);
10889    %}
10890   ins_pipe(ialu_reg_reg);
10891 %}
10892 
10893 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
10894 %{
10895   match(Set dst (SubL src1 (AndL src2 mask)));
10896   ins_cost(INSN_COST);
10897   format %{ "sub  $dst, $src1, $src2, uxtw" %}
10898 
10899    ins_encode %{
10900      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
10901             as_Register($src2$$reg), ext::uxtw);
10902    %}
10903   ins_pipe(ialu_reg_reg);
10904 %}
10905 
10906 // END This section of the file is automatically generated. Do not edit --------------
10907 
10908 // ============================================================================
10909 // Floating Point Arithmetic Instructions
10910 
10911 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
10912   match(Set dst (AddF src1 src2));
10913 
10914   ins_cost(INSN_COST * 5);
10915   format %{ "fadds   $dst, $src1, $src2" %}
10916 
10917   ins_encode %{
10918     __ fadds(as_FloatRegister($dst$$reg),
10919              as_FloatRegister($src1$$reg),
10920              as_FloatRegister($src2$$reg));
10921   %}
10922 
10923   ins_pipe(pipe_class_default);
10924 %}
10925 
10926 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
10927   match(Set dst (AddD src1 src2));
10928 
10929   ins_cost(INSN_COST * 5);
10930   format %{ "faddd   $dst, $src1, $src2" %}
10931 
10932   ins_encode %{
10933     __ faddd(as_FloatRegister($dst$$reg),
10934              as_FloatRegister($src1$$reg),
10935              as_FloatRegister($src2$$reg));
10936   %}
10937 
10938   ins_pipe(pipe_class_default);
10939 %}
10940 
10941 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
10942   match(Set dst (SubF src1 src2));
10943 
10944   ins_cost(INSN_COST * 5);
10945   format %{ "fsubs   $dst, $src1, $src2" %}
10946 
10947   ins_encode %{
10948     __ fsubs(as_FloatRegister($dst$$reg),
10949              as_FloatRegister($src1$$reg),
10950              as_FloatRegister($src2$$reg));
10951   %}
10952 
10953   ins_pipe(pipe_class_default);
10954 %}
10955 
10956 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
10957   match(Set dst (SubD src1 src2));
10958 
10959   ins_cost(INSN_COST * 5);
10960   format %{ "fsubd   $dst, $src1, $src2" %}
10961 
10962   ins_encode %{
10963     __ fsubd(as_FloatRegister($dst$$reg),
10964              as_FloatRegister($src1$$reg),
10965              as_FloatRegister($src2$$reg));
10966   %}
10967 
10968   ins_pipe(pipe_class_default);
10969 %}
10970 
10971 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
10972   match(Set dst (MulF src1 src2));
10973 
10974   ins_cost(INSN_COST * 6);
10975   format %{ "fmuls   $dst, $src1, $src2" %}
10976 
10977   ins_encode %{
10978     __ fmuls(as_FloatRegister($dst$$reg),
10979              as_FloatRegister($src1$$reg),
10980              as_FloatRegister($src2$$reg));
10981   %}
10982 
10983   ins_pipe(pipe_class_default);
10984 %}
10985 
10986 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
10987   match(Set dst (MulD src1 src2));
10988 
10989   ins_cost(INSN_COST * 6);
10990   format %{ "fmuld   $dst, $src1, $src2" %}
10991 
10992   ins_encode %{
10993     __ fmuld(as_FloatRegister($dst$$reg),
10994              as_FloatRegister($src1$$reg),
10995              as_FloatRegister($src2$$reg));
10996   %}
10997 
10998   ins_pipe(pipe_class_default);
10999 %}
11000 
11001 // We cannot use these fused mul w add/sub ops because they don't
11002 // produce the same result as the equivalent separated ops
11003 // (essentially they don't round the intermediate result). that's a
11004 // shame. leaving them here in case we can idenitfy cases where it is
11005 // legitimate to use them
11006 
11007 
11008 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11009 //   match(Set dst (AddF (MulF src1 src2) src3));
11010 
11011 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11012 
11013 //   ins_encode %{
11014 //     __ fmadds(as_FloatRegister($dst$$reg),
11015 //              as_FloatRegister($src1$$reg),
11016 //              as_FloatRegister($src2$$reg),
11017 //              as_FloatRegister($src3$$reg));
11018 //   %}
11019 
11020 //   ins_pipe(pipe_class_default);
11021 // %}
11022 
11023 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11024 //   match(Set dst (AddD (MulD src1 src2) src3));
11025 
11026 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11027 
11028 //   ins_encode %{
11029 //     __ fmaddd(as_FloatRegister($dst$$reg),
11030 //              as_FloatRegister($src1$$reg),
11031 //              as_FloatRegister($src2$$reg),
11032 //              as_FloatRegister($src3$$reg));
11033 //   %}
11034 
11035 //   ins_pipe(pipe_class_default);
11036 // %}
11037 
11038 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11039 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11040 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11041 
11042 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11043 
11044 //   ins_encode %{
11045 //     __ fmsubs(as_FloatRegister($dst$$reg),
11046 //               as_FloatRegister($src1$$reg),
11047 //               as_FloatRegister($src2$$reg),
11048 //              as_FloatRegister($src3$$reg));
11049 //   %}
11050 
11051 //   ins_pipe(pipe_class_default);
11052 // %}
11053 
11054 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11055 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11056 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11057 
11058 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11059 
11060 //   ins_encode %{
11061 //     __ fmsubd(as_FloatRegister($dst$$reg),
11062 //               as_FloatRegister($src1$$reg),
11063 //               as_FloatRegister($src2$$reg),
11064 //               as_FloatRegister($src3$$reg));
11065 //   %}
11066 
11067 //   ins_pipe(pipe_class_default);
11068 // %}
11069 
11070 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11071 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11072 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11073 
11074 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11075 
11076 //   ins_encode %{
11077 //     __ fnmadds(as_FloatRegister($dst$$reg),
11078 //                as_FloatRegister($src1$$reg),
11079 //                as_FloatRegister($src2$$reg),
11080 //                as_FloatRegister($src3$$reg));
11081 //   %}
11082 
11083 //   ins_pipe(pipe_class_default);
11084 // %}
11085 
11086 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11087 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11088 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11089 
11090 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11091 
11092 //   ins_encode %{
11093 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11094 //                as_FloatRegister($src1$$reg),
11095 //                as_FloatRegister($src2$$reg),
11096 //                as_FloatRegister($src3$$reg));
11097 //   %}
11098 
11099 //   ins_pipe(pipe_class_default);
11100 // %}
11101 
11102 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11103 //   match(Set dst (SubF (MulF src1 src2) src3));
11104 
11105 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11106 
11107 //   ins_encode %{
11108 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11109 //                as_FloatRegister($src1$$reg),
11110 //                as_FloatRegister($src2$$reg),
11111 //                as_FloatRegister($src3$$reg));
11112 //   %}
11113 
11114 //   ins_pipe(pipe_class_default);
11115 // %}
11116 
11117 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11118 //   match(Set dst (SubD (MulD src1 src2) src3));
11119 
11120 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11121 
11122 //   ins_encode %{
11123 //   // n.b. insn name should be fnmsubd
11124 //     __ fnmsub(as_FloatRegister($dst$$reg),
11125 //                as_FloatRegister($src1$$reg),
11126 //                as_FloatRegister($src2$$reg),
11127 //                as_FloatRegister($src3$$reg));
11128 //   %}
11129 
11130 //   ins_pipe(pipe_class_default);
11131 // %}
11132 
11133 
11134 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11135   match(Set dst (DivF src1  src2));
11136 
11137   ins_cost(INSN_COST * 18);
11138   format %{ "fdivs   $dst, $src1, $src2" %}
11139 
11140   ins_encode %{
11141     __ fdivs(as_FloatRegister($dst$$reg),
11142              as_FloatRegister($src1$$reg),
11143              as_FloatRegister($src2$$reg));
11144   %}
11145 
11146   ins_pipe(pipe_class_default);
11147 %}
11148 
11149 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11150   match(Set dst (DivD src1  src2));
11151 
11152   ins_cost(INSN_COST * 32);
11153   format %{ "fdivd   $dst, $src1, $src2" %}
11154 
11155   ins_encode %{
11156     __ fdivd(as_FloatRegister($dst$$reg),
11157              as_FloatRegister($src1$$reg),
11158              as_FloatRegister($src2$$reg));
11159   %}
11160 
11161   ins_pipe(pipe_class_default);
11162 %}
11163 
11164 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11165   match(Set dst (NegF src));
11166 
11167   ins_cost(INSN_COST * 3);
11168   format %{ "fneg   $dst, $src" %}
11169 
11170   ins_encode %{
11171     __ fnegs(as_FloatRegister($dst$$reg),
11172              as_FloatRegister($src$$reg));
11173   %}
11174 
11175   ins_pipe(pipe_class_default);
11176 %}
11177 
11178 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11179   match(Set dst (NegD src));
11180 
11181   ins_cost(INSN_COST * 3);
11182   format %{ "fnegd   $dst, $src" %}
11183 
11184   ins_encode %{
11185     __ fnegd(as_FloatRegister($dst$$reg),
11186              as_FloatRegister($src$$reg));
11187   %}
11188 
11189   ins_pipe(pipe_class_default);
11190 %}
11191 
11192 instruct absF_reg(vRegF dst, vRegF src) %{
11193   match(Set dst (AbsF src));
11194 
11195   ins_cost(INSN_COST * 3);
11196   format %{ "fabss   $dst, $src" %}
11197   ins_encode %{
11198     __ fabss(as_FloatRegister($dst$$reg),
11199              as_FloatRegister($src$$reg));
11200   %}
11201 
11202   ins_pipe(pipe_class_default);
11203 %}
11204 
11205 instruct absD_reg(vRegD dst, vRegD src) %{
11206   match(Set dst (AbsD src));
11207 
11208   ins_cost(INSN_COST * 3);
11209   format %{ "fabsd   $dst, $src" %}
11210   ins_encode %{
11211     __ fabsd(as_FloatRegister($dst$$reg),
11212              as_FloatRegister($src$$reg));
11213   %}
11214 
11215   ins_pipe(pipe_class_default);
11216 %}
11217 
11218 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11219   match(Set dst (SqrtD src));
11220 
11221   ins_cost(INSN_COST * 50);
11222   format %{ "fsqrtd  $dst, $src" %}
11223   ins_encode %{
11224     __ fsqrtd(as_FloatRegister($dst$$reg),
11225              as_FloatRegister($src$$reg));
11226   %}
11227 
11228   ins_pipe(pipe_class_default);
11229 %}
11230 
11231 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11232   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11233 
11234   ins_cost(INSN_COST * 50);
11235   format %{ "fsqrts  $dst, $src" %}
11236   ins_encode %{
11237     __ fsqrts(as_FloatRegister($dst$$reg),
11238              as_FloatRegister($src$$reg));
11239   %}
11240 
11241   ins_pipe(pipe_class_default);
11242 %}
11243 
11244 // ============================================================================
11245 // Logical Instructions
11246 
11247 // Integer Logical Instructions
11248 
11249 // And Instructions
11250 
11251 
11252 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11253   match(Set dst (AndI src1 src2));
11254 
11255   format %{ "andw  $dst, $src1, $src2\t# int" %}
11256 
11257   ins_cost(INSN_COST);
11258   ins_encode %{
11259     __ andw(as_Register($dst$$reg),
11260             as_Register($src1$$reg),
11261             as_Register($src2$$reg));
11262   %}
11263 
11264   ins_pipe(ialu_reg_reg);
11265 %}
11266 
11267 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11268   match(Set dst (AndI src1 src2));
11269 
11270   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11271 
11272   ins_cost(INSN_COST);
11273   ins_encode %{
11274     __ andw(as_Register($dst$$reg),
11275             as_Register($src1$$reg),
11276             (unsigned long)($src2$$constant));
11277   %}
11278 
11279   ins_pipe(ialu_reg_imm);
11280 %}
11281 
11282 // Or Instructions
11283 
11284 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11285   match(Set dst (OrI src1 src2));
11286 
11287   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11288 
11289   ins_cost(INSN_COST);
11290   ins_encode %{
11291     __ orrw(as_Register($dst$$reg),
11292             as_Register($src1$$reg),
11293             as_Register($src2$$reg));
11294   %}
11295 
11296   ins_pipe(ialu_reg_reg);
11297 %}
11298 
11299 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11300   match(Set dst (OrI src1 src2));
11301 
11302   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11303 
11304   ins_cost(INSN_COST);
11305   ins_encode %{
11306     __ orrw(as_Register($dst$$reg),
11307             as_Register($src1$$reg),
11308             (unsigned long)($src2$$constant));
11309   %}
11310 
11311   ins_pipe(ialu_reg_imm);
11312 %}
11313 
11314 // Xor Instructions
11315 
11316 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11317   match(Set dst (XorI src1 src2));
11318 
11319   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11320 
11321   ins_cost(INSN_COST);
11322   ins_encode %{
11323     __ eorw(as_Register($dst$$reg),
11324             as_Register($src1$$reg),
11325             as_Register($src2$$reg));
11326   %}
11327 
11328   ins_pipe(ialu_reg_reg);
11329 %}
11330 
11331 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11332   match(Set dst (XorI src1 src2));
11333 
11334   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11335 
11336   ins_cost(INSN_COST);
11337   ins_encode %{
11338     __ eorw(as_Register($dst$$reg),
11339             as_Register($src1$$reg),
11340             (unsigned long)($src2$$constant));
11341   %}
11342 
11343   ins_pipe(ialu_reg_imm);
11344 %}
11345 
11346 // Long Logical Instructions
11347 // TODO
11348 
11349 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11350   match(Set dst (AndL src1 src2));
11351 
11352   format %{ "and  $dst, $src1, $src2\t# int" %}
11353 
11354   ins_cost(INSN_COST);
11355   ins_encode %{
11356     __ andr(as_Register($dst$$reg),
11357             as_Register($src1$$reg),
11358             as_Register($src2$$reg));
11359   %}
11360 
11361   ins_pipe(ialu_reg_reg);
11362 %}
11363 
11364 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11365   match(Set dst (AndL src1 src2));
11366 
11367   format %{ "and  $dst, $src1, $src2\t# int" %}
11368 
11369   ins_cost(INSN_COST);
11370   ins_encode %{
11371     __ andr(as_Register($dst$$reg),
11372             as_Register($src1$$reg),
11373             (unsigned long)($src2$$constant));
11374   %}
11375 
11376   ins_pipe(ialu_reg_imm);
11377 %}
11378 
11379 // Or Instructions
11380 
11381 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11382   match(Set dst (OrL src1 src2));
11383 
11384   format %{ "orr  $dst, $src1, $src2\t# int" %}
11385 
11386   ins_cost(INSN_COST);
11387   ins_encode %{
11388     __ orr(as_Register($dst$$reg),
11389            as_Register($src1$$reg),
11390            as_Register($src2$$reg));
11391   %}
11392 
11393   ins_pipe(ialu_reg_reg);
11394 %}
11395 
11396 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11397   match(Set dst (OrL src1 src2));
11398 
11399   format %{ "orr  $dst, $src1, $src2\t# int" %}
11400 
11401   ins_cost(INSN_COST);
11402   ins_encode %{
11403     __ orr(as_Register($dst$$reg),
11404            as_Register($src1$$reg),
11405            (unsigned long)($src2$$constant));
11406   %}
11407 
11408   ins_pipe(ialu_reg_imm);
11409 %}
11410 
11411 // Xor Instructions
11412 
11413 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11414   match(Set dst (XorL src1 src2));
11415 
11416   format %{ "eor  $dst, $src1, $src2\t# int" %}
11417 
11418   ins_cost(INSN_COST);
11419   ins_encode %{
11420     __ eor(as_Register($dst$$reg),
11421            as_Register($src1$$reg),
11422            as_Register($src2$$reg));
11423   %}
11424 
11425   ins_pipe(ialu_reg_reg);
11426 %}
11427 
11428 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
11429   match(Set dst (XorL src1 src2));
11430 
11431   ins_cost(INSN_COST);
11432   format %{ "eor  $dst, $src1, $src2\t# int" %}
11433 
11434   ins_encode %{
11435     __ eor(as_Register($dst$$reg),
11436            as_Register($src1$$reg),
11437            (unsigned long)($src2$$constant));
11438   %}
11439 
11440   ins_pipe(ialu_reg_imm);
11441 %}
11442 
11443 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
11444 %{
11445   match(Set dst (ConvI2L src));
11446 
11447   ins_cost(INSN_COST);
11448   format %{ "sxtw  $dst, $src\t# i2l" %}
11449   ins_encode %{
11450     __ sbfm($dst$$Register, $src$$Register, 0, 31);
11451   %}
11452   ins_pipe(ialu_reg_shift);
11453 %}
11454 
11455 // this pattern occurs in bigmath arithmetic
11456 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
11457 %{
11458   match(Set dst (AndL (ConvI2L src) mask));
11459 
11460   ins_cost(INSN_COST);
11461   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
11462   ins_encode %{
11463     __ ubfm($dst$$Register, $src$$Register, 0, 31);
11464   %}
11465 
11466   ins_pipe(ialu_reg_shift);
11467 %}
11468 
11469 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
11470   match(Set dst (ConvL2I src));
11471 
11472   ins_cost(INSN_COST);
11473   format %{ "movw  $dst, $src \t// l2i" %}
11474 
11475   ins_encode %{
11476     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
11477   %}
11478 
11479   ins_pipe(ialu_reg);
11480 %}
11481 
11482 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
11483 %{
11484   match(Set dst (Conv2B src));
11485   effect(KILL cr);
11486 
11487   format %{
11488     "cmpw $src, zr\n\t"
11489     "cset $dst, ne"
11490   %}
11491 
11492   ins_encode %{
11493     __ cmpw(as_Register($src$$reg), zr);
11494     __ cset(as_Register($dst$$reg), Assembler::NE);
11495   %}
11496 
11497   ins_pipe(ialu_reg);
11498 %}
11499 
11500 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
11501 %{
11502   match(Set dst (Conv2B src));
11503   effect(KILL cr);
11504 
11505   format %{
11506     "cmp  $src, zr\n\t"
11507     "cset $dst, ne"
11508   %}
11509 
11510   ins_encode %{
11511     __ cmp(as_Register($src$$reg), zr);
11512     __ cset(as_Register($dst$$reg), Assembler::NE);
11513   %}
11514 
11515   ins_pipe(ialu_reg);
11516 %}
11517 
11518 instruct convD2F_reg(vRegF dst, vRegD src) %{
11519   match(Set dst (ConvD2F src));
11520 
11521   ins_cost(INSN_COST * 5);
11522   format %{ "fcvtd  $dst, $src \t// d2f" %}
11523 
11524   ins_encode %{
11525     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11526   %}
11527 
11528   ins_pipe(pipe_class_default);
11529 %}
11530 
11531 instruct convF2D_reg(vRegD dst, vRegF src) %{
11532   match(Set dst (ConvF2D src));
11533 
11534   ins_cost(INSN_COST * 5);
11535   format %{ "fcvts  $dst, $src \t// f2d" %}
11536 
11537   ins_encode %{
11538     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
11539   %}
11540 
11541   ins_pipe(pipe_class_default);
11542 %}
11543 
11544 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11545   match(Set dst (ConvF2I src));
11546 
11547   ins_cost(INSN_COST * 5);
11548   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
11549 
11550   ins_encode %{
11551     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11552   %}
11553 
11554   ins_pipe(pipe_class_default);
11555 %}
11556 
11557 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
11558   match(Set dst (ConvF2L src));
11559 
11560   ins_cost(INSN_COST * 5);
11561   format %{ "fcvtzs  $dst, $src \t// f2l" %}
11562 
11563   ins_encode %{
11564     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11565   %}
11566 
11567   ins_pipe(pipe_class_default);
11568 %}
11569 
11570 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
11571   match(Set dst (ConvI2F src));
11572 
11573   ins_cost(INSN_COST * 5);
11574   format %{ "scvtfws  $dst, $src \t// i2f" %}
11575 
11576   ins_encode %{
11577     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11578   %}
11579 
11580   ins_pipe(pipe_class_default);
11581 %}
11582 
11583 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
11584   match(Set dst (ConvL2F src));
11585 
11586   ins_cost(INSN_COST * 5);
11587   format %{ "scvtfs  $dst, $src \t// l2f" %}
11588 
11589   ins_encode %{
11590     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11591   %}
11592 
11593   ins_pipe(pipe_class_default);
11594 %}
11595 
11596 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
11597   match(Set dst (ConvD2I src));
11598 
11599   ins_cost(INSN_COST * 5);
11600   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
11601 
11602   ins_encode %{
11603     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11604   %}
11605 
11606   ins_pipe(pipe_class_default);
11607 %}
11608 
11609 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
11610   match(Set dst (ConvD2L src));
11611 
11612   ins_cost(INSN_COST * 5);
11613   format %{ "fcvtzd  $dst, $src \t// d2l" %}
11614 
11615   ins_encode %{
11616     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
11617   %}
11618 
11619   ins_pipe(pipe_class_default);
11620 %}
11621 
11622 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
11623   match(Set dst (ConvI2D src));
11624 
11625   ins_cost(INSN_COST * 5);
11626   format %{ "scvtfwd  $dst, $src \t// i2d" %}
11627 
11628   ins_encode %{
11629     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11630   %}
11631 
11632   ins_pipe(pipe_class_default);
11633 %}
11634 
11635 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
11636   match(Set dst (ConvL2D src));
11637 
11638   ins_cost(INSN_COST * 5);
11639   format %{ "scvtfd  $dst, $src \t// l2d" %}
11640 
11641   ins_encode %{
11642     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
11643   %}
11644 
11645   ins_pipe(pipe_class_default);
11646 %}
11647 
11648 // stack <-> reg and reg <-> reg shuffles with no conversion
11649 
11650 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
11651 
11652   match(Set dst (MoveF2I src));
11653 
11654   effect(DEF dst, USE src);
11655 
11656   ins_cost(4 * INSN_COST);
11657 
11658   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
11659 
11660   ins_encode %{
11661     __ ldrw($dst$$Register, Address(sp, $src$$disp));
11662   %}
11663 
11664   ins_pipe(iload_reg_reg);
11665 
11666 %}
11667 
11668 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
11669 
11670   match(Set dst (MoveI2F src));
11671 
11672   effect(DEF dst, USE src);
11673 
11674   ins_cost(4 * INSN_COST);
11675 
11676   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
11677 
11678   ins_encode %{
11679     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
11680   %}
11681 
11682   ins_pipe(pipe_class_memory);
11683 
11684 %}
11685 
11686 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
11687 
11688   match(Set dst (MoveD2L src));
11689 
11690   effect(DEF dst, USE src);
11691 
11692   ins_cost(4 * INSN_COST);
11693 
11694   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
11695 
11696   ins_encode %{
11697     __ ldr($dst$$Register, Address(sp, $src$$disp));
11698   %}
11699 
11700   ins_pipe(iload_reg_reg);
11701 
11702 %}
11703 
11704 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
11705 
11706   match(Set dst (MoveL2D src));
11707 
11708   effect(DEF dst, USE src);
11709 
11710   ins_cost(4 * INSN_COST);
11711 
11712   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
11713 
11714   ins_encode %{
11715     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
11716   %}
11717 
11718   ins_pipe(pipe_class_memory);
11719 
11720 %}
11721 
11722 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
11723 
11724   match(Set dst (MoveF2I src));
11725 
11726   effect(DEF dst, USE src);
11727 
11728   ins_cost(INSN_COST);
11729 
11730   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
11731 
11732   ins_encode %{
11733     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
11734   %}
11735 
11736   ins_pipe(pipe_class_memory);
11737 
11738 %}
11739 
11740 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
11741 
11742   match(Set dst (MoveI2F src));
11743 
11744   effect(DEF dst, USE src);
11745 
11746   ins_cost(INSN_COST);
11747 
11748   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
11749 
11750   ins_encode %{
11751     __ strw($src$$Register, Address(sp, $dst$$disp));
11752   %}
11753 
11754   ins_pipe(istore_reg_reg);
11755 
11756 %}
11757 
11758 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
11759 
11760   match(Set dst (MoveD2L src));
11761 
11762   effect(DEF dst, USE src);
11763 
11764   ins_cost(INSN_COST);
11765 
11766   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
11767 
11768   ins_encode %{
11769     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
11770   %}
11771 
11772   ins_pipe(pipe_class_memory);
11773 
11774 %}
11775 
11776 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
11777 
11778   match(Set dst (MoveL2D src));
11779 
11780   effect(DEF dst, USE src);
11781 
11782   ins_cost(INSN_COST);
11783 
11784   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
11785 
11786   ins_encode %{
11787     __ str($src$$Register, Address(sp, $dst$$disp));
11788   %}
11789 
11790   ins_pipe(istore_reg_reg);
11791 
11792 %}
11793 
11794 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
11795 
11796   match(Set dst (MoveF2I src));
11797 
11798   effect(DEF dst, USE src);
11799 
11800   ins_cost(INSN_COST);
11801 
11802   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
11803 
11804   ins_encode %{
11805     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
11806   %}
11807 
11808   ins_pipe(pipe_class_memory);
11809 
11810 %}
11811 
11812 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
11813 
11814   match(Set dst (MoveI2F src));
11815 
11816   effect(DEF dst, USE src);
11817 
11818   ins_cost(INSN_COST);
11819 
11820   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
11821 
11822   ins_encode %{
11823     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
11824   %}
11825 
11826   ins_pipe(pipe_class_memory);
11827 
11828 %}
11829 
11830 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
11831 
11832   match(Set dst (MoveD2L src));
11833 
11834   effect(DEF dst, USE src);
11835 
11836   ins_cost(INSN_COST);
11837 
11838   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
11839 
11840   ins_encode %{
11841     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
11842   %}
11843 
11844   ins_pipe(pipe_class_memory);
11845 
11846 %}
11847 
11848 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
11849 
11850   match(Set dst (MoveL2D src));
11851 
11852   effect(DEF dst, USE src);
11853 
11854   ins_cost(INSN_COST);
11855 
11856   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
11857 
11858   ins_encode %{
11859     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
11860   %}
11861 
11862   ins_pipe(pipe_class_memory);
11863 
11864 %}
11865 
11866 // ============================================================================
11867 // clearing of an array
11868 
11869 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
11870 %{
11871   match(Set dummy (ClearArray cnt base));
11872   effect(USE_KILL cnt, USE_KILL base);
11873 
11874   ins_cost(4 * INSN_COST);
11875   format %{ "ClearArray $cnt, $base" %}
11876 
11877   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
11878 
11879   ins_pipe(pipe_class_memory);
11880 %}
11881 
11882 // ============================================================================
11883 // Overflow Math Instructions
11884 
11885 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
11886 %{
11887   match(Set cr (OverflowAddI op1 op2));
11888 
11889   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
11890   ins_cost(INSN_COST);
11891   ins_encode %{
11892     __ cmnw($op1$$Register, $op2$$Register);
11893   %}
11894 
11895   ins_pipe(icmp_reg_reg);
11896 %}
11897 
11898 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
11899 %{
11900   match(Set cr (OverflowAddI op1 op2));
11901 
11902   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
11903   ins_cost(INSN_COST);
11904   ins_encode %{
11905     __ cmnw($op1$$Register, $op2$$constant);
11906   %}
11907 
11908   ins_pipe(icmp_reg_imm);
11909 %}
11910 
11911 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
11912 %{
11913   match(Set cr (OverflowAddL op1 op2));
11914 
11915   format %{ "cmn   $op1, $op2\t# overflow check long" %}
11916   ins_cost(INSN_COST);
11917   ins_encode %{
11918     __ cmn($op1$$Register, $op2$$Register);
11919   %}
11920 
11921   ins_pipe(icmp_reg_reg);
11922 %}
11923 
11924 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
11925 %{
11926   match(Set cr (OverflowAddL op1 op2));
11927 
11928   format %{ "cmn   $op1, $op2\t# overflow check long" %}
11929   ins_cost(INSN_COST);
11930   ins_encode %{
11931     __ cmn($op1$$Register, $op2$$constant);
11932   %}
11933 
11934   ins_pipe(icmp_reg_imm);
11935 %}
11936 
11937 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
11938 %{
11939   match(Set cr (OverflowSubI op1 op2));
11940 
11941   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
11942   ins_cost(INSN_COST);
11943   ins_encode %{
11944     __ cmpw($op1$$Register, $op2$$Register);
11945   %}
11946 
11947   ins_pipe(icmp_reg_reg);
11948 %}
11949 
11950 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
11951 %{
11952   match(Set cr (OverflowSubI op1 op2));
11953 
11954   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
11955   ins_cost(INSN_COST);
11956   ins_encode %{
11957     __ cmpw($op1$$Register, $op2$$constant);
11958   %}
11959 
11960   ins_pipe(icmp_reg_imm);
11961 %}
11962 
11963 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
11964 %{
11965   match(Set cr (OverflowSubL op1 op2));
11966 
11967   format %{ "cmp   $op1, $op2\t# overflow check long" %}
11968   ins_cost(INSN_COST);
11969   ins_encode %{
11970     __ cmp($op1$$Register, $op2$$Register);
11971   %}
11972 
11973   ins_pipe(icmp_reg_reg);
11974 %}
11975 
11976 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
11977 %{
11978   match(Set cr (OverflowSubL op1 op2));
11979 
11980   format %{ "cmp   $op1, $op2\t# overflow check long" %}
11981   ins_cost(INSN_COST);
11982   ins_encode %{
11983     __ cmp($op1$$Register, $op2$$constant);
11984   %}
11985 
11986   ins_pipe(icmp_reg_imm);
11987 %}
11988 
11989 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
11990 %{
11991   match(Set cr (OverflowSubI zero op1));
11992 
11993   format %{ "cmpw  zr, $op1\t# overflow check int" %}
11994   ins_cost(INSN_COST);
11995   ins_encode %{
11996     __ cmpw(zr, $op1$$Register);
11997   %}
11998 
11999   ins_pipe(icmp_reg_imm);
12000 %}
12001 
12002 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12003 %{
12004   match(Set cr (OverflowSubL zero op1));
12005 
12006   format %{ "cmp   zr, $op1\t# overflow check long" %}
12007   ins_cost(INSN_COST);
12008   ins_encode %{
12009     __ cmp(zr, $op1$$Register);
12010   %}
12011 
12012   ins_pipe(icmp_reg_imm);
12013 %}
12014 
12015 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12016 %{
12017   match(Set cr (OverflowMulI op1 op2));
12018 
12019   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12020             "cmp   rscratch1, rscratch1, sxtw\n\t"
12021             "movw  rscratch1, #0x80000000\n\t"
12022             "cselw rscratch1, rscratch1, zr, NE\n\t"
12023             "cmpw  rscratch1, #1" %}
12024   ins_cost(5 * INSN_COST);
12025   ins_encode %{
12026     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12027     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12028     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12029     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12030     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12031   %}
12032 
12033   ins_pipe(pipe_slow);
12034 %}
12035 
12036 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12037 %{
12038   match(If cmp (OverflowMulI op1 op2));
12039   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12040             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12041   effect(USE labl, KILL cr);
12042 
12043   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12044             "cmp   rscratch1, rscratch1, sxtw\n\t"
12045             "b$cmp   $labl" %}
12046   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12047   ins_encode %{
12048     Label* L = $labl$$label;
12049     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12050     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12051     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12052     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12053   %}
12054 
12055   ins_pipe(pipe_serial);
12056 %}
12057 
12058 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12059 %{
12060   match(Set cr (OverflowMulL op1 op2));
12061 
12062   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12063             "smulh rscratch2, $op1, $op2\n\t"
12064             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12065             "movw  rscratch1, #0x80000000\n\t"
12066             "cselw rscratch1, rscratch1, zr, NE\n\t"
12067             "cmpw  rscratch1, #1" %}
12068   ins_cost(6 * INSN_COST);
12069   ins_encode %{
12070     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12071     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12072     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12073     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12074     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12075     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12076   %}
12077 
12078   ins_pipe(pipe_slow);
12079 %}
12080 
12081 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12082 %{
12083   match(If cmp (OverflowMulL op1 op2));
12084   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12085             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12086   effect(USE labl, KILL cr);
12087 
12088   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12089             "smulh rscratch2, $op1, $op2\n\t"
12090             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12091             "b$cmp $labl" %}
12092   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12093   ins_encode %{
12094     Label* L = $labl$$label;
12095     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12096     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12097     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12098     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12099     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12100   %}
12101 
12102   ins_pipe(pipe_serial);
12103 %}
12104 
12105 // ============================================================================
12106 // Compare Instructions
12107 
12108 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12109 %{
12110   match(Set cr (CmpI op1 op2));
12111 
12112   effect(DEF cr, USE op1, USE op2);
12113 
12114   ins_cost(INSN_COST);
12115   format %{ "cmpw  $op1, $op2" %}
12116 
12117   ins_encode(aarch64_enc_cmpw(op1, op2));
12118 
12119   ins_pipe(icmp_reg_reg);
12120 %}
12121 
12122 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12123 %{
12124   match(Set cr (CmpI op1 zero));
12125 
12126   effect(DEF cr, USE op1);
12127 
12128   ins_cost(INSN_COST);
12129   format %{ "cmpw $op1, 0" %}
12130 
12131   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12132 
12133   ins_pipe(icmp_reg_imm);
12134 %}
12135 
12136 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12137 %{
12138   match(Set cr (CmpI op1 op2));
12139 
12140   effect(DEF cr, USE op1);
12141 
12142   ins_cost(INSN_COST);
12143   format %{ "cmpw  $op1, $op2" %}
12144 
12145   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12146 
12147   ins_pipe(icmp_reg_imm);
12148 %}
12149 
12150 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12151 %{
12152   match(Set cr (CmpI op1 op2));
12153 
12154   effect(DEF cr, USE op1);
12155 
12156   ins_cost(INSN_COST * 2);
12157   format %{ "cmpw  $op1, $op2" %}
12158 
12159   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12160 
12161   ins_pipe(icmp_reg_imm);
12162 %}
12163 
12164 // Unsigned compare Instructions; really, same as signed compare
12165 // except it should only be used to feed an If or a CMovI which takes a
12166 // cmpOpU.
12167 
12168 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12169 %{
12170   match(Set cr (CmpU op1 op2));
12171 
12172   effect(DEF cr, USE op1, USE op2);
12173 
12174   ins_cost(INSN_COST);
12175   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12176 
12177   ins_encode(aarch64_enc_cmpw(op1, op2));
12178 
12179   ins_pipe(icmp_reg_reg);
12180 %}
12181 
12182 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12183 %{
12184   match(Set cr (CmpU op1 zero));
12185 
12186   effect(DEF cr, USE op1);
12187 
12188   ins_cost(INSN_COST);
12189   format %{ "cmpw $op1, #0\t# unsigned" %}
12190 
12191   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12192 
12193   ins_pipe(icmp_reg_imm);
12194 %}
12195 
12196 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12197 %{
12198   match(Set cr (CmpU op1 op2));
12199 
12200   effect(DEF cr, USE op1);
12201 
12202   ins_cost(INSN_COST);
12203   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12204 
12205   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12206 
12207   ins_pipe(icmp_reg_imm);
12208 %}
12209 
12210 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12211 %{
12212   match(Set cr (CmpU op1 op2));
12213 
12214   effect(DEF cr, USE op1);
12215 
12216   ins_cost(INSN_COST * 2);
12217   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12218 
12219   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12220 
12221   ins_pipe(icmp_reg_imm);
12222 %}
12223 
12224 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12225 %{
12226   match(Set cr (CmpL op1 op2));
12227 
12228   effect(DEF cr, USE op1, USE op2);
12229 
12230   ins_cost(INSN_COST);
12231   format %{ "cmp  $op1, $op2" %}
12232 
12233   ins_encode(aarch64_enc_cmp(op1, op2));
12234 
12235   ins_pipe(icmp_reg_reg);
12236 %}
12237 
12238 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
12239 %{
12240   match(Set cr (CmpL op1 zero));
12241 
12242   effect(DEF cr, USE op1);
12243 
12244   ins_cost(INSN_COST);
12245   format %{ "tst  $op1" %}
12246 
12247   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12248 
12249   ins_pipe(icmp_reg_imm);
12250 %}
12251 
12252 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12253 %{
12254   match(Set cr (CmpL op1 op2));
12255 
12256   effect(DEF cr, USE op1);
12257 
12258   ins_cost(INSN_COST);
12259   format %{ "cmp  $op1, $op2" %}
12260 
12261   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12262 
12263   ins_pipe(icmp_reg_imm);
12264 %}
12265 
12266 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12267 %{
12268   match(Set cr (CmpL op1 op2));
12269 
12270   effect(DEF cr, USE op1);
12271 
12272   ins_cost(INSN_COST * 2);
12273   format %{ "cmp  $op1, $op2" %}
12274 
12275   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12276 
12277   ins_pipe(icmp_reg_imm);
12278 %}
12279 
12280 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12281 %{
12282   match(Set cr (CmpP op1 op2));
12283 
12284   effect(DEF cr, USE op1, USE op2);
12285 
12286   ins_cost(INSN_COST);
12287   format %{ "cmp  $op1, $op2\t // ptr" %}
12288 
12289   ins_encode(aarch64_enc_cmpp(op1, op2));
12290 
12291   ins_pipe(icmp_reg_reg);
12292 %}
12293 
12294 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12295 %{
12296   match(Set cr (CmpN op1 op2));
12297 
12298   effect(DEF cr, USE op1, USE op2);
12299 
12300   ins_cost(INSN_COST);
12301   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12302 
12303   ins_encode(aarch64_enc_cmpn(op1, op2));
12304 
12305   ins_pipe(icmp_reg_reg);
12306 %}
12307 
12308 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12309 %{
12310   match(Set cr (CmpP op1 zero));
12311 
12312   effect(DEF cr, USE op1, USE zero);
12313 
12314   ins_cost(INSN_COST);
12315   format %{ "cmp  $op1, 0\t // ptr" %}
12316 
12317   ins_encode(aarch64_enc_testp(op1));
12318 
12319   ins_pipe(icmp_reg_imm);
12320 %}
12321 
12322 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12323 %{
12324   match(Set cr (CmpN op1 zero));
12325 
12326   effect(DEF cr, USE op1, USE zero);
12327 
12328   ins_cost(INSN_COST);
12329   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12330 
12331   ins_encode(aarch64_enc_testn(op1));
12332 
12333   ins_pipe(icmp_reg_imm);
12334 %}
12335 
12336 // FP comparisons
12337 //
12338 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12339 // using normal cmpOp. See declaration of rFlagsReg for details.
12340 
12341 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12342 %{
12343   match(Set cr (CmpF src1 src2));
12344 
12345   ins_cost(3 * INSN_COST);
12346   format %{ "fcmps $src1, $src2" %}
12347 
12348   ins_encode %{
12349     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12350   %}
12351 
12352   ins_pipe(pipe_class_compare);
12353 %}
12354 
12355 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12356 %{
12357   match(Set cr (CmpF src1 src2));
12358 
12359   ins_cost(3 * INSN_COST);
12360   format %{ "fcmps $src1, 0.0" %}
12361 
12362   ins_encode %{
12363     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
12364   %}
12365 
12366   ins_pipe(pipe_class_compare);
12367 %}
12368 // FROM HERE
12369 
12370 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12371 %{
12372   match(Set cr (CmpD src1 src2));
12373 
12374   ins_cost(3 * INSN_COST);
12375   format %{ "fcmpd $src1, $src2" %}
12376 
12377   ins_encode %{
12378     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12379   %}
12380 
12381   ins_pipe(pipe_class_compare);
12382 %}
12383 
12384 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12385 %{
12386   match(Set cr (CmpD src1 src2));
12387 
12388   ins_cost(3 * INSN_COST);
12389   format %{ "fcmpd $src1, 0.0" %}
12390 
12391   ins_encode %{
12392     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
12393   %}
12394 
12395   ins_pipe(pipe_class_compare);
12396 %}
12397 
12398 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
12399 %{
12400   match(Set dst (CmpF3 src1 src2));
12401   effect(KILL cr);
12402 
12403   ins_cost(5 * INSN_COST);
12404   format %{ "fcmps $src1, $src2\n\t"
12405             "csinvw($dst, zr, zr, eq\n\t"
12406             "csnegw($dst, $dst, $dst, lt)"
12407   %}
12408 
12409   ins_encode %{
12410     Label done;
12411     FloatRegister s1 = as_FloatRegister($src1$$reg);
12412     FloatRegister s2 = as_FloatRegister($src2$$reg);
12413     Register d = as_Register($dst$$reg);
12414     __ fcmps(s1, s2);
12415     // installs 0 if EQ else -1
12416     __ csinvw(d, zr, zr, Assembler::EQ);
12417     // keeps -1 if less or unordered else installs 1
12418     __ csnegw(d, d, d, Assembler::LT);
12419     __ bind(done);
12420   %}
12421 
12422   ins_pipe(pipe_class_default);
12423 
12424 %}
12425 
12426 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
12427 %{
12428   match(Set dst (CmpD3 src1 src2));
12429   effect(KILL cr);
12430 
12431   ins_cost(5 * INSN_COST);
12432   format %{ "fcmpd $src1, $src2\n\t"
12433             "csinvw($dst, zr, zr, eq\n\t"
12434             "csnegw($dst, $dst, $dst, lt)"
12435   %}
12436 
12437   ins_encode %{
12438     Label done;
12439     FloatRegister s1 = as_FloatRegister($src1$$reg);
12440     FloatRegister s2 = as_FloatRegister($src2$$reg);
12441     Register d = as_Register($dst$$reg);
12442     __ fcmpd(s1, s2);
12443     // installs 0 if EQ else -1
12444     __ csinvw(d, zr, zr, Assembler::EQ);
12445     // keeps -1 if less or unordered else installs 1
12446     __ csnegw(d, d, d, Assembler::LT);
12447     __ bind(done);
12448   %}
12449   ins_pipe(pipe_class_default);
12450 
12451 %}
12452 
12453 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
12454 %{
12455   match(Set dst (CmpF3 src1 zero));
12456   effect(KILL cr);
12457 
12458   ins_cost(5 * INSN_COST);
12459   format %{ "fcmps $src1, 0.0\n\t"
12460             "csinvw($dst, zr, zr, eq\n\t"
12461             "csnegw($dst, $dst, $dst, lt)"
12462   %}
12463 
12464   ins_encode %{
12465     Label done;
12466     FloatRegister s1 = as_FloatRegister($src1$$reg);
12467     Register d = as_Register($dst$$reg);
12468     __ fcmps(s1, 0.0D);
12469     // installs 0 if EQ else -1
12470     __ csinvw(d, zr, zr, Assembler::EQ);
12471     // keeps -1 if less or unordered else installs 1
12472     __ csnegw(d, d, d, Assembler::LT);
12473     __ bind(done);
12474   %}
12475 
12476   ins_pipe(pipe_class_default);
12477 
12478 %}
12479 
12480 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
12481 %{
12482   match(Set dst (CmpD3 src1 zero));
12483   effect(KILL cr);
12484 
12485   ins_cost(5 * INSN_COST);
12486   format %{ "fcmpd $src1, 0.0\n\t"
12487             "csinvw($dst, zr, zr, eq\n\t"
12488             "csnegw($dst, $dst, $dst, lt)"
12489   %}
12490 
12491   ins_encode %{
12492     Label done;
12493     FloatRegister s1 = as_FloatRegister($src1$$reg);
12494     Register d = as_Register($dst$$reg);
12495     __ fcmpd(s1, 0.0D);
12496     // installs 0 if EQ else -1
12497     __ csinvw(d, zr, zr, Assembler::EQ);
12498     // keeps -1 if less or unordered else installs 1
12499     __ csnegw(d, d, d, Assembler::LT);
12500     __ bind(done);
12501   %}
12502   ins_pipe(pipe_class_default);
12503 
12504 %}
12505 
12506 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
12507 %{
12508   match(Set dst (CmpLTMask p q));
12509   effect(KILL cr);
12510 
12511   ins_cost(3 * INSN_COST);
12512 
12513   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
12514             "csetw $dst, lt\n\t"
12515             "subw $dst, zr, $dst"
12516   %}
12517 
12518   ins_encode %{
12519     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
12520     __ csetw(as_Register($dst$$reg), Assembler::LT);
12521     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
12522   %}
12523 
12524   ins_pipe(ialu_reg_reg);
12525 %}
12526 
12527 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
12528 %{
12529   match(Set dst (CmpLTMask src zero));
12530   effect(KILL cr);
12531 
12532   ins_cost(INSN_COST);
12533 
12534   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
12535 
12536   ins_encode %{
12537     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
12538   %}
12539 
12540   ins_pipe(ialu_reg_shift);
12541 %}
12542 
12543 // ============================================================================
12544 // Max and Min
12545 
12546 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
12547 %{
12548   match(Set dst (MinI src1 src2));
12549 
12550   effect(DEF dst, USE src1, USE src2, KILL cr);
12551   size(8);
12552 
12553   ins_cost(INSN_COST * 3);
12554   format %{
12555     "cmpw $src1 $src2\t signed int\n\t"
12556     "cselw $dst, $src1, $src2 lt\t"
12557   %}
12558 
12559   ins_encode %{
12560     __ cmpw(as_Register($src1$$reg),
12561             as_Register($src2$$reg));
12562     __ cselw(as_Register($dst$$reg),
12563              as_Register($src1$$reg),
12564              as_Register($src2$$reg),
12565              Assembler::LT);
12566   %}
12567 
12568   ins_pipe(ialu_reg_reg);
12569 %}
12570 // FROM HERE
12571 
12572 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
12573 %{
12574   match(Set dst (MaxI src1 src2));
12575 
12576   effect(DEF dst, USE src1, USE src2, KILL cr);
12577   size(8);
12578 
12579   ins_cost(INSN_COST * 3);
12580   format %{
12581     "cmpw $src1 $src2\t signed int\n\t"
12582     "cselw $dst, $src1, $src2 gt\t"
12583   %}
12584 
12585   ins_encode %{
12586     __ cmpw(as_Register($src1$$reg),
12587             as_Register($src2$$reg));
12588     __ cselw(as_Register($dst$$reg),
12589              as_Register($src1$$reg),
12590              as_Register($src2$$reg),
12591              Assembler::GT);
12592   %}
12593 
12594   ins_pipe(ialu_reg_reg);
12595 %}
12596 
12597 // ============================================================================
12598 // Branch Instructions
12599 
12600 // Direct Branch.
12601 instruct branch(label lbl)
12602 %{
12603   match(Goto);
12604 
12605   effect(USE lbl);
12606 
12607   ins_cost(BRANCH_COST);
12608   format %{ "b  $lbl" %}
12609 
12610   ins_encode(aarch64_enc_b(lbl));
12611 
12612   ins_pipe(pipe_branch);
12613 %}
12614 
12615 // Conditional Near Branch
12616 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
12617 %{
12618   // Same match rule as `branchConFar'.
12619   match(If cmp cr);
12620 
12621   effect(USE lbl);
12622 
12623   ins_cost(BRANCH_COST);
12624   // If set to 1 this indicates that the current instruction is a
12625   // short variant of a long branch. This avoids using this
12626   // instruction in first-pass matching. It will then only be used in
12627   // the `Shorten_branches' pass.
12628   // ins_short_branch(1);
12629   format %{ "b$cmp  $lbl" %}
12630 
12631   ins_encode(aarch64_enc_br_con(cmp, lbl));
12632 
12633   ins_pipe(pipe_branch_cond);
12634 %}
12635 
12636 // Conditional Near Branch Unsigned
12637 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
12638 %{
12639   // Same match rule as `branchConFar'.
12640   match(If cmp cr);
12641 
12642   effect(USE lbl);
12643 
12644   ins_cost(BRANCH_COST);
12645   // If set to 1 this indicates that the current instruction is a
12646   // short variant of a long branch. This avoids using this
12647   // instruction in first-pass matching. It will then only be used in
12648   // the `Shorten_branches' pass.
12649   // ins_short_branch(1);
12650   format %{ "b$cmp  $lbl\t# unsigned" %}
12651 
12652   ins_encode(aarch64_enc_br_conU(cmp, lbl));
12653 
12654   ins_pipe(pipe_branch_cond);
12655 %}
12656 
12657 // Make use of CBZ and CBNZ.  These instructions, as well as being
12658 // shorter than (cmp; branch), have the additional benefit of not
12659 // killing the flags.
12660 
12661 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
12662   match(If cmp (CmpI op1 op2));
12663   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12664             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12665   effect(USE labl);
12666 
12667   ins_cost(BRANCH_COST);
12668   format %{ "cbw$cmp   $op1, $labl" %}
12669   ins_encode %{
12670     Label* L = $labl$$label;
12671     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12672     if (cond == Assembler::EQ)
12673       __ cbzw($op1$$Register, *L);
12674     else
12675       __ cbnzw($op1$$Register, *L);
12676   %}
12677   ins_pipe(pipe_cmp_branch);
12678 %}
12679 
12680 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
12681   match(If cmp (CmpL op1 op2));
12682   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12683             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12684   effect(USE labl);
12685 
12686   ins_cost(BRANCH_COST);
12687   format %{ "cb$cmp   $op1, $labl" %}
12688   ins_encode %{
12689     Label* L = $labl$$label;
12690     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12691     if (cond == Assembler::EQ)
12692       __ cbz($op1$$Register, *L);
12693     else
12694       __ cbnz($op1$$Register, *L);
12695   %}
12696   ins_pipe(pipe_cmp_branch);
12697 %}
12698 
12699 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
12700   match(If cmp (CmpP op1 op2));
12701   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
12702             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
12703   effect(USE labl);
12704 
12705   ins_cost(BRANCH_COST);
12706   format %{ "cb$cmp   $op1, $labl" %}
12707   ins_encode %{
12708     Label* L = $labl$$label;
12709     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12710     if (cond == Assembler::EQ)
12711       __ cbz($op1$$Register, *L);
12712     else
12713       __ cbnz($op1$$Register, *L);
12714   %}
12715   ins_pipe(pipe_cmp_branch);
12716 %}
12717 
12718 // Conditional Far Branch
12719 // Conditional Far Branch Unsigned
12720 // TODO: fixme
12721 
12722 // counted loop end branch near
12723 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
12724 %{
12725   match(CountedLoopEnd cmp cr);
12726 
12727   effect(USE lbl);
12728 
12729   ins_cost(BRANCH_COST);
12730   // short variant.
12731   // ins_short_branch(1);
12732   format %{ "b$cmp $lbl \t// counted loop end" %}
12733 
12734   ins_encode(aarch64_enc_br_con(cmp, lbl));
12735 
12736   ins_pipe(pipe_branch);
12737 %}
12738 
12739 // counted loop end branch near Unsigned
12740 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
12741 %{
12742   match(CountedLoopEnd cmp cr);
12743 
12744   effect(USE lbl);
12745 
12746   ins_cost(BRANCH_COST);
12747   // short variant.
12748   // ins_short_branch(1);
12749   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
12750 
12751   ins_encode(aarch64_enc_br_conU(cmp, lbl));
12752 
12753   ins_pipe(pipe_branch);
12754 %}
12755 
12756 // counted loop end branch far
12757 // counted loop end branch far unsigned
12758 // TODO: fixme
12759 
12760 // ============================================================================
12761 // inlined locking and unlocking
12762 
12763 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
12764 %{
12765   match(Set cr (FastLock object box));
12766   effect(TEMP tmp, TEMP tmp2);
12767 
12768   // TODO
12769   // identify correct cost
12770   ins_cost(5 * INSN_COST);
12771   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
12772 
12773   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
12774 
12775   ins_pipe(pipe_serial);
12776 %}
12777 
12778 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
12779 %{
12780   match(Set cr (FastUnlock object box));
12781   effect(TEMP tmp, TEMP tmp2);
12782 
12783   ins_cost(5 * INSN_COST);
12784   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
12785 
12786   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
12787 
12788   ins_pipe(pipe_serial);
12789 %}
12790 
12791 
12792 // ============================================================================
12793 // Safepoint Instructions
12794 
12795 // TODO
12796 // provide a near and far version of this code
12797 
12798 instruct safePoint(iRegP poll)
12799 %{
12800   match(SafePoint poll);
12801 
12802   format %{
12803     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
12804   %}
12805   ins_encode %{
12806     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
12807   %}
12808   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
12809 %}
12810 
12811 
12812 // ============================================================================
12813 // Procedure Call/Return Instructions
12814 
12815 // Call Java Static Instruction
12816 
12817 instruct CallStaticJavaDirect(method meth)
12818 %{
12819   match(CallStaticJava);
12820 
12821   effect(USE meth);
12822 
12823   ins_cost(CALL_COST);
12824 
12825   format %{ "call,static $meth \t// ==> " %}
12826 
12827   ins_encode( aarch64_enc_java_static_call(meth),
12828               aarch64_enc_call_epilog );
12829 
12830   ins_pipe(pipe_class_call);
12831 %}
12832 
12833 // TO HERE
12834 
12835 // Call Java Dynamic Instruction
12836 instruct CallDynamicJavaDirect(method meth)
12837 %{
12838   match(CallDynamicJava);
12839 
12840   effect(USE meth);
12841 
12842   ins_cost(CALL_COST);
12843 
12844   format %{ "CALL,dynamic $meth \t// ==> " %}
12845 
12846   ins_encode( aarch64_enc_java_dynamic_call(meth),
12847                aarch64_enc_call_epilog );
12848 
12849   ins_pipe(pipe_class_call);
12850 %}
12851 
12852 // Call Runtime Instruction
12853 
12854 instruct CallRuntimeDirect(method meth)
12855 %{
12856   match(CallRuntime);
12857 
12858   effect(USE meth);
12859 
12860   ins_cost(CALL_COST);
12861 
12862   format %{ "CALL, runtime $meth" %}
12863 
12864   ins_encode( aarch64_enc_java_to_runtime(meth) );
12865 
12866   ins_pipe(pipe_class_call);
12867 %}
12868 
12869 // Call Runtime Instruction
12870 
12871 instruct CallLeafDirect(method meth)
12872 %{
12873   match(CallLeaf);
12874 
12875   effect(USE meth);
12876 
12877   ins_cost(CALL_COST);
12878 
12879   format %{ "CALL, runtime leaf $meth" %}
12880 
12881   ins_encode( aarch64_enc_java_to_runtime(meth) );
12882 
12883   ins_pipe(pipe_class_call);
12884 %}
12885 
12886 // Call Runtime Instruction
12887 
12888 instruct CallLeafNoFPDirect(method meth)
12889 %{
12890   match(CallLeafNoFP);
12891 
12892   effect(USE meth);
12893 
12894   ins_cost(CALL_COST);
12895 
12896   format %{ "CALL, runtime leaf nofp $meth" %}
12897 
12898   ins_encode( aarch64_enc_java_to_runtime(meth) );
12899 
12900   ins_pipe(pipe_class_call);
12901 %}
12902 
12903 // Tail Call; Jump from runtime stub to Java code.
12904 // Also known as an 'interprocedural jump'.
12905 // Target of jump will eventually return to caller.
12906 // TailJump below removes the return address.
12907 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
12908 %{
12909   match(TailCall jump_target method_oop);
12910 
12911   ins_cost(CALL_COST);
12912 
12913   format %{ "br $jump_target\t# $method_oop holds method oop" %}
12914 
12915   ins_encode(aarch64_enc_tail_call(jump_target));
12916 
12917   ins_pipe(pipe_class_call);
12918 %}
12919 
12920 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
12921 %{
12922   match(TailJump jump_target ex_oop);
12923 
12924   ins_cost(CALL_COST);
12925 
12926   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
12927 
12928   ins_encode(aarch64_enc_tail_jmp(jump_target));
12929 
12930   ins_pipe(pipe_class_call);
12931 %}
12932 
12933 // Create exception oop: created by stack-crawling runtime code.
12934 // Created exception is now available to this handler, and is setup
12935 // just prior to jumping to this handler. No code emitted.
12936 // TODO check
12937 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
12938 instruct CreateException(iRegP_R0 ex_oop)
12939 %{
12940   match(Set ex_oop (CreateEx));
12941 
12942   format %{ " -- \t// exception oop; no code emitted" %}
12943 
12944   size(0);
12945 
12946   ins_encode( /*empty*/ );
12947 
12948   ins_pipe(pipe_class_empty);
12949 %}
12950 
12951 // Rethrow exception: The exception oop will come in the first
12952 // argument position. Then JUMP (not call) to the rethrow stub code.
12953 instruct RethrowException() %{
12954   match(Rethrow);
12955   ins_cost(CALL_COST);
12956 
12957   format %{ "b rethrow_stub" %}
12958 
12959   ins_encode( aarch64_enc_rethrow() );
12960 
12961   ins_pipe(pipe_class_call);
12962 %}
12963 
12964 
12965 // Return Instruction
12966 // epilog node loads ret address into lr as part of frame pop
12967 instruct Ret()
12968 %{
12969   match(Return);
12970 
12971   format %{ "ret\t// return register" %}
12972 
12973   ins_encode( aarch64_enc_ret() );
12974 
12975   ins_pipe(pipe_branch);
12976 %}
12977 
12978 // Die now.
12979 instruct ShouldNotReachHere() %{
12980   match(Halt);
12981 
12982   ins_cost(CALL_COST);
12983   format %{ "ShouldNotReachHere" %}
12984 
12985   ins_encode %{
12986     // TODO
12987     // implement proper trap call here
12988     __ brk(999);
12989   %}
12990 
12991   ins_pipe(pipe_class_default);
12992 %}
12993 
12994 // ============================================================================
12995 // Partial Subtype Check
12996 //
12997 // superklass array for an instance of the superklass.  Set a hidden
12998 // internal cache on a hit (cache is checked with exposed code in
12999 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13000 // encoding ALSO sets flags.
13001 
13002 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13003 %{
13004   match(Set result (PartialSubtypeCheck sub super));
13005   effect(KILL cr, KILL temp);
13006 
13007   ins_cost(1100);  // slightly larger than the next version
13008   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13009 
13010   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13011 
13012   opcode(0x1); // Force zero of result reg on hit
13013 
13014   ins_pipe(pipe_class_memory);
13015 %}
13016 
13017 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13018 %{
13019   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13020   effect(KILL temp, KILL result);
13021 
13022   ins_cost(1100);  // slightly larger than the next version
13023   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13024 
13025   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13026 
13027   opcode(0x0); // Don't zero result reg on hit
13028 
13029   ins_pipe(pipe_class_memory);
13030 %}
13031 
13032 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13033                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13034 %{
13035   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13036   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13037 
13038   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13039   ins_encode %{
13040     __ string_compare($str1$$Register, $str2$$Register,
13041                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13042                       $tmp1$$Register);
13043   %}
13044   ins_pipe(pipe_class_memory);
13045 %}
13046 
13047 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13048        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13049 %{
13050   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13051   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13052          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13053   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13054 
13055   ins_encode %{
13056     __ string_indexof($str1$$Register, $str2$$Register,
13057                       $cnt1$$Register, $cnt2$$Register,
13058                       $tmp1$$Register, $tmp2$$Register,
13059                       $tmp3$$Register, $tmp4$$Register,
13060                       -1, $result$$Register);
13061   %}
13062   ins_pipe(pipe_class_memory);
13063 %}
13064 
13065 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13066                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
13067                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13068 %{
13069   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13070   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13071          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13072   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13073 
13074   ins_encode %{
13075     int icnt2 = (int)$int_cnt2$$constant;
13076     __ string_indexof($str1$$Register, $str2$$Register,
13077                       $cnt1$$Register, zr,
13078                       $tmp1$$Register, $tmp2$$Register,
13079                       $tmp3$$Register, $tmp4$$Register,
13080                       icnt2, $result$$Register);
13081   %}
13082   ins_pipe(pipe_class_memory);
13083 %}
13084 
13085 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13086                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13087 %{
13088   match(Set result (StrEquals (Binary str1 str2) cnt));
13089   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13090 
13091   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13092   ins_encode %{
13093     __ string_equals($str1$$Register, $str2$$Register,
13094                       $cnt$$Register, $result$$Register,
13095                       $tmp$$Register);
13096   %}
13097   ins_pipe(pipe_class_memory);
13098 %}
13099 
13100 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13101                       iRegP_R10 tmp, rFlagsReg cr)
13102 %{
13103   match(Set result (AryEq ary1 ary2));
13104   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13105 
13106   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
13107   ins_encode %{
13108     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
13109                           $result$$Register, $tmp$$Register);
13110   %}
13111   ins_pipe(pipe_class_memory);
13112 %}
13113 
13114 // encode char[] to byte[] in ISO_8859_1
13115 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
13116                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
13117                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
13118                           iRegI_R0 result, rFlagsReg cr)
13119 %{
13120   match(Set result (EncodeISOArray src (Binary dst len)));
13121   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
13122          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
13123 
13124   format %{ "Encode array $src,$dst,$len -> $result" %}
13125   ins_encode %{
13126     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
13127          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
13128          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
13129   %}
13130   ins_pipe( pipe_class_memory );
13131 %}
13132 
13133 // ============================================================================
13134 // This name is KNOWN by the ADLC and cannot be changed.
13135 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13136 // for this guy.
13137 instruct tlsLoadP(thread_RegP dst)
13138 %{
13139   match(Set dst (ThreadLocal));
13140 
13141   ins_cost(0);
13142 
13143   format %{ " -- \t// $dst=Thread::current(), empty" %}
13144 
13145   size(0);
13146 
13147   ins_encode( /*empty*/ );
13148 
13149   ins_pipe(pipe_class_empty);
13150 %}
13151 
13152 // ====================VECTOR INSTRUCTIONS=====================================
13153 
13154 // Load vector (32 bits)
13155 instruct loadV4(vecD dst, vmem mem)
13156 %{
13157   predicate(n->as_LoadVector()->memory_size() == 4);
13158   match(Set dst (LoadVector mem));
13159   ins_cost(4 * INSN_COST);
13160   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13161   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13162   ins_pipe(pipe_class_memory);
13163 %}
13164 
13165 // Load vector (64 bits)
13166 instruct loadV8(vecD dst, vmem mem)
13167 %{
13168   predicate(n->as_LoadVector()->memory_size() == 8);
13169   match(Set dst (LoadVector mem));
13170   ins_cost(4 * INSN_COST);
13171   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13172   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13173   ins_pipe(pipe_class_memory);
13174 %}
13175 
13176 // Load Vector (128 bits)
13177 instruct loadV16(vecX dst, vmem mem)
13178 %{
13179   predicate(n->as_LoadVector()->memory_size() == 16);
13180   match(Set dst (LoadVector mem));
13181   ins_cost(4 * INSN_COST);
13182   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13183   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13184   ins_pipe(pipe_class_memory);
13185 %}
13186 
13187 // Store Vector (32 bits)
13188 instruct storeV4(vecD src, vmem mem)
13189 %{
13190   predicate(n->as_StoreVector()->memory_size() == 4);
13191   match(Set mem (StoreVector mem src));
13192   ins_cost(4 * INSN_COST);
13193   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13194   ins_encode( aarch64_enc_strvS(src, mem) );
13195   ins_pipe(pipe_class_memory);
13196 %}
13197 
13198 // Store Vector (64 bits)
13199 instruct storeV8(vecD src, vmem mem)
13200 %{
13201   predicate(n->as_StoreVector()->memory_size() == 8);
13202   match(Set mem (StoreVector mem src));
13203   ins_cost(4 * INSN_COST);
13204   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13205   ins_encode( aarch64_enc_strvD(src, mem) );
13206   ins_pipe(pipe_class_memory);
13207 %}
13208 
13209 // Store Vector (128 bits)
13210 instruct storeV16(vecX src, vmem mem)
13211 %{
13212   predicate(n->as_StoreVector()->memory_size() == 16);
13213   match(Set mem (StoreVector mem src));
13214   ins_cost(4 * INSN_COST);
13215   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13216   ins_encode( aarch64_enc_strvQ(src, mem) );
13217   ins_pipe(pipe_class_memory);
13218 %}
13219 
13220 instruct replicate8B(vecD dst, iRegIorL2I src)
13221 %{
13222   predicate(n->as_Vector()->length() == 4 ||
13223             n->as_Vector()->length() == 8);
13224   match(Set dst (ReplicateB src));
13225   ins_cost(INSN_COST);
13226   format %{ "dup  $dst, $src\t# vector (8B)" %}
13227   ins_encode %{
13228     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
13229   %}
13230   ins_pipe(pipe_class_default);
13231 %}
13232 
13233 instruct replicate16B(vecX dst, iRegIorL2I src)
13234 %{
13235   predicate(n->as_Vector()->length() == 16);
13236   match(Set dst (ReplicateB src));
13237   ins_cost(INSN_COST);
13238   format %{ "dup  $dst, $src\t# vector (16B)" %}
13239   ins_encode %{
13240     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13241   %}
13242   ins_pipe(pipe_class_default);
13243 %}
13244 
13245 instruct replicate8B_imm(vecD dst, immI con)
13246 %{
13247   predicate(n->as_Vector()->length() == 4 ||
13248             n->as_Vector()->length() == 8);
13249   match(Set dst (ReplicateB con));
13250   ins_cost(INSN_COST);
13251   format %{ "movi  $dst, $con\t# vector(8B)" %}
13252   ins_encode %{
13253     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
13254   %}
13255   ins_pipe(pipe_class_default);
13256 %}
13257 
13258 instruct replicate16B_imm(vecX dst, immI con)
13259 %{
13260   predicate(n->as_Vector()->length() == 16);
13261   match(Set dst (ReplicateB con));
13262   ins_cost(INSN_COST);
13263   format %{ "movi  $dst, $con\t# vector(16B)" %}
13264   ins_encode %{
13265     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
13266   %}
13267   ins_pipe(pipe_class_default);
13268 %}
13269 
13270 instruct replicate4S(vecD dst, iRegIorL2I src)
13271 %{
13272   predicate(n->as_Vector()->length() == 2 ||
13273             n->as_Vector()->length() == 4);
13274   match(Set dst (ReplicateS src));
13275   ins_cost(INSN_COST);
13276   format %{ "dup  $dst, $src\t# vector (4S)" %}
13277   ins_encode %{
13278     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
13279   %}
13280   ins_pipe(pipe_class_default);
13281 %}
13282 
13283 instruct replicate8S(vecX dst, iRegIorL2I src)
13284 %{
13285   predicate(n->as_Vector()->length() == 8);
13286   match(Set dst (ReplicateS src));
13287   ins_cost(INSN_COST);
13288   format %{ "dup  $dst, $src\t# vector (8S)" %}
13289   ins_encode %{
13290     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13291   %}
13292   ins_pipe(pipe_class_default);
13293 %}
13294 
13295 instruct replicate4S_imm(vecD dst, immI con)
13296 %{
13297   predicate(n->as_Vector()->length() == 2 ||
13298             n->as_Vector()->length() == 4);
13299   match(Set dst (ReplicateS con));
13300   ins_cost(INSN_COST);
13301   format %{ "movi  $dst, $con\t# vector(4H)" %}
13302   ins_encode %{
13303     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
13304   %}
13305   ins_pipe(pipe_class_default);
13306 %}
13307 
13308 instruct replicate8S_imm(vecX dst, immI con)
13309 %{
13310   predicate(n->as_Vector()->length() == 8);
13311   match(Set dst (ReplicateS con));
13312   ins_cost(INSN_COST);
13313   format %{ "movi  $dst, $con\t# vector(8H)" %}
13314   ins_encode %{
13315     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
13316   %}
13317   ins_pipe(pipe_class_default);
13318 %}
13319 
13320 instruct replicate2I(vecD dst, iRegIorL2I src)
13321 %{
13322   predicate(n->as_Vector()->length() == 2);
13323   match(Set dst (ReplicateI src));
13324   ins_cost(INSN_COST);
13325   format %{ "dup  $dst, $src\t# vector (2I)" %}
13326   ins_encode %{
13327     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
13328   %}
13329   ins_pipe(pipe_class_default);
13330 %}
13331 
13332 instruct replicate4I(vecX dst, iRegIorL2I src)
13333 %{
13334   predicate(n->as_Vector()->length() == 4);
13335   match(Set dst (ReplicateI src));
13336   ins_cost(INSN_COST);
13337   format %{ "dup  $dst, $src\t# vector (4I)" %}
13338   ins_encode %{
13339     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13340   %}
13341   ins_pipe(pipe_class_default);
13342 %}
13343 
13344 instruct replicate2I_imm(vecD dst, immI con)
13345 %{
13346   predicate(n->as_Vector()->length() == 2);
13347   match(Set dst (ReplicateI con));
13348   ins_cost(INSN_COST);
13349   format %{ "movi  $dst, $con\t# vector(2I)" %}
13350   ins_encode %{
13351     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
13352   %}
13353   ins_pipe(pipe_class_default);
13354 %}
13355 
13356 instruct replicate4I_imm(vecX dst, immI con)
13357 %{
13358   predicate(n->as_Vector()->length() == 4);
13359   match(Set dst (ReplicateI con));
13360   ins_cost(INSN_COST);
13361   format %{ "movi  $dst, $con\t# vector(4I)" %}
13362   ins_encode %{
13363     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13364   %}
13365   ins_pipe(pipe_class_default);
13366 %}
13367 
13368 instruct replicate2L(vecX dst, iRegL src)
13369 %{
13370   predicate(n->as_Vector()->length() == 2);
13371   match(Set dst (ReplicateL src));
13372   ins_cost(INSN_COST);
13373   format %{ "dup  $dst, $src\t# vector (2L)" %}
13374   ins_encode %{
13375     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13376   %}
13377   ins_pipe(pipe_class_default);
13378 %}
13379 
13380 instruct replicate2L_zero(vecX dst, immI0 zero)
13381 %{
13382   predicate(n->as_Vector()->length() == 2);
13383   match(Set dst (ReplicateI zero));
13384   ins_cost(INSN_COST);
13385   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13386   ins_encode %{
13387     __ eor(as_FloatRegister($dst$$reg), __ T16B,
13388            as_FloatRegister($dst$$reg),
13389            as_FloatRegister($dst$$reg));
13390   %}
13391   ins_pipe(pipe_class_default);
13392 %}
13393 
13394 instruct replicate2F(vecD dst, vRegF src)
13395 %{
13396   predicate(n->as_Vector()->length() == 2);
13397   match(Set dst (ReplicateF src));
13398   ins_cost(INSN_COST);
13399   format %{ "dup  $dst, $src\t# vector (2F)" %}
13400   ins_encode %{
13401     __ dup(as_FloatRegister($dst$$reg), __ T2S,
13402            as_FloatRegister($src$$reg));
13403   %}
13404   ins_pipe(pipe_class_default);
13405 %}
13406 
13407 instruct replicate4F(vecX dst, vRegF src)
13408 %{
13409   predicate(n->as_Vector()->length() == 4);
13410   match(Set dst (ReplicateF src));
13411   ins_cost(INSN_COST);
13412   format %{ "dup  $dst, $src\t# vector (4F)" %}
13413   ins_encode %{
13414     __ dup(as_FloatRegister($dst$$reg), __ T4S,
13415            as_FloatRegister($src$$reg));
13416   %}
13417   ins_pipe(pipe_class_default);
13418 %}
13419 
13420 instruct replicate2D(vecX dst, vRegD src)
13421 %{
13422   predicate(n->as_Vector()->length() == 2);
13423   match(Set dst (ReplicateD src));
13424   ins_cost(INSN_COST);
13425   format %{ "dup  $dst, $src\t# vector (2D)" %}
13426   ins_encode %{
13427     __ dup(as_FloatRegister($dst$$reg), __ T2D,
13428            as_FloatRegister($src$$reg));
13429   %}
13430   ins_pipe(pipe_class_default);
13431 %}
13432 
13433 // ====================REDUCTION ARITHMETIC====================================
13434 
13435 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
13436 %{
13437   match(Set dst (AddReductionVI src1 src2));
13438   ins_cost(INSN_COST);
13439   effect(TEMP tmp, TEMP tmp2);
13440   format %{ "umov  $tmp, $src2, S, 0\n\t"
13441             "umov  $tmp2, $src2, S, 1\n\t"
13442             "addw  $dst, $src1, $tmp\n\t"
13443             "addw  $dst, $dst, $tmp2\t add reduction2i"
13444   %}
13445   ins_encode %{
13446     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13447     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13448     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
13449     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
13450   %}
13451   ins_pipe(pipe_class_default);
13452 %}
13453 
13454 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13455 %{
13456   match(Set dst (AddReductionVI src1 src2));
13457   ins_cost(INSN_COST);
13458   effect(TEMP tmp, TEMP tmp2);
13459   format %{ "addv  $tmp, T4S, $src2\n\t"
13460             "umov  $tmp2, $tmp, S, 0\n\t"
13461             "addw  $dst, $tmp2, $src1\t add reduction4i"
13462   %}
13463   ins_encode %{
13464     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
13465             as_FloatRegister($src2$$reg));
13466     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13467     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
13468   %}
13469   ins_pipe(pipe_class_default);
13470 %}
13471 
13472 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
13473 %{
13474   match(Set dst (MulReductionVI src1 src2));
13475   ins_cost(INSN_COST);
13476   effect(TEMP tmp, TEMP dst);
13477   format %{ "umov  $tmp, $src2, S, 0\n\t"
13478             "mul   $dst, $tmp, $src1\n\t"
13479             "umov  $tmp, $src2, S, 1\n\t"
13480             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
13481   %}
13482   ins_encode %{
13483     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
13484     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
13485     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
13486     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
13487   %}
13488   ins_pipe(pipe_class_default);
13489 %}
13490 
13491 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
13492 %{
13493   match(Set dst (MulReductionVI src1 src2));
13494   ins_cost(INSN_COST);
13495   effect(TEMP tmp, TEMP tmp2, TEMP dst);
13496   format %{ "ins   $tmp, $src2, 0, 1\n\t"
13497             "mul   $tmp, $tmp, $src2\n\t"
13498             "umov  $tmp2, $tmp, S, 0\n\t"
13499             "mul   $dst, $tmp2, $src1\n\t"
13500             "umov  $tmp2, $tmp, S, 1\n\t"
13501             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
13502   %}
13503   ins_encode %{
13504     __ ins(as_FloatRegister($tmp$$reg), __ D,
13505            as_FloatRegister($src2$$reg), 0, 1);
13506     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
13507            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
13508     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
13509     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
13510     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
13511     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
13512   %}
13513   ins_pipe(pipe_class_default);
13514 %}
13515 
13516 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13517 %{
13518   match(Set dst (AddReductionVF src1 src2));
13519   ins_cost(INSN_COST);
13520   effect(TEMP tmp, TEMP dst);
13521   format %{ "fadds $dst, $src1, $src2\n\t"
13522             "ins   $tmp, S, $src2, 0, 1\n\t"
13523             "fadds $dst, $dst, $tmp\t add reduction2f"
13524   %}
13525   ins_encode %{
13526     __ fadds(as_FloatRegister($dst$$reg),
13527              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13528     __ ins(as_FloatRegister($tmp$$reg), __ S,
13529            as_FloatRegister($src2$$reg), 0, 1);
13530     __ fadds(as_FloatRegister($dst$$reg),
13531              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13532   %}
13533   ins_pipe(pipe_class_default);
13534 %}
13535 
13536 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13537 %{
13538   match(Set dst (AddReductionVF src1 src2));
13539   ins_cost(INSN_COST);
13540   effect(TEMP tmp, TEMP dst);
13541   format %{ "fadds $dst, $src1, $src2\n\t"
13542             "ins   $tmp, S, $src2, 0, 1\n\t"
13543             "fadds $dst, $dst, $tmp\n\t"
13544             "ins   $tmp, S, $src2, 0, 2\n\t"
13545             "fadds $dst, $dst, $tmp\n\t"
13546             "ins   $tmp, S, $src2, 0, 3\n\t"
13547             "fadds $dst, $dst, $tmp\t add reduction4f"
13548   %}
13549   ins_encode %{
13550     __ fadds(as_FloatRegister($dst$$reg),
13551              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13552     __ ins(as_FloatRegister($tmp$$reg), __ S,
13553            as_FloatRegister($src2$$reg), 0, 1);
13554     __ fadds(as_FloatRegister($dst$$reg),
13555              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13556     __ ins(as_FloatRegister($tmp$$reg), __ S,
13557            as_FloatRegister($src2$$reg), 0, 2);
13558     __ fadds(as_FloatRegister($dst$$reg),
13559              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13560     __ ins(as_FloatRegister($tmp$$reg), __ S,
13561            as_FloatRegister($src2$$reg), 0, 3);
13562     __ fadds(as_FloatRegister($dst$$reg),
13563              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13564   %}
13565   ins_pipe(pipe_class_default);
13566 %}
13567 
13568 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
13569 %{
13570   match(Set dst (MulReductionVF src1 src2));
13571   ins_cost(INSN_COST);
13572   effect(TEMP tmp, TEMP dst);
13573   format %{ "fmuls $dst, $src1, $src2\n\t"
13574             "ins   $tmp, S, $src2, 0, 1\n\t"
13575             "fmuls $dst, $dst, $tmp\t add reduction4f"
13576   %}
13577   ins_encode %{
13578     __ fmuls(as_FloatRegister($dst$$reg),
13579              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13580     __ ins(as_FloatRegister($tmp$$reg), __ S,
13581            as_FloatRegister($src2$$reg), 0, 1);
13582     __ fmuls(as_FloatRegister($dst$$reg),
13583              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13584   %}
13585   ins_pipe(pipe_class_default);
13586 %}
13587 
13588 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
13589 %{
13590   match(Set dst (MulReductionVF src1 src2));
13591   ins_cost(INSN_COST);
13592   effect(TEMP tmp, TEMP dst);
13593   format %{ "fmuls $dst, $src1, $src2\n\t"
13594             "ins   $tmp, S, $src2, 0, 1\n\t"
13595             "fmuls $dst, $dst, $tmp\n\t"
13596             "ins   $tmp, S, $src2, 0, 2\n\t"
13597             "fmuls $dst, $dst, $tmp\n\t"
13598             "ins   $tmp, S, $src2, 0, 3\n\t"
13599             "fmuls $dst, $dst, $tmp\t add reduction4f"
13600   %}
13601   ins_encode %{
13602     __ fmuls(as_FloatRegister($dst$$reg),
13603              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13604     __ ins(as_FloatRegister($tmp$$reg), __ S,
13605            as_FloatRegister($src2$$reg), 0, 1);
13606     __ fmuls(as_FloatRegister($dst$$reg),
13607              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13608     __ ins(as_FloatRegister($tmp$$reg), __ S,
13609            as_FloatRegister($src2$$reg), 0, 2);
13610     __ fmuls(as_FloatRegister($dst$$reg),
13611              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13612     __ ins(as_FloatRegister($tmp$$reg), __ S,
13613            as_FloatRegister($src2$$reg), 0, 3);
13614     __ fmuls(as_FloatRegister($dst$$reg),
13615              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13616   %}
13617   ins_pipe(pipe_class_default);
13618 %}
13619 
13620 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13621 %{
13622   match(Set dst (AddReductionVD src1 src2));
13623   ins_cost(INSN_COST);
13624   effect(TEMP tmp, TEMP dst);
13625   format %{ "faddd $dst, $src1, $src2\n\t"
13626             "ins   $tmp, D, $src2, 0, 1\n\t"
13627             "faddd $dst, $dst, $tmp\t add reduction2d"
13628   %}
13629   ins_encode %{
13630     __ faddd(as_FloatRegister($dst$$reg),
13631              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13632     __ ins(as_FloatRegister($tmp$$reg), __ D,
13633            as_FloatRegister($src2$$reg), 0, 1);
13634     __ faddd(as_FloatRegister($dst$$reg),
13635              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13636   %}
13637   ins_pipe(pipe_class_default);
13638 %}
13639 
13640 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
13641 %{
13642   match(Set dst (MulReductionVD src1 src2));
13643   ins_cost(INSN_COST);
13644   effect(TEMP tmp, TEMP dst);
13645   format %{ "fmuld $dst, $src1, $src2\n\t"
13646             "ins   $tmp, D, $src2, 0, 1\n\t"
13647             "fmuld $dst, $dst, $tmp\t add reduction2d"
13648   %}
13649   ins_encode %{
13650     __ fmuld(as_FloatRegister($dst$$reg),
13651              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13652     __ ins(as_FloatRegister($tmp$$reg), __ D,
13653            as_FloatRegister($src2$$reg), 0, 1);
13654     __ fmuld(as_FloatRegister($dst$$reg),
13655              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
13656   %}
13657   ins_pipe(pipe_class_default);
13658 %}
13659 
13660 // ====================VECTOR ARITHMETIC=======================================
13661 
13662 // --------------------------------- ADD --------------------------------------
13663 
13664 instruct vadd8B(vecD dst, vecD src1, vecD src2)
13665 %{
13666   predicate(n->as_Vector()->length() == 4 ||
13667             n->as_Vector()->length() == 8);
13668   match(Set dst (AddVB src1 src2));
13669   ins_cost(INSN_COST);
13670   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
13671   ins_encode %{
13672     __ addv(as_FloatRegister($dst$$reg), __ T8B,
13673             as_FloatRegister($src1$$reg),
13674             as_FloatRegister($src2$$reg));
13675   %}
13676   ins_pipe(pipe_class_default);
13677 %}
13678 
13679 instruct vadd16B(vecX dst, vecX src1, vecX src2)
13680 %{
13681   predicate(n->as_Vector()->length() == 16);
13682   match(Set dst (AddVB src1 src2));
13683   ins_cost(INSN_COST);
13684   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
13685   ins_encode %{
13686     __ addv(as_FloatRegister($dst$$reg), __ T16B,
13687             as_FloatRegister($src1$$reg),
13688             as_FloatRegister($src2$$reg));
13689   %}
13690   ins_pipe(pipe_class_default);
13691 %}
13692 
13693 instruct vadd4S(vecD dst, vecD src1, vecD src2)
13694 %{
13695   predicate(n->as_Vector()->length() == 2 ||
13696             n->as_Vector()->length() == 4);
13697   match(Set dst (AddVS src1 src2));
13698   ins_cost(INSN_COST);
13699   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
13700   ins_encode %{
13701     __ addv(as_FloatRegister($dst$$reg), __ T4H,
13702             as_FloatRegister($src1$$reg),
13703             as_FloatRegister($src2$$reg));
13704   %}
13705   ins_pipe(pipe_class_default);
13706 %}
13707 
13708 instruct vadd8S(vecX dst, vecX src1, vecX src2)
13709 %{
13710   predicate(n->as_Vector()->length() == 8);
13711   match(Set dst (AddVS src1 src2));
13712   ins_cost(INSN_COST);
13713   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
13714   ins_encode %{
13715     __ addv(as_FloatRegister($dst$$reg), __ T8H,
13716             as_FloatRegister($src1$$reg),
13717             as_FloatRegister($src2$$reg));
13718   %}
13719   ins_pipe(pipe_class_default);
13720 %}
13721 
13722 instruct vadd2I(vecD dst, vecD src1, vecD src2)
13723 %{
13724   predicate(n->as_Vector()->length() == 2);
13725   match(Set dst (AddVI src1 src2));
13726   ins_cost(INSN_COST);
13727   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
13728   ins_encode %{
13729     __ addv(as_FloatRegister($dst$$reg), __ T2S,
13730             as_FloatRegister($src1$$reg),
13731             as_FloatRegister($src2$$reg));
13732   %}
13733   ins_pipe(pipe_class_default);
13734 %}
13735 
13736 instruct vadd4I(vecX dst, vecX src1, vecX src2)
13737 %{
13738   predicate(n->as_Vector()->length() == 4);
13739   match(Set dst (AddVI src1 src2));
13740   ins_cost(INSN_COST);
13741   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
13742   ins_encode %{
13743     __ addv(as_FloatRegister($dst$$reg), __ T4S,
13744             as_FloatRegister($src1$$reg),
13745             as_FloatRegister($src2$$reg));
13746   %}
13747   ins_pipe(pipe_class_default);
13748 %}
13749 
13750 instruct vadd2L(vecX dst, vecX src1, vecX src2)
13751 %{
13752   predicate(n->as_Vector()->length() == 2);
13753   match(Set dst (AddVL src1 src2));
13754   ins_cost(INSN_COST);
13755   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
13756   ins_encode %{
13757     __ addv(as_FloatRegister($dst$$reg), __ T2D,
13758             as_FloatRegister($src1$$reg),
13759             as_FloatRegister($src2$$reg));
13760   %}
13761   ins_pipe(pipe_class_default);
13762 %}
13763 
13764 instruct vadd2F(vecD dst, vecD src1, vecD src2)
13765 %{
13766   predicate(n->as_Vector()->length() == 2);
13767   match(Set dst (AddVF src1 src2));
13768   ins_cost(INSN_COST);
13769   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
13770   ins_encode %{
13771     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
13772             as_FloatRegister($src1$$reg),
13773             as_FloatRegister($src2$$reg));
13774   %}
13775   ins_pipe(pipe_class_default);
13776 %}
13777 
13778 instruct vadd4F(vecX dst, vecX src1, vecX src2)
13779 %{
13780   predicate(n->as_Vector()->length() == 4);
13781   match(Set dst (AddVF src1 src2));
13782   ins_cost(INSN_COST);
13783   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
13784   ins_encode %{
13785     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
13786             as_FloatRegister($src1$$reg),
13787             as_FloatRegister($src2$$reg));
13788   %}
13789   ins_pipe(pipe_class_default);
13790 %}
13791 
13792 instruct vadd2D(vecX dst, vecX src1, vecX src2)
13793 %{
13794   match(Set dst (AddVD src1 src2));
13795   ins_cost(INSN_COST);
13796   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
13797   ins_encode %{
13798     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
13799             as_FloatRegister($src1$$reg),
13800             as_FloatRegister($src2$$reg));
13801   %}
13802   ins_pipe(pipe_class_default);
13803 %}
13804 
13805 // --------------------------------- SUB --------------------------------------
13806 
13807 instruct vsub8B(vecD dst, vecD src1, vecD src2)
13808 %{
13809   predicate(n->as_Vector()->length() == 4 ||
13810             n->as_Vector()->length() == 8);
13811   match(Set dst (SubVB src1 src2));
13812   ins_cost(INSN_COST);
13813   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
13814   ins_encode %{
13815     __ subv(as_FloatRegister($dst$$reg), __ T8B,
13816             as_FloatRegister($src1$$reg),
13817             as_FloatRegister($src2$$reg));
13818   %}
13819   ins_pipe(pipe_class_default);
13820 %}
13821 
13822 instruct vsub16B(vecX dst, vecX src1, vecX src2)
13823 %{
13824   predicate(n->as_Vector()->length() == 16);
13825   match(Set dst (SubVB src1 src2));
13826   ins_cost(INSN_COST);
13827   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
13828   ins_encode %{
13829     __ subv(as_FloatRegister($dst$$reg), __ T16B,
13830             as_FloatRegister($src1$$reg),
13831             as_FloatRegister($src2$$reg));
13832   %}
13833   ins_pipe(pipe_class_default);
13834 %}
13835 
13836 instruct vsub4S(vecD dst, vecD src1, vecD src2)
13837 %{
13838   predicate(n->as_Vector()->length() == 2 ||
13839             n->as_Vector()->length() == 4);
13840   match(Set dst (SubVS src1 src2));
13841   ins_cost(INSN_COST);
13842   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
13843   ins_encode %{
13844     __ subv(as_FloatRegister($dst$$reg), __ T4H,
13845             as_FloatRegister($src1$$reg),
13846             as_FloatRegister($src2$$reg));
13847   %}
13848   ins_pipe(pipe_class_default);
13849 %}
13850 
13851 instruct vsub8S(vecX dst, vecX src1, vecX src2)
13852 %{
13853   predicate(n->as_Vector()->length() == 8);
13854   match(Set dst (SubVS src1 src2));
13855   ins_cost(INSN_COST);
13856   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
13857   ins_encode %{
13858     __ subv(as_FloatRegister($dst$$reg), __ T8H,
13859             as_FloatRegister($src1$$reg),
13860             as_FloatRegister($src2$$reg));
13861   %}
13862   ins_pipe(pipe_class_default);
13863 %}
13864 
13865 instruct vsub2I(vecD dst, vecD src1, vecD src2)
13866 %{
13867   predicate(n->as_Vector()->length() == 2);
13868   match(Set dst (SubVI src1 src2));
13869   ins_cost(INSN_COST);
13870   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
13871   ins_encode %{
13872     __ subv(as_FloatRegister($dst$$reg), __ T2S,
13873             as_FloatRegister($src1$$reg),
13874             as_FloatRegister($src2$$reg));
13875   %}
13876   ins_pipe(pipe_class_default);
13877 %}
13878 
13879 instruct vsub4I(vecX dst, vecX src1, vecX src2)
13880 %{
13881   predicate(n->as_Vector()->length() == 4);
13882   match(Set dst (SubVI src1 src2));
13883   ins_cost(INSN_COST);
13884   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
13885   ins_encode %{
13886     __ subv(as_FloatRegister($dst$$reg), __ T4S,
13887             as_FloatRegister($src1$$reg),
13888             as_FloatRegister($src2$$reg));
13889   %}
13890   ins_pipe(pipe_class_default);
13891 %}
13892 
13893 instruct vsub2L(vecX dst, vecX src1, vecX src2)
13894 %{
13895   predicate(n->as_Vector()->length() == 2);
13896   match(Set dst (SubVL src1 src2));
13897   ins_cost(INSN_COST);
13898   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
13899   ins_encode %{
13900     __ subv(as_FloatRegister($dst$$reg), __ T2D,
13901             as_FloatRegister($src1$$reg),
13902             as_FloatRegister($src2$$reg));
13903   %}
13904   ins_pipe(pipe_class_default);
13905 %}
13906 
13907 instruct vsub2F(vecD dst, vecD src1, vecD src2)
13908 %{
13909   predicate(n->as_Vector()->length() == 2);
13910   match(Set dst (SubVF src1 src2));
13911   ins_cost(INSN_COST);
13912   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
13913   ins_encode %{
13914     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
13915             as_FloatRegister($src1$$reg),
13916             as_FloatRegister($src2$$reg));
13917   %}
13918   ins_pipe(pipe_class_default);
13919 %}
13920 
13921 instruct vsub4F(vecX dst, vecX src1, vecX src2)
13922 %{
13923   predicate(n->as_Vector()->length() == 4);
13924   match(Set dst (SubVF src1 src2));
13925   ins_cost(INSN_COST);
13926   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
13927   ins_encode %{
13928     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
13929             as_FloatRegister($src1$$reg),
13930             as_FloatRegister($src2$$reg));
13931   %}
13932   ins_pipe(pipe_class_default);
13933 %}
13934 
13935 instruct vsub2D(vecX dst, vecX src1, vecX src2)
13936 %{
13937   predicate(n->as_Vector()->length() == 2);
13938   match(Set dst (SubVD src1 src2));
13939   ins_cost(INSN_COST);
13940   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
13941   ins_encode %{
13942     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
13943             as_FloatRegister($src1$$reg),
13944             as_FloatRegister($src2$$reg));
13945   %}
13946   ins_pipe(pipe_class_default);
13947 %}
13948 
13949 // --------------------------------- MUL --------------------------------------
13950 
13951 instruct vmul4S(vecD dst, vecD src1, vecD src2)
13952 %{
13953   predicate(n->as_Vector()->length() == 2 ||
13954             n->as_Vector()->length() == 4);
13955   match(Set dst (MulVS src1 src2));
13956   ins_cost(INSN_COST);
13957   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
13958   ins_encode %{
13959     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
13960             as_FloatRegister($src1$$reg),
13961             as_FloatRegister($src2$$reg));
13962   %}
13963   ins_pipe(pipe_class_default);
13964 %}
13965 
13966 instruct vmul8S(vecX dst, vecX src1, vecX src2)
13967 %{
13968   predicate(n->as_Vector()->length() == 8);
13969   match(Set dst (MulVS src1 src2));
13970   ins_cost(INSN_COST);
13971   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
13972   ins_encode %{
13973     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
13974             as_FloatRegister($src1$$reg),
13975             as_FloatRegister($src2$$reg));
13976   %}
13977   ins_pipe(pipe_class_default);
13978 %}
13979 
13980 instruct vmul2I(vecD dst, vecD src1, vecD src2)
13981 %{
13982   predicate(n->as_Vector()->length() == 2);
13983   match(Set dst (MulVI src1 src2));
13984   ins_cost(INSN_COST);
13985   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
13986   ins_encode %{
13987     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
13988             as_FloatRegister($src1$$reg),
13989             as_FloatRegister($src2$$reg));
13990   %}
13991   ins_pipe(pipe_class_default);
13992 %}
13993 
13994 instruct vmul4I(vecX dst, vecX src1, vecX src2)
13995 %{
13996   predicate(n->as_Vector()->length() == 4);
13997   match(Set dst (MulVI src1 src2));
13998   ins_cost(INSN_COST);
13999   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14000   ins_encode %{
14001     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14002             as_FloatRegister($src1$$reg),
14003             as_FloatRegister($src2$$reg));
14004   %}
14005   ins_pipe(pipe_class_default);
14006 %}
14007 
14008 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14009 %{
14010   predicate(n->as_Vector()->length() == 2);
14011   match(Set dst (MulVF src1 src2));
14012   ins_cost(INSN_COST);
14013   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14014   ins_encode %{
14015     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14016             as_FloatRegister($src1$$reg),
14017             as_FloatRegister($src2$$reg));
14018   %}
14019   ins_pipe(pipe_class_default);
14020 %}
14021 
14022 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14023 %{
14024   predicate(n->as_Vector()->length() == 4);
14025   match(Set dst (MulVF src1 src2));
14026   ins_cost(INSN_COST);
14027   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14028   ins_encode %{
14029     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14030             as_FloatRegister($src1$$reg),
14031             as_FloatRegister($src2$$reg));
14032   %}
14033   ins_pipe(pipe_class_default);
14034 %}
14035 
14036 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14037 %{
14038   predicate(n->as_Vector()->length() == 2);
14039   match(Set dst (MulVD src1 src2));
14040   ins_cost(INSN_COST);
14041   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14042   ins_encode %{
14043     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14044             as_FloatRegister($src1$$reg),
14045             as_FloatRegister($src2$$reg));
14046   %}
14047   ins_pipe(pipe_class_default);
14048 %}
14049 
14050 // --------------------------------- DIV --------------------------------------
14051 
14052 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14053 %{
14054   predicate(n->as_Vector()->length() == 2);
14055   match(Set dst (DivVF src1 src2));
14056   ins_cost(INSN_COST);
14057   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14058   ins_encode %{
14059     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14060             as_FloatRegister($src1$$reg),
14061             as_FloatRegister($src2$$reg));
14062   %}
14063   ins_pipe(pipe_class_default);
14064 %}
14065 
14066 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14067 %{
14068   predicate(n->as_Vector()->length() == 4);
14069   match(Set dst (DivVF src1 src2));
14070   ins_cost(INSN_COST);
14071   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14072   ins_encode %{
14073     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14074             as_FloatRegister($src1$$reg),
14075             as_FloatRegister($src2$$reg));
14076   %}
14077   ins_pipe(pipe_class_default);
14078 %}
14079 
14080 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14081 %{
14082   predicate(n->as_Vector()->length() == 2);
14083   match(Set dst (DivVD src1 src2));
14084   ins_cost(INSN_COST);
14085   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14086   ins_encode %{
14087     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14088             as_FloatRegister($src1$$reg),
14089             as_FloatRegister($src2$$reg));
14090   %}
14091   ins_pipe(pipe_class_default);
14092 %}
14093 
14094 // --------------------------------- AND --------------------------------------
14095 
14096 instruct vand8B(vecD dst, vecD src1, vecD src2)
14097 %{
14098   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14099             n->as_Vector()->length_in_bytes() == 8);
14100   match(Set dst (AndV src1 src2));
14101   ins_cost(INSN_COST);
14102   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14103   ins_encode %{
14104     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14105             as_FloatRegister($src1$$reg),
14106             as_FloatRegister($src2$$reg));
14107   %}
14108   ins_pipe(pipe_class_default);
14109 %}
14110 
14111 instruct vand16B(vecX dst, vecX src1, vecX src2)
14112 %{
14113   predicate(n->as_Vector()->length_in_bytes() == 16);
14114   match(Set dst (AndV src1 src2));
14115   ins_cost(INSN_COST);
14116   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14117   ins_encode %{
14118     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14119             as_FloatRegister($src1$$reg),
14120             as_FloatRegister($src2$$reg));
14121   %}
14122   ins_pipe(pipe_class_default);
14123 %}
14124 
14125 // --------------------------------- OR ---------------------------------------
14126 
14127 instruct vor8B(vecD dst, vecD src1, vecD src2)
14128 %{
14129   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14130             n->as_Vector()->length_in_bytes() == 8);
14131   match(Set dst (OrV src1 src2));
14132   ins_cost(INSN_COST);
14133   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14134   ins_encode %{
14135     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14136             as_FloatRegister($src1$$reg),
14137             as_FloatRegister($src2$$reg));
14138   %}
14139   ins_pipe(pipe_class_default);
14140 %}
14141 
14142 instruct vor16B(vecX dst, vecX src1, vecX src2)
14143 %{
14144   predicate(n->as_Vector()->length_in_bytes() == 16);
14145   match(Set dst (OrV src1 src2));
14146   ins_cost(INSN_COST);
14147   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14148   ins_encode %{
14149     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14150             as_FloatRegister($src1$$reg),
14151             as_FloatRegister($src2$$reg));
14152   %}
14153   ins_pipe(pipe_class_default);
14154 %}
14155 
14156 // --------------------------------- XOR --------------------------------------
14157 
14158 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14159 %{
14160   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14161             n->as_Vector()->length_in_bytes() == 8);
14162   match(Set dst (XorV src1 src2));
14163   ins_cost(INSN_COST);
14164   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14165   ins_encode %{
14166     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14167             as_FloatRegister($src1$$reg),
14168             as_FloatRegister($src2$$reg));
14169   %}
14170   ins_pipe(pipe_class_default);
14171 %}
14172 
14173 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14174 %{
14175   predicate(n->as_Vector()->length_in_bytes() == 16);
14176   match(Set dst (XorV src1 src2));
14177   ins_cost(INSN_COST);
14178   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14179   ins_encode %{
14180     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14181             as_FloatRegister($src1$$reg),
14182             as_FloatRegister($src2$$reg));
14183   %}
14184   ins_pipe(pipe_class_default);
14185 %}
14186 
14187 // ------------------------------ Shift ---------------------------------------
14188 
14189 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
14190   match(Set dst (LShiftCntV cnt));
14191   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
14192   ins_encode %{
14193     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14194   %}
14195   ins_pipe(pipe_class_default);
14196 %}
14197 
14198 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
14199 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
14200   match(Set dst (RShiftCntV cnt));
14201   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
14202   ins_encode %{
14203     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14204     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
14205   %}
14206   ins_pipe(pipe_class_default);
14207 %}
14208 
14209 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
14210   predicate(n->as_Vector()->length() == 4 ||
14211             n->as_Vector()->length() == 8);
14212   match(Set dst (LShiftVB src shift));
14213   match(Set dst (RShiftVB src shift));
14214   ins_cost(INSN_COST);
14215   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14216   ins_encode %{
14217     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14218             as_FloatRegister($src$$reg),
14219             as_FloatRegister($shift$$reg));
14220   %}
14221   ins_pipe(pipe_class_default);
14222 %}
14223 
14224 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14225   predicate(n->as_Vector()->length() == 16);
14226   match(Set dst (LShiftVB src shift));
14227   match(Set dst (RShiftVB src shift));
14228   ins_cost(INSN_COST);
14229   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14230   ins_encode %{
14231     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14232             as_FloatRegister($src$$reg),
14233             as_FloatRegister($shift$$reg));
14234   %}
14235   ins_pipe(pipe_class_default);
14236 %}
14237 
14238 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
14239   predicate(n->as_Vector()->length() == 4 ||
14240             n->as_Vector()->length() == 8);
14241   match(Set dst (URShiftVB src shift));
14242   ins_cost(INSN_COST);
14243   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
14244   ins_encode %{
14245     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
14246             as_FloatRegister($src$$reg),
14247             as_FloatRegister($shift$$reg));
14248   %}
14249   ins_pipe(pipe_class_default);
14250 %}
14251 
14252 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
14253   predicate(n->as_Vector()->length() == 16);
14254   match(Set dst (URShiftVB src shift));
14255   ins_cost(INSN_COST);
14256   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
14257   ins_encode %{
14258     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
14259             as_FloatRegister($src$$reg),
14260             as_FloatRegister($shift$$reg));
14261   %}
14262   ins_pipe(pipe_class_default);
14263 %}
14264 
14265 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
14266   predicate(n->as_Vector()->length() == 4 ||
14267             n->as_Vector()->length() == 8);
14268   match(Set dst (LShiftVB src shift));
14269   ins_cost(INSN_COST);
14270   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
14271   ins_encode %{
14272     int sh = (int)$shift$$constant & 31;
14273     if (sh >= 8) {
14274       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14275              as_FloatRegister($src$$reg),
14276              as_FloatRegister($src$$reg));
14277     } else {
14278       __ shl(as_FloatRegister($dst$$reg), __ T8B,
14279              as_FloatRegister($src$$reg), sh);
14280     }
14281   %}
14282   ins_pipe(pipe_class_default);
14283 %}
14284 
14285 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
14286   predicate(n->as_Vector()->length() == 16);
14287   match(Set dst (LShiftVB src shift));
14288   ins_cost(INSN_COST);
14289   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
14290   ins_encode %{
14291     int sh = (int)$shift$$constant & 31;
14292     if (sh >= 8) {
14293       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14294              as_FloatRegister($src$$reg),
14295              as_FloatRegister($src$$reg));
14296     } else {
14297       __ shl(as_FloatRegister($dst$$reg), __ T16B,
14298              as_FloatRegister($src$$reg), sh);
14299     }
14300   %}
14301   ins_pipe(pipe_class_default);
14302 %}
14303 
14304 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
14305   predicate(n->as_Vector()->length() == 4 ||
14306             n->as_Vector()->length() == 8);
14307   match(Set dst (RShiftVB src shift));
14308   ins_cost(INSN_COST);
14309   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
14310   ins_encode %{
14311     int sh = (int)$shift$$constant & 31;
14312     if (sh >= 8) sh = 7;
14313     sh = -sh & 7;
14314     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
14315            as_FloatRegister($src$$reg), sh);
14316   %}
14317   ins_pipe(pipe_class_default);
14318 %}
14319 
14320 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
14321   predicate(n->as_Vector()->length() == 16);
14322   match(Set dst (RShiftVB src shift));
14323   ins_cost(INSN_COST);
14324   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
14325   ins_encode %{
14326     int sh = (int)$shift$$constant & 31;
14327     if (sh >= 8) sh = 7;
14328     sh = -sh & 7;
14329     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
14330            as_FloatRegister($src$$reg), sh);
14331   %}
14332   ins_pipe(pipe_class_default);
14333 %}
14334 
14335 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
14336   predicate(n->as_Vector()->length() == 4 ||
14337             n->as_Vector()->length() == 8);
14338   match(Set dst (URShiftVB src shift));
14339   ins_cost(INSN_COST);
14340   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
14341   ins_encode %{
14342     int sh = (int)$shift$$constant & 31;
14343     if (sh >= 8) {
14344       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14345              as_FloatRegister($src$$reg),
14346              as_FloatRegister($src$$reg));
14347     } else {
14348       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
14349              as_FloatRegister($src$$reg), -sh & 7);
14350     }
14351   %}
14352   ins_pipe(pipe_class_default);
14353 %}
14354 
14355 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
14356   predicate(n->as_Vector()->length() == 16);
14357   match(Set dst (URShiftVB src shift));
14358   ins_cost(INSN_COST);
14359   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
14360   ins_encode %{
14361     int sh = (int)$shift$$constant & 31;
14362     if (sh >= 8) {
14363       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14364              as_FloatRegister($src$$reg),
14365              as_FloatRegister($src$$reg));
14366     } else {
14367       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
14368              as_FloatRegister($src$$reg), -sh & 7);
14369     }
14370   %}
14371   ins_pipe(pipe_class_default);
14372 %}
14373 
14374 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
14375   predicate(n->as_Vector()->length() == 2 ||
14376             n->as_Vector()->length() == 4);
14377   match(Set dst (LShiftVS src shift));
14378   match(Set dst (RShiftVS src shift));
14379   ins_cost(INSN_COST);
14380   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
14381   ins_encode %{
14382     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
14383             as_FloatRegister($src$$reg),
14384             as_FloatRegister($shift$$reg));
14385   %}
14386   ins_pipe(pipe_class_default);
14387 %}
14388 
14389 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
14390   predicate(n->as_Vector()->length() == 8);
14391   match(Set dst (LShiftVS src shift));
14392   match(Set dst (RShiftVS src shift));
14393   ins_cost(INSN_COST);
14394   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
14395   ins_encode %{
14396     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
14397             as_FloatRegister($src$$reg),
14398             as_FloatRegister($shift$$reg));
14399   %}
14400   ins_pipe(pipe_class_default);
14401 %}
14402 
14403 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
14404   predicate(n->as_Vector()->length() == 2 ||
14405             n->as_Vector()->length() == 4);
14406   match(Set dst (URShiftVS src shift));
14407   ins_cost(INSN_COST);
14408   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
14409   ins_encode %{
14410     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
14411             as_FloatRegister($src$$reg),
14412             as_FloatRegister($shift$$reg));
14413   %}
14414   ins_pipe(pipe_class_default);
14415 %}
14416 
14417 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
14418   predicate(n->as_Vector()->length() == 8);
14419   match(Set dst (URShiftVS src shift));
14420   ins_cost(INSN_COST);
14421   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
14422   ins_encode %{
14423     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
14424             as_FloatRegister($src$$reg),
14425             as_FloatRegister($shift$$reg));
14426   %}
14427   ins_pipe(pipe_class_default);
14428 %}
14429 
14430 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
14431   predicate(n->as_Vector()->length() == 2 ||
14432             n->as_Vector()->length() == 4);
14433   match(Set dst (LShiftVS src shift));
14434   ins_cost(INSN_COST);
14435   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
14436   ins_encode %{
14437     int sh = (int)$shift$$constant & 31;
14438     if (sh >= 16) {
14439       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14440              as_FloatRegister($src$$reg),
14441              as_FloatRegister($src$$reg));
14442     } else {
14443       __ shl(as_FloatRegister($dst$$reg), __ T4H,
14444              as_FloatRegister($src$$reg), sh);
14445     }
14446   %}
14447   ins_pipe(pipe_class_default);
14448 %}
14449 
14450 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
14451   predicate(n->as_Vector()->length() == 8);
14452   match(Set dst (LShiftVS src shift));
14453   ins_cost(INSN_COST);
14454   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
14455   ins_encode %{
14456     int sh = (int)$shift$$constant & 31;
14457     if (sh >= 16) {
14458       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14459              as_FloatRegister($src$$reg),
14460              as_FloatRegister($src$$reg));
14461     } else {
14462       __ shl(as_FloatRegister($dst$$reg), __ T8H,
14463              as_FloatRegister($src$$reg), sh);
14464     }
14465   %}
14466   ins_pipe(pipe_class_default);
14467 %}
14468 
14469 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
14470   predicate(n->as_Vector()->length() == 2 ||
14471             n->as_Vector()->length() == 4);
14472   match(Set dst (RShiftVS src shift));
14473   ins_cost(INSN_COST);
14474   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
14475   ins_encode %{
14476     int sh = (int)$shift$$constant & 31;
14477     if (sh >= 16) sh = 15;
14478     sh = -sh & 15;
14479     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
14480            as_FloatRegister($src$$reg), sh);
14481   %}
14482   ins_pipe(pipe_class_default);
14483 %}
14484 
14485 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
14486   predicate(n->as_Vector()->length() == 8);
14487   match(Set dst (RShiftVS src shift));
14488   ins_cost(INSN_COST);
14489   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
14490   ins_encode %{
14491     int sh = (int)$shift$$constant & 31;
14492     if (sh >= 16) sh = 15;
14493     sh = -sh & 15;
14494     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
14495            as_FloatRegister($src$$reg), sh);
14496   %}
14497   ins_pipe(pipe_class_default);
14498 %}
14499 
14500 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
14501   predicate(n->as_Vector()->length() == 2 ||
14502             n->as_Vector()->length() == 4);
14503   match(Set dst (URShiftVS src shift));
14504   ins_cost(INSN_COST);
14505   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
14506   ins_encode %{
14507     int sh = (int)$shift$$constant & 31;
14508     if (sh >= 16) {
14509       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14510              as_FloatRegister($src$$reg),
14511              as_FloatRegister($src$$reg));
14512     } else {
14513       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
14514              as_FloatRegister($src$$reg), -sh & 15);
14515     }
14516   %}
14517   ins_pipe(pipe_class_default);
14518 %}
14519 
14520 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
14521   predicate(n->as_Vector()->length() == 8);
14522   match(Set dst (URShiftVS src shift));
14523   ins_cost(INSN_COST);
14524   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
14525   ins_encode %{
14526     int sh = (int)$shift$$constant & 31;
14527     if (sh >= 16) {
14528       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14529              as_FloatRegister($src$$reg),
14530              as_FloatRegister($src$$reg));
14531     } else {
14532       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
14533              as_FloatRegister($src$$reg), -sh & 15);
14534     }
14535   %}
14536   ins_pipe(pipe_class_default);
14537 %}
14538 
14539 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
14540   predicate(n->as_Vector()->length() == 2);
14541   match(Set dst (LShiftVI src shift));
14542   match(Set dst (RShiftVI src shift));
14543   ins_cost(INSN_COST);
14544   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
14545   ins_encode %{
14546     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
14547             as_FloatRegister($src$$reg),
14548             as_FloatRegister($shift$$reg));
14549   %}
14550   ins_pipe(pipe_class_default);
14551 %}
14552 
14553 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
14554   predicate(n->as_Vector()->length() == 4);
14555   match(Set dst (LShiftVI src shift));
14556   match(Set dst (RShiftVI src shift));
14557   ins_cost(INSN_COST);
14558   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
14559   ins_encode %{
14560     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
14561             as_FloatRegister($src$$reg),
14562             as_FloatRegister($shift$$reg));
14563   %}
14564   ins_pipe(pipe_class_default);
14565 %}
14566 
14567 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
14568   predicate(n->as_Vector()->length() == 2);
14569   match(Set dst (URShiftVI src shift));
14570   ins_cost(INSN_COST);
14571   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
14572   ins_encode %{
14573     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
14574             as_FloatRegister($src$$reg),
14575             as_FloatRegister($shift$$reg));
14576   %}
14577   ins_pipe(pipe_class_default);
14578 %}
14579 
14580 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
14581   predicate(n->as_Vector()->length() == 4);
14582   match(Set dst (URShiftVI src shift));
14583   ins_cost(INSN_COST);
14584   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
14585   ins_encode %{
14586     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
14587             as_FloatRegister($src$$reg),
14588             as_FloatRegister($shift$$reg));
14589   %}
14590   ins_pipe(pipe_class_default);
14591 %}
14592 
14593 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
14594   predicate(n->as_Vector()->length() == 2);
14595   match(Set dst (LShiftVI src shift));
14596   ins_cost(INSN_COST);
14597   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
14598   ins_encode %{
14599     __ shl(as_FloatRegister($dst$$reg), __ T2S,
14600            as_FloatRegister($src$$reg),
14601            (int)$shift$$constant & 31);
14602   %}
14603   ins_pipe(pipe_class_default);
14604 %}
14605 
14606 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
14607   predicate(n->as_Vector()->length() == 4);
14608   match(Set dst (LShiftVI src shift));
14609   ins_cost(INSN_COST);
14610   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
14611   ins_encode %{
14612     __ shl(as_FloatRegister($dst$$reg), __ T4S,
14613            as_FloatRegister($src$$reg),
14614            (int)$shift$$constant & 31);
14615   %}
14616   ins_pipe(pipe_class_default);
14617 %}
14618 
14619 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
14620   predicate(n->as_Vector()->length() == 2);
14621   match(Set dst (RShiftVI src shift));
14622   ins_cost(INSN_COST);
14623   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
14624   ins_encode %{
14625     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
14626             as_FloatRegister($src$$reg),
14627             -(int)$shift$$constant & 31);
14628   %}
14629   ins_pipe(pipe_class_default);
14630 %}
14631 
14632 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
14633   predicate(n->as_Vector()->length() == 4);
14634   match(Set dst (RShiftVI src shift));
14635   ins_cost(INSN_COST);
14636   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
14637   ins_encode %{
14638     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
14639             as_FloatRegister($src$$reg),
14640             -(int)$shift$$constant & 31);
14641   %}
14642   ins_pipe(pipe_class_default);
14643 %}
14644 
14645 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
14646   predicate(n->as_Vector()->length() == 2);
14647   match(Set dst (URShiftVI src shift));
14648   ins_cost(INSN_COST);
14649   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
14650   ins_encode %{
14651     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
14652             as_FloatRegister($src$$reg),
14653             -(int)$shift$$constant & 31);
14654   %}
14655   ins_pipe(pipe_class_default);
14656 %}
14657 
14658 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
14659   predicate(n->as_Vector()->length() == 4);
14660   match(Set dst (URShiftVI src shift));
14661   ins_cost(INSN_COST);
14662   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
14663   ins_encode %{
14664     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
14665             as_FloatRegister($src$$reg),
14666             -(int)$shift$$constant & 31);
14667   %}
14668   ins_pipe(pipe_class_default);
14669 %}
14670 
14671 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
14672   predicate(n->as_Vector()->length() == 2);
14673   match(Set dst (LShiftVL src shift));
14674   match(Set dst (RShiftVL src shift));
14675   ins_cost(INSN_COST);
14676   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
14677   ins_encode %{
14678     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
14679             as_FloatRegister($src$$reg),
14680             as_FloatRegister($shift$$reg));
14681   %}
14682   ins_pipe(pipe_class_default);
14683 %}
14684 
14685 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
14686   predicate(n->as_Vector()->length() == 2);
14687   match(Set dst (URShiftVL src shift));
14688   ins_cost(INSN_COST);
14689   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
14690   ins_encode %{
14691     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
14692             as_FloatRegister($src$$reg),
14693             as_FloatRegister($shift$$reg));
14694   %}
14695   ins_pipe(pipe_class_default);
14696 %}
14697 
14698 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
14699   predicate(n->as_Vector()->length() == 2);
14700   match(Set dst (LShiftVL src shift));
14701   ins_cost(INSN_COST);
14702   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
14703   ins_encode %{
14704     __ shl(as_FloatRegister($dst$$reg), __ T2D,
14705            as_FloatRegister($src$$reg),
14706            (int)$shift$$constant & 63);
14707   %}
14708   ins_pipe(pipe_class_default);
14709 %}
14710 
14711 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
14712   predicate(n->as_Vector()->length() == 2);
14713   match(Set dst (RShiftVL src shift));
14714   ins_cost(INSN_COST);
14715   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
14716   ins_encode %{
14717     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
14718             as_FloatRegister($src$$reg),
14719             -(int)$shift$$constant & 63);
14720   %}
14721   ins_pipe(pipe_class_default);
14722 %}
14723 
14724 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
14725   predicate(n->as_Vector()->length() == 2);
14726   match(Set dst (URShiftVL src shift));
14727   ins_cost(INSN_COST);
14728   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
14729   ins_encode %{
14730     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
14731             as_FloatRegister($src$$reg),
14732             -(int)$shift$$constant & 63);
14733   %}
14734   ins_pipe(pipe_class_default);
14735 %}
14736 
14737 //----------PEEPHOLE RULES-----------------------------------------------------
14738 // These must follow all instruction definitions as they use the names
14739 // defined in the instructions definitions.
14740 //
14741 // peepmatch ( root_instr_name [preceding_instruction]* );
14742 //
14743 // peepconstraint %{
14744 // (instruction_number.operand_name relational_op instruction_number.operand_name
14745 //  [, ...] );
14746 // // instruction numbers are zero-based using left to right order in peepmatch
14747 //
14748 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
14749 // // provide an instruction_number.operand_name for each operand that appears
14750 // // in the replacement instruction's match rule
14751 //
14752 // ---------VM FLAGS---------------------------------------------------------
14753 //
14754 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14755 //
14756 // Each peephole rule is given an identifying number starting with zero and
14757 // increasing by one in the order seen by the parser.  An individual peephole
14758 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14759 // on the command-line.
14760 //
14761 // ---------CURRENT LIMITATIONS----------------------------------------------
14762 //
14763 // Only match adjacent instructions in same basic block
14764 // Only equality constraints
14765 // Only constraints between operands, not (0.dest_reg == RAX_enc)
14766 // Only one replacement instruction
14767 //
14768 // ---------EXAMPLE----------------------------------------------------------
14769 //
14770 // // pertinent parts of existing instructions in architecture description
14771 // instruct movI(iRegINoSp dst, iRegI src)
14772 // %{
14773 //   match(Set dst (CopyI src));
14774 // %}
14775 //
14776 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
14777 // %{
14778 //   match(Set dst (AddI dst src));
14779 //   effect(KILL cr);
14780 // %}
14781 //
14782 // // Change (inc mov) to lea
14783 // peephole %{
14784 //   // increment preceeded by register-register move
14785 //   peepmatch ( incI_iReg movI );
14786 //   // require that the destination register of the increment
14787 //   // match the destination register of the move
14788 //   peepconstraint ( 0.dst == 1.dst );
14789 //   // construct a replacement instruction that sets
14790 //   // the destination to ( move's source register + one )
14791 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
14792 // %}
14793 //
14794 
14795 // Implementation no longer uses movX instructions since
14796 // machine-independent system no longer uses CopyX nodes.
14797 //
14798 // peephole
14799 // %{
14800 //   peepmatch (incI_iReg movI);
14801 //   peepconstraint (0.dst == 1.dst);
14802 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14803 // %}
14804 
14805 // peephole
14806 // %{
14807 //   peepmatch (decI_iReg movI);
14808 //   peepconstraint (0.dst == 1.dst);
14809 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14810 // %}
14811 
14812 // peephole
14813 // %{
14814 //   peepmatch (addI_iReg_imm movI);
14815 //   peepconstraint (0.dst == 1.dst);
14816 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
14817 // %}
14818 
14819 // peephole
14820 // %{
14821 //   peepmatch (incL_iReg movL);
14822 //   peepconstraint (0.dst == 1.dst);
14823 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
14824 // %}
14825 
14826 // peephole
14827 // %{
14828 //   peepmatch (decL_iReg movL);
14829 //   peepconstraint (0.dst == 1.dst);
14830 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
14831 // %}
14832 
14833 // peephole
14834 // %{
14835 //   peepmatch (addL_iReg_imm movL);
14836 //   peepconstraint (0.dst == 1.dst);
14837 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
14838 // %}
14839 
14840 // peephole
14841 // %{
14842 //   peepmatch (addP_iReg_imm movP);
14843 //   peepconstraint (0.dst == 1.dst);
14844 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
14845 // %}
14846 
14847 // // Change load of spilled value to only a spill
14848 // instruct storeI(memory mem, iRegI src)
14849 // %{
14850 //   match(Set mem (StoreI mem src));
14851 // %}
14852 //
14853 // instruct loadI(iRegINoSp dst, memory mem)
14854 // %{
14855 //   match(Set dst (LoadI mem));
14856 // %}
14857 //
14858 
14859 //----------SMARTSPILL RULES---------------------------------------------------
14860 // These must follow all instruction definitions as they use the names
14861 // defined in the instructions definitions.
14862 
14863 // Local Variables:
14864 // mode: c++
14865 // End: