1 //
   2 // Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2020, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     ( SOC, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   ( SOC, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit general purpose registers
 439 reg_class all_reg32(
 440     R0,
 441     R1,
 442     R2,
 443     R3,
 444     R4,
 445     R5,
 446     R6,
 447     R7,
 448     R10,
 449     R11,
 450     R12,
 451     R13,
 452     R14,
 453     R15,
 454     R16,
 455     R17,
 456     R18,
 457     R19,
 458     R20,
 459     R21,
 460     R22,
 461     R23,
 462     R24,
 463     R25,
 464     R26,
 465     R27,
 466     R28,
 467     R29,
 468     R30,
 469     R31
 470 );
 471 
 472 
 473 // Class for all 32 bit integer registers (excluding SP which
 474 // will never be used as an integer register)
 475 reg_class any_reg32 %{
 476   return _ANY_REG32_mask;
 477 %}
 478 
 479 // Singleton class for R0 int register
 480 reg_class int_r0_reg(R0);
 481 
 482 // Singleton class for R2 int register
 483 reg_class int_r2_reg(R2);
 484 
 485 // Singleton class for R3 int register
 486 reg_class int_r3_reg(R3);
 487 
 488 // Singleton class for R4 int register
 489 reg_class int_r4_reg(R4);
 490 
 491 // Singleton class for R31 int register
 492 reg_class int_r31_reg(R31);
 493 
 494 // Class for all 64 bit general purpose registers
 495 reg_class all_reg(
 496     R0, R0_H,
 497     R1, R1_H,
 498     R2, R2_H,
 499     R3, R3_H,
 500     R4, R4_H,
 501     R5, R5_H,
 502     R6, R6_H,
 503     R7, R7_H,
 504     R10, R10_H,
 505     R11, R11_H,
 506     R12, R12_H,
 507     R13, R13_H,
 508     R14, R14_H,
 509     R15, R15_H,
 510     R16, R16_H,
 511     R17, R17_H,
 512     R18, R18_H,
 513     R19, R19_H,
 514     R20, R20_H,
 515     R21, R21_H,
 516     R22, R22_H,
 517     R23, R23_H,
 518     R24, R24_H,
 519     R25, R25_H,
 520     R26, R26_H,
 521     R27, R27_H,
 522     R28, R28_H,
 523     R29, R29_H,
 524     R30, R30_H,
 525     R31, R31_H
 526 );
 527 
 528 // Class for all long integer registers (including SP)
 529 reg_class any_reg %{
 530   return _ANY_REG_mask;
 531 %}
 532 
 533 // Class for non-allocatable 32 bit registers
 534 reg_class non_allocatable_reg32(
 535     R28,                        // thread
 536     R30,                        // lr
 537     R31                         // sp
 538 );
 539 
 540 // Class for non-allocatable 64 bit registers
 541 reg_class non_allocatable_reg(
 542     R28, R28_H,                 // thread
 543     R30, R30_H,                 // lr
 544     R31, R31_H                  // sp
 545 );
 546 
 547 // Class for all non-special integer registers
 548 reg_class no_special_reg32 %{
 549   return _NO_SPECIAL_REG32_mask;
 550 %}
 551 
 552 // Class for all non-special long integer registers
 553 reg_class no_special_reg %{
 554   return _NO_SPECIAL_REG_mask;
 555 %}
 556 
 557 // Class for 64 bit register r0
 558 reg_class r0_reg(
 559     R0, R0_H
 560 );
 561 
 562 // Class for 64 bit register r1
 563 reg_class r1_reg(
 564     R1, R1_H
 565 );
 566 
 567 // Class for 64 bit register r2
 568 reg_class r2_reg(
 569     R2, R2_H
 570 );
 571 
 572 // Class for 64 bit register r3
 573 reg_class r3_reg(
 574     R3, R3_H
 575 );
 576 
 577 // Class for 64 bit register r4
 578 reg_class r4_reg(
 579     R4, R4_H
 580 );
 581 
 582 // Class for 64 bit register r5
 583 reg_class r5_reg(
 584     R5, R5_H
 585 );
 586 
 587 // Class for 64 bit register r10
 588 reg_class r10_reg(
 589     R10, R10_H
 590 );
 591 
 592 // Class for 64 bit register r11
 593 reg_class r11_reg(
 594     R11, R11_H
 595 );
 596 
 597 // Class for method register
 598 reg_class method_reg(
 599     R12, R12_H
 600 );
 601 
 602 // Class for heapbase register
 603 reg_class heapbase_reg(
 604     R27, R27_H
 605 );
 606 
 607 // Class for thread register
 608 reg_class thread_reg(
 609     R28, R28_H
 610 );
 611 
 612 // Class for frame pointer register
 613 reg_class fp_reg(
 614     R29, R29_H
 615 );
 616 
 617 // Class for link register
 618 reg_class lr_reg(
 619     R30, R30_H
 620 );
 621 
 622 // Class for long sp register
 623 reg_class sp_reg(
 624   R31, R31_H
 625 );
 626 
 627 // Class for all pointer registers
 628 reg_class ptr_reg %{
 629   return _PTR_REG_mask;
 630 %}
 631 
 632 // Class for all non_special pointer registers
 633 reg_class no_special_ptr_reg %{
 634   return _NO_SPECIAL_PTR_REG_mask;
 635 %}
 636 
 637 // Class for all float registers
 638 reg_class float_reg(
 639     V0,
 640     V1,
 641     V2,
 642     V3,
 643     V4,
 644     V5,
 645     V6,
 646     V7,
 647     V8,
 648     V9,
 649     V10,
 650     V11,
 651     V12,
 652     V13,
 653     V14,
 654     V15,
 655     V16,
 656     V17,
 657     V18,
 658     V19,
 659     V20,
 660     V21,
 661     V22,
 662     V23,
 663     V24,
 664     V25,
 665     V26,
 666     V27,
 667     V28,
 668     V29,
 669     V30,
 670     V31
 671 );
 672 
 673 // Double precision float registers have virtual `high halves' that
 674 // are needed by the allocator.
 675 // Class for all double registers
 676 reg_class double_reg(
 677     V0, V0_H,
 678     V1, V1_H,
 679     V2, V2_H,
 680     V3, V3_H,
 681     V4, V4_H,
 682     V5, V5_H,
 683     V6, V6_H,
 684     V7, V7_H,
 685     V8, V8_H,
 686     V9, V9_H,
 687     V10, V10_H,
 688     V11, V11_H,
 689     V12, V12_H,
 690     V13, V13_H,
 691     V14, V14_H,
 692     V15, V15_H,
 693     V16, V16_H,
 694     V17, V17_H,
 695     V18, V18_H,
 696     V19, V19_H,
 697     V20, V20_H,
 698     V21, V21_H,
 699     V22, V22_H,
 700     V23, V23_H,
 701     V24, V24_H,
 702     V25, V25_H,
 703     V26, V26_H,
 704     V27, V27_H,
 705     V28, V28_H,
 706     V29, V29_H,
 707     V30, V30_H,
 708     V31, V31_H
 709 );
 710 
 711 // Class for all 64bit vector registers
 712 reg_class vectord_reg(
 713     V0, V0_H,
 714     V1, V1_H,
 715     V2, V2_H,
 716     V3, V3_H,
 717     V4, V4_H,
 718     V5, V5_H,
 719     V6, V6_H,
 720     V7, V7_H,
 721     V8, V8_H,
 722     V9, V9_H,
 723     V10, V10_H,
 724     V11, V11_H,
 725     V12, V12_H,
 726     V13, V13_H,
 727     V14, V14_H,
 728     V15, V15_H,
 729     V16, V16_H,
 730     V17, V17_H,
 731     V18, V18_H,
 732     V19, V19_H,
 733     V20, V20_H,
 734     V21, V21_H,
 735     V22, V22_H,
 736     V23, V23_H,
 737     V24, V24_H,
 738     V25, V25_H,
 739     V26, V26_H,
 740     V27, V27_H,
 741     V28, V28_H,
 742     V29, V29_H,
 743     V30, V30_H,
 744     V31, V31_H
 745 );
 746 
 747 // Class for all 128bit vector registers
 748 reg_class vectorx_reg(
 749     V0, V0_H, V0_J, V0_K,
 750     V1, V1_H, V1_J, V1_K,
 751     V2, V2_H, V2_J, V2_K,
 752     V3, V3_H, V3_J, V3_K,
 753     V4, V4_H, V4_J, V4_K,
 754     V5, V5_H, V5_J, V5_K,
 755     V6, V6_H, V6_J, V6_K,
 756     V7, V7_H, V7_J, V7_K,
 757     V8, V8_H, V8_J, V8_K,
 758     V9, V9_H, V9_J, V9_K,
 759     V10, V10_H, V10_J, V10_K,
 760     V11, V11_H, V11_J, V11_K,
 761     V12, V12_H, V12_J, V12_K,
 762     V13, V13_H, V13_J, V13_K,
 763     V14, V14_H, V14_J, V14_K,
 764     V15, V15_H, V15_J, V15_K,
 765     V16, V16_H, V16_J, V16_K,
 766     V17, V17_H, V17_J, V17_K,
 767     V18, V18_H, V18_J, V18_K,
 768     V19, V19_H, V19_J, V19_K,
 769     V20, V20_H, V20_J, V20_K,
 770     V21, V21_H, V21_J, V21_K,
 771     V22, V22_H, V22_J, V22_K,
 772     V23, V23_H, V23_J, V23_K,
 773     V24, V24_H, V24_J, V24_K,
 774     V25, V25_H, V25_J, V25_K,
 775     V26, V26_H, V26_J, V26_K,
 776     V27, V27_H, V27_J, V27_K,
 777     V28, V28_H, V28_J, V28_K,
 778     V29, V29_H, V29_J, V29_K,
 779     V30, V30_H, V30_J, V30_K,
 780     V31, V31_H, V31_J, V31_K
 781 );
 782 
 783 // Class for 128 bit register v0
 784 reg_class v0_reg(
 785     V0, V0_H
 786 );
 787 
 788 // Class for 128 bit register v1
 789 reg_class v1_reg(
 790     V1, V1_H
 791 );
 792 
 793 // Class for 128 bit register v2
 794 reg_class v2_reg(
 795     V2, V2_H
 796 );
 797 
 798 // Class for 128 bit register v3
 799 reg_class v3_reg(
 800     V3, V3_H
 801 );
 802 
 803 // Class for 128 bit register v4
 804 reg_class v4_reg(
 805     V4, V4_H
 806 );
 807 
 808 // Class for 128 bit register v5
 809 reg_class v5_reg(
 810     V5, V5_H
 811 );
 812 
 813 // Class for 128 bit register v6
 814 reg_class v6_reg(
 815     V6, V6_H
 816 );
 817 
 818 // Class for 128 bit register v7
 819 reg_class v7_reg(
 820     V7, V7_H
 821 );
 822 
 823 // Class for 128 bit register v8
 824 reg_class v8_reg(
 825     V8, V8_H
 826 );
 827 
 828 // Class for 128 bit register v9
 829 reg_class v9_reg(
 830     V9, V9_H
 831 );
 832 
 833 // Class for 128 bit register v10
 834 reg_class v10_reg(
 835     V10, V10_H
 836 );
 837 
 838 // Class for 128 bit register v11
 839 reg_class v11_reg(
 840     V11, V11_H
 841 );
 842 
 843 // Class for 128 bit register v12
 844 reg_class v12_reg(
 845     V12, V12_H
 846 );
 847 
 848 // Class for 128 bit register v13
 849 reg_class v13_reg(
 850     V13, V13_H
 851 );
 852 
 853 // Class for 128 bit register v14
 854 reg_class v14_reg(
 855     V14, V14_H
 856 );
 857 
 858 // Class for 128 bit register v15
 859 reg_class v15_reg(
 860     V15, V15_H
 861 );
 862 
 863 // Class for 128 bit register v16
 864 reg_class v16_reg(
 865     V16, V16_H
 866 );
 867 
 868 // Class for 128 bit register v17
 869 reg_class v17_reg(
 870     V17, V17_H
 871 );
 872 
 873 // Class for 128 bit register v18
 874 reg_class v18_reg(
 875     V18, V18_H
 876 );
 877 
 878 // Class for 128 bit register v19
 879 reg_class v19_reg(
 880     V19, V19_H
 881 );
 882 
 883 // Class for 128 bit register v20
 884 reg_class v20_reg(
 885     V20, V20_H
 886 );
 887 
 888 // Class for 128 bit register v21
 889 reg_class v21_reg(
 890     V21, V21_H
 891 );
 892 
 893 // Class for 128 bit register v22
 894 reg_class v22_reg(
 895     V22, V22_H
 896 );
 897 
 898 // Class for 128 bit register v23
 899 reg_class v23_reg(
 900     V23, V23_H
 901 );
 902 
 903 // Class for 128 bit register v24
 904 reg_class v24_reg(
 905     V24, V24_H
 906 );
 907 
 908 // Class for 128 bit register v25
 909 reg_class v25_reg(
 910     V25, V25_H
 911 );
 912 
 913 // Class for 128 bit register v26
 914 reg_class v26_reg(
 915     V26, V26_H
 916 );
 917 
 918 // Class for 128 bit register v27
 919 reg_class v27_reg(
 920     V27, V27_H
 921 );
 922 
 923 // Class for 128 bit register v28
 924 reg_class v28_reg(
 925     V28, V28_H
 926 );
 927 
 928 // Class for 128 bit register v29
 929 reg_class v29_reg(
 930     V29, V29_H
 931 );
 932 
 933 // Class for 128 bit register v30
 934 reg_class v30_reg(
 935     V30, V30_H
 936 );
 937 
 938 // Class for 128 bit register v31
 939 reg_class v31_reg(
 940     V31, V31_H
 941 );
 942 
 943 // Singleton class for condition codes
 944 reg_class int_flags(RFLAGS);
 945 
 946 %}
 947 
 948 //----------DEFINITION BLOCK---------------------------------------------------
 949 // Define name --> value mappings to inform the ADLC of an integer valued name
 950 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 951 // Format:
 952 //        int_def  <name>         ( <int_value>, <expression>);
 953 // Generated Code in ad_<arch>.hpp
 954 //        #define  <name>   (<expression>)
 955 //        // value == <int_value>
 956 // Generated code in ad_<arch>.cpp adlc_verification()
 957 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 958 //
 959 
 960 // we follow the ppc-aix port in using a simple cost model which ranks
 961 // register operations as cheap, memory ops as more expensive and
 962 // branches as most expensive. the first two have a low as well as a
 963 // normal cost. huge cost appears to be a way of saying don't do
 964 // something
 965 
 966 definitions %{
 967   // The default cost (of a register move instruction).
 968   int_def INSN_COST            (    100,     100);
 969   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 970   int_def CALL_COST            (    200,     2 * INSN_COST);
 971   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 972 %}
 973 
 974 
 975 //----------SOURCE BLOCK-------------------------------------------------------
 976 // This is a block of C++ code which provides values, functions, and
 977 // definitions necessary in the rest of the architecture description
 978 
 979 source_hpp %{
 980 
 981 #include "asm/macroAssembler.hpp"
 982 #include "gc/shared/cardTable.hpp"
 983 #include "gc/shared/cardTableBarrierSet.hpp"
 984 #include "gc/shared/collectedHeap.hpp"
 985 #include "opto/addnode.hpp"
 986 #include "opto/convertnode.hpp"
 987 
 988 extern RegMask _ANY_REG32_mask;
 989 extern RegMask _ANY_REG_mask;
 990 extern RegMask _PTR_REG_mask;
 991 extern RegMask _NO_SPECIAL_REG32_mask;
 992 extern RegMask _NO_SPECIAL_REG_mask;
 993 extern RegMask _NO_SPECIAL_PTR_REG_mask;
 994 
 995 class CallStubImpl {
 996 
 997   //--------------------------------------------------------------
 998   //---<  Used for optimization in Compile::shorten_branches  >---
 999   //--------------------------------------------------------------
1000 
1001  public:
1002   // Size of call trampoline stub.
1003   static uint size_call_trampoline() {
1004     return 0; // no call trampolines on this platform
1005   }
1006 
1007   // number of relocations needed by a call trampoline stub
1008   static uint reloc_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 };
1012 
1013 class HandlerImpl {
1014 
1015  public:
1016 
1017   static int emit_exception_handler(CodeBuffer &cbuf);
1018   static int emit_deopt_handler(CodeBuffer& cbuf);
1019 
1020   static uint size_exception_handler() {
1021     return MacroAssembler::far_branch_size();
1022   }
1023 
1024   static uint size_deopt_handler() {
1025     // count one adr and one far branch instruction
1026     return 4 * NativeInstruction::instruction_size;
1027   }
1028 };
1029 
1030 class Node::PD {
1031 public:
1032   enum NodeFlags {
1033     _last_flag = Node::_last_flag
1034   };
1035 };
1036 
1037  bool is_CAS(int opcode, bool maybe_volatile);
1038 
1039   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1040 
1041   bool unnecessary_acquire(const Node *barrier);
1042   bool needs_acquiring_load(const Node *load);
1043 
1044   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1045 
1046   bool unnecessary_release(const Node *barrier);
1047   bool unnecessary_volatile(const Node *barrier);
1048   bool needs_releasing_store(const Node *store);
1049 
1050   // predicate controlling translation of CompareAndSwapX
1051   bool needs_acquiring_load_exclusive(const Node *load);
1052 
1053   // predicate controlling addressing modes
1054   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1055 %}
1056 
1057 source %{
1058 
1059   // Derived RegMask with conditionally allocatable registers
1060 
1061   void PhaseOutput::pd_perform_mach_node_analysis() {
1062   }
1063 
1064   int MachNode::pd_alignment_required() const {
1065     return 1;
1066   }
1067 
1068   int MachNode::compute_padding(int current_offset) const {
1069     return 0;
1070   }
1071 
1072   RegMask _ANY_REG32_mask;
1073   RegMask _ANY_REG_mask;
1074   RegMask _PTR_REG_mask;
1075   RegMask _NO_SPECIAL_REG32_mask;
1076   RegMask _NO_SPECIAL_REG_mask;
1077   RegMask _NO_SPECIAL_PTR_REG_mask;
1078 
1079   void reg_mask_init() {
1080     // We derive below RegMask(s) from the ones which are auto-generated from
1081     // adlc register classes to make AArch64 rheapbase (r27) and rfp (r29)
1082     // registers conditionally reserved.
1083 
1084     _ANY_REG32_mask = _ALL_REG32_mask;
1085     _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
1086 
1087     _ANY_REG_mask = _ALL_REG_mask;
1088 
1089     _PTR_REG_mask = _ALL_REG_mask;
1090 
1091     _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
1092     _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
1093 
1094     _NO_SPECIAL_REG_mask = _ALL_REG_mask;
1095     _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1096 
1097     _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
1098     _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1099 
1100     // r27 is not allocatable when compressed oops is on and heapbase is not
1101     // zero, compressed klass pointers doesn't use r27 after JDK-8234794
1102     if (UseCompressedOops && CompressedOops::ptrs_base() != NULL) {
1103       _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1104       _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
1105       _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
1106     }
1107 
1108     // r29 is not allocatable when PreserveFramePointer is on
1109     if (PreserveFramePointer) {
1110       _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1111       _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
1112       _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
1113     }
1114   }
1115 
1116   // Optimizaton of volatile gets and puts
1117   // -------------------------------------
1118   //
1119   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1120   // use to implement volatile reads and writes. For a volatile read
1121   // we simply need
1122   //
1123   //   ldar<x>
1124   //
1125   // and for a volatile write we need
1126   //
1127   //   stlr<x>
1128   //
1129   // Alternatively, we can implement them by pairing a normal
1130   // load/store with a memory barrier. For a volatile read we need
1131   //
1132   //   ldr<x>
1133   //   dmb ishld
1134   //
1135   // for a volatile write
1136   //
1137   //   dmb ish
1138   //   str<x>
1139   //   dmb ish
1140   //
1141   // We can also use ldaxr and stlxr to implement compare and swap CAS
1142   // sequences. These are normally translated to an instruction
1143   // sequence like the following
1144   //
1145   //   dmb      ish
1146   // retry:
1147   //   ldxr<x>   rval raddr
1148   //   cmp       rval rold
1149   //   b.ne done
1150   //   stlxr<x>  rval, rnew, rold
1151   //   cbnz      rval retry
1152   // done:
1153   //   cset      r0, eq
1154   //   dmb ishld
1155   //
1156   // Note that the exclusive store is already using an stlxr
1157   // instruction. That is required to ensure visibility to other
1158   // threads of the exclusive write (assuming it succeeds) before that
1159   // of any subsequent writes.
1160   //
1161   // The following instruction sequence is an improvement on the above
1162   //
1163   // retry:
1164   //   ldaxr<x>  rval raddr
1165   //   cmp       rval rold
1166   //   b.ne done
1167   //   stlxr<x>  rval, rnew, rold
1168   //   cbnz      rval retry
1169   // done:
1170   //   cset      r0, eq
1171   //
1172   // We don't need the leading dmb ish since the stlxr guarantees
1173   // visibility of prior writes in the case that the swap is
1174   // successful. Crucially we don't have to worry about the case where
1175   // the swap is not successful since no valid program should be
1176   // relying on visibility of prior changes by the attempting thread
1177   // in the case where the CAS fails.
1178   //
1179   // Similarly, we don't need the trailing dmb ishld if we substitute
1180   // an ldaxr instruction since that will provide all the guarantees we
1181   // require regarding observation of changes made by other threads
1182   // before any change to the CAS address observed by the load.
1183   //
1184   // In order to generate the desired instruction sequence we need to
1185   // be able to identify specific 'signature' ideal graph node
1186   // sequences which i) occur as a translation of a volatile reads or
1187   // writes or CAS operations and ii) do not occur through any other
1188   // translation or graph transformation. We can then provide
1189   // alternative aldc matching rules which translate these node
1190   // sequences to the desired machine code sequences. Selection of the
1191   // alternative rules can be implemented by predicates which identify
1192   // the relevant node sequences.
1193   //
1194   // The ideal graph generator translates a volatile read to the node
1195   // sequence
1196   //
1197   //   LoadX[mo_acquire]
1198   //   MemBarAcquire
1199   //
1200   // As a special case when using the compressed oops optimization we
1201   // may also see this variant
1202   //
1203   //   LoadN[mo_acquire]
1204   //   DecodeN
1205   //   MemBarAcquire
1206   //
1207   // A volatile write is translated to the node sequence
1208   //
1209   //   MemBarRelease
1210   //   StoreX[mo_release] {CardMark}-optional
1211   //   MemBarVolatile
1212   //
1213   // n.b. the above node patterns are generated with a strict
1214   // 'signature' configuration of input and output dependencies (see
1215   // the predicates below for exact details). The card mark may be as
1216   // simple as a few extra nodes or, in a few GC configurations, may
1217   // include more complex control flow between the leading and
1218   // trailing memory barriers. However, whatever the card mark
1219   // configuration these signatures are unique to translated volatile
1220   // reads/stores -- they will not appear as a result of any other
1221   // bytecode translation or inlining nor as a consequence of
1222   // optimizing transforms.
1223   //
1224   // We also want to catch inlined unsafe volatile gets and puts and
1225   // be able to implement them using either ldar<x>/stlr<x> or some
1226   // combination of ldr<x>/stlr<x> and dmb instructions.
1227   //
1228   // Inlined unsafe volatiles puts manifest as a minor variant of the
1229   // normal volatile put node sequence containing an extra cpuorder
1230   // membar
1231   //
1232   //   MemBarRelease
1233   //   MemBarCPUOrder
1234   //   StoreX[mo_release] {CardMark}-optional
1235   //   MemBarCPUOrder
1236   //   MemBarVolatile
1237   //
1238   // n.b. as an aside, a cpuorder membar is not itself subject to
1239   // matching and translation by adlc rules.  However, the rule
1240   // predicates need to detect its presence in order to correctly
1241   // select the desired adlc rules.
1242   //
1243   // Inlined unsafe volatile gets manifest as a slightly different
1244   // node sequence to a normal volatile get because of the
1245   // introduction of some CPUOrder memory barriers to bracket the
1246   // Load. However, but the same basic skeleton of a LoadX feeding a
1247   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1248   // present
1249   //
1250   //   MemBarCPUOrder
1251   //        ||       \\
1252   //   MemBarCPUOrder LoadX[mo_acquire]
1253   //        ||            |
1254   //        ||       {DecodeN} optional
1255   //        ||       /
1256   //     MemBarAcquire
1257   //
1258   // In this case the acquire membar does not directly depend on the
1259   // load. However, we can be sure that the load is generated from an
1260   // inlined unsafe volatile get if we see it dependent on this unique
1261   // sequence of membar nodes. Similarly, given an acquire membar we
1262   // can know that it was added because of an inlined unsafe volatile
1263   // get if it is fed and feeds a cpuorder membar and if its feed
1264   // membar also feeds an acquiring load.
1265   //
1266   // Finally an inlined (Unsafe) CAS operation is translated to the
1267   // following ideal graph
1268   //
1269   //   MemBarRelease
1270   //   MemBarCPUOrder
1271   //   CompareAndSwapX {CardMark}-optional
1272   //   MemBarCPUOrder
1273   //   MemBarAcquire
1274   //
1275   // So, where we can identify these volatile read and write
1276   // signatures we can choose to plant either of the above two code
1277   // sequences. For a volatile read we can simply plant a normal
1278   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1279   // also choose to inhibit translation of the MemBarAcquire and
1280   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1281   //
1282   // When we recognise a volatile store signature we can choose to
1283   // plant at a dmb ish as a translation for the MemBarRelease, a
1284   // normal str<x> and then a dmb ish for the MemBarVolatile.
1285   // Alternatively, we can inhibit translation of the MemBarRelease
1286   // and MemBarVolatile and instead plant a simple stlr<x>
1287   // instruction.
1288   //
1289   // when we recognise a CAS signature we can choose to plant a dmb
1290   // ish as a translation for the MemBarRelease, the conventional
1291   // macro-instruction sequence for the CompareAndSwap node (which
1292   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1293   // Alternatively, we can elide generation of the dmb instructions
1294   // and plant the alternative CompareAndSwap macro-instruction
1295   // sequence (which uses ldaxr<x>).
1296   //
1297   // Of course, the above only applies when we see these signature
1298   // configurations. We still want to plant dmb instructions in any
1299   // other cases where we may see a MemBarAcquire, MemBarRelease or
1300   // MemBarVolatile. For example, at the end of a constructor which
1301   // writes final/volatile fields we will see a MemBarRelease
1302   // instruction and this needs a 'dmb ish' lest we risk the
1303   // constructed object being visible without making the
1304   // final/volatile field writes visible.
1305   //
1306   // n.b. the translation rules below which rely on detection of the
1307   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1308   // If we see anything other than the signature configurations we
1309   // always just translate the loads and stores to ldr<x> and str<x>
1310   // and translate acquire, release and volatile membars to the
1311   // relevant dmb instructions.
1312   //
1313 
1314   // is_CAS(int opcode, bool maybe_volatile)
1315   //
1316   // return true if opcode is one of the possible CompareAndSwapX
1317   // values otherwise false.
1318 
1319   bool is_CAS(int opcode, bool maybe_volatile)
1320   {
1321     switch(opcode) {
1322       // We handle these
1323     case Op_CompareAndSwapI:
1324     case Op_CompareAndSwapL:
1325     case Op_CompareAndSwapP:
1326     case Op_CompareAndSwapN:
1327     case Op_ShenandoahCompareAndSwapP:
1328     case Op_ShenandoahCompareAndSwapN:
1329     case Op_CompareAndSwapB:
1330     case Op_CompareAndSwapS:
1331     case Op_GetAndSetI:
1332     case Op_GetAndSetL:
1333     case Op_GetAndSetP:
1334     case Op_GetAndSetN:
1335     case Op_GetAndAddI:
1336     case Op_GetAndAddL:
1337       return true;
1338     case Op_CompareAndExchangeI:
1339     case Op_CompareAndExchangeN:
1340     case Op_CompareAndExchangeB:
1341     case Op_CompareAndExchangeS:
1342     case Op_CompareAndExchangeL:
1343     case Op_CompareAndExchangeP:
1344     case Op_WeakCompareAndSwapB:
1345     case Op_WeakCompareAndSwapS:
1346     case Op_WeakCompareAndSwapI:
1347     case Op_WeakCompareAndSwapL:
1348     case Op_WeakCompareAndSwapP:
1349     case Op_WeakCompareAndSwapN:
1350     case Op_ShenandoahWeakCompareAndSwapP:
1351     case Op_ShenandoahWeakCompareAndSwapN:
1352     case Op_ShenandoahCompareAndExchangeP:
1353     case Op_ShenandoahCompareAndExchangeN:
1354       return maybe_volatile;
1355     default:
1356       return false;
1357     }
1358   }
1359 
1360   // helper to determine the maximum number of Phi nodes we may need to
1361   // traverse when searching from a card mark membar for the merge mem
1362   // feeding a trailing membar or vice versa
1363 
1364 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1365 
1366 bool unnecessary_acquire(const Node *barrier)
1367 {
1368   assert(barrier->is_MemBar(), "expecting a membar");
1369 
1370   if (UseBarriersForVolatile) {
1371     // we need to plant a dmb
1372     return false;
1373   }
1374 
1375   MemBarNode* mb = barrier->as_MemBar();
1376 
1377   if (mb->trailing_load()) {
1378     return true;
1379   }
1380 
1381   if (mb->trailing_load_store()) {
1382     Node* load_store = mb->in(MemBarNode::Precedent);
1383     assert(load_store->is_LoadStore(), "unexpected graph shape");
1384     return is_CAS(load_store->Opcode(), true);
1385   }
1386 
1387   return false;
1388 }
1389 
1390 bool needs_acquiring_load(const Node *n)
1391 {
1392   assert(n->is_Load(), "expecting a load");
1393   if (UseBarriersForVolatile) {
1394     // we use a normal load and a dmb
1395     return false;
1396   }
1397 
1398   LoadNode *ld = n->as_Load();
1399 
1400   return ld->is_acquire();
1401 }
1402 
1403 bool unnecessary_release(const Node *n)
1404 {
1405   assert((n->is_MemBar() &&
1406           n->Opcode() == Op_MemBarRelease),
1407          "expecting a release membar");
1408 
1409   if (UseBarriersForVolatile) {
1410     // we need to plant a dmb
1411     return false;
1412   }
1413 
1414   MemBarNode *barrier = n->as_MemBar();
1415   if (!barrier->leading()) {
1416     return false;
1417   } else {
1418     Node* trailing = barrier->trailing_membar();
1419     MemBarNode* trailing_mb = trailing->as_MemBar();
1420     assert(trailing_mb->trailing(), "Not a trailing membar?");
1421     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1422 
1423     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1424     if (mem->is_Store()) {
1425       assert(mem->as_Store()->is_release(), "");
1426       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1427       return true;
1428     } else {
1429       assert(mem->is_LoadStore(), "");
1430       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1431       return is_CAS(mem->Opcode(), true);
1432     }
1433   }
1434   return false;
1435 }
1436 
1437 bool unnecessary_volatile(const Node *n)
1438 {
1439   // assert n->is_MemBar();
1440   if (UseBarriersForVolatile) {
1441     // we need to plant a dmb
1442     return false;
1443   }
1444 
1445   MemBarNode *mbvol = n->as_MemBar();
1446 
1447   bool release = mbvol->trailing_store();
1448   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1449 #ifdef ASSERT
1450   if (release) {
1451     Node* leading = mbvol->leading_membar();
1452     assert(leading->Opcode() == Op_MemBarRelease, "");
1453     assert(leading->as_MemBar()->leading_store(), "");
1454     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1455   }
1456 #endif
1457 
1458   return release;
1459 }
1460 
1461 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1462 
1463 bool needs_releasing_store(const Node *n)
1464 {
1465   // assert n->is_Store();
1466   if (UseBarriersForVolatile) {
1467     // we use a normal store and dmb combination
1468     return false;
1469   }
1470 
1471   StoreNode *st = n->as_Store();
1472 
1473   return st->trailing_membar() != NULL;
1474 }
1475 
1476 // predicate controlling translation of CAS
1477 //
1478 // returns true if CAS needs to use an acquiring load otherwise false
1479 
1480 bool needs_acquiring_load_exclusive(const Node *n)
1481 {
1482   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1483   if (UseBarriersForVolatile) {
1484     return false;
1485   }
1486 
1487   LoadStoreNode* ldst = n->as_LoadStore();
1488   if (is_CAS(n->Opcode(), false)) {
1489     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1490   } else {
1491     return ldst->trailing_membar() != NULL;
1492   }
1493 
1494   // so we can just return true here
1495   return true;
1496 }
1497 
1498 #define __ _masm.
1499 
1500 // advance declarations for helper functions to convert register
1501 // indices to register objects
1502 
1503 // the ad file has to provide implementations of certain methods
1504 // expected by the generic code
1505 //
1506 // REQUIRED FUNCTIONALITY
1507 
1508 //=============================================================================
1509 
1510 // !!!!! Special hack to get all types of calls to specify the byte offset
1511 //       from the start of the call to the point where the return address
1512 //       will point.
1513 
1514 int MachCallStaticJavaNode::ret_addr_offset()
1515 {
1516   // call should be a simple bl
1517   int off = 4;
1518   return off;
1519 }
1520 
1521 int MachCallDynamicJavaNode::ret_addr_offset()
1522 {
1523   return 16; // movz, movk, movk, bl
1524 }
1525 
1526 int MachCallRuntimeNode::ret_addr_offset() {
1527   // for generated stubs the call will be
1528   //   far_call(addr)
1529   // for real runtime callouts it will be six instructions
1530   // see aarch64_enc_java_to_runtime
1531   //   adr(rscratch2, retaddr)
1532   //   lea(rscratch1, RuntimeAddress(addr)
1533   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1534   //   blr(rscratch1)
1535   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1536   if (cb) {
1537     return MacroAssembler::far_branch_size();
1538   } else {
1539     return 6 * NativeInstruction::instruction_size;
1540   }
1541 }
1542 
1543 // Indicate if the safepoint node needs the polling page as an input
1544 
1545 // the shared code plants the oop data at the start of the generated
1546 // code for the safepoint node and that needs ot be at the load
1547 // instruction itself. so we cannot plant a mov of the safepoint poll
1548 // address followed by a load. setting this to true means the mov is
1549 // scheduled as a prior instruction. that's better for scheduling
1550 // anyway.
1551 
1552 bool SafePointNode::needs_polling_address_input()
1553 {
1554   return true;
1555 }
1556 
1557 //=============================================================================
1558 
1559 #ifndef PRODUCT
1560 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1561   st->print("BREAKPOINT");
1562 }
1563 #endif
1564 
1565 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1566   C2_MacroAssembler _masm(&cbuf);
1567   __ brk(0);
1568 }
1569 
1570 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1571   return MachNode::size(ra_);
1572 }
1573 
1574 //=============================================================================
1575 
1576 #ifndef PRODUCT
1577   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1578     st->print("nop \t# %d bytes pad for loops and calls", _count);
1579   }
1580 #endif
1581 
1582   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1583     C2_MacroAssembler _masm(&cbuf);
1584     for (int i = 0; i < _count; i++) {
1585       __ nop();
1586     }
1587   }
1588 
1589   uint MachNopNode::size(PhaseRegAlloc*) const {
1590     return _count * NativeInstruction::instruction_size;
1591   }
1592 
1593 //=============================================================================
1594 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1595 
1596 int ConstantTable::calculate_table_base_offset() const {
1597   return 0;  // absolute addressing, no offset
1598 }
1599 
1600 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1601 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1602   ShouldNotReachHere();
1603 }
1604 
1605 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1606   // Empty encoding
1607 }
1608 
1609 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1610   return 0;
1611 }
1612 
1613 #ifndef PRODUCT
1614 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1615   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1616 }
1617 #endif
1618 
1619 #ifndef PRODUCT
1620 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1621   Compile* C = ra_->C;
1622 
1623   int framesize = C->output()->frame_slots() << LogBytesPerInt;
1624 
1625   if (C->output()->need_stack_bang(framesize))
1626     st->print("# stack bang size=%d\n\t", framesize);
1627 
1628   if (framesize < ((1 << 9) + 2 * wordSize)) {
1629     st->print("sub  sp, sp, #%d\n\t", framesize);
1630     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1631     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1632   } else {
1633     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1634     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1635     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1636     st->print("sub  sp, sp, rscratch1");
1637   }
1638   if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
1639     st->print("\n\t");
1640     st->print("ldr  rscratch1, [guard]\n\t");
1641     st->print("dmb ishld\n\t");
1642     st->print("ldr  rscratch2, [rthread, #thread_disarmed_offset]\n\t");
1643     st->print("cmp  rscratch1, rscratch2\n\t");
1644     st->print("b.eq skip");
1645     st->print("\n\t");
1646     st->print("blr #nmethod_entry_barrier_stub\n\t");
1647     st->print("b skip\n\t");
1648     st->print("guard: int\n\t");
1649     st->print("\n\t");
1650     st->print("skip:\n\t");
1651   }
1652 }
1653 #endif
1654 
1655 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1656   Compile* C = ra_->C;
1657   C2_MacroAssembler _masm(&cbuf);
1658 
1659   // n.b. frame size includes space for return pc and rfp
1660   const long framesize = C->output()->frame_size_in_bytes();
1661   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1662 
1663   // insert a nop at the start of the prolog so we can patch in a
1664   // branch if we need to invalidate the method later
1665   __ nop();
1666 
1667   if (C->clinit_barrier_on_entry()) {
1668     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1669 
1670     Label L_skip_barrier;
1671 
1672     __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
1673     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1674     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1675     __ bind(L_skip_barrier);
1676   }
1677 
1678   int bangsize = C->output()->bang_size_in_bytes();
1679   if (C->output()->need_stack_bang(bangsize) && UseStackBanging)
1680     __ generate_stack_overflow_check(bangsize);
1681 
1682   __ build_frame(framesize);
1683 
1684   if (C->stub_function() == NULL) {
1685     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1686     bs->nmethod_entry_barrier(&_masm);
1687   }
1688 
1689   if (VerifyStackAtCalls) {
1690     Unimplemented();
1691   }
1692 
1693   C->output()->set_frame_complete(cbuf.insts_size());
1694 
1695   if (C->has_mach_constant_base_node()) {
1696     // NOTE: We set the table base offset here because users might be
1697     // emitted before MachConstantBaseNode.
1698     ConstantTable& constant_table = C->output()->constant_table();
1699     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1700   }
1701 }
1702 
1703 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1704 {
1705   return MachNode::size(ra_); // too many variables; just compute it
1706                               // the hard way
1707 }
1708 
1709 int MachPrologNode::reloc() const
1710 {
1711   return 0;
1712 }
1713 
1714 //=============================================================================
1715 
1716 #ifndef PRODUCT
1717 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1718   Compile* C = ra_->C;
1719   int framesize = C->output()->frame_slots() << LogBytesPerInt;
1720 
1721   st->print("# pop frame %d\n\t",framesize);
1722 
1723   if (framesize == 0) {
1724     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1725   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1726     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1727     st->print("add  sp, sp, #%d\n\t", framesize);
1728   } else {
1729     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1730     st->print("add  sp, sp, rscratch1\n\t");
1731     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1732   }
1733 
1734   if (do_polling() && C->is_method_compilation()) {
1735     st->print("# touch polling page\n\t");
1736     st->print("ldr rscratch1, [rthread],#polling_page_offset\n\t");
1737     st->print("ldr zr, [rscratch1]");
1738   }
1739 }
1740 #endif
1741 
1742 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1743   Compile* C = ra_->C;
1744   C2_MacroAssembler _masm(&cbuf);
1745   int framesize = C->output()->frame_slots() << LogBytesPerInt;
1746 
1747   __ remove_frame(framesize);
1748 
1749   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1750     __ reserved_stack_check();
1751   }
1752 
1753   if (do_polling() && C->is_method_compilation()) {
1754     __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type);
1755   }
1756 }
1757 
1758 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1759   // Variable size. Determine dynamically.
1760   return MachNode::size(ra_);
1761 }
1762 
1763 int MachEpilogNode::reloc() const {
1764   // Return number of relocatable values contained in this instruction.
1765   return 1; // 1 for polling page.
1766 }
1767 
1768 const Pipeline * MachEpilogNode::pipeline() const {
1769   return MachNode::pipeline_class();
1770 }
1771 
1772 //=============================================================================
1773 
1774 // Figure out which register class each belongs in: rc_int, rc_float or
1775 // rc_stack.
1776 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1777 
1778 static enum RC rc_class(OptoReg::Name reg) {
1779 
1780   if (reg == OptoReg::Bad) {
1781     return rc_bad;
1782   }
1783 
1784   // we have 30 int registers * 2 halves
1785   // (rscratch1 and rscratch2 are omitted)
1786   int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
1787 
1788   if (reg < slots_of_int_registers) {
1789     return rc_int;
1790   }
1791 
1792   // we have 32 float register * 4 halves
1793   if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) {
1794     return rc_float;
1795   }
1796 
1797   // Between float regs & stack is the flags regs.
1798   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1799 
1800   return rc_stack;
1801 }
1802 
1803 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1804   Compile* C = ra_->C;
1805 
1806   // Get registers to move.
1807   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1808   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1809   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1810   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1811 
1812   enum RC src_hi_rc = rc_class(src_hi);
1813   enum RC src_lo_rc = rc_class(src_lo);
1814   enum RC dst_hi_rc = rc_class(dst_hi);
1815   enum RC dst_lo_rc = rc_class(dst_lo);
1816 
1817   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1818 
1819   if (src_hi != OptoReg::Bad) {
1820     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1821            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1822            "expected aligned-adjacent pairs");
1823   }
1824 
1825   if (src_lo == dst_lo && src_hi == dst_hi) {
1826     return 0;            // Self copy, no move.
1827   }
1828 
1829   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1830               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1831   int src_offset = ra_->reg2offset(src_lo);
1832   int dst_offset = ra_->reg2offset(dst_lo);
1833 
1834   if (bottom_type()->isa_vect() != NULL) {
1835     uint ireg = ideal_reg();
1836     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1837     if (cbuf) {
1838       C2_MacroAssembler _masm(cbuf);
1839       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1840       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1841         // stack->stack
1842         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1843         if (ireg == Op_VecD) {
1844           __ unspill(rscratch1, true, src_offset);
1845           __ spill(rscratch1, true, dst_offset);
1846         } else {
1847           __ spill_copy128(src_offset, dst_offset);
1848         }
1849       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1850         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1851                ireg == Op_VecD ? __ T8B : __ T16B,
1852                as_FloatRegister(Matcher::_regEncode[src_lo]));
1853       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1854         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1855                        ireg == Op_VecD ? __ D : __ Q,
1856                        ra_->reg2offset(dst_lo));
1857       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1858         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1859                        ireg == Op_VecD ? __ D : __ Q,
1860                        ra_->reg2offset(src_lo));
1861       } else {
1862         ShouldNotReachHere();
1863       }
1864     }
1865   } else if (cbuf) {
1866     C2_MacroAssembler _masm(cbuf);
1867     switch (src_lo_rc) {
1868     case rc_int:
1869       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1870         if (is64) {
1871             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1872                    as_Register(Matcher::_regEncode[src_lo]));
1873         } else {
1874             C2_MacroAssembler _masm(cbuf);
1875             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1876                     as_Register(Matcher::_regEncode[src_lo]));
1877         }
1878       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1879         if (is64) {
1880             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1881                      as_Register(Matcher::_regEncode[src_lo]));
1882         } else {
1883             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1884                      as_Register(Matcher::_regEncode[src_lo]));
1885         }
1886       } else {                    // gpr --> stack spill
1887         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1888         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1889       }
1890       break;
1891     case rc_float:
1892       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1893         if (is64) {
1894             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1895                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1896         } else {
1897             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1898                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1899         }
1900       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1901           if (cbuf) {
1902             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1903                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1904         } else {
1905             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1906                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1907         }
1908       } else {                    // fpr --> stack spill
1909         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1910         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1911                  is64 ? __ D : __ S, dst_offset);
1912       }
1913       break;
1914     case rc_stack:
1915       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1916         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1917       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1918         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1919                    is64 ? __ D : __ S, src_offset);
1920       } else {                    // stack --> stack copy
1921         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1922         __ unspill(rscratch1, is64, src_offset);
1923         __ spill(rscratch1, is64, dst_offset);
1924       }
1925       break;
1926     default:
1927       assert(false, "bad rc_class for spill");
1928       ShouldNotReachHere();
1929     }
1930   }
1931 
1932   if (st) {
1933     st->print("spill ");
1934     if (src_lo_rc == rc_stack) {
1935       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1936     } else {
1937       st->print("%s -> ", Matcher::regName[src_lo]);
1938     }
1939     if (dst_lo_rc == rc_stack) {
1940       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1941     } else {
1942       st->print("%s", Matcher::regName[dst_lo]);
1943     }
1944     if (bottom_type()->isa_vect() != NULL) {
1945       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1946     } else {
1947       st->print("\t# spill size = %d", is64 ? 64:32);
1948     }
1949   }
1950 
1951   return 0;
1952 
1953 }
1954 
1955 #ifndef PRODUCT
1956 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1957   if (!ra_)
1958     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1959   else
1960     implementation(NULL, ra_, false, st);
1961 }
1962 #endif
1963 
1964 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1965   implementation(&cbuf, ra_, false, NULL);
1966 }
1967 
1968 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1969   return MachNode::size(ra_);
1970 }
1971 
1972 //=============================================================================
1973 
1974 #ifndef PRODUCT
1975 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1976   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1977   int reg = ra_->get_reg_first(this);
1978   st->print("add %s, rsp, #%d]\t# box lock",
1979             Matcher::regName[reg], offset);
1980 }
1981 #endif
1982 
1983 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1984   C2_MacroAssembler _masm(&cbuf);
1985 
1986   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1987   int reg    = ra_->get_encode(this);
1988 
1989   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1990     __ add(as_Register(reg), sp, offset);
1991   } else {
1992     ShouldNotReachHere();
1993   }
1994 }
1995 
1996 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1997   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1998   return 4;
1999 }
2000 
2001 //=============================================================================
2002 
2003 #ifndef PRODUCT
2004 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2005 {
2006   st->print_cr("# MachUEPNode");
2007   if (UseCompressedClassPointers) {
2008     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2009     if (CompressedKlassPointers::shift() != 0) {
2010       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2011     }
2012   } else {
2013    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2014   }
2015   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2016   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2017 }
2018 #endif
2019 
2020 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2021 {
2022   // This is the unverified entry point.
2023   C2_MacroAssembler _masm(&cbuf);
2024 
2025   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2026   Label skip;
2027   // TODO
2028   // can we avoid this skip and still use a reloc?
2029   __ br(Assembler::EQ, skip);
2030   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2031   __ bind(skip);
2032 }
2033 
2034 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2035 {
2036   return MachNode::size(ra_);
2037 }
2038 
2039 // REQUIRED EMIT CODE
2040 
2041 //=============================================================================
2042 
2043 // Emit exception handler code.
2044 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2045 {
2046   // mov rscratch1 #exception_blob_entry_point
2047   // br rscratch1
2048   // Note that the code buffer's insts_mark is always relative to insts.
2049   // That's why we must use the macroassembler to generate a handler.
2050   C2_MacroAssembler _masm(&cbuf);
2051   address base = __ start_a_stub(size_exception_handler());
2052   if (base == NULL) {
2053     ciEnv::current()->record_failure("CodeCache is full");
2054     return 0;  // CodeBuffer::expand failed
2055   }
2056   int offset = __ offset();
2057   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2058   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2059   __ end_a_stub();
2060   return offset;
2061 }
2062 
2063 // Emit deopt handler code.
2064 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2065 {
2066   // Note that the code buffer's insts_mark is always relative to insts.
2067   // That's why we must use the macroassembler to generate a handler.
2068   C2_MacroAssembler _masm(&cbuf);
2069   address base = __ start_a_stub(size_deopt_handler());
2070   if (base == NULL) {
2071     ciEnv::current()->record_failure("CodeCache is full");
2072     return 0;  // CodeBuffer::expand failed
2073   }
2074   int offset = __ offset();
2075 
2076   __ adr(lr, __ pc());
2077   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2078 
2079   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2080   __ end_a_stub();
2081   return offset;
2082 }
2083 
2084 // REQUIRED MATCHER CODE
2085 
2086 //=============================================================================
2087 
2088 const bool Matcher::match_rule_supported(int opcode) {
2089   if (!has_match_rule(opcode))
2090     return false;
2091 
2092   bool ret_value = true;
2093   switch (opcode) {
2094     case Op_CacheWB:
2095     case Op_CacheWBPreSync:
2096     case Op_CacheWBPostSync:
2097       if (!VM_Version::supports_data_cache_line_flush()) {
2098         ret_value = false;
2099       }
2100       break;
2101   }
2102 
2103   return ret_value; // Per default match rules are supported.
2104 }
2105 
2106 // Identify extra cases that we might want to provide match rules for vector nodes and
2107 // other intrinsics guarded with vector length (vlen) and element type (bt).
2108 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2109   if (!match_rule_supported(opcode)) {
2110     return false;
2111   }
2112 
2113   // Special cases which require vector length
2114   switch (opcode) {
2115     case Op_MulAddVS2VI: {
2116       if (vlen != 4) {
2117         return false;
2118       }
2119       break;
2120     }
2121   }
2122 
2123   return true; // Per default match rules are supported.
2124 }
2125 
2126 const bool Matcher::has_predicated_vectors(void) {
2127   return false;
2128 }
2129 
2130 const int Matcher::float_pressure(int default_pressure_threshold) {
2131   return default_pressure_threshold;
2132 }
2133 
2134 int Matcher::regnum_to_fpu_offset(int regnum)
2135 {
2136   Unimplemented();
2137   return 0;
2138 }
2139 
2140 // Is this branch offset short enough that a short branch can be used?
2141 //
2142 // NOTE: If the platform does not provide any short branch variants, then
2143 //       this method should return false for offset 0.
2144 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2145   // The passed offset is relative to address of the branch.
2146 
2147   return (-32768 <= offset && offset < 32768);
2148 }
2149 
2150 const bool Matcher::isSimpleConstant64(jlong value) {
2151   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2152   // Probably always true, even if a temp register is required.
2153   return true;
2154 }
2155 
2156 // true just means we have fast l2f conversion
2157 const bool Matcher::convL2FSupported(void) {
2158   return true;
2159 }
2160 
2161 // Vector width in bytes.
2162 const int Matcher::vector_width_in_bytes(BasicType bt) {
2163   int size = MIN2(16,(int)MaxVectorSize);
2164   // Minimum 2 values in vector
2165   if (size < 2*type2aelembytes(bt)) size = 0;
2166   // But never < 4
2167   if (size < 4) size = 0;
2168   return size;
2169 }
2170 
2171 // Limits on vector size (number of elements) loaded into vector.
2172 const int Matcher::max_vector_size(const BasicType bt) {
2173   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2174 }
2175 const int Matcher::min_vector_size(const BasicType bt) {
2176 //  For the moment limit the vector size to 8 bytes
2177     int size = 8 / type2aelembytes(bt);
2178     if (size < 2) size = 2;
2179     return size;
2180 }
2181 
2182 // Vector ideal reg.
2183 const uint Matcher::vector_ideal_reg(int len) {
2184   switch(len) {
2185     case  8: return Op_VecD;
2186     case 16: return Op_VecX;
2187   }
2188   ShouldNotReachHere();
2189   return 0;
2190 }
2191 
2192 // AES support not yet implemented
2193 const bool Matcher::pass_original_key_for_aes() {
2194   return false;
2195 }
2196 
2197 // aarch64 supports misaligned vectors store/load.
2198 const bool Matcher::misaligned_vectors_ok() {
2199   return true;
2200 }
2201 
2202 // false => size gets scaled to BytesPerLong, ok.
2203 const bool Matcher::init_array_count_is_in_bytes = false;
2204 
2205 // Use conditional move (CMOVL)
2206 const int Matcher::long_cmove_cost() {
2207   // long cmoves are no more expensive than int cmoves
2208   return 0;
2209 }
2210 
2211 const int Matcher::float_cmove_cost() {
2212   // float cmoves are no more expensive than int cmoves
2213   return 0;
2214 }
2215 
2216 // Does the CPU require late expand (see block.cpp for description of late expand)?
2217 const bool Matcher::require_postalloc_expand = false;
2218 
2219 // Do we need to mask the count passed to shift instructions or does
2220 // the cpu only look at the lower 5/6 bits anyway?
2221 const bool Matcher::need_masked_shift_count = false;
2222 
2223 // No support for generic vector operands.
2224 const bool Matcher::supports_generic_vector_operands  = false;
2225 
2226 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
2227   ShouldNotReachHere(); // generic vector operands not supported
2228   return NULL;
2229 }
2230 
2231 bool Matcher::is_generic_reg2reg_move(MachNode* m) {
2232   ShouldNotReachHere();  // generic vector operands not supported
2233   return false;
2234 }
2235 
2236 bool Matcher::is_generic_vector(MachOper* opnd)  {
2237   ShouldNotReachHere();  // generic vector operands not supported
2238   return false;
2239 }
2240 
2241 // This affects two different things:
2242 //  - how Decode nodes are matched
2243 //  - how ImplicitNullCheck opportunities are recognized
2244 // If true, the matcher will try to remove all Decodes and match them
2245 // (as operands) into nodes. NullChecks are not prepared to deal with
2246 // Decodes by final_graph_reshaping().
2247 // If false, final_graph_reshaping() forces the decode behind the Cmp
2248 // for a NullCheck. The matcher matches the Decode node into a register.
2249 // Implicit_null_check optimization moves the Decode along with the
2250 // memory operation back up before the NullCheck.
2251 bool Matcher::narrow_oop_use_complex_address() {
2252   return CompressedOops::shift() == 0;
2253 }
2254 
2255 bool Matcher::narrow_klass_use_complex_address() {
2256 // TODO
2257 // decide whether we need to set this to true
2258   return false;
2259 }
2260 
2261 bool Matcher::const_oop_prefer_decode() {
2262   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2263   return CompressedOops::base() == NULL;
2264 }
2265 
2266 bool Matcher::const_klass_prefer_decode() {
2267   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2268   return CompressedKlassPointers::base() == NULL;
2269 }
2270 
2271 // Is it better to copy float constants, or load them directly from
2272 // memory?  Intel can load a float constant from a direct address,
2273 // requiring no extra registers.  Most RISCs will have to materialize
2274 // an address into a register first, so they would do better to copy
2275 // the constant from stack.
2276 const bool Matcher::rematerialize_float_constants = false;
2277 
2278 // If CPU can load and store mis-aligned doubles directly then no
2279 // fixup is needed.  Else we split the double into 2 integer pieces
2280 // and move it piece-by-piece.  Only happens when passing doubles into
2281 // C code as the Java calling convention forces doubles to be aligned.
2282 const bool Matcher::misaligned_doubles_ok = true;
2283 
2284 // No-op on amd64
2285 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2286   Unimplemented();
2287 }
2288 
2289 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
2290 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2291 
2292 // Are floats converted to double when stored to stack during
2293 // deoptimization?
2294 bool Matcher::float_in_double() { return false; }
2295 
2296 // Do ints take an entire long register or just half?
2297 // The relevant question is how the int is callee-saved:
2298 // the whole long is written but de-opt'ing will have to extract
2299 // the relevant 32 bits.
2300 const bool Matcher::int_in_long = true;
2301 
2302 // Return whether or not this register is ever used as an argument.
2303 // This function is used on startup to build the trampoline stubs in
2304 // generateOptoStub.  Registers not mentioned will be killed by the VM
2305 // call in the trampoline, and arguments in those registers not be
2306 // available to the callee.
2307 bool Matcher::can_be_java_arg(int reg)
2308 {
2309   return
2310     reg ==  R0_num || reg == R0_H_num ||
2311     reg ==  R1_num || reg == R1_H_num ||
2312     reg ==  R2_num || reg == R2_H_num ||
2313     reg ==  R3_num || reg == R3_H_num ||
2314     reg ==  R4_num || reg == R4_H_num ||
2315     reg ==  R5_num || reg == R5_H_num ||
2316     reg ==  R6_num || reg == R6_H_num ||
2317     reg ==  R7_num || reg == R7_H_num ||
2318     reg ==  V0_num || reg == V0_H_num ||
2319     reg ==  V1_num || reg == V1_H_num ||
2320     reg ==  V2_num || reg == V2_H_num ||
2321     reg ==  V3_num || reg == V3_H_num ||
2322     reg ==  V4_num || reg == V4_H_num ||
2323     reg ==  V5_num || reg == V5_H_num ||
2324     reg ==  V6_num || reg == V6_H_num ||
2325     reg ==  V7_num || reg == V7_H_num;
2326 }
2327 
2328 bool Matcher::is_spillable_arg(int reg)
2329 {
2330   return can_be_java_arg(reg);
2331 }
2332 
2333 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2334   return false;
2335 }
2336 
2337 RegMask Matcher::divI_proj_mask() {
2338   ShouldNotReachHere();
2339   return RegMask();
2340 }
2341 
2342 // Register for MODI projection of divmodI.
2343 RegMask Matcher::modI_proj_mask() {
2344   ShouldNotReachHere();
2345   return RegMask();
2346 }
2347 
2348 // Register for DIVL projection of divmodL.
2349 RegMask Matcher::divL_proj_mask() {
2350   ShouldNotReachHere();
2351   return RegMask();
2352 }
2353 
2354 // Register for MODL projection of divmodL.
2355 RegMask Matcher::modL_proj_mask() {
2356   ShouldNotReachHere();
2357   return RegMask();
2358 }
2359 
2360 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2361   return FP_REG_mask();
2362 }
2363 
2364 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2365   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2366     Node* u = addp->fast_out(i);
2367     if (u->is_Mem()) {
2368       int opsize = u->as_Mem()->memory_size();
2369       assert(opsize > 0, "unexpected memory operand size");
2370       if (u->as_Mem()->memory_size() != (1<<shift)) {
2371         return false;
2372       }
2373     }
2374   }
2375   return true;
2376 }
2377 
2378 const bool Matcher::convi2l_type_required = false;
2379 
2380 // Should the matcher clone input 'm' of node 'n'?
2381 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2382   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
2383     mstack.push(m, Visit);           // m = ShiftCntV
2384     return true;
2385   }
2386   return false;
2387 }
2388 
2389 // Should the Matcher clone shifts on addressing modes, expecting them
2390 // to be subsumed into complex addressing expressions or compute them
2391 // into registers?
2392 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2393   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2394     return true;
2395   }
2396 
2397   Node *off = m->in(AddPNode::Offset);
2398   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2399       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2400       // Are there other uses besides address expressions?
2401       !is_visited(off)) {
2402     address_visited.set(off->_idx); // Flag as address_visited
2403     mstack.push(off->in(2), Visit);
2404     Node *conv = off->in(1);
2405     if (conv->Opcode() == Op_ConvI2L &&
2406         // Are there other uses besides address expressions?
2407         !is_visited(conv)) {
2408       address_visited.set(conv->_idx); // Flag as address_visited
2409       mstack.push(conv->in(1), Pre_Visit);
2410     } else {
2411       mstack.push(conv, Pre_Visit);
2412     }
2413     address_visited.test_set(m->_idx); // Flag as address_visited
2414     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2415     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2416     return true;
2417   } else if (off->Opcode() == Op_ConvI2L &&
2418              // Are there other uses besides address expressions?
2419              !is_visited(off)) {
2420     address_visited.test_set(m->_idx); // Flag as address_visited
2421     address_visited.set(off->_idx); // Flag as address_visited
2422     mstack.push(off->in(1), Pre_Visit);
2423     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2424     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2425     return true;
2426   }
2427   return false;
2428 }
2429 
2430 void Compile::reshape_address(AddPNode* addp) {
2431 }
2432 
2433 
2434 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2435   C2_MacroAssembler _masm(&cbuf);                                       \
2436   {                                                                     \
2437     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2438     guarantee(DISP == 0, "mode not permitted for volatile");            \
2439     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2440     __ INSN(REG, as_Register(BASE));                                    \
2441   }
2442 
2443 
2444 static Address mem2address(int opcode, Register base, int index, int size, int disp)
2445   {
2446     Address::extend scale;
2447 
2448     // Hooboy, this is fugly.  We need a way to communicate to the
2449     // encoder that the index needs to be sign extended, so we have to
2450     // enumerate all the cases.
2451     switch (opcode) {
2452     case INDINDEXSCALEDI2L:
2453     case INDINDEXSCALEDI2LN:
2454     case INDINDEXI2L:
2455     case INDINDEXI2LN:
2456       scale = Address::sxtw(size);
2457       break;
2458     default:
2459       scale = Address::lsl(size);
2460     }
2461 
2462     if (index == -1) {
2463       return Address(base, disp);
2464     } else {
2465       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2466       return Address(base, as_Register(index), scale);
2467     }
2468   }
2469 
2470 
2471 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2472 typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr);
2473 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2474 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2475                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2476 
2477   // Used for all non-volatile memory accesses.  The use of
2478   // $mem->opcode() to discover whether this pattern uses sign-extended
2479   // offsets is something of a kludge.
2480   static void loadStore(C2_MacroAssembler masm, mem_insn insn,
2481                         Register reg, int opcode,
2482                         Register base, int index, int scale, int disp,
2483                         int size_in_memory)
2484   {
2485     Address addr = mem2address(opcode, base, index, scale, disp);
2486     if (addr.getMode() == Address::base_plus_offset) {
2487       /* If we get an out-of-range offset it is a bug in the compiler,
2488          so we assert here. */
2489       assert(Address::offset_ok_for_immed(addr.offset(), exact_log2(size_in_memory)),
2490              "c2 compiler bug");
2491       /* Fix up any out-of-range offsets. */
2492       assert_different_registers(rscratch1, base);
2493       assert_different_registers(rscratch1, reg);
2494       addr = masm.legitimize_address(addr, size_in_memory, rscratch1);
2495     }
2496     (masm.*insn)(reg, addr);
2497   }
2498 
2499   static void loadStore(C2_MacroAssembler masm, mem_float_insn insn,
2500                         FloatRegister reg, int opcode,
2501                         Register base, int index, int size, int disp,
2502                         int size_in_memory)
2503   {
2504     Address::extend scale;
2505 
2506     switch (opcode) {
2507     case INDINDEXSCALEDI2L:
2508     case INDINDEXSCALEDI2LN:
2509       scale = Address::sxtw(size);
2510       break;
2511     default:
2512       scale = Address::lsl(size);
2513     }
2514 
2515     if (index == -1) {
2516       /* If we get an out-of-range offset it is a bug in the compiler,
2517          so we assert here. */
2518       assert(Address::offset_ok_for_immed(disp, exact_log2(size_in_memory)), "c2 compiler bug");
2519       /* Fix up any out-of-range offsets. */
2520       assert_different_registers(rscratch1, base);
2521       Address addr = Address(base, disp);
2522       addr = masm.legitimize_address(addr, size_in_memory, rscratch1);
2523       (masm.*insn)(reg, addr);
2524     } else {
2525       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2526       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2527     }
2528   }
2529 
2530   static void loadStore(C2_MacroAssembler masm, mem_vector_insn insn,
2531                         FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2532                         int opcode, Register base, int index, int size, int disp)
2533   {
2534     if (index == -1) {
2535       (masm.*insn)(reg, T, Address(base, disp));
2536     } else {
2537       assert(disp == 0, "unsupported address mode");
2538       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2539     }
2540   }
2541 
2542 %}
2543 
2544 
2545 
2546 //----------ENCODING BLOCK-----------------------------------------------------
2547 // This block specifies the encoding classes used by the compiler to
2548 // output byte streams.  Encoding classes are parameterized macros
2549 // used by Machine Instruction Nodes in order to generate the bit
2550 // encoding of the instruction.  Operands specify their base encoding
2551 // interface with the interface keyword.  There are currently
2552 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2553 // COND_INTER.  REG_INTER causes an operand to generate a function
2554 // which returns its register number when queried.  CONST_INTER causes
2555 // an operand to generate a function which returns the value of the
2556 // constant when queried.  MEMORY_INTER causes an operand to generate
2557 // four functions which return the Base Register, the Index Register,
2558 // the Scale Value, and the Offset Value of the operand when queried.
2559 // COND_INTER causes an operand to generate six functions which return
2560 // the encoding code (ie - encoding bits for the instruction)
2561 // associated with each basic boolean condition for a conditional
2562 // instruction.
2563 //
2564 // Instructions specify two basic values for encoding.  Again, a
2565 // function is available to check if the constant displacement is an
2566 // oop. They use the ins_encode keyword to specify their encoding
2567 // classes (which must be a sequence of enc_class names, and their
2568 // parameters, specified in the encoding block), and they use the
2569 // opcode keyword to specify, in order, their primary, secondary, and
2570 // tertiary opcode.  Only the opcode sections which a particular
2571 // instruction needs for encoding need to be specified.
2572 encode %{
2573   // Build emit functions for each basic byte or larger field in the
2574   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2575   // from C++ code in the enc_class source block.  Emit functions will
2576   // live in the main source block for now.  In future, we can
2577   // generalize this by adding a syntax that specifies the sizes of
2578   // fields in an order, so that the adlc can build the emit functions
2579   // automagically
2580 
2581   // catch all for unimplemented encodings
2582   enc_class enc_unimplemented %{
2583     C2_MacroAssembler _masm(&cbuf);
2584     __ unimplemented("C2 catch all");
2585   %}
2586 
2587   // BEGIN Non-volatile memory access
2588 
2589   // This encoding class is generated automatically from ad_encode.m4.
2590   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2591   enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
2592     Register dst_reg = as_Register($dst$$reg);
2593     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2594                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2595   %}
2596 
2597   // This encoding class is generated automatically from ad_encode.m4.
2598   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2599   enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
2600     Register dst_reg = as_Register($dst$$reg);
2601     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2602                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2603   %}
2604 
2605   // This encoding class is generated automatically from ad_encode.m4.
2606   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2607   enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
2608     Register dst_reg = as_Register($dst$$reg);
2609     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2610                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2611   %}
2612 
2613   // This encoding class is generated automatically from ad_encode.m4.
2614   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2615   enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
2616     Register dst_reg = as_Register($dst$$reg);
2617     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2618                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2619   %}
2620 
2621   // This encoding class is generated automatically from ad_encode.m4.
2622   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2623   enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
2624     Register dst_reg = as_Register($dst$$reg);
2625     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2626                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
2627   %}
2628 
2629   // This encoding class is generated automatically from ad_encode.m4.
2630   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2631   enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
2632     Register dst_reg = as_Register($dst$$reg);
2633     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2634                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
2635   %}
2636 
2637   // This encoding class is generated automatically from ad_encode.m4.
2638   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2639   enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
2640     Register dst_reg = as_Register($dst$$reg);
2641     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2642                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
2643   %}
2644 
2645   // This encoding class is generated automatically from ad_encode.m4.
2646   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2647   enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
2648     Register dst_reg = as_Register($dst$$reg);
2649     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2650                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
2651   %}
2652 
2653   // This encoding class is generated automatically from ad_encode.m4.
2654   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2655   enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
2656     Register dst_reg = as_Register($dst$$reg);
2657     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2658                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2659   %}
2660 
2661   // This encoding class is generated automatically from ad_encode.m4.
2662   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2663   enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
2664     Register dst_reg = as_Register($dst$$reg);
2665     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2666                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2667   %}
2668 
2669   // This encoding class is generated automatically from ad_encode.m4.
2670   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2671   enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
2672     Register dst_reg = as_Register($dst$$reg);
2673     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2674                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2675   %}
2676 
2677   // This encoding class is generated automatically from ad_encode.m4.
2678   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2679   enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
2680     Register dst_reg = as_Register($dst$$reg);
2681     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2682                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
2683   %}
2684 
2685   // This encoding class is generated automatically from ad_encode.m4.
2686   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2687   enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
2688     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2689     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2690                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2691   %}
2692 
2693   // This encoding class is generated automatically from ad_encode.m4.
2694   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2695   enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
2696     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2697     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2698                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
2699   %}
2700 
2701   // This encoding class is generated automatically from ad_encode.m4.
2702   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2703   enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
2704     Register src_reg = as_Register($src$$reg);
2705     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2706                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2707   %}
2708 
2709   // This encoding class is generated automatically from ad_encode.m4.
2710   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2711   enc_class aarch64_enc_strb0(memory1 mem) %{
2712     C2_MacroAssembler _masm(&cbuf);
2713     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2714                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2715   %}
2716 
2717   // This encoding class is generated automatically from ad_encode.m4.
2718   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2719   enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
2720     Register src_reg = as_Register($src$$reg);
2721     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2722                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
2723   %}
2724 
2725   // This encoding class is generated automatically from ad_encode.m4.
2726   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2727   enc_class aarch64_enc_strh0(memory2 mem) %{
2728     C2_MacroAssembler _masm(&cbuf);
2729     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2730                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
2731   %}
2732 
2733   // This encoding class is generated automatically from ad_encode.m4.
2734   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2735   enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
2736     Register src_reg = as_Register($src$$reg);
2737     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2738                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2739   %}
2740 
2741   // This encoding class is generated automatically from ad_encode.m4.
2742   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2743   enc_class aarch64_enc_strw0(memory4 mem) %{
2744     C2_MacroAssembler _masm(&cbuf);
2745     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2746                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2747   %}
2748 
2749   // This encoding class is generated automatically from ad_encode.m4.
2750   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2751   enc_class aarch64_enc_str(iRegL src, memory8 mem) %{
2752     Register src_reg = as_Register($src$$reg);
2753     // we sometimes get asked to store the stack pointer into the
2754     // current thread -- we cannot do that directly on AArch64
2755     if (src_reg == r31_sp) {
2756       C2_MacroAssembler _masm(&cbuf);
2757       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2758       __ mov(rscratch2, sp);
2759       src_reg = rscratch2;
2760     }
2761     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2762                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
2763   %}
2764 
2765   // This encoding class is generated automatically from ad_encode.m4.
2766   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2767   enc_class aarch64_enc_str0(memory8 mem) %{
2768     C2_MacroAssembler _masm(&cbuf);
2769     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2770                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
2771   %}
2772 
2773   // This encoding class is generated automatically from ad_encode.m4.
2774   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2775   enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
2776     FloatRegister src_reg = as_FloatRegister($src$$reg);
2777     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2778                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2779   %}
2780 
2781   // This encoding class is generated automatically from ad_encode.m4.
2782   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2783   enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
2784     FloatRegister src_reg = as_FloatRegister($src$$reg);
2785     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2786                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
2787   %}
2788 
2789   // This encoding class is generated automatically from ad_encode.m4.
2790   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2791   enc_class aarch64_enc_strw_immn(immN src, memory1 mem) %{
2792     C2_MacroAssembler _masm(&cbuf);
2793     address con = (address)$src$$constant;
2794     // need to do this the hard way until we can manage relocs
2795     // for 32 bit constants
2796     __ movoop(rscratch2, (jobject)con);
2797     if (con) __ encode_heap_oop_not_null(rscratch2);
2798     loadStore(_masm, &MacroAssembler::strw, rscratch2, $mem->opcode(),
2799                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2800   %}
2801 
2802   // This encoding class is generated automatically from ad_encode.m4.
2803   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2804   enc_class aarch64_enc_strw_immnk(immN src, memory4 mem) %{
2805     C2_MacroAssembler _masm(&cbuf);
2806     address con = (address)$src$$constant;
2807     // need to do this the hard way until we can manage relocs
2808     // for 32 bit constants
2809     __ movoop(rscratch2, (jobject)con);
2810     __ encode_klass_not_null(rscratch2);
2811     loadStore(_masm, &MacroAssembler::strw, rscratch2, $mem->opcode(),
2812                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2813   %}
2814 
2815   // This encoding class is generated automatically from ad_encode.m4.
2816   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2817   enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
2818       C2_MacroAssembler _masm(&cbuf);
2819       __ membar(Assembler::StoreStore);
2820       loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2821                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2822   %}
2823 
2824   // END Non-volatile memory access
2825 
2826   // Vector loads and stores
2827   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2828     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2829     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2830        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2831   %}
2832 
2833   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2834     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2835     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2836        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2837   %}
2838 
2839   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2840     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2841     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2842        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2843   %}
2844 
2845   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2846     FloatRegister src_reg = as_FloatRegister($src$$reg);
2847     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2848        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2849   %}
2850 
2851   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2852     FloatRegister src_reg = as_FloatRegister($src$$reg);
2853     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2854        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2855   %}
2856 
2857   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2858     FloatRegister src_reg = as_FloatRegister($src$$reg);
2859     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2860        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2861   %}
2862 
2863   // volatile loads and stores
2864 
2865   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2866     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2867                  rscratch1, stlrb);
2868   %}
2869 
2870   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2871     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2872                  rscratch1, stlrh);
2873   %}
2874 
2875   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2876     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2877                  rscratch1, stlrw);
2878   %}
2879 
2880 
2881   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2882     Register dst_reg = as_Register($dst$$reg);
2883     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2884              rscratch1, ldarb);
2885     __ sxtbw(dst_reg, dst_reg);
2886   %}
2887 
2888   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2889     Register dst_reg = as_Register($dst$$reg);
2890     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2891              rscratch1, ldarb);
2892     __ sxtb(dst_reg, dst_reg);
2893   %}
2894 
2895   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2896     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2897              rscratch1, ldarb);
2898   %}
2899 
2900   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2901     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2902              rscratch1, ldarb);
2903   %}
2904 
2905   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2906     Register dst_reg = as_Register($dst$$reg);
2907     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2908              rscratch1, ldarh);
2909     __ sxthw(dst_reg, dst_reg);
2910   %}
2911 
2912   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2913     Register dst_reg = as_Register($dst$$reg);
2914     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2915              rscratch1, ldarh);
2916     __ sxth(dst_reg, dst_reg);
2917   %}
2918 
2919   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2920     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2921              rscratch1, ldarh);
2922   %}
2923 
2924   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2925     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2926              rscratch1, ldarh);
2927   %}
2928 
2929   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2930     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2931              rscratch1, ldarw);
2932   %}
2933 
2934   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2935     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2936              rscratch1, ldarw);
2937   %}
2938 
2939   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2940     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2941              rscratch1, ldar);
2942   %}
2943 
2944   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2945     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2946              rscratch1, ldarw);
2947     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2948   %}
2949 
2950   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2951     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2952              rscratch1, ldar);
2953     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2954   %}
2955 
2956   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2957     Register src_reg = as_Register($src$$reg);
2958     // we sometimes get asked to store the stack pointer into the
2959     // current thread -- we cannot do that directly on AArch64
2960     if (src_reg == r31_sp) {
2961       C2_MacroAssembler _masm(&cbuf);
2962       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2963       __ mov(rscratch2, sp);
2964       src_reg = rscratch2;
2965     }
2966     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2967                  rscratch1, stlr);
2968   %}
2969 
2970   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2971     {
2972       C2_MacroAssembler _masm(&cbuf);
2973       FloatRegister src_reg = as_FloatRegister($src$$reg);
2974       __ fmovs(rscratch2, src_reg);
2975     }
2976     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2977                  rscratch1, stlrw);
2978   %}
2979 
2980   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2981     {
2982       C2_MacroAssembler _masm(&cbuf);
2983       FloatRegister src_reg = as_FloatRegister($src$$reg);
2984       __ fmovd(rscratch2, src_reg);
2985     }
2986     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2987                  rscratch1, stlr);
2988   %}
2989 
2990   // synchronized read/update encodings
2991 
2992   enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
2993     C2_MacroAssembler _masm(&cbuf);
2994     Register dst_reg = as_Register($dst$$reg);
2995     Register base = as_Register($mem$$base);
2996     int index = $mem$$index;
2997     int scale = $mem$$scale;
2998     int disp = $mem$$disp;
2999     if (index == -1) {
3000        if (disp != 0) {
3001         __ lea(rscratch1, Address(base, disp));
3002         __ ldaxr(dst_reg, rscratch1);
3003       } else {
3004         // TODO
3005         // should we ever get anything other than this case?
3006         __ ldaxr(dst_reg, base);
3007       }
3008     } else {
3009       Register index_reg = as_Register(index);
3010       if (disp == 0) {
3011         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
3012         __ ldaxr(dst_reg, rscratch1);
3013       } else {
3014         __ lea(rscratch1, Address(base, disp));
3015         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
3016         __ ldaxr(dst_reg, rscratch1);
3017       }
3018     }
3019   %}
3020 
3021   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
3022     C2_MacroAssembler _masm(&cbuf);
3023     Register src_reg = as_Register($src$$reg);
3024     Register base = as_Register($mem$$base);
3025     int index = $mem$$index;
3026     int scale = $mem$$scale;
3027     int disp = $mem$$disp;
3028     if (index == -1) {
3029        if (disp != 0) {
3030         __ lea(rscratch2, Address(base, disp));
3031         __ stlxr(rscratch1, src_reg, rscratch2);
3032       } else {
3033         // TODO
3034         // should we ever get anything other than this case?
3035         __ stlxr(rscratch1, src_reg, base);
3036       }
3037     } else {
3038       Register index_reg = as_Register(index);
3039       if (disp == 0) {
3040         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3041         __ stlxr(rscratch1, src_reg, rscratch2);
3042       } else {
3043         __ lea(rscratch2, Address(base, disp));
3044         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3045         __ stlxr(rscratch1, src_reg, rscratch2);
3046       }
3047     }
3048     __ cmpw(rscratch1, zr);
3049   %}
3050 
3051   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
3052     C2_MacroAssembler _masm(&cbuf);
3053     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3054     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3055                Assembler::xword, /*acquire*/ false, /*release*/ true,
3056                /*weak*/ false, noreg);
3057   %}
3058 
3059   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3060     C2_MacroAssembler _masm(&cbuf);
3061     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3062     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3063                Assembler::word, /*acquire*/ false, /*release*/ true,
3064                /*weak*/ false, noreg);
3065   %}
3066 
3067   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3068     C2_MacroAssembler _masm(&cbuf);
3069     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3070     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3071                Assembler::halfword, /*acquire*/ false, /*release*/ true,
3072                /*weak*/ false, noreg);
3073   %}
3074 
3075   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3076     C2_MacroAssembler _masm(&cbuf);
3077     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3078     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3079                Assembler::byte, /*acquire*/ false, /*release*/ true,
3080                /*weak*/ false, noreg);
3081   %}
3082 
3083 
3084   // The only difference between aarch64_enc_cmpxchg and
3085   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
3086   // CompareAndSwap sequence to serve as a barrier on acquiring a
3087   // lock.
3088   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
3089     C2_MacroAssembler _masm(&cbuf);
3090     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3091     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3092                Assembler::xword, /*acquire*/ true, /*release*/ true,
3093                /*weak*/ false, noreg);
3094   %}
3095 
3096   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3097     C2_MacroAssembler _masm(&cbuf);
3098     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3099     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3100                Assembler::word, /*acquire*/ true, /*release*/ true,
3101                /*weak*/ false, noreg);
3102   %}
3103 
3104   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3105     C2_MacroAssembler _masm(&cbuf);
3106     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3107     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3108                Assembler::halfword, /*acquire*/ true, /*release*/ true,
3109                /*weak*/ false, noreg);
3110   %}
3111 
3112   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3113     C2_MacroAssembler _masm(&cbuf);
3114     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3115     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3116                Assembler::byte, /*acquire*/ true, /*release*/ true,
3117                /*weak*/ false, noreg);
3118   %}
3119 
3120   // auxiliary used for CompareAndSwapX to set result register
3121   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
3122     C2_MacroAssembler _masm(&cbuf);
3123     Register res_reg = as_Register($res$$reg);
3124     __ cset(res_reg, Assembler::EQ);
3125   %}
3126 
3127   // prefetch encodings
3128 
3129   enc_class aarch64_enc_prefetchw(memory mem) %{
3130     C2_MacroAssembler _masm(&cbuf);
3131     Register base = as_Register($mem$$base);
3132     int index = $mem$$index;
3133     int scale = $mem$$scale;
3134     int disp = $mem$$disp;
3135     if (index == -1) {
3136       __ prfm(Address(base, disp), PSTL1KEEP);
3137     } else {
3138       Register index_reg = as_Register(index);
3139       if (disp == 0) {
3140         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
3141       } else {
3142         __ lea(rscratch1, Address(base, disp));
3143         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3144       }
3145     }
3146   %}
3147 
3148   /// mov envcodings
3149 
3150   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3151     C2_MacroAssembler _masm(&cbuf);
3152     u_int32_t con = (u_int32_t)$src$$constant;
3153     Register dst_reg = as_Register($dst$$reg);
3154     if (con == 0) {
3155       __ movw(dst_reg, zr);
3156     } else {
3157       __ movw(dst_reg, con);
3158     }
3159   %}
3160 
3161   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3162     C2_MacroAssembler _masm(&cbuf);
3163     Register dst_reg = as_Register($dst$$reg);
3164     u_int64_t con = (u_int64_t)$src$$constant;
3165     if (con == 0) {
3166       __ mov(dst_reg, zr);
3167     } else {
3168       __ mov(dst_reg, con);
3169     }
3170   %}
3171 
3172   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3173     C2_MacroAssembler _masm(&cbuf);
3174     Register dst_reg = as_Register($dst$$reg);
3175     address con = (address)$src$$constant;
3176     if (con == NULL || con == (address)1) {
3177       ShouldNotReachHere();
3178     } else {
3179       relocInfo::relocType rtype = $src->constant_reloc();
3180       if (rtype == relocInfo::oop_type) {
3181         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3182       } else if (rtype == relocInfo::metadata_type) {
3183         __ mov_metadata(dst_reg, (Metadata*)con);
3184       } else {
3185         assert(rtype == relocInfo::none, "unexpected reloc type");
3186         if (con < (address)(uintptr_t)os::vm_page_size()) {
3187           __ mov(dst_reg, con);
3188         } else {
3189           unsigned long offset;
3190           __ adrp(dst_reg, con, offset);
3191           __ add(dst_reg, dst_reg, offset);
3192         }
3193       }
3194     }
3195   %}
3196 
3197   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3198     C2_MacroAssembler _masm(&cbuf);
3199     Register dst_reg = as_Register($dst$$reg);
3200     __ mov(dst_reg, zr);
3201   %}
3202 
3203   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3204     C2_MacroAssembler _masm(&cbuf);
3205     Register dst_reg = as_Register($dst$$reg);
3206     __ mov(dst_reg, (u_int64_t)1);
3207   %}
3208 
3209   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3210     C2_MacroAssembler _masm(&cbuf);
3211     __ load_byte_map_base($dst$$Register);
3212   %}
3213 
3214   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3215     C2_MacroAssembler _masm(&cbuf);
3216     Register dst_reg = as_Register($dst$$reg);
3217     address con = (address)$src$$constant;
3218     if (con == NULL) {
3219       ShouldNotReachHere();
3220     } else {
3221       relocInfo::relocType rtype = $src->constant_reloc();
3222       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3223       __ set_narrow_oop(dst_reg, (jobject)con);
3224     }
3225   %}
3226 
3227   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3228     C2_MacroAssembler _masm(&cbuf);
3229     Register dst_reg = as_Register($dst$$reg);
3230     __ mov(dst_reg, zr);
3231   %}
3232 
3233   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3234     C2_MacroAssembler _masm(&cbuf);
3235     Register dst_reg = as_Register($dst$$reg);
3236     address con = (address)$src$$constant;
3237     if (con == NULL) {
3238       ShouldNotReachHere();
3239     } else {
3240       relocInfo::relocType rtype = $src->constant_reloc();
3241       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3242       __ set_narrow_klass(dst_reg, (Klass *)con);
3243     }
3244   %}
3245 
3246   // arithmetic encodings
3247 
3248   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3249     C2_MacroAssembler _masm(&cbuf);
3250     Register dst_reg = as_Register($dst$$reg);
3251     Register src_reg = as_Register($src1$$reg);
3252     int32_t con = (int32_t)$src2$$constant;
3253     // add has primary == 0, subtract has primary == 1
3254     if ($primary) { con = -con; }
3255     if (con < 0) {
3256       __ subw(dst_reg, src_reg, -con);
3257     } else {
3258       __ addw(dst_reg, src_reg, con);
3259     }
3260   %}
3261 
3262   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3263     C2_MacroAssembler _masm(&cbuf);
3264     Register dst_reg = as_Register($dst$$reg);
3265     Register src_reg = as_Register($src1$$reg);
3266     int32_t con = (int32_t)$src2$$constant;
3267     // add has primary == 0, subtract has primary == 1
3268     if ($primary) { con = -con; }
3269     if (con < 0) {
3270       __ sub(dst_reg, src_reg, -con);
3271     } else {
3272       __ add(dst_reg, src_reg, con);
3273     }
3274   %}
3275 
3276   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3277     C2_MacroAssembler _masm(&cbuf);
3278    Register dst_reg = as_Register($dst$$reg);
3279    Register src1_reg = as_Register($src1$$reg);
3280    Register src2_reg = as_Register($src2$$reg);
3281     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3282   %}
3283 
3284   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3285     C2_MacroAssembler _masm(&cbuf);
3286    Register dst_reg = as_Register($dst$$reg);
3287    Register src1_reg = as_Register($src1$$reg);
3288    Register src2_reg = as_Register($src2$$reg);
3289     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3290   %}
3291 
3292   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3293     C2_MacroAssembler _masm(&cbuf);
3294    Register dst_reg = as_Register($dst$$reg);
3295    Register src1_reg = as_Register($src1$$reg);
3296    Register src2_reg = as_Register($src2$$reg);
3297     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3298   %}
3299 
3300   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3301     C2_MacroAssembler _masm(&cbuf);
3302    Register dst_reg = as_Register($dst$$reg);
3303    Register src1_reg = as_Register($src1$$reg);
3304    Register src2_reg = as_Register($src2$$reg);
3305     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3306   %}
3307 
3308   // compare instruction encodings
3309 
3310   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3311     C2_MacroAssembler _masm(&cbuf);
3312     Register reg1 = as_Register($src1$$reg);
3313     Register reg2 = as_Register($src2$$reg);
3314     __ cmpw(reg1, reg2);
3315   %}
3316 
3317   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3318     C2_MacroAssembler _masm(&cbuf);
3319     Register reg = as_Register($src1$$reg);
3320     int32_t val = $src2$$constant;
3321     if (val >= 0) {
3322       __ subsw(zr, reg, val);
3323     } else {
3324       __ addsw(zr, reg, -val);
3325     }
3326   %}
3327 
3328   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3329     C2_MacroAssembler _masm(&cbuf);
3330     Register reg1 = as_Register($src1$$reg);
3331     u_int32_t val = (u_int32_t)$src2$$constant;
3332     __ movw(rscratch1, val);
3333     __ cmpw(reg1, rscratch1);
3334   %}
3335 
3336   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3337     C2_MacroAssembler _masm(&cbuf);
3338     Register reg1 = as_Register($src1$$reg);
3339     Register reg2 = as_Register($src2$$reg);
3340     __ cmp(reg1, reg2);
3341   %}
3342 
3343   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3344     C2_MacroAssembler _masm(&cbuf);
3345     Register reg = as_Register($src1$$reg);
3346     int64_t val = $src2$$constant;
3347     if (val >= 0) {
3348       __ subs(zr, reg, val);
3349     } else if (val != -val) {
3350       __ adds(zr, reg, -val);
3351     } else {
3352     // aargh, Long.MIN_VALUE is a special case
3353       __ orr(rscratch1, zr, (u_int64_t)val);
3354       __ subs(zr, reg, rscratch1);
3355     }
3356   %}
3357 
3358   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3359     C2_MacroAssembler _masm(&cbuf);
3360     Register reg1 = as_Register($src1$$reg);
3361     u_int64_t val = (u_int64_t)$src2$$constant;
3362     __ mov(rscratch1, val);
3363     __ cmp(reg1, rscratch1);
3364   %}
3365 
3366   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3367     C2_MacroAssembler _masm(&cbuf);
3368     Register reg1 = as_Register($src1$$reg);
3369     Register reg2 = as_Register($src2$$reg);
3370     __ cmp(reg1, reg2);
3371   %}
3372 
3373   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3374     C2_MacroAssembler _masm(&cbuf);
3375     Register reg1 = as_Register($src1$$reg);
3376     Register reg2 = as_Register($src2$$reg);
3377     __ cmpw(reg1, reg2);
3378   %}
3379 
3380   enc_class aarch64_enc_testp(iRegP src) %{
3381     C2_MacroAssembler _masm(&cbuf);
3382     Register reg = as_Register($src$$reg);
3383     __ cmp(reg, zr);
3384   %}
3385 
3386   enc_class aarch64_enc_testn(iRegN src) %{
3387     C2_MacroAssembler _masm(&cbuf);
3388     Register reg = as_Register($src$$reg);
3389     __ cmpw(reg, zr);
3390   %}
3391 
3392   enc_class aarch64_enc_b(label lbl) %{
3393     C2_MacroAssembler _masm(&cbuf);
3394     Label *L = $lbl$$label;
3395     __ b(*L);
3396   %}
3397 
3398   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3399     C2_MacroAssembler _masm(&cbuf);
3400     Label *L = $lbl$$label;
3401     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3402   %}
3403 
3404   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3405     C2_MacroAssembler _masm(&cbuf);
3406     Label *L = $lbl$$label;
3407     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3408   %}
3409 
3410   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3411   %{
3412      Register sub_reg = as_Register($sub$$reg);
3413      Register super_reg = as_Register($super$$reg);
3414      Register temp_reg = as_Register($temp$$reg);
3415      Register result_reg = as_Register($result$$reg);
3416 
3417      Label miss;
3418      C2_MacroAssembler _masm(&cbuf);
3419      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3420                                      NULL, &miss,
3421                                      /*set_cond_codes:*/ true);
3422      if ($primary) {
3423        __ mov(result_reg, zr);
3424      }
3425      __ bind(miss);
3426   %}
3427 
3428   enc_class aarch64_enc_java_static_call(method meth) %{
3429     C2_MacroAssembler _masm(&cbuf);
3430 
3431     address addr = (address)$meth$$method;
3432     address call;
3433     if (!_method) {
3434       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3435       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3436     } else {
3437       int method_index = resolved_method_index(cbuf);
3438       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3439                                                   : static_call_Relocation::spec(method_index);
3440       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3441 
3442       // Emit stub for static call
3443       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3444       if (stub == NULL) {
3445         ciEnv::current()->record_failure("CodeCache is full");
3446         return;
3447       }
3448     }
3449     if (call == NULL) {
3450       ciEnv::current()->record_failure("CodeCache is full");
3451       return;
3452     }
3453   %}
3454 
3455   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3456     C2_MacroAssembler _masm(&cbuf);
3457     int method_index = resolved_method_index(cbuf);
3458     address call = __ ic_call((address)$meth$$method, method_index);
3459     if (call == NULL) {
3460       ciEnv::current()->record_failure("CodeCache is full");
3461       return;
3462     }
3463   %}
3464 
3465   enc_class aarch64_enc_call_epilog() %{
3466     C2_MacroAssembler _masm(&cbuf);
3467     if (VerifyStackAtCalls) {
3468       // Check that stack depth is unchanged: find majik cookie on stack
3469       __ call_Unimplemented();
3470     }
3471   %}
3472 
3473   enc_class aarch64_enc_java_to_runtime(method meth) %{
3474     C2_MacroAssembler _masm(&cbuf);
3475 
3476     // some calls to generated routines (arraycopy code) are scheduled
3477     // by C2 as runtime calls. if so we can call them using a br (they
3478     // will be in a reachable segment) otherwise we have to use a blr
3479     // which loads the absolute address into a register.
3480     address entry = (address)$meth$$method;
3481     CodeBlob *cb = CodeCache::find_blob(entry);
3482     if (cb) {
3483       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3484       if (call == NULL) {
3485         ciEnv::current()->record_failure("CodeCache is full");
3486         return;
3487       }
3488     } else {
3489       Label retaddr;
3490       __ adr(rscratch2, retaddr);
3491       __ lea(rscratch1, RuntimeAddress(entry));
3492       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3493       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3494       __ blr(rscratch1);
3495       __ bind(retaddr);
3496       __ add(sp, sp, 2 * wordSize);
3497     }
3498   %}
3499 
3500   enc_class aarch64_enc_rethrow() %{
3501     C2_MacroAssembler _masm(&cbuf);
3502     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3503   %}
3504 
3505   enc_class aarch64_enc_ret() %{
3506     C2_MacroAssembler _masm(&cbuf);
3507     __ ret(lr);
3508   %}
3509 
3510   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3511     C2_MacroAssembler _masm(&cbuf);
3512     Register target_reg = as_Register($jump_target$$reg);
3513     __ br(target_reg);
3514   %}
3515 
3516   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3517     C2_MacroAssembler _masm(&cbuf);
3518     Register target_reg = as_Register($jump_target$$reg);
3519     // exception oop should be in r0
3520     // ret addr has been popped into lr
3521     // callee expects it in r3
3522     __ mov(r3, lr);
3523     __ br(target_reg);
3524   %}
3525 
3526   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3527     C2_MacroAssembler _masm(&cbuf);
3528     Register oop = as_Register($object$$reg);
3529     Register box = as_Register($box$$reg);
3530     Register disp_hdr = as_Register($tmp$$reg);
3531     Register tmp = as_Register($tmp2$$reg);
3532     Label cont;
3533     Label object_has_monitor;
3534     Label cas_failed;
3535 
3536     assert_different_registers(oop, box, tmp, disp_hdr);
3537 
3538     // Load markWord from object into displaced_header.
3539     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3540 
3541     if (UseBiasedLocking && !UseOptoBiasInlining) {
3542       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3543     }
3544 
3545     // Check for existing monitor
3546     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3547 
3548     // Set tmp to be (markWord of object | UNLOCK_VALUE).
3549     __ orr(tmp, disp_hdr, markWord::unlocked_value);
3550 
3551     // Initialize the box. (Must happen before we update the object mark!)
3552     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3553 
3554     // Compare object markWord with an unlocked value (tmp) and if
3555     // equal exchange the stack address of our box with object markWord.
3556     // On failure disp_hdr contains the possibly locked markWord.
3557     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3558                /*release*/ true, /*weak*/ false, disp_hdr);
3559     __ br(Assembler::EQ, cont);
3560 
3561     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3562 
3563     // If the compare-and-exchange succeeded, then we found an unlocked
3564     // object, will have now locked it will continue at label cont
3565 
3566     __ bind(cas_failed);
3567     // We did not see an unlocked object so try the fast recursive case.
3568 
3569     // Check if the owner is self by comparing the value in the
3570     // markWord of object (disp_hdr) with the stack pointer.
3571     __ mov(rscratch1, sp);
3572     __ sub(disp_hdr, disp_hdr, rscratch1);
3573     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
3574     // If condition is true we are cont and hence we can store 0 as the
3575     // displaced header in the box, which indicates that it is a recursive lock.
3576     __ ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
3577     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3578 
3579     __ b(cont);
3580 
3581     // Handle existing monitor.
3582     __ bind(object_has_monitor);
3583 
3584     // The object's monitor m is unlocked iff m->owner == NULL,
3585     // otherwise m->owner may contain a thread or a stack address.
3586     //
3587     // Try to CAS m->owner from NULL to current thread.
3588     __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value));
3589     __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3590                /*release*/ true, /*weak*/ false, noreg); // Sets flags for result
3591 
3592     // Store a non-null value into the box to avoid looking like a re-entrant
3593     // lock. The fast-path monitor unlock code checks for
3594     // markWord::monitor_value so use markWord::unused_mark which has the
3595     // relevant bit set, and also matches ObjectSynchronizer::enter.
3596     __ mov(tmp, (address)markWord::unused_mark().value());
3597     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3598 
3599     __ bind(cont);
3600     // flag == EQ indicates success
3601     // flag == NE indicates failure
3602   %}
3603 
3604   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3605     C2_MacroAssembler _masm(&cbuf);
3606     Register oop = as_Register($object$$reg);
3607     Register box = as_Register($box$$reg);
3608     Register disp_hdr = as_Register($tmp$$reg);
3609     Register tmp = as_Register($tmp2$$reg);
3610     Label cont;
3611     Label object_has_monitor;
3612 
3613     assert_different_registers(oop, box, tmp, disp_hdr);
3614 
3615     if (UseBiasedLocking && !UseOptoBiasInlining) {
3616       __ biased_locking_exit(oop, tmp, cont);
3617     }
3618 
3619     // Find the lock address and load the displaced header from the stack.
3620     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3621 
3622     // If the displaced header is 0, we have a recursive unlock.
3623     __ cmp(disp_hdr, zr);
3624     __ br(Assembler::EQ, cont);
3625 
3626     // Handle existing monitor.
3627     __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3628     __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
3629 
3630     // Check if it is still a light weight lock, this is is true if we
3631     // see the stack address of the basicLock in the markWord of the
3632     // object.
3633 
3634     __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
3635                /*release*/ true, /*weak*/ false, tmp);
3636     __ b(cont);
3637 
3638     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3639 
3640     // Handle existing monitor.
3641     __ bind(object_has_monitor);
3642     STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
3643     __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
3644     __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3645     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3646     __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3647     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3648     __ cmp(rscratch1, zr); // Sets flags for result
3649     __ br(Assembler::NE, cont);
3650 
3651     __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3652     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3653     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3654     __ cmp(rscratch1, zr); // Sets flags for result
3655     __ cbnz(rscratch1, cont);
3656     // need a release store here
3657     __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3658     __ stlr(zr, tmp); // set unowned
3659 
3660     __ bind(cont);
3661     // flag == EQ indicates success
3662     // flag == NE indicates failure
3663   %}
3664 
3665 %}
3666 
3667 //----------FRAME--------------------------------------------------------------
3668 // Definition of frame structure and management information.
3669 //
3670 //  S T A C K   L A Y O U T    Allocators stack-slot number
3671 //                             |   (to get allocators register number
3672 //  G  Owned by    |        |  v    add OptoReg::stack0())
3673 //  r   CALLER     |        |
3674 //  o     |        +--------+      pad to even-align allocators stack-slot
3675 //  w     V        |  pad0  |        numbers; owned by CALLER
3676 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3677 //  h     ^        |   in   |  5
3678 //        |        |  args  |  4   Holes in incoming args owned by SELF
3679 //  |     |        |        |  3
3680 //  |     |        +--------+
3681 //  V     |        | old out|      Empty on Intel, window on Sparc
3682 //        |    old |preserve|      Must be even aligned.
3683 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3684 //        |        |   in   |  3   area for Intel ret address
3685 //     Owned by    |preserve|      Empty on Sparc.
3686 //       SELF      +--------+
3687 //        |        |  pad2  |  2   pad to align old SP
3688 //        |        +--------+  1
3689 //        |        | locks  |  0
3690 //        |        +--------+----> OptoReg::stack0(), even aligned
3691 //        |        |  pad1  | 11   pad to align new SP
3692 //        |        +--------+
3693 //        |        |        | 10
3694 //        |        | spills |  9   spills
3695 //        V        |        |  8   (pad0 slot for callee)
3696 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3697 //        ^        |  out   |  7
3698 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3699 //     Owned by    +--------+
3700 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3701 //        |    new |preserve|      Must be even-aligned.
3702 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3703 //        |        |        |
3704 //
3705 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3706 //         known from SELF's arguments and the Java calling convention.
3707 //         Region 6-7 is determined per call site.
3708 // Note 2: If the calling convention leaves holes in the incoming argument
3709 //         area, those holes are owned by SELF.  Holes in the outgoing area
3710 //         are owned by the CALLEE.  Holes should not be nessecary in the
3711 //         incoming area, as the Java calling convention is completely under
3712 //         the control of the AD file.  Doubles can be sorted and packed to
3713 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3714 //         varargs C calling conventions.
3715 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3716 //         even aligned with pad0 as needed.
3717 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3718 //           (the latter is true on Intel but is it false on AArch64?)
3719 //         region 6-11 is even aligned; it may be padded out more so that
3720 //         the region from SP to FP meets the minimum stack alignment.
3721 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3722 //         alignment.  Region 11, pad1, may be dynamically extended so that
3723 //         SP meets the minimum alignment.
3724 
3725 frame %{
3726   // What direction does stack grow in (assumed to be same for C & Java)
3727   stack_direction(TOWARDS_LOW);
3728 
3729   // These three registers define part of the calling convention
3730   // between compiled code and the interpreter.
3731 
3732   // Inline Cache Register or methodOop for I2C.
3733   inline_cache_reg(R12);
3734 
3735   // Method Oop Register when calling interpreter.
3736   interpreter_method_oop_reg(R12);
3737 
3738   // Number of stack slots consumed by locking an object
3739   sync_stack_slots(2);
3740 
3741   // Compiled code's Frame Pointer
3742   frame_pointer(R31);
3743 
3744   // Interpreter stores its frame pointer in a register which is
3745   // stored to the stack by I2CAdaptors.
3746   // I2CAdaptors convert from interpreted java to compiled java.
3747   interpreter_frame_pointer(R29);
3748 
3749   // Stack alignment requirement
3750   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3751 
3752   // Number of stack slots between incoming argument block and the start of
3753   // a new frame.  The PROLOG must add this many slots to the stack.  The
3754   // EPILOG must remove this many slots. aarch64 needs two slots for
3755   // return address and fp.
3756   // TODO think this is correct but check
3757   in_preserve_stack_slots(4);
3758 
3759   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3760   // for calls to C.  Supports the var-args backing area for register parms.
3761   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3762 
3763   // The after-PROLOG location of the return address.  Location of
3764   // return address specifies a type (REG or STACK) and a number
3765   // representing the register number (i.e. - use a register name) or
3766   // stack slot.
3767   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3768   // Otherwise, it is above the locks and verification slot and alignment word
3769   // TODO this may well be correct but need to check why that - 2 is there
3770   // ppc port uses 0 but we definitely need to allow for fixed_slots
3771   // which folds in the space used for monitors
3772   return_addr(STACK - 2 +
3773               align_up((Compile::current()->in_preserve_stack_slots() +
3774                         Compile::current()->fixed_slots()),
3775                        stack_alignment_in_slots()));
3776 
3777   // Body of function which returns an integer array locating
3778   // arguments either in registers or in stack slots.  Passed an array
3779   // of ideal registers called "sig" and a "length" count.  Stack-slot
3780   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3781   // arguments for a CALLEE.  Incoming stack arguments are
3782   // automatically biased by the preserve_stack_slots field above.
3783 
3784   calling_convention
3785   %{
3786     // No difference between ingoing/outgoing just pass false
3787     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3788   %}
3789 
3790   c_calling_convention
3791   %{
3792     // This is obviously always outgoing
3793     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3794   %}
3795 
3796   // Location of compiled Java return values.  Same as C for now.
3797   return_value
3798   %{
3799     // TODO do we allow ideal_reg == Op_RegN???
3800     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3801            "only return normal values");
3802 
3803     static const int lo[Op_RegL + 1] = { // enum name
3804       0,                                 // Op_Node
3805       0,                                 // Op_Set
3806       R0_num,                            // Op_RegN
3807       R0_num,                            // Op_RegI
3808       R0_num,                            // Op_RegP
3809       V0_num,                            // Op_RegF
3810       V0_num,                            // Op_RegD
3811       R0_num                             // Op_RegL
3812     };
3813 
3814     static const int hi[Op_RegL + 1] = { // enum name
3815       0,                                 // Op_Node
3816       0,                                 // Op_Set
3817       OptoReg::Bad,                      // Op_RegN
3818       OptoReg::Bad,                      // Op_RegI
3819       R0_H_num,                          // Op_RegP
3820       OptoReg::Bad,                      // Op_RegF
3821       V0_H_num,                          // Op_RegD
3822       R0_H_num                           // Op_RegL
3823     };
3824 
3825     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3826   %}
3827 %}
3828 
3829 //----------ATTRIBUTES---------------------------------------------------------
3830 //----------Operand Attributes-------------------------------------------------
3831 op_attrib op_cost(1);        // Required cost attribute
3832 
3833 //----------Instruction Attributes---------------------------------------------
3834 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3835 ins_attrib ins_size(32);        // Required size attribute (in bits)
3836 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3837                                 // a non-matching short branch variant
3838                                 // of some long branch?
3839 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3840                                 // be a power of 2) specifies the
3841                                 // alignment that some part of the
3842                                 // instruction (not necessarily the
3843                                 // start) requires.  If > 1, a
3844                                 // compute_padding() function must be
3845                                 // provided for the instruction
3846 
3847 //----------OPERANDS-----------------------------------------------------------
3848 // Operand definitions must precede instruction definitions for correct parsing
3849 // in the ADLC because operands constitute user defined types which are used in
3850 // instruction definitions.
3851 
3852 //----------Simple Operands----------------------------------------------------
3853 
3854 // Integer operands 32 bit
3855 // 32 bit immediate
3856 operand immI()
3857 %{
3858   match(ConI);
3859 
3860   op_cost(0);
3861   format %{ %}
3862   interface(CONST_INTER);
3863 %}
3864 
3865 // 32 bit zero
3866 operand immI0()
3867 %{
3868   predicate(n->get_int() == 0);
3869   match(ConI);
3870 
3871   op_cost(0);
3872   format %{ %}
3873   interface(CONST_INTER);
3874 %}
3875 
3876 // 32 bit unit increment
3877 operand immI_1()
3878 %{
3879   predicate(n->get_int() == 1);
3880   match(ConI);
3881 
3882   op_cost(0);
3883   format %{ %}
3884   interface(CONST_INTER);
3885 %}
3886 
3887 // 32 bit unit decrement
3888 operand immI_M1()
3889 %{
3890   predicate(n->get_int() == -1);
3891   match(ConI);
3892 
3893   op_cost(0);
3894   format %{ %}
3895   interface(CONST_INTER);
3896 %}
3897 
3898 // Shift values for add/sub extension shift
3899 operand immIExt()
3900 %{
3901   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3902   match(ConI);
3903 
3904   op_cost(0);
3905   format %{ %}
3906   interface(CONST_INTER);
3907 %}
3908 
3909 operand immI_le_4()
3910 %{
3911   predicate(n->get_int() <= 4);
3912   match(ConI);
3913 
3914   op_cost(0);
3915   format %{ %}
3916   interface(CONST_INTER);
3917 %}
3918 
3919 operand immI_31()
3920 %{
3921   predicate(n->get_int() == 31);
3922   match(ConI);
3923 
3924   op_cost(0);
3925   format %{ %}
3926   interface(CONST_INTER);
3927 %}
3928 
3929 operand immI_8()
3930 %{
3931   predicate(n->get_int() == 8);
3932   match(ConI);
3933 
3934   op_cost(0);
3935   format %{ %}
3936   interface(CONST_INTER);
3937 %}
3938 
3939 operand immI_16()
3940 %{
3941   predicate(n->get_int() == 16);
3942   match(ConI);
3943 
3944   op_cost(0);
3945   format %{ %}
3946   interface(CONST_INTER);
3947 %}
3948 
3949 operand immI_24()
3950 %{
3951   predicate(n->get_int() == 24);
3952   match(ConI);
3953 
3954   op_cost(0);
3955   format %{ %}
3956   interface(CONST_INTER);
3957 %}
3958 
3959 operand immI_32()
3960 %{
3961   predicate(n->get_int() == 32);
3962   match(ConI);
3963 
3964   op_cost(0);
3965   format %{ %}
3966   interface(CONST_INTER);
3967 %}
3968 
3969 operand immI_48()
3970 %{
3971   predicate(n->get_int() == 48);
3972   match(ConI);
3973 
3974   op_cost(0);
3975   format %{ %}
3976   interface(CONST_INTER);
3977 %}
3978 
3979 operand immI_56()
3980 %{
3981   predicate(n->get_int() == 56);
3982   match(ConI);
3983 
3984   op_cost(0);
3985   format %{ %}
3986   interface(CONST_INTER);
3987 %}
3988 
3989 operand immI_63()
3990 %{
3991   predicate(n->get_int() == 63);
3992   match(ConI);
3993 
3994   op_cost(0);
3995   format %{ %}
3996   interface(CONST_INTER);
3997 %}
3998 
3999 operand immI_64()
4000 %{
4001   predicate(n->get_int() == 64);
4002   match(ConI);
4003 
4004   op_cost(0);
4005   format %{ %}
4006   interface(CONST_INTER);
4007 %}
4008 
4009 operand immI_255()
4010 %{
4011   predicate(n->get_int() == 255);
4012   match(ConI);
4013 
4014   op_cost(0);
4015   format %{ %}
4016   interface(CONST_INTER);
4017 %}
4018 
4019 operand immI_65535()
4020 %{
4021   predicate(n->get_int() == 65535);
4022   match(ConI);
4023 
4024   op_cost(0);
4025   format %{ %}
4026   interface(CONST_INTER);
4027 %}
4028 
4029 operand immL_255()
4030 %{
4031   predicate(n->get_long() == 255L);
4032   match(ConL);
4033 
4034   op_cost(0);
4035   format %{ %}
4036   interface(CONST_INTER);
4037 %}
4038 
4039 operand immL_65535()
4040 %{
4041   predicate(n->get_long() == 65535L);
4042   match(ConL);
4043 
4044   op_cost(0);
4045   format %{ %}
4046   interface(CONST_INTER);
4047 %}
4048 
4049 operand immL_4294967295()
4050 %{
4051   predicate(n->get_long() == 4294967295L);
4052   match(ConL);
4053 
4054   op_cost(0);
4055   format %{ %}
4056   interface(CONST_INTER);
4057 %}
4058 
4059 operand immL_bitmask()
4060 %{
4061   predicate((n->get_long() != 0)
4062             && ((n->get_long() & 0xc000000000000000l) == 0)
4063             && is_power_of_2(n->get_long() + 1));
4064   match(ConL);
4065 
4066   op_cost(0);
4067   format %{ %}
4068   interface(CONST_INTER);
4069 %}
4070 
4071 operand immI_bitmask()
4072 %{
4073   predicate((n->get_int() != 0)
4074             && ((n->get_int() & 0xc0000000) == 0)
4075             && is_power_of_2(n->get_int() + 1));
4076   match(ConI);
4077 
4078   op_cost(0);
4079   format %{ %}
4080   interface(CONST_INTER);
4081 %}
4082 
4083 // Scale values for scaled offset addressing modes (up to long but not quad)
4084 operand immIScale()
4085 %{
4086   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4087   match(ConI);
4088 
4089   op_cost(0);
4090   format %{ %}
4091   interface(CONST_INTER);
4092 %}
4093 
4094 // 26 bit signed offset -- for pc-relative branches
4095 operand immI26()
4096 %{
4097   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4098   match(ConI);
4099 
4100   op_cost(0);
4101   format %{ %}
4102   interface(CONST_INTER);
4103 %}
4104 
4105 // 19 bit signed offset -- for pc-relative loads
4106 operand immI19()
4107 %{
4108   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4109   match(ConI);
4110 
4111   op_cost(0);
4112   format %{ %}
4113   interface(CONST_INTER);
4114 %}
4115 
4116 // 12 bit unsigned offset -- for base plus immediate loads
4117 operand immIU12()
4118 %{
4119   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4120   match(ConI);
4121 
4122   op_cost(0);
4123   format %{ %}
4124   interface(CONST_INTER);
4125 %}
4126 
4127 operand immLU12()
4128 %{
4129   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4130   match(ConL);
4131 
4132   op_cost(0);
4133   format %{ %}
4134   interface(CONST_INTER);
4135 %}
4136 
4137 // Offset for scaled or unscaled immediate loads and stores
4138 operand immIOffset()
4139 %{
4140   predicate(Address::offset_ok_for_immed(n->get_int(), 0));
4141   match(ConI);
4142 
4143   op_cost(0);
4144   format %{ %}
4145   interface(CONST_INTER);
4146 %}
4147 
4148 operand immIOffset1()
4149 %{
4150   predicate(Address::offset_ok_for_immed(n->get_int(), 0));
4151   match(ConI);
4152 
4153   op_cost(0);
4154   format %{ %}
4155   interface(CONST_INTER);
4156 %}
4157 
4158 operand immIOffset2()
4159 %{
4160   predicate(Address::offset_ok_for_immed(n->get_int(), 1));
4161   match(ConI);
4162 
4163   op_cost(0);
4164   format %{ %}
4165   interface(CONST_INTER);
4166 %}
4167 
4168 operand immIOffset4()
4169 %{
4170   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
4171   match(ConI);
4172 
4173   op_cost(0);
4174   format %{ %}
4175   interface(CONST_INTER);
4176 %}
4177 
4178 operand immIOffset8()
4179 %{
4180   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4181   match(ConI);
4182 
4183   op_cost(0);
4184   format %{ %}
4185   interface(CONST_INTER);
4186 %}
4187 
4188 operand immIOffset16()
4189 %{
4190   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4191   match(ConI);
4192 
4193   op_cost(0);
4194   format %{ %}
4195   interface(CONST_INTER);
4196 %}
4197 
4198 operand immLoffset()
4199 %{
4200   predicate(Address::offset_ok_for_immed(n->get_long(), 0));
4201   match(ConL);
4202 
4203   op_cost(0);
4204   format %{ %}
4205   interface(CONST_INTER);
4206 %}
4207 
4208 operand immLoffset1()
4209 %{
4210   predicate(Address::offset_ok_for_immed(n->get_long(), 0));
4211   match(ConL);
4212 
4213   op_cost(0);
4214   format %{ %}
4215   interface(CONST_INTER);
4216 %}
4217 
4218 operand immLoffset2()
4219 %{
4220   predicate(Address::offset_ok_for_immed(n->get_long(), 1));
4221   match(ConL);
4222 
4223   op_cost(0);
4224   format %{ %}
4225   interface(CONST_INTER);
4226 %}
4227 
4228 operand immLoffset4()
4229 %{
4230   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4231   match(ConL);
4232 
4233   op_cost(0);
4234   format %{ %}
4235   interface(CONST_INTER);
4236 %}
4237 
4238 operand immLoffset8()
4239 %{
4240   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4241   match(ConL);
4242 
4243   op_cost(0);
4244   format %{ %}
4245   interface(CONST_INTER);
4246 %}
4247 
4248 operand immLoffset16()
4249 %{
4250   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4251   match(ConL);
4252 
4253   op_cost(0);
4254   format %{ %}
4255   interface(CONST_INTER);
4256 %}
4257 
4258 // 32 bit integer valid for add sub immediate
4259 operand immIAddSub()
4260 %{
4261   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4262   match(ConI);
4263   op_cost(0);
4264   format %{ %}
4265   interface(CONST_INTER);
4266 %}
4267 
4268 // 32 bit unsigned integer valid for logical immediate
4269 // TODO -- check this is right when e.g the mask is 0x80000000
4270 operand immILog()
4271 %{
4272   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4273   match(ConI);
4274 
4275   op_cost(0);
4276   format %{ %}
4277   interface(CONST_INTER);
4278 %}
4279 
4280 // Integer operands 64 bit
4281 // 64 bit immediate
4282 operand immL()
4283 %{
4284   match(ConL);
4285 
4286   op_cost(0);
4287   format %{ %}
4288   interface(CONST_INTER);
4289 %}
4290 
4291 // 64 bit zero
4292 operand immL0()
4293 %{
4294   predicate(n->get_long() == 0);
4295   match(ConL);
4296 
4297   op_cost(0);
4298   format %{ %}
4299   interface(CONST_INTER);
4300 %}
4301 
4302 // 64 bit unit increment
4303 operand immL_1()
4304 %{
4305   predicate(n->get_long() == 1);
4306   match(ConL);
4307 
4308   op_cost(0);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 // 64 bit unit decrement
4314 operand immL_M1()
4315 %{
4316   predicate(n->get_long() == -1);
4317   match(ConL);
4318 
4319   op_cost(0);
4320   format %{ %}
4321   interface(CONST_INTER);
4322 %}
4323 
4324 // 32 bit offset of pc in thread anchor
4325 
4326 operand immL_pc_off()
4327 %{
4328   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4329                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4330   match(ConL);
4331 
4332   op_cost(0);
4333   format %{ %}
4334   interface(CONST_INTER);
4335 %}
4336 
4337 // 64 bit integer valid for add sub immediate
4338 operand immLAddSub()
4339 %{
4340   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4341   match(ConL);
4342   op_cost(0);
4343   format %{ %}
4344   interface(CONST_INTER);
4345 %}
4346 
4347 // 64 bit integer valid for logical immediate
4348 operand immLLog()
4349 %{
4350   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4351   match(ConL);
4352   op_cost(0);
4353   format %{ %}
4354   interface(CONST_INTER);
4355 %}
4356 
4357 // Long Immediate: low 32-bit mask
4358 operand immL_32bits()
4359 %{
4360   predicate(n->get_long() == 0xFFFFFFFFL);
4361   match(ConL);
4362   op_cost(0);
4363   format %{ %}
4364   interface(CONST_INTER);
4365 %}
4366 
4367 // Pointer operands
4368 // Pointer Immediate
4369 operand immP()
4370 %{
4371   match(ConP);
4372 
4373   op_cost(0);
4374   format %{ %}
4375   interface(CONST_INTER);
4376 %}
4377 
4378 // NULL Pointer Immediate
4379 operand immP0()
4380 %{
4381   predicate(n->get_ptr() == 0);
4382   match(ConP);
4383 
4384   op_cost(0);
4385   format %{ %}
4386   interface(CONST_INTER);
4387 %}
4388 
4389 // Pointer Immediate One
4390 // this is used in object initialization (initial object header)
4391 operand immP_1()
4392 %{
4393   predicate(n->get_ptr() == 1);
4394   match(ConP);
4395 
4396   op_cost(0);
4397   format %{ %}
4398   interface(CONST_INTER);
4399 %}
4400 
4401 // Card Table Byte Map Base
4402 operand immByteMapBase()
4403 %{
4404   // Get base of card map
4405   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4406             (CardTable::CardValue*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4407   match(ConP);
4408 
4409   op_cost(0);
4410   format %{ %}
4411   interface(CONST_INTER);
4412 %}
4413 
4414 // Pointer Immediate Minus One
4415 // this is used when we want to write the current PC to the thread anchor
4416 operand immP_M1()
4417 %{
4418   predicate(n->get_ptr() == -1);
4419   match(ConP);
4420 
4421   op_cost(0);
4422   format %{ %}
4423   interface(CONST_INTER);
4424 %}
4425 
4426 // Pointer Immediate Minus Two
4427 // this is used when we want to write the current PC to the thread anchor
4428 operand immP_M2()
4429 %{
4430   predicate(n->get_ptr() == -2);
4431   match(ConP);
4432 
4433   op_cost(0);
4434   format %{ %}
4435   interface(CONST_INTER);
4436 %}
4437 
4438 // Float and Double operands
4439 // Double Immediate
4440 operand immD()
4441 %{
4442   match(ConD);
4443   op_cost(0);
4444   format %{ %}
4445   interface(CONST_INTER);
4446 %}
4447 
4448 // Double Immediate: +0.0d
4449 operand immD0()
4450 %{
4451   predicate(jlong_cast(n->getd()) == 0);
4452   match(ConD);
4453 
4454   op_cost(0);
4455   format %{ %}
4456   interface(CONST_INTER);
4457 %}
4458 
4459 // constant 'double +0.0'.
4460 operand immDPacked()
4461 %{
4462   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4463   match(ConD);
4464   op_cost(0);
4465   format %{ %}
4466   interface(CONST_INTER);
4467 %}
4468 
4469 // Float Immediate
4470 operand immF()
4471 %{
4472   match(ConF);
4473   op_cost(0);
4474   format %{ %}
4475   interface(CONST_INTER);
4476 %}
4477 
4478 // Float Immediate: +0.0f.
4479 operand immF0()
4480 %{
4481   predicate(jint_cast(n->getf()) == 0);
4482   match(ConF);
4483 
4484   op_cost(0);
4485   format %{ %}
4486   interface(CONST_INTER);
4487 %}
4488 
4489 //
4490 operand immFPacked()
4491 %{
4492   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4493   match(ConF);
4494   op_cost(0);
4495   format %{ %}
4496   interface(CONST_INTER);
4497 %}
4498 
4499 // Narrow pointer operands
4500 // Narrow Pointer Immediate
4501 operand immN()
4502 %{
4503   match(ConN);
4504 
4505   op_cost(0);
4506   format %{ %}
4507   interface(CONST_INTER);
4508 %}
4509 
4510 // Narrow NULL Pointer Immediate
4511 operand immN0()
4512 %{
4513   predicate(n->get_narrowcon() == 0);
4514   match(ConN);
4515 
4516   op_cost(0);
4517   format %{ %}
4518   interface(CONST_INTER);
4519 %}
4520 
4521 operand immNKlass()
4522 %{
4523   match(ConNKlass);
4524 
4525   op_cost(0);
4526   format %{ %}
4527   interface(CONST_INTER);
4528 %}
4529 
4530 // Integer 32 bit Register Operands
4531 // Integer 32 bitRegister (excludes SP)
4532 operand iRegI()
4533 %{
4534   constraint(ALLOC_IN_RC(any_reg32));
4535   match(RegI);
4536   match(iRegINoSp);
4537   op_cost(0);
4538   format %{ %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 // Integer 32 bit Register not Special
4543 operand iRegINoSp()
4544 %{
4545   constraint(ALLOC_IN_RC(no_special_reg32));
4546   match(RegI);
4547   op_cost(0);
4548   format %{ %}
4549   interface(REG_INTER);
4550 %}
4551 
4552 // Integer 64 bit Register Operands
4553 // Integer 64 bit Register (includes SP)
4554 operand iRegL()
4555 %{
4556   constraint(ALLOC_IN_RC(any_reg));
4557   match(RegL);
4558   match(iRegLNoSp);
4559   op_cost(0);
4560   format %{ %}
4561   interface(REG_INTER);
4562 %}
4563 
4564 // Integer 64 bit Register not Special
4565 operand iRegLNoSp()
4566 %{
4567   constraint(ALLOC_IN_RC(no_special_reg));
4568   match(RegL);
4569   match(iRegL_R0);
4570   format %{ %}
4571   interface(REG_INTER);
4572 %}
4573 
4574 // Pointer Register Operands
4575 // Pointer Register
4576 operand iRegP()
4577 %{
4578   constraint(ALLOC_IN_RC(ptr_reg));
4579   match(RegP);
4580   match(iRegPNoSp);
4581   match(iRegP_R0);
4582   //match(iRegP_R2);
4583   //match(iRegP_R4);
4584   //match(iRegP_R5);
4585   match(thread_RegP);
4586   op_cost(0);
4587   format %{ %}
4588   interface(REG_INTER);
4589 %}
4590 
4591 // Pointer 64 bit Register not Special
4592 operand iRegPNoSp()
4593 %{
4594   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4595   match(RegP);
4596   // match(iRegP);
4597   // match(iRegP_R0);
4598   // match(iRegP_R2);
4599   // match(iRegP_R4);
4600   // match(iRegP_R5);
4601   // match(thread_RegP);
4602   op_cost(0);
4603   format %{ %}
4604   interface(REG_INTER);
4605 %}
4606 
4607 // Pointer 64 bit Register R0 only
4608 operand iRegP_R0()
4609 %{
4610   constraint(ALLOC_IN_RC(r0_reg));
4611   match(RegP);
4612   // match(iRegP);
4613   match(iRegPNoSp);
4614   op_cost(0);
4615   format %{ %}
4616   interface(REG_INTER);
4617 %}
4618 
4619 // Pointer 64 bit Register R1 only
4620 operand iRegP_R1()
4621 %{
4622   constraint(ALLOC_IN_RC(r1_reg));
4623   match(RegP);
4624   // match(iRegP);
4625   match(iRegPNoSp);
4626   op_cost(0);
4627   format %{ %}
4628   interface(REG_INTER);
4629 %}
4630 
4631 // Pointer 64 bit Register R2 only
4632 operand iRegP_R2()
4633 %{
4634   constraint(ALLOC_IN_RC(r2_reg));
4635   match(RegP);
4636   // match(iRegP);
4637   match(iRegPNoSp);
4638   op_cost(0);
4639   format %{ %}
4640   interface(REG_INTER);
4641 %}
4642 
4643 // Pointer 64 bit Register R3 only
4644 operand iRegP_R3()
4645 %{
4646   constraint(ALLOC_IN_RC(r3_reg));
4647   match(RegP);
4648   // match(iRegP);
4649   match(iRegPNoSp);
4650   op_cost(0);
4651   format %{ %}
4652   interface(REG_INTER);
4653 %}
4654 
4655 // Pointer 64 bit Register R4 only
4656 operand iRegP_R4()
4657 %{
4658   constraint(ALLOC_IN_RC(r4_reg));
4659   match(RegP);
4660   // match(iRegP);
4661   match(iRegPNoSp);
4662   op_cost(0);
4663   format %{ %}
4664   interface(REG_INTER);
4665 %}
4666 
4667 // Pointer 64 bit Register R5 only
4668 operand iRegP_R5()
4669 %{
4670   constraint(ALLOC_IN_RC(r5_reg));
4671   match(RegP);
4672   // match(iRegP);
4673   match(iRegPNoSp);
4674   op_cost(0);
4675   format %{ %}
4676   interface(REG_INTER);
4677 %}
4678 
4679 // Pointer 64 bit Register R10 only
4680 operand iRegP_R10()
4681 %{
4682   constraint(ALLOC_IN_RC(r10_reg));
4683   match(RegP);
4684   // match(iRegP);
4685   match(iRegPNoSp);
4686   op_cost(0);
4687   format %{ %}
4688   interface(REG_INTER);
4689 %}
4690 
4691 // Long 64 bit Register R0 only
4692 operand iRegL_R0()
4693 %{
4694   constraint(ALLOC_IN_RC(r0_reg));
4695   match(RegL);
4696   match(iRegLNoSp);
4697   op_cost(0);
4698   format %{ %}
4699   interface(REG_INTER);
4700 %}
4701 
4702 // Long 64 bit Register R2 only
4703 operand iRegL_R2()
4704 %{
4705   constraint(ALLOC_IN_RC(r2_reg));
4706   match(RegL);
4707   match(iRegLNoSp);
4708   op_cost(0);
4709   format %{ %}
4710   interface(REG_INTER);
4711 %}
4712 
4713 // Long 64 bit Register R3 only
4714 operand iRegL_R3()
4715 %{
4716   constraint(ALLOC_IN_RC(r3_reg));
4717   match(RegL);
4718   match(iRegLNoSp);
4719   op_cost(0);
4720   format %{ %}
4721   interface(REG_INTER);
4722 %}
4723 
4724 // Long 64 bit Register R11 only
4725 operand iRegL_R11()
4726 %{
4727   constraint(ALLOC_IN_RC(r11_reg));
4728   match(RegL);
4729   match(iRegLNoSp);
4730   op_cost(0);
4731   format %{ %}
4732   interface(REG_INTER);
4733 %}
4734 
4735 // Pointer 64 bit Register FP only
4736 operand iRegP_FP()
4737 %{
4738   constraint(ALLOC_IN_RC(fp_reg));
4739   match(RegP);
4740   // match(iRegP);
4741   op_cost(0);
4742   format %{ %}
4743   interface(REG_INTER);
4744 %}
4745 
4746 // Register R0 only
4747 operand iRegI_R0()
4748 %{
4749   constraint(ALLOC_IN_RC(int_r0_reg));
4750   match(RegI);
4751   match(iRegINoSp);
4752   op_cost(0);
4753   format %{ %}
4754   interface(REG_INTER);
4755 %}
4756 
4757 // Register R2 only
4758 operand iRegI_R2()
4759 %{
4760   constraint(ALLOC_IN_RC(int_r2_reg));
4761   match(RegI);
4762   match(iRegINoSp);
4763   op_cost(0);
4764   format %{ %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 // Register R3 only
4769 operand iRegI_R3()
4770 %{
4771   constraint(ALLOC_IN_RC(int_r3_reg));
4772   match(RegI);
4773   match(iRegINoSp);
4774   op_cost(0);
4775   format %{ %}
4776   interface(REG_INTER);
4777 %}
4778 
4779 
4780 // Register R4 only
4781 operand iRegI_R4()
4782 %{
4783   constraint(ALLOC_IN_RC(int_r4_reg));
4784   match(RegI);
4785   match(iRegINoSp);
4786   op_cost(0);
4787   format %{ %}
4788   interface(REG_INTER);
4789 %}
4790 
4791 
4792 // Pointer Register Operands
4793 // Narrow Pointer Register
4794 operand iRegN()
4795 %{
4796   constraint(ALLOC_IN_RC(any_reg32));
4797   match(RegN);
4798   match(iRegNNoSp);
4799   op_cost(0);
4800   format %{ %}
4801   interface(REG_INTER);
4802 %}
4803 
4804 operand iRegN_R0()
4805 %{
4806   constraint(ALLOC_IN_RC(r0_reg));
4807   match(iRegN);
4808   op_cost(0);
4809   format %{ %}
4810   interface(REG_INTER);
4811 %}
4812 
4813 operand iRegN_R2()
4814 %{
4815   constraint(ALLOC_IN_RC(r2_reg));
4816   match(iRegN);
4817   op_cost(0);
4818   format %{ %}
4819   interface(REG_INTER);
4820 %}
4821 
4822 operand iRegN_R3()
4823 %{
4824   constraint(ALLOC_IN_RC(r3_reg));
4825   match(iRegN);
4826   op_cost(0);
4827   format %{ %}
4828   interface(REG_INTER);
4829 %}
4830 
4831 // Integer 64 bit Register not Special
4832 operand iRegNNoSp()
4833 %{
4834   constraint(ALLOC_IN_RC(no_special_reg32));
4835   match(RegN);
4836   op_cost(0);
4837   format %{ %}
4838   interface(REG_INTER);
4839 %}
4840 
4841 // heap base register -- used for encoding immN0
4842 
4843 operand iRegIHeapbase()
4844 %{
4845   constraint(ALLOC_IN_RC(heapbase_reg));
4846   match(RegI);
4847   op_cost(0);
4848   format %{ %}
4849   interface(REG_INTER);
4850 %}
4851 
4852 // Float Register
4853 // Float register operands
4854 operand vRegF()
4855 %{
4856   constraint(ALLOC_IN_RC(float_reg));
4857   match(RegF);
4858 
4859   op_cost(0);
4860   format %{ %}
4861   interface(REG_INTER);
4862 %}
4863 
4864 // Double Register
4865 // Double register operands
4866 operand vRegD()
4867 %{
4868   constraint(ALLOC_IN_RC(double_reg));
4869   match(RegD);
4870 
4871   op_cost(0);
4872   format %{ %}
4873   interface(REG_INTER);
4874 %}
4875 
4876 operand vecD()
4877 %{
4878   constraint(ALLOC_IN_RC(vectord_reg));
4879   match(VecD);
4880 
4881   op_cost(0);
4882   format %{ %}
4883   interface(REG_INTER);
4884 %}
4885 
4886 operand vecX()
4887 %{
4888   constraint(ALLOC_IN_RC(vectorx_reg));
4889   match(VecX);
4890 
4891   op_cost(0);
4892   format %{ %}
4893   interface(REG_INTER);
4894 %}
4895 
4896 operand vRegD_V0()
4897 %{
4898   constraint(ALLOC_IN_RC(v0_reg));
4899   match(RegD);
4900   op_cost(0);
4901   format %{ %}
4902   interface(REG_INTER);
4903 %}
4904 
4905 operand vRegD_V1()
4906 %{
4907   constraint(ALLOC_IN_RC(v1_reg));
4908   match(RegD);
4909   op_cost(0);
4910   format %{ %}
4911   interface(REG_INTER);
4912 %}
4913 
4914 operand vRegD_V2()
4915 %{
4916   constraint(ALLOC_IN_RC(v2_reg));
4917   match(RegD);
4918   op_cost(0);
4919   format %{ %}
4920   interface(REG_INTER);
4921 %}
4922 
4923 operand vRegD_V3()
4924 %{
4925   constraint(ALLOC_IN_RC(v3_reg));
4926   match(RegD);
4927   op_cost(0);
4928   format %{ %}
4929   interface(REG_INTER);
4930 %}
4931 
4932 operand vRegD_V4()
4933 %{
4934   constraint(ALLOC_IN_RC(v4_reg));
4935   match(RegD);
4936   op_cost(0);
4937   format %{ %}
4938   interface(REG_INTER);
4939 %}
4940 
4941 operand vRegD_V5()
4942 %{
4943   constraint(ALLOC_IN_RC(v5_reg));
4944   match(RegD);
4945   op_cost(0);
4946   format %{ %}
4947   interface(REG_INTER);
4948 %}
4949 
4950 operand vRegD_V6()
4951 %{
4952   constraint(ALLOC_IN_RC(v6_reg));
4953   match(RegD);
4954   op_cost(0);
4955   format %{ %}
4956   interface(REG_INTER);
4957 %}
4958 
4959 operand vRegD_V7()
4960 %{
4961   constraint(ALLOC_IN_RC(v7_reg));
4962   match(RegD);
4963   op_cost(0);
4964   format %{ %}
4965   interface(REG_INTER);
4966 %}
4967 
4968 operand vRegD_V8()
4969 %{
4970   constraint(ALLOC_IN_RC(v8_reg));
4971   match(RegD);
4972   op_cost(0);
4973   format %{ %}
4974   interface(REG_INTER);
4975 %}
4976 
4977 operand vRegD_V9()
4978 %{
4979   constraint(ALLOC_IN_RC(v9_reg));
4980   match(RegD);
4981   op_cost(0);
4982   format %{ %}
4983   interface(REG_INTER);
4984 %}
4985 
4986 operand vRegD_V10()
4987 %{
4988   constraint(ALLOC_IN_RC(v10_reg));
4989   match(RegD);
4990   op_cost(0);
4991   format %{ %}
4992   interface(REG_INTER);
4993 %}
4994 
4995 operand vRegD_V11()
4996 %{
4997   constraint(ALLOC_IN_RC(v11_reg));
4998   match(RegD);
4999   op_cost(0);
5000   format %{ %}
5001   interface(REG_INTER);
5002 %}
5003 
5004 operand vRegD_V12()
5005 %{
5006   constraint(ALLOC_IN_RC(v12_reg));
5007   match(RegD);
5008   op_cost(0);
5009   format %{ %}
5010   interface(REG_INTER);
5011 %}
5012 
5013 operand vRegD_V13()
5014 %{
5015   constraint(ALLOC_IN_RC(v13_reg));
5016   match(RegD);
5017   op_cost(0);
5018   format %{ %}
5019   interface(REG_INTER);
5020 %}
5021 
5022 operand vRegD_V14()
5023 %{
5024   constraint(ALLOC_IN_RC(v14_reg));
5025   match(RegD);
5026   op_cost(0);
5027   format %{ %}
5028   interface(REG_INTER);
5029 %}
5030 
5031 operand vRegD_V15()
5032 %{
5033   constraint(ALLOC_IN_RC(v15_reg));
5034   match(RegD);
5035   op_cost(0);
5036   format %{ %}
5037   interface(REG_INTER);
5038 %}
5039 
5040 operand vRegD_V16()
5041 %{
5042   constraint(ALLOC_IN_RC(v16_reg));
5043   match(RegD);
5044   op_cost(0);
5045   format %{ %}
5046   interface(REG_INTER);
5047 %}
5048 
5049 operand vRegD_V17()
5050 %{
5051   constraint(ALLOC_IN_RC(v17_reg));
5052   match(RegD);
5053   op_cost(0);
5054   format %{ %}
5055   interface(REG_INTER);
5056 %}
5057 
5058 operand vRegD_V18()
5059 %{
5060   constraint(ALLOC_IN_RC(v18_reg));
5061   match(RegD);
5062   op_cost(0);
5063   format %{ %}
5064   interface(REG_INTER);
5065 %}
5066 
5067 operand vRegD_V19()
5068 %{
5069   constraint(ALLOC_IN_RC(v19_reg));
5070   match(RegD);
5071   op_cost(0);
5072   format %{ %}
5073   interface(REG_INTER);
5074 %}
5075 
5076 operand vRegD_V20()
5077 %{
5078   constraint(ALLOC_IN_RC(v20_reg));
5079   match(RegD);
5080   op_cost(0);
5081   format %{ %}
5082   interface(REG_INTER);
5083 %}
5084 
5085 operand vRegD_V21()
5086 %{
5087   constraint(ALLOC_IN_RC(v21_reg));
5088   match(RegD);
5089   op_cost(0);
5090   format %{ %}
5091   interface(REG_INTER);
5092 %}
5093 
5094 operand vRegD_V22()
5095 %{
5096   constraint(ALLOC_IN_RC(v22_reg));
5097   match(RegD);
5098   op_cost(0);
5099   format %{ %}
5100   interface(REG_INTER);
5101 %}
5102 
5103 operand vRegD_V23()
5104 %{
5105   constraint(ALLOC_IN_RC(v23_reg));
5106   match(RegD);
5107   op_cost(0);
5108   format %{ %}
5109   interface(REG_INTER);
5110 %}
5111 
5112 operand vRegD_V24()
5113 %{
5114   constraint(ALLOC_IN_RC(v24_reg));
5115   match(RegD);
5116   op_cost(0);
5117   format %{ %}
5118   interface(REG_INTER);
5119 %}
5120 
5121 operand vRegD_V25()
5122 %{
5123   constraint(ALLOC_IN_RC(v25_reg));
5124   match(RegD);
5125   op_cost(0);
5126   format %{ %}
5127   interface(REG_INTER);
5128 %}
5129 
5130 operand vRegD_V26()
5131 %{
5132   constraint(ALLOC_IN_RC(v26_reg));
5133   match(RegD);
5134   op_cost(0);
5135   format %{ %}
5136   interface(REG_INTER);
5137 %}
5138 
5139 operand vRegD_V27()
5140 %{
5141   constraint(ALLOC_IN_RC(v27_reg));
5142   match(RegD);
5143   op_cost(0);
5144   format %{ %}
5145   interface(REG_INTER);
5146 %}
5147 
5148 operand vRegD_V28()
5149 %{
5150   constraint(ALLOC_IN_RC(v28_reg));
5151   match(RegD);
5152   op_cost(0);
5153   format %{ %}
5154   interface(REG_INTER);
5155 %}
5156 
5157 operand vRegD_V29()
5158 %{
5159   constraint(ALLOC_IN_RC(v29_reg));
5160   match(RegD);
5161   op_cost(0);
5162   format %{ %}
5163   interface(REG_INTER);
5164 %}
5165 
5166 operand vRegD_V30()
5167 %{
5168   constraint(ALLOC_IN_RC(v30_reg));
5169   match(RegD);
5170   op_cost(0);
5171   format %{ %}
5172   interface(REG_INTER);
5173 %}
5174 
5175 operand vRegD_V31()
5176 %{
5177   constraint(ALLOC_IN_RC(v31_reg));
5178   match(RegD);
5179   op_cost(0);
5180   format %{ %}
5181   interface(REG_INTER);
5182 %}
5183 
5184 // Flags register, used as output of signed compare instructions
5185 
5186 // note that on AArch64 we also use this register as the output for
5187 // for floating point compare instructions (CmpF CmpD). this ensures
5188 // that ordered inequality tests use GT, GE, LT or LE none of which
5189 // pass through cases where the result is unordered i.e. one or both
5190 // inputs to the compare is a NaN. this means that the ideal code can
5191 // replace e.g. a GT with an LE and not end up capturing the NaN case
5192 // (where the comparison should always fail). EQ and NE tests are
5193 // always generated in ideal code so that unordered folds into the NE
5194 // case, matching the behaviour of AArch64 NE.
5195 //
5196 // This differs from x86 where the outputs of FP compares use a
5197 // special FP flags registers and where compares based on this
5198 // register are distinguished into ordered inequalities (cmpOpUCF) and
5199 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
5200 // to explicitly handle the unordered case in branches. x86 also has
5201 // to include extra CMoveX rules to accept a cmpOpUCF input.
5202 
5203 operand rFlagsReg()
5204 %{
5205   constraint(ALLOC_IN_RC(int_flags));
5206   match(RegFlags);
5207 
5208   op_cost(0);
5209   format %{ "RFLAGS" %}
5210   interface(REG_INTER);
5211 %}
5212 
5213 // Flags register, used as output of unsigned compare instructions
5214 operand rFlagsRegU()
5215 %{
5216   constraint(ALLOC_IN_RC(int_flags));
5217   match(RegFlags);
5218 
5219   op_cost(0);
5220   format %{ "RFLAGSU" %}
5221   interface(REG_INTER);
5222 %}
5223 
5224 // Special Registers
5225 
5226 // Method Register
5227 operand inline_cache_RegP(iRegP reg)
5228 %{
5229   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
5230   match(reg);
5231   match(iRegPNoSp);
5232   op_cost(0);
5233   format %{ %}
5234   interface(REG_INTER);
5235 %}
5236 
5237 operand interpreter_method_oop_RegP(iRegP reg)
5238 %{
5239   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
5240   match(reg);
5241   match(iRegPNoSp);
5242   op_cost(0);
5243   format %{ %}
5244   interface(REG_INTER);
5245 %}
5246 
5247 // Thread Register
5248 operand thread_RegP(iRegP reg)
5249 %{
5250   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
5251   match(reg);
5252   op_cost(0);
5253   format %{ %}
5254   interface(REG_INTER);
5255 %}
5256 
5257 operand lr_RegP(iRegP reg)
5258 %{
5259   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
5260   match(reg);
5261   op_cost(0);
5262   format %{ %}
5263   interface(REG_INTER);
5264 %}
5265 
5266 //----------Memory Operands----------------------------------------------------
5267 
5268 operand indirect(iRegP reg)
5269 %{
5270   constraint(ALLOC_IN_RC(ptr_reg));
5271   match(reg);
5272   op_cost(0);
5273   format %{ "[$reg]" %}
5274   interface(MEMORY_INTER) %{
5275     base($reg);
5276     index(0xffffffff);
5277     scale(0x0);
5278     disp(0x0);
5279   %}
5280 %}
5281 
5282 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
5283 %{
5284   constraint(ALLOC_IN_RC(ptr_reg));
5285   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5286   match(AddP reg (LShiftL (ConvI2L ireg) scale));
5287   op_cost(0);
5288   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
5289   interface(MEMORY_INTER) %{
5290     base($reg);
5291     index($ireg);
5292     scale($scale);
5293     disp(0x0);
5294   %}
5295 %}
5296 
5297 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
5298 %{
5299   constraint(ALLOC_IN_RC(ptr_reg));
5300   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5301   match(AddP reg (LShiftL lreg scale));
5302   op_cost(0);
5303   format %{ "$reg, $lreg lsl($scale)" %}
5304   interface(MEMORY_INTER) %{
5305     base($reg);
5306     index($lreg);
5307     scale($scale);
5308     disp(0x0);
5309   %}
5310 %}
5311 
5312 operand indIndexI2L(iRegP reg, iRegI ireg)
5313 %{
5314   constraint(ALLOC_IN_RC(ptr_reg));
5315   match(AddP reg (ConvI2L ireg));
5316   op_cost(0);
5317   format %{ "$reg, $ireg, 0, I2L" %}
5318   interface(MEMORY_INTER) %{
5319     base($reg);
5320     index($ireg);
5321     scale(0x0);
5322     disp(0x0);
5323   %}
5324 %}
5325 
5326 operand indIndex(iRegP reg, iRegL lreg)
5327 %{
5328   constraint(ALLOC_IN_RC(ptr_reg));
5329   match(AddP reg lreg);
5330   op_cost(0);
5331   format %{ "$reg, $lreg" %}
5332   interface(MEMORY_INTER) %{
5333     base($reg);
5334     index($lreg);
5335     scale(0x0);
5336     disp(0x0);
5337   %}
5338 %}
5339 
5340 operand indOffI(iRegP reg, immIOffset off)
5341 %{
5342   constraint(ALLOC_IN_RC(ptr_reg));
5343   match(AddP reg off);
5344   op_cost(0);
5345   format %{ "[$reg, $off]" %}
5346   interface(MEMORY_INTER) %{
5347     base($reg);
5348     index(0xffffffff);
5349     scale(0x0);
5350     disp($off);
5351   %}
5352 %}
5353 
5354 operand indOffI1(iRegP reg, immIOffset1 off)
5355 %{
5356   constraint(ALLOC_IN_RC(ptr_reg));
5357   match(AddP reg off);
5358   op_cost(0);
5359   format %{ "[$reg, $off]" %}
5360   interface(MEMORY_INTER) %{
5361     base($reg);
5362     index(0xffffffff);
5363     scale(0x0);
5364     disp($off);
5365   %}
5366 %}
5367 
5368 operand indOffI2(iRegP reg, immIOffset2 off)
5369 %{
5370   constraint(ALLOC_IN_RC(ptr_reg));
5371   match(AddP reg off);
5372   op_cost(0);
5373   format %{ "[$reg, $off]" %}
5374   interface(MEMORY_INTER) %{
5375     base($reg);
5376     index(0xffffffff);
5377     scale(0x0);
5378     disp($off);
5379   %}
5380 %}
5381 
5382 operand indOffI4(iRegP reg, immIOffset4 off)
5383 %{
5384   constraint(ALLOC_IN_RC(ptr_reg));
5385   match(AddP reg off);
5386   op_cost(0);
5387   format %{ "[$reg, $off]" %}
5388   interface(MEMORY_INTER) %{
5389     base($reg);
5390     index(0xffffffff);
5391     scale(0x0);
5392     disp($off);
5393   %}
5394 %}
5395 
5396 operand indOffI8(iRegP reg, immIOffset8 off)
5397 %{
5398   constraint(ALLOC_IN_RC(ptr_reg));
5399   match(AddP reg off);
5400   op_cost(0);
5401   format %{ "[$reg, $off]" %}
5402   interface(MEMORY_INTER) %{
5403     base($reg);
5404     index(0xffffffff);
5405     scale(0x0);
5406     disp($off);
5407   %}
5408 %}
5409 
5410 operand indOffI16(iRegP reg, immIOffset16 off)
5411 %{
5412   constraint(ALLOC_IN_RC(ptr_reg));
5413   match(AddP reg off);
5414   op_cost(0);
5415   format %{ "[$reg, $off]" %}
5416   interface(MEMORY_INTER) %{
5417     base($reg);
5418     index(0xffffffff);
5419     scale(0x0);
5420     disp($off);
5421   %}
5422 %}
5423 
5424 operand indOffL(iRegP reg, immLoffset off)
5425 %{
5426   constraint(ALLOC_IN_RC(ptr_reg));
5427   match(AddP reg off);
5428   op_cost(0);
5429   format %{ "[$reg, $off]" %}
5430   interface(MEMORY_INTER) %{
5431     base($reg);
5432     index(0xffffffff);
5433     scale(0x0);
5434     disp($off);
5435   %}
5436 %}
5437 
5438 operand indOffL1(iRegP reg, immLoffset1 off)
5439 %{
5440   constraint(ALLOC_IN_RC(ptr_reg));
5441   match(AddP reg off);
5442   op_cost(0);
5443   format %{ "[$reg, $off]" %}
5444   interface(MEMORY_INTER) %{
5445     base($reg);
5446     index(0xffffffff);
5447     scale(0x0);
5448     disp($off);
5449   %}
5450 %}
5451 
5452 operand indOffL2(iRegP reg, immLoffset2 off)
5453 %{
5454   constraint(ALLOC_IN_RC(ptr_reg));
5455   match(AddP reg off);
5456   op_cost(0);
5457   format %{ "[$reg, $off]" %}
5458   interface(MEMORY_INTER) %{
5459     base($reg);
5460     index(0xffffffff);
5461     scale(0x0);
5462     disp($off);
5463   %}
5464 %}
5465 
5466 operand indOffL4(iRegP reg, immLoffset4 off)
5467 %{
5468   constraint(ALLOC_IN_RC(ptr_reg));
5469   match(AddP reg off);
5470   op_cost(0);
5471   format %{ "[$reg, $off]" %}
5472   interface(MEMORY_INTER) %{
5473     base($reg);
5474     index(0xffffffff);
5475     scale(0x0);
5476     disp($off);
5477   %}
5478 %}
5479 
5480 operand indOffL8(iRegP reg, immLoffset8 off)
5481 %{
5482   constraint(ALLOC_IN_RC(ptr_reg));
5483   match(AddP reg off);
5484   op_cost(0);
5485   format %{ "[$reg, $off]" %}
5486   interface(MEMORY_INTER) %{
5487     base($reg);
5488     index(0xffffffff);
5489     scale(0x0);
5490     disp($off);
5491   %}
5492 %}
5493 
5494 operand indOffL16(iRegP reg, immLoffset16 off)
5495 %{
5496   constraint(ALLOC_IN_RC(ptr_reg));
5497   match(AddP reg off);
5498   op_cost(0);
5499   format %{ "[$reg, $off]" %}
5500   interface(MEMORY_INTER) %{
5501     base($reg);
5502     index(0xffffffff);
5503     scale(0x0);
5504     disp($off);
5505   %}
5506 %}
5507 
5508 operand indirectN(iRegN reg)
5509 %{
5510   predicate(CompressedOops::shift() == 0);
5511   constraint(ALLOC_IN_RC(ptr_reg));
5512   match(DecodeN reg);
5513   op_cost(0);
5514   format %{ "[$reg]\t# narrow" %}
5515   interface(MEMORY_INTER) %{
5516     base($reg);
5517     index(0xffffffff);
5518     scale(0x0);
5519     disp(0x0);
5520   %}
5521 %}
5522 
5523 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5524 %{
5525   predicate(CompressedOops::shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5526   constraint(ALLOC_IN_RC(ptr_reg));
5527   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5528   op_cost(0);
5529   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5530   interface(MEMORY_INTER) %{
5531     base($reg);
5532     index($ireg);
5533     scale($scale);
5534     disp(0x0);
5535   %}
5536 %}
5537 
5538 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5539 %{
5540   predicate(CompressedOops::shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5541   constraint(ALLOC_IN_RC(ptr_reg));
5542   match(AddP (DecodeN reg) (LShiftL lreg scale));
5543   op_cost(0);
5544   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5545   interface(MEMORY_INTER) %{
5546     base($reg);
5547     index($lreg);
5548     scale($scale);
5549     disp(0x0);
5550   %}
5551 %}
5552 
5553 operand indIndexI2LN(iRegN reg, iRegI ireg)
5554 %{
5555   predicate(CompressedOops::shift() == 0);
5556   constraint(ALLOC_IN_RC(ptr_reg));
5557   match(AddP (DecodeN reg) (ConvI2L ireg));
5558   op_cost(0);
5559   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5560   interface(MEMORY_INTER) %{
5561     base($reg);
5562     index($ireg);
5563     scale(0x0);
5564     disp(0x0);
5565   %}
5566 %}
5567 
5568 operand indIndexN(iRegN reg, iRegL lreg)
5569 %{
5570   predicate(CompressedOops::shift() == 0);
5571   constraint(ALLOC_IN_RC(ptr_reg));
5572   match(AddP (DecodeN reg) lreg);
5573   op_cost(0);
5574   format %{ "$reg, $lreg\t# narrow" %}
5575   interface(MEMORY_INTER) %{
5576     base($reg);
5577     index($lreg);
5578     scale(0x0);
5579     disp(0x0);
5580   %}
5581 %}
5582 
5583 operand indOffIN(iRegN reg, immIOffset off)
5584 %{
5585   predicate(CompressedOops::shift() == 0);
5586   constraint(ALLOC_IN_RC(ptr_reg));
5587   match(AddP (DecodeN reg) off);
5588   op_cost(0);
5589   format %{ "[$reg, $off]\t# narrow" %}
5590   interface(MEMORY_INTER) %{
5591     base($reg);
5592     index(0xffffffff);
5593     scale(0x0);
5594     disp($off);
5595   %}
5596 %}
5597 
5598 operand indOffLN(iRegN reg, immLoffset off)
5599 %{
5600   predicate(CompressedOops::shift() == 0);
5601   constraint(ALLOC_IN_RC(ptr_reg));
5602   match(AddP (DecodeN reg) off);
5603   op_cost(0);
5604   format %{ "[$reg, $off]\t# narrow" %}
5605   interface(MEMORY_INTER) %{
5606     base($reg);
5607     index(0xffffffff);
5608     scale(0x0);
5609     disp($off);
5610   %}
5611 %}
5612 
5613 
5614 
5615 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5616 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5617 %{
5618   constraint(ALLOC_IN_RC(ptr_reg));
5619   match(AddP reg off);
5620   op_cost(0);
5621   format %{ "[$reg, $off]" %}
5622   interface(MEMORY_INTER) %{
5623     base($reg);
5624     index(0xffffffff);
5625     scale(0x0);
5626     disp($off);
5627   %}
5628 %}
5629 
5630 //----------Special Memory Operands--------------------------------------------
5631 // Stack Slot Operand - This operand is used for loading and storing temporary
5632 //                      values on the stack where a match requires a value to
5633 //                      flow through memory.
5634 operand stackSlotP(sRegP reg)
5635 %{
5636   constraint(ALLOC_IN_RC(stack_slots));
5637   op_cost(100);
5638   // No match rule because this operand is only generated in matching
5639   // match(RegP);
5640   format %{ "[$reg]" %}
5641   interface(MEMORY_INTER) %{
5642     base(0x1e);  // RSP
5643     index(0x0);  // No Index
5644     scale(0x0);  // No Scale
5645     disp($reg);  // Stack Offset
5646   %}
5647 %}
5648 
5649 operand stackSlotI(sRegI reg)
5650 %{
5651   constraint(ALLOC_IN_RC(stack_slots));
5652   // No match rule because this operand is only generated in matching
5653   // match(RegI);
5654   format %{ "[$reg]" %}
5655   interface(MEMORY_INTER) %{
5656     base(0x1e);  // RSP
5657     index(0x0);  // No Index
5658     scale(0x0);  // No Scale
5659     disp($reg);  // Stack Offset
5660   %}
5661 %}
5662 
5663 operand stackSlotF(sRegF reg)
5664 %{
5665   constraint(ALLOC_IN_RC(stack_slots));
5666   // No match rule because this operand is only generated in matching
5667   // match(RegF);
5668   format %{ "[$reg]" %}
5669   interface(MEMORY_INTER) %{
5670     base(0x1e);  // RSP
5671     index(0x0);  // No Index
5672     scale(0x0);  // No Scale
5673     disp($reg);  // Stack Offset
5674   %}
5675 %}
5676 
5677 operand stackSlotD(sRegD reg)
5678 %{
5679   constraint(ALLOC_IN_RC(stack_slots));
5680   // No match rule because this operand is only generated in matching
5681   // match(RegD);
5682   format %{ "[$reg]" %}
5683   interface(MEMORY_INTER) %{
5684     base(0x1e);  // RSP
5685     index(0x0);  // No Index
5686     scale(0x0);  // No Scale
5687     disp($reg);  // Stack Offset
5688   %}
5689 %}
5690 
5691 operand stackSlotL(sRegL reg)
5692 %{
5693   constraint(ALLOC_IN_RC(stack_slots));
5694   // No match rule because this operand is only generated in matching
5695   // match(RegL);
5696   format %{ "[$reg]" %}
5697   interface(MEMORY_INTER) %{
5698     base(0x1e);  // RSP
5699     index(0x0);  // No Index
5700     scale(0x0);  // No Scale
5701     disp($reg);  // Stack Offset
5702   %}
5703 %}
5704 
5705 // Operands for expressing Control Flow
5706 // NOTE: Label is a predefined operand which should not be redefined in
5707 //       the AD file. It is generically handled within the ADLC.
5708 
5709 //----------Conditional Branch Operands----------------------------------------
5710 // Comparison Op  - This is the operation of the comparison, and is limited to
5711 //                  the following set of codes:
5712 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5713 //
5714 // Other attributes of the comparison, such as unsignedness, are specified
5715 // by the comparison instruction that sets a condition code flags register.
5716 // That result is represented by a flags operand whose subtype is appropriate
5717 // to the unsignedness (etc.) of the comparison.
5718 //
5719 // Later, the instruction which matches both the Comparison Op (a Bool) and
5720 // the flags (produced by the Cmp) specifies the coding of the comparison op
5721 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5722 
5723 // used for signed integral comparisons and fp comparisons
5724 
5725 operand cmpOp()
5726 %{
5727   match(Bool);
5728 
5729   format %{ "" %}
5730   interface(COND_INTER) %{
5731     equal(0x0, "eq");
5732     not_equal(0x1, "ne");
5733     less(0xb, "lt");
5734     greater_equal(0xa, "ge");
5735     less_equal(0xd, "le");
5736     greater(0xc, "gt");
5737     overflow(0x6, "vs");
5738     no_overflow(0x7, "vc");
5739   %}
5740 %}
5741 
5742 // used for unsigned integral comparisons
5743 
5744 operand cmpOpU()
5745 %{
5746   match(Bool);
5747 
5748   format %{ "" %}
5749   interface(COND_INTER) %{
5750     equal(0x0, "eq");
5751     not_equal(0x1, "ne");
5752     less(0x3, "lo");
5753     greater_equal(0x2, "hs");
5754     less_equal(0x9, "ls");
5755     greater(0x8, "hi");
5756     overflow(0x6, "vs");
5757     no_overflow(0x7, "vc");
5758   %}
5759 %}
5760 
5761 // used for certain integral comparisons which can be
5762 // converted to cbxx or tbxx instructions
5763 
5764 operand cmpOpEqNe()
5765 %{
5766   match(Bool);
5767   op_cost(0);
5768   predicate(n->as_Bool()->_test._test == BoolTest::ne
5769             || n->as_Bool()->_test._test == BoolTest::eq);
5770 
5771   format %{ "" %}
5772   interface(COND_INTER) %{
5773     equal(0x0, "eq");
5774     not_equal(0x1, "ne");
5775     less(0xb, "lt");
5776     greater_equal(0xa, "ge");
5777     less_equal(0xd, "le");
5778     greater(0xc, "gt");
5779     overflow(0x6, "vs");
5780     no_overflow(0x7, "vc");
5781   %}
5782 %}
5783 
5784 // used for certain integral comparisons which can be
5785 // converted to cbxx or tbxx instructions
5786 
5787 operand cmpOpLtGe()
5788 %{
5789   match(Bool);
5790   op_cost(0);
5791 
5792   predicate(n->as_Bool()->_test._test == BoolTest::lt
5793             || n->as_Bool()->_test._test == BoolTest::ge);
5794 
5795   format %{ "" %}
5796   interface(COND_INTER) %{
5797     equal(0x0, "eq");
5798     not_equal(0x1, "ne");
5799     less(0xb, "lt");
5800     greater_equal(0xa, "ge");
5801     less_equal(0xd, "le");
5802     greater(0xc, "gt");
5803     overflow(0x6, "vs");
5804     no_overflow(0x7, "vc");
5805   %}
5806 %}
5807 
5808 // used for certain unsigned integral comparisons which can be
5809 // converted to cbxx or tbxx instructions
5810 
5811 operand cmpOpUEqNeLtGe()
5812 %{
5813   match(Bool);
5814   op_cost(0);
5815 
5816   predicate(n->as_Bool()->_test._test == BoolTest::eq
5817             || n->as_Bool()->_test._test == BoolTest::ne
5818             || n->as_Bool()->_test._test == BoolTest::lt
5819             || n->as_Bool()->_test._test == BoolTest::ge);
5820 
5821   format %{ "" %}
5822   interface(COND_INTER) %{
5823     equal(0x0, "eq");
5824     not_equal(0x1, "ne");
5825     less(0xb, "lt");
5826     greater_equal(0xa, "ge");
5827     less_equal(0xd, "le");
5828     greater(0xc, "gt");
5829     overflow(0x6, "vs");
5830     no_overflow(0x7, "vc");
5831   %}
5832 %}
5833 
5834 // Special operand allowing long args to int ops to be truncated for free
5835 
5836 operand iRegL2I(iRegL reg) %{
5837 
5838   op_cost(0);
5839 
5840   match(ConvL2I reg);
5841 
5842   format %{ "l2i($reg)" %}
5843 
5844   interface(REG_INTER)
5845 %}
5846 
5847 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5848 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5849 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5850 
5851 //----------OPERAND CLASSES----------------------------------------------------
5852 // Operand Classes are groups of operands that are used as to simplify
5853 // instruction definitions by not requiring the AD writer to specify
5854 // separate instructions for every form of operand when the
5855 // instruction accepts multiple operand types with the same basic
5856 // encoding and format. The classic case of this is memory operands.
5857 
5858 // memory is used to define read/write location for load/store
5859 // instruction defs. we can turn a memory op into an Address
5860 
5861 opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
5862                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
5863 
5864 opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
5865                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
5866 
5867 opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
5868                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5869 
5870 opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
5871                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5872 
5873 // All of the memory operands. For the pipeline description.
5874 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
5875                indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
5876                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5877 
5878 
5879 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5880 // operations. it allows the src to be either an iRegI or a (ConvL2I
5881 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5882 // can be elided because the 32-bit instruction will just employ the
5883 // lower 32 bits anyway.
5884 //
5885 // n.b. this does not elide all L2I conversions. if the truncated
5886 // value is consumed by more than one operation then the ConvL2I
5887 // cannot be bundled into the consuming nodes so an l2i gets planted
5888 // (actually a movw $dst $src) and the downstream instructions consume
5889 // the result of the l2i as an iRegI input. That's a shame since the
5890 // movw is actually redundant but its not too costly.
5891 
5892 opclass iRegIorL2I(iRegI, iRegL2I);
5893 
5894 //----------PIPELINE-----------------------------------------------------------
5895 // Rules which define the behavior of the target architectures pipeline.
5896 
5897 // For specific pipelines, eg A53, define the stages of that pipeline
5898 //pipe_desc(ISS, EX1, EX2, WR);
5899 #define ISS S0
5900 #define EX1 S1
5901 #define EX2 S2
5902 #define WR  S3
5903 
5904 // Integer ALU reg operation
5905 pipeline %{
5906 
5907 attributes %{
5908   // ARM instructions are of fixed length
5909   fixed_size_instructions;        // Fixed size instructions TODO does
5910   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5911   // ARM instructions come in 32-bit word units
5912   instruction_unit_size = 4;         // An instruction is 4 bytes long
5913   instruction_fetch_unit_size = 64;  // The processor fetches one line
5914   instruction_fetch_units = 1;       // of 64 bytes
5915 
5916   // List of nop instructions
5917   nops( MachNop );
5918 %}
5919 
5920 // We don't use an actual pipeline model so don't care about resources
5921 // or description. we do use pipeline classes to introduce fixed
5922 // latencies
5923 
5924 //----------RESOURCES----------------------------------------------------------
5925 // Resources are the functional units available to the machine
5926 
5927 resources( INS0, INS1, INS01 = INS0 | INS1,
5928            ALU0, ALU1, ALU = ALU0 | ALU1,
5929            MAC,
5930            DIV,
5931            BRANCH,
5932            LDST,
5933            NEON_FP);
5934 
5935 //----------PIPELINE DESCRIPTION-----------------------------------------------
5936 // Pipeline Description specifies the stages in the machine's pipeline
5937 
5938 // Define the pipeline as a generic 6 stage pipeline
5939 pipe_desc(S0, S1, S2, S3, S4, S5);
5940 
5941 //----------PIPELINE CLASSES---------------------------------------------------
5942 // Pipeline Classes describe the stages in which input and output are
5943 // referenced by the hardware pipeline.
5944 
5945 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5946 %{
5947   single_instruction;
5948   src1   : S1(read);
5949   src2   : S2(read);
5950   dst    : S5(write);
5951   INS01  : ISS;
5952   NEON_FP : S5;
5953 %}
5954 
5955 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5956 %{
5957   single_instruction;
5958   src1   : S1(read);
5959   src2   : S2(read);
5960   dst    : S5(write);
5961   INS01  : ISS;
5962   NEON_FP : S5;
5963 %}
5964 
5965 pipe_class fp_uop_s(vRegF dst, vRegF src)
5966 %{
5967   single_instruction;
5968   src    : S1(read);
5969   dst    : S5(write);
5970   INS01  : ISS;
5971   NEON_FP : S5;
5972 %}
5973 
5974 pipe_class fp_uop_d(vRegD dst, vRegD src)
5975 %{
5976   single_instruction;
5977   src    : S1(read);
5978   dst    : S5(write);
5979   INS01  : ISS;
5980   NEON_FP : S5;
5981 %}
5982 
5983 pipe_class fp_d2f(vRegF dst, vRegD src)
5984 %{
5985   single_instruction;
5986   src    : S1(read);
5987   dst    : S5(write);
5988   INS01  : ISS;
5989   NEON_FP : S5;
5990 %}
5991 
5992 pipe_class fp_f2d(vRegD dst, vRegF src)
5993 %{
5994   single_instruction;
5995   src    : S1(read);
5996   dst    : S5(write);
5997   INS01  : ISS;
5998   NEON_FP : S5;
5999 %}
6000 
6001 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6002 %{
6003   single_instruction;
6004   src    : S1(read);
6005   dst    : S5(write);
6006   INS01  : ISS;
6007   NEON_FP : S5;
6008 %}
6009 
6010 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6011 %{
6012   single_instruction;
6013   src    : S1(read);
6014   dst    : S5(write);
6015   INS01  : ISS;
6016   NEON_FP : S5;
6017 %}
6018 
6019 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6020 %{
6021   single_instruction;
6022   src    : S1(read);
6023   dst    : S5(write);
6024   INS01  : ISS;
6025   NEON_FP : S5;
6026 %}
6027 
6028 pipe_class fp_l2f(vRegF dst, iRegL src)
6029 %{
6030   single_instruction;
6031   src    : S1(read);
6032   dst    : S5(write);
6033   INS01  : ISS;
6034   NEON_FP : S5;
6035 %}
6036 
6037 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
6038 %{
6039   single_instruction;
6040   src    : S1(read);
6041   dst    : S5(write);
6042   INS01  : ISS;
6043   NEON_FP : S5;
6044 %}
6045 
6046 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
6047 %{
6048   single_instruction;
6049   src    : S1(read);
6050   dst    : S5(write);
6051   INS01  : ISS;
6052   NEON_FP : S5;
6053 %}
6054 
6055 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
6056 %{
6057   single_instruction;
6058   src    : S1(read);
6059   dst    : S5(write);
6060   INS01  : ISS;
6061   NEON_FP : S5;
6062 %}
6063 
6064 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
6065 %{
6066   single_instruction;
6067   src    : S1(read);
6068   dst    : S5(write);
6069   INS01  : ISS;
6070   NEON_FP : S5;
6071 %}
6072 
6073 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
6074 %{
6075   single_instruction;
6076   src1   : S1(read);
6077   src2   : S2(read);
6078   dst    : S5(write);
6079   INS0   : ISS;
6080   NEON_FP : S5;
6081 %}
6082 
6083 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
6084 %{
6085   single_instruction;
6086   src1   : S1(read);
6087   src2   : S2(read);
6088   dst    : S5(write);
6089   INS0   : ISS;
6090   NEON_FP : S5;
6091 %}
6092 
6093 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
6094 %{
6095   single_instruction;
6096   cr     : S1(read);
6097   src1   : S1(read);
6098   src2   : S1(read);
6099   dst    : S3(write);
6100   INS01  : ISS;
6101   NEON_FP : S3;
6102 %}
6103 
6104 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
6105 %{
6106   single_instruction;
6107   cr     : S1(read);
6108   src1   : S1(read);
6109   src2   : S1(read);
6110   dst    : S3(write);
6111   INS01  : ISS;
6112   NEON_FP : S3;
6113 %}
6114 
6115 pipe_class fp_imm_s(vRegF dst)
6116 %{
6117   single_instruction;
6118   dst    : S3(write);
6119   INS01  : ISS;
6120   NEON_FP : S3;
6121 %}
6122 
6123 pipe_class fp_imm_d(vRegD dst)
6124 %{
6125   single_instruction;
6126   dst    : S3(write);
6127   INS01  : ISS;
6128   NEON_FP : S3;
6129 %}
6130 
6131 pipe_class fp_load_constant_s(vRegF dst)
6132 %{
6133   single_instruction;
6134   dst    : S4(write);
6135   INS01  : ISS;
6136   NEON_FP : S4;
6137 %}
6138 
6139 pipe_class fp_load_constant_d(vRegD dst)
6140 %{
6141   single_instruction;
6142   dst    : S4(write);
6143   INS01  : ISS;
6144   NEON_FP : S4;
6145 %}
6146 
6147 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
6148 %{
6149   single_instruction;
6150   dst    : S5(write);
6151   src1   : S1(read);
6152   src2   : S1(read);
6153   INS01  : ISS;
6154   NEON_FP : S5;
6155 %}
6156 
6157 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
6158 %{
6159   single_instruction;
6160   dst    : S5(write);
6161   src1   : S1(read);
6162   src2   : S1(read);
6163   INS0   : ISS;
6164   NEON_FP : S5;
6165 %}
6166 
6167 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
6168 %{
6169   single_instruction;
6170   dst    : S5(write);
6171   src1   : S1(read);
6172   src2   : S1(read);
6173   dst    : S1(read);
6174   INS01  : ISS;
6175   NEON_FP : S5;
6176 %}
6177 
6178 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
6179 %{
6180   single_instruction;
6181   dst    : S5(write);
6182   src1   : S1(read);
6183   src2   : S1(read);
6184   dst    : S1(read);
6185   INS0   : ISS;
6186   NEON_FP : S5;
6187 %}
6188 
6189 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
6190 %{
6191   single_instruction;
6192   dst    : S4(write);
6193   src1   : S2(read);
6194   src2   : S2(read);
6195   INS01  : ISS;
6196   NEON_FP : S4;
6197 %}
6198 
6199 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
6200 %{
6201   single_instruction;
6202   dst    : S4(write);
6203   src1   : S2(read);
6204   src2   : S2(read);
6205   INS0   : ISS;
6206   NEON_FP : S4;
6207 %}
6208 
6209 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
6210 %{
6211   single_instruction;
6212   dst    : S3(write);
6213   src1   : S2(read);
6214   src2   : S2(read);
6215   INS01  : ISS;
6216   NEON_FP : S3;
6217 %}
6218 
6219 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
6220 %{
6221   single_instruction;
6222   dst    : S3(write);
6223   src1   : S2(read);
6224   src2   : S2(read);
6225   INS0   : ISS;
6226   NEON_FP : S3;
6227 %}
6228 
6229 pipe_class vshift64(vecD dst, vecD src, vecX shift)
6230 %{
6231   single_instruction;
6232   dst    : S3(write);
6233   src    : S1(read);
6234   shift  : S1(read);
6235   INS01  : ISS;
6236   NEON_FP : S3;
6237 %}
6238 
6239 pipe_class vshift128(vecX dst, vecX src, vecX shift)
6240 %{
6241   single_instruction;
6242   dst    : S3(write);
6243   src    : S1(read);
6244   shift  : S1(read);
6245   INS0   : ISS;
6246   NEON_FP : S3;
6247 %}
6248 
6249 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
6250 %{
6251   single_instruction;
6252   dst    : S3(write);
6253   src    : S1(read);
6254   INS01  : ISS;
6255   NEON_FP : S3;
6256 %}
6257 
6258 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
6259 %{
6260   single_instruction;
6261   dst    : S3(write);
6262   src    : S1(read);
6263   INS0   : ISS;
6264   NEON_FP : S3;
6265 %}
6266 
6267 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
6268 %{
6269   single_instruction;
6270   dst    : S5(write);
6271   src1   : S1(read);
6272   src2   : S1(read);
6273   INS01  : ISS;
6274   NEON_FP : S5;
6275 %}
6276 
6277 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
6278 %{
6279   single_instruction;
6280   dst    : S5(write);
6281   src1   : S1(read);
6282   src2   : S1(read);
6283   INS0   : ISS;
6284   NEON_FP : S5;
6285 %}
6286 
6287 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
6288 %{
6289   single_instruction;
6290   dst    : S5(write);
6291   src1   : S1(read);
6292   src2   : S1(read);
6293   INS0   : ISS;
6294   NEON_FP : S5;
6295 %}
6296 
6297 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
6298 %{
6299   single_instruction;
6300   dst    : S5(write);
6301   src1   : S1(read);
6302   src2   : S1(read);
6303   INS0   : ISS;
6304   NEON_FP : S5;
6305 %}
6306 
6307 pipe_class vsqrt_fp128(vecX dst, vecX src)
6308 %{
6309   single_instruction;
6310   dst    : S5(write);
6311   src    : S1(read);
6312   INS0   : ISS;
6313   NEON_FP : S5;
6314 %}
6315 
6316 pipe_class vunop_fp64(vecD dst, vecD src)
6317 %{
6318   single_instruction;
6319   dst    : S5(write);
6320   src    : S1(read);
6321   INS01  : ISS;
6322   NEON_FP : S5;
6323 %}
6324 
6325 pipe_class vunop_fp128(vecX dst, vecX src)
6326 %{
6327   single_instruction;
6328   dst    : S5(write);
6329   src    : S1(read);
6330   INS0   : ISS;
6331   NEON_FP : S5;
6332 %}
6333 
6334 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
6335 %{
6336   single_instruction;
6337   dst    : S3(write);
6338   src    : S1(read);
6339   INS01  : ISS;
6340   NEON_FP : S3;
6341 %}
6342 
6343 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
6344 %{
6345   single_instruction;
6346   dst    : S3(write);
6347   src    : S1(read);
6348   INS01  : ISS;
6349   NEON_FP : S3;
6350 %}
6351 
6352 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
6353 %{
6354   single_instruction;
6355   dst    : S3(write);
6356   src    : S1(read);
6357   INS01  : ISS;
6358   NEON_FP : S3;
6359 %}
6360 
6361 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
6362 %{
6363   single_instruction;
6364   dst    : S3(write);
6365   src    : S1(read);
6366   INS01  : ISS;
6367   NEON_FP : S3;
6368 %}
6369 
6370 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
6371 %{
6372   single_instruction;
6373   dst    : S3(write);
6374   src    : S1(read);
6375   INS01  : ISS;
6376   NEON_FP : S3;
6377 %}
6378 
6379 pipe_class vmovi_reg_imm64(vecD dst)
6380 %{
6381   single_instruction;
6382   dst    : S3(write);
6383   INS01  : ISS;
6384   NEON_FP : S3;
6385 %}
6386 
6387 pipe_class vmovi_reg_imm128(vecX dst)
6388 %{
6389   single_instruction;
6390   dst    : S3(write);
6391   INS0   : ISS;
6392   NEON_FP : S3;
6393 %}
6394 
6395 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
6396 %{
6397   single_instruction;
6398   dst    : S5(write);
6399   mem    : ISS(read);
6400   INS01  : ISS;
6401   NEON_FP : S3;
6402 %}
6403 
6404 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
6405 %{
6406   single_instruction;
6407   dst    : S5(write);
6408   mem    : ISS(read);
6409   INS01  : ISS;
6410   NEON_FP : S3;
6411 %}
6412 
6413 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
6414 %{
6415   single_instruction;
6416   mem    : ISS(read);
6417   src    : S2(read);
6418   INS01  : ISS;
6419   NEON_FP : S3;
6420 %}
6421 
6422 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
6423 %{
6424   single_instruction;
6425   mem    : ISS(read);
6426   src    : S2(read);
6427   INS01  : ISS;
6428   NEON_FP : S3;
6429 %}
6430 
6431 //------- Integer ALU operations --------------------------
6432 
6433 // Integer ALU reg-reg operation
6434 // Operands needed in EX1, result generated in EX2
6435 // Eg.  ADD     x0, x1, x2
6436 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6437 %{
6438   single_instruction;
6439   dst    : EX2(write);
6440   src1   : EX1(read);
6441   src2   : EX1(read);
6442   INS01  : ISS; // Dual issue as instruction 0 or 1
6443   ALU    : EX2;
6444 %}
6445 
6446 // Integer ALU reg-reg operation with constant shift
6447 // Shifted register must be available in LATE_ISS instead of EX1
6448 // Eg.  ADD     x0, x1, x2, LSL #2
6449 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6450 %{
6451   single_instruction;
6452   dst    : EX2(write);
6453   src1   : EX1(read);
6454   src2   : ISS(read);
6455   INS01  : ISS;
6456   ALU    : EX2;
6457 %}
6458 
6459 // Integer ALU reg operation with constant shift
6460 // Eg.  LSL     x0, x1, #shift
6461 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6462 %{
6463   single_instruction;
6464   dst    : EX2(write);
6465   src1   : ISS(read);
6466   INS01  : ISS;
6467   ALU    : EX2;
6468 %}
6469 
6470 // Integer ALU reg-reg operation with variable shift
6471 // Both operands must be available in LATE_ISS instead of EX1
6472 // Result is available in EX1 instead of EX2
6473 // Eg.  LSLV    x0, x1, x2
6474 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6475 %{
6476   single_instruction;
6477   dst    : EX1(write);
6478   src1   : ISS(read);
6479   src2   : ISS(read);
6480   INS01  : ISS;
6481   ALU    : EX1;
6482 %}
6483 
6484 // Integer ALU reg-reg operation with extract
6485 // As for _vshift above, but result generated in EX2
6486 // Eg.  EXTR    x0, x1, x2, #N
6487 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6488 %{
6489   single_instruction;
6490   dst    : EX2(write);
6491   src1   : ISS(read);
6492   src2   : ISS(read);
6493   INS1   : ISS; // Can only dual issue as Instruction 1
6494   ALU    : EX1;
6495 %}
6496 
6497 // Integer ALU reg operation
6498 // Eg.  NEG     x0, x1
6499 pipe_class ialu_reg(iRegI dst, iRegI src)
6500 %{
6501   single_instruction;
6502   dst    : EX2(write);
6503   src    : EX1(read);
6504   INS01  : ISS;
6505   ALU    : EX2;
6506 %}
6507 
6508 // Integer ALU reg mmediate operation
6509 // Eg.  ADD     x0, x1, #N
6510 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6511 %{
6512   single_instruction;
6513   dst    : EX2(write);
6514   src1   : EX1(read);
6515   INS01  : ISS;
6516   ALU    : EX2;
6517 %}
6518 
6519 // Integer ALU immediate operation (no source operands)
6520 // Eg.  MOV     x0, #N
6521 pipe_class ialu_imm(iRegI dst)
6522 %{
6523   single_instruction;
6524   dst    : EX1(write);
6525   INS01  : ISS;
6526   ALU    : EX1;
6527 %}
6528 
6529 //------- Compare operation -------------------------------
6530 
6531 // Compare reg-reg
6532 // Eg.  CMP     x0, x1
6533 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6534 %{
6535   single_instruction;
6536 //  fixed_latency(16);
6537   cr     : EX2(write);
6538   op1    : EX1(read);
6539   op2    : EX1(read);
6540   INS01  : ISS;
6541   ALU    : EX2;
6542 %}
6543 
6544 // Compare reg-reg
6545 // Eg.  CMP     x0, #N
6546 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6547 %{
6548   single_instruction;
6549 //  fixed_latency(16);
6550   cr     : EX2(write);
6551   op1    : EX1(read);
6552   INS01  : ISS;
6553   ALU    : EX2;
6554 %}
6555 
6556 //------- Conditional instructions ------------------------
6557 
6558 // Conditional no operands
6559 // Eg.  CSINC   x0, zr, zr, <cond>
6560 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6561 %{
6562   single_instruction;
6563   cr     : EX1(read);
6564   dst    : EX2(write);
6565   INS01  : ISS;
6566   ALU    : EX2;
6567 %}
6568 
6569 // Conditional 2 operand
6570 // EG.  CSEL    X0, X1, X2, <cond>
6571 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6572 %{
6573   single_instruction;
6574   cr     : EX1(read);
6575   src1   : EX1(read);
6576   src2   : EX1(read);
6577   dst    : EX2(write);
6578   INS01  : ISS;
6579   ALU    : EX2;
6580 %}
6581 
6582 // Conditional 2 operand
6583 // EG.  CSEL    X0, X1, X2, <cond>
6584 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6585 %{
6586   single_instruction;
6587   cr     : EX1(read);
6588   src    : EX1(read);
6589   dst    : EX2(write);
6590   INS01  : ISS;
6591   ALU    : EX2;
6592 %}
6593 
6594 //------- Multiply pipeline operations --------------------
6595 
6596 // Multiply reg-reg
6597 // Eg.  MUL     w0, w1, w2
6598 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6599 %{
6600   single_instruction;
6601   dst    : WR(write);
6602   src1   : ISS(read);
6603   src2   : ISS(read);
6604   INS01  : ISS;
6605   MAC    : WR;
6606 %}
6607 
6608 // Multiply accumulate
6609 // Eg.  MADD    w0, w1, w2, w3
6610 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6611 %{
6612   single_instruction;
6613   dst    : WR(write);
6614   src1   : ISS(read);
6615   src2   : ISS(read);
6616   src3   : ISS(read);
6617   INS01  : ISS;
6618   MAC    : WR;
6619 %}
6620 
6621 // Eg.  MUL     w0, w1, w2
6622 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6623 %{
6624   single_instruction;
6625   fixed_latency(3); // Maximum latency for 64 bit mul
6626   dst    : WR(write);
6627   src1   : ISS(read);
6628   src2   : ISS(read);
6629   INS01  : ISS;
6630   MAC    : WR;
6631 %}
6632 
6633 // Multiply accumulate
6634 // Eg.  MADD    w0, w1, w2, w3
6635 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6636 %{
6637   single_instruction;
6638   fixed_latency(3); // Maximum latency for 64 bit mul
6639   dst    : WR(write);
6640   src1   : ISS(read);
6641   src2   : ISS(read);
6642   src3   : ISS(read);
6643   INS01  : ISS;
6644   MAC    : WR;
6645 %}
6646 
6647 //------- Divide pipeline operations --------------------
6648 
6649 // Eg.  SDIV    w0, w1, w2
6650 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6651 %{
6652   single_instruction;
6653   fixed_latency(8); // Maximum latency for 32 bit divide
6654   dst    : WR(write);
6655   src1   : ISS(read);
6656   src2   : ISS(read);
6657   INS0   : ISS; // Can only dual issue as instruction 0
6658   DIV    : WR;
6659 %}
6660 
6661 // Eg.  SDIV    x0, x1, x2
6662 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6663 %{
6664   single_instruction;
6665   fixed_latency(16); // Maximum latency for 64 bit divide
6666   dst    : WR(write);
6667   src1   : ISS(read);
6668   src2   : ISS(read);
6669   INS0   : ISS; // Can only dual issue as instruction 0
6670   DIV    : WR;
6671 %}
6672 
6673 //------- Load pipeline operations ------------------------
6674 
6675 // Load - prefetch
6676 // Eg.  PFRM    <mem>
6677 pipe_class iload_prefetch(memory mem)
6678 %{
6679   single_instruction;
6680   mem    : ISS(read);
6681   INS01  : ISS;
6682   LDST   : WR;
6683 %}
6684 
6685 // Load - reg, mem
6686 // Eg.  LDR     x0, <mem>
6687 pipe_class iload_reg_mem(iRegI dst, memory mem)
6688 %{
6689   single_instruction;
6690   dst    : WR(write);
6691   mem    : ISS(read);
6692   INS01  : ISS;
6693   LDST   : WR;
6694 %}
6695 
6696 // Load - reg, reg
6697 // Eg.  LDR     x0, [sp, x1]
6698 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6699 %{
6700   single_instruction;
6701   dst    : WR(write);
6702   src    : ISS(read);
6703   INS01  : ISS;
6704   LDST   : WR;
6705 %}
6706 
6707 //------- Store pipeline operations -----------------------
6708 
6709 // Store - zr, mem
6710 // Eg.  STR     zr, <mem>
6711 pipe_class istore_mem(memory mem)
6712 %{
6713   single_instruction;
6714   mem    : ISS(read);
6715   INS01  : ISS;
6716   LDST   : WR;
6717 %}
6718 
6719 // Store - reg, mem
6720 // Eg.  STR     x0, <mem>
6721 pipe_class istore_reg_mem(iRegI src, memory mem)
6722 %{
6723   single_instruction;
6724   mem    : ISS(read);
6725   src    : EX2(read);
6726   INS01  : ISS;
6727   LDST   : WR;
6728 %}
6729 
6730 // Store - reg, reg
6731 // Eg. STR      x0, [sp, x1]
6732 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6733 %{
6734   single_instruction;
6735   dst    : ISS(read);
6736   src    : EX2(read);
6737   INS01  : ISS;
6738   LDST   : WR;
6739 %}
6740 
6741 //------- Store pipeline operations -----------------------
6742 
6743 // Branch
6744 pipe_class pipe_branch()
6745 %{
6746   single_instruction;
6747   INS01  : ISS;
6748   BRANCH : EX1;
6749 %}
6750 
6751 // Conditional branch
6752 pipe_class pipe_branch_cond(rFlagsReg cr)
6753 %{
6754   single_instruction;
6755   cr     : EX1(read);
6756   INS01  : ISS;
6757   BRANCH : EX1;
6758 %}
6759 
6760 // Compare & Branch
6761 // EG.  CBZ/CBNZ
6762 pipe_class pipe_cmp_branch(iRegI op1)
6763 %{
6764   single_instruction;
6765   op1    : EX1(read);
6766   INS01  : ISS;
6767   BRANCH : EX1;
6768 %}
6769 
6770 //------- Synchronisation operations ----------------------
6771 
6772 // Any operation requiring serialization.
6773 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6774 pipe_class pipe_serial()
6775 %{
6776   single_instruction;
6777   force_serialization;
6778   fixed_latency(16);
6779   INS01  : ISS(2); // Cannot dual issue with any other instruction
6780   LDST   : WR;
6781 %}
6782 
6783 // Generic big/slow expanded idiom - also serialized
6784 pipe_class pipe_slow()
6785 %{
6786   instruction_count(10);
6787   multiple_bundles;
6788   force_serialization;
6789   fixed_latency(16);
6790   INS01  : ISS(2); // Cannot dual issue with any other instruction
6791   LDST   : WR;
6792 %}
6793 
6794 // Empty pipeline class
6795 pipe_class pipe_class_empty()
6796 %{
6797   single_instruction;
6798   fixed_latency(0);
6799 %}
6800 
6801 // Default pipeline class.
6802 pipe_class pipe_class_default()
6803 %{
6804   single_instruction;
6805   fixed_latency(2);
6806 %}
6807 
6808 // Pipeline class for compares.
6809 pipe_class pipe_class_compare()
6810 %{
6811   single_instruction;
6812   fixed_latency(16);
6813 %}
6814 
6815 // Pipeline class for memory operations.
6816 pipe_class pipe_class_memory()
6817 %{
6818   single_instruction;
6819   fixed_latency(16);
6820 %}
6821 
6822 // Pipeline class for call.
6823 pipe_class pipe_class_call()
6824 %{
6825   single_instruction;
6826   fixed_latency(100);
6827 %}
6828 
6829 // Define the class for the Nop node.
6830 define %{
6831    MachNop = pipe_class_empty;
6832 %}
6833 
6834 %}
6835 //----------INSTRUCTIONS-------------------------------------------------------
6836 //
6837 // match      -- States which machine-independent subtree may be replaced
6838 //               by this instruction.
6839 // ins_cost   -- The estimated cost of this instruction is used by instruction
6840 //               selection to identify a minimum cost tree of machine
6841 //               instructions that matches a tree of machine-independent
6842 //               instructions.
6843 // format     -- A string providing the disassembly for this instruction.
6844 //               The value of an instruction's operand may be inserted
6845 //               by referring to it with a '$' prefix.
6846 // opcode     -- Three instruction opcodes may be provided.  These are referred
6847 //               to within an encode class as $primary, $secondary, and $tertiary
6848 //               rrspectively.  The primary opcode is commonly used to
6849 //               indicate the type of machine instruction, while secondary
6850 //               and tertiary are often used for prefix options or addressing
6851 //               modes.
6852 // ins_encode -- A list of encode classes with parameters. The encode class
6853 //               name must have been defined in an 'enc_class' specification
6854 //               in the encode section of the architecture description.
6855 
6856 // ============================================================================
6857 // Memory (Load/Store) Instructions
6858 
6859 // Load Instructions
6860 
6861 // Load Byte (8 bit signed)
6862 instruct loadB(iRegINoSp dst, memory1 mem)
6863 %{
6864   match(Set dst (LoadB mem));
6865   predicate(!needs_acquiring_load(n));
6866 
6867   ins_cost(4 * INSN_COST);
6868   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6869 
6870   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6871 
6872   ins_pipe(iload_reg_mem);
6873 %}
6874 
6875 // Load Byte (8 bit signed) into long
6876 instruct loadB2L(iRegLNoSp dst, memory1 mem)
6877 %{
6878   match(Set dst (ConvI2L (LoadB mem)));
6879   predicate(!needs_acquiring_load(n->in(1)));
6880 
6881   ins_cost(4 * INSN_COST);
6882   format %{ "ldrsb  $dst, $mem\t# byte" %}
6883 
6884   ins_encode(aarch64_enc_ldrsb(dst, mem));
6885 
6886   ins_pipe(iload_reg_mem);
6887 %}
6888 
6889 // Load Byte (8 bit unsigned)
6890 instruct loadUB(iRegINoSp dst, memory1 mem)
6891 %{
6892   match(Set dst (LoadUB mem));
6893   predicate(!needs_acquiring_load(n));
6894 
6895   ins_cost(4 * INSN_COST);
6896   format %{ "ldrbw  $dst, $mem\t# byte" %}
6897 
6898   ins_encode(aarch64_enc_ldrb(dst, mem));
6899 
6900   ins_pipe(iload_reg_mem);
6901 %}
6902 
6903 // Load Byte (8 bit unsigned) into long
6904 instruct loadUB2L(iRegLNoSp dst, memory1 mem)
6905 %{
6906   match(Set dst (ConvI2L (LoadUB mem)));
6907   predicate(!needs_acquiring_load(n->in(1)));
6908 
6909   ins_cost(4 * INSN_COST);
6910   format %{ "ldrb  $dst, $mem\t# byte" %}
6911 
6912   ins_encode(aarch64_enc_ldrb(dst, mem));
6913 
6914   ins_pipe(iload_reg_mem);
6915 %}
6916 
6917 // Load Short (16 bit signed)
6918 instruct loadS(iRegINoSp dst, memory2 mem)
6919 %{
6920   match(Set dst (LoadS mem));
6921   predicate(!needs_acquiring_load(n));
6922 
6923   ins_cost(4 * INSN_COST);
6924   format %{ "ldrshw  $dst, $mem\t# short" %}
6925 
6926   ins_encode(aarch64_enc_ldrshw(dst, mem));
6927 
6928   ins_pipe(iload_reg_mem);
6929 %}
6930 
6931 // Load Short (16 bit signed) into long
6932 instruct loadS2L(iRegLNoSp dst, memory2 mem)
6933 %{
6934   match(Set dst (ConvI2L (LoadS mem)));
6935   predicate(!needs_acquiring_load(n->in(1)));
6936 
6937   ins_cost(4 * INSN_COST);
6938   format %{ "ldrsh  $dst, $mem\t# short" %}
6939 
6940   ins_encode(aarch64_enc_ldrsh(dst, mem));
6941 
6942   ins_pipe(iload_reg_mem);
6943 %}
6944 
6945 // Load Char (16 bit unsigned)
6946 instruct loadUS(iRegINoSp dst, memory2 mem)
6947 %{
6948   match(Set dst (LoadUS mem));
6949   predicate(!needs_acquiring_load(n));
6950 
6951   ins_cost(4 * INSN_COST);
6952   format %{ "ldrh  $dst, $mem\t# short" %}
6953 
6954   ins_encode(aarch64_enc_ldrh(dst, mem));
6955 
6956   ins_pipe(iload_reg_mem);
6957 %}
6958 
6959 // Load Short/Char (16 bit unsigned) into long
6960 instruct loadUS2L(iRegLNoSp dst, memory2 mem)
6961 %{
6962   match(Set dst (ConvI2L (LoadUS mem)));
6963   predicate(!needs_acquiring_load(n->in(1)));
6964 
6965   ins_cost(4 * INSN_COST);
6966   format %{ "ldrh  $dst, $mem\t# short" %}
6967 
6968   ins_encode(aarch64_enc_ldrh(dst, mem));
6969 
6970   ins_pipe(iload_reg_mem);
6971 %}
6972 
6973 // Load Integer (32 bit signed)
6974 instruct loadI(iRegINoSp dst, memory4 mem)
6975 %{
6976   match(Set dst (LoadI mem));
6977   predicate(!needs_acquiring_load(n));
6978 
6979   ins_cost(4 * INSN_COST);
6980   format %{ "ldrw  $dst, $mem\t# int" %}
6981 
6982   ins_encode(aarch64_enc_ldrw(dst, mem));
6983 
6984   ins_pipe(iload_reg_mem);
6985 %}
6986 
6987 // Load Integer (32 bit signed) into long
6988 instruct loadI2L(iRegLNoSp dst, memory4 mem)
6989 %{
6990   match(Set dst (ConvI2L (LoadI mem)));
6991   predicate(!needs_acquiring_load(n->in(1)));
6992 
6993   ins_cost(4 * INSN_COST);
6994   format %{ "ldrsw  $dst, $mem\t# int" %}
6995 
6996   ins_encode(aarch64_enc_ldrsw(dst, mem));
6997 
6998   ins_pipe(iload_reg_mem);
6999 %}
7000 
7001 // Load Integer (32 bit unsigned) into long
7002 instruct loadUI2L(iRegLNoSp dst, memory4 mem, immL_32bits mask)
7003 %{
7004   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7005   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7006 
7007   ins_cost(4 * INSN_COST);
7008   format %{ "ldrw  $dst, $mem\t# int" %}
7009 
7010   ins_encode(aarch64_enc_ldrw(dst, mem));
7011 
7012   ins_pipe(iload_reg_mem);
7013 %}
7014 
7015 // Load Long (64 bit signed)
7016 instruct loadL(iRegLNoSp dst, memory8 mem)
7017 %{
7018   match(Set dst (LoadL mem));
7019   predicate(!needs_acquiring_load(n));
7020 
7021   ins_cost(4 * INSN_COST);
7022   format %{ "ldr  $dst, $mem\t# int" %}
7023 
7024   ins_encode(aarch64_enc_ldr(dst, mem));
7025 
7026   ins_pipe(iload_reg_mem);
7027 %}
7028 
7029 // Load Range
7030 instruct loadRange(iRegINoSp dst, memory4 mem)
7031 %{
7032   match(Set dst (LoadRange mem));
7033 
7034   ins_cost(4 * INSN_COST);
7035   format %{ "ldrw  $dst, $mem\t# range" %}
7036 
7037   ins_encode(aarch64_enc_ldrw(dst, mem));
7038 
7039   ins_pipe(iload_reg_mem);
7040 %}
7041 
7042 // Load Pointer
7043 instruct loadP(iRegPNoSp dst, memory8 mem)
7044 %{
7045   match(Set dst (LoadP mem));
7046   predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0));
7047 
7048   ins_cost(4 * INSN_COST);
7049   format %{ "ldr  $dst, $mem\t# ptr" %}
7050 
7051   ins_encode(aarch64_enc_ldr(dst, mem));
7052 
7053   ins_pipe(iload_reg_mem);
7054 %}
7055 
7056 // Load Compressed Pointer
7057 instruct loadN(iRegNNoSp dst, memory4 mem)
7058 %{
7059   match(Set dst (LoadN mem));
7060   predicate(!needs_acquiring_load(n));
7061 
7062   ins_cost(4 * INSN_COST);
7063   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
7064 
7065   ins_encode(aarch64_enc_ldrw(dst, mem));
7066 
7067   ins_pipe(iload_reg_mem);
7068 %}
7069 
7070 // Load Klass Pointer
7071 instruct loadKlass(iRegPNoSp dst, memory8 mem)
7072 %{
7073   match(Set dst (LoadKlass mem));
7074   predicate(!needs_acquiring_load(n));
7075 
7076   ins_cost(4 * INSN_COST);
7077   format %{ "ldr  $dst, $mem\t# class" %}
7078 
7079   ins_encode(aarch64_enc_ldr(dst, mem));
7080 
7081   ins_pipe(iload_reg_mem);
7082 %}
7083 
7084 // Load Narrow Klass Pointer
7085 instruct loadNKlass(iRegNNoSp dst, memory4 mem)
7086 %{
7087   match(Set dst (LoadNKlass mem));
7088   predicate(!needs_acquiring_load(n));
7089 
7090   ins_cost(4 * INSN_COST);
7091   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
7092 
7093   ins_encode(aarch64_enc_ldrw(dst, mem));
7094 
7095   ins_pipe(iload_reg_mem);
7096 %}
7097 
7098 // Load Float
7099 instruct loadF(vRegF dst, memory4 mem)
7100 %{
7101   match(Set dst (LoadF mem));
7102   predicate(!needs_acquiring_load(n));
7103 
7104   ins_cost(4 * INSN_COST);
7105   format %{ "ldrs  $dst, $mem\t# float" %}
7106 
7107   ins_encode( aarch64_enc_ldrs(dst, mem) );
7108 
7109   ins_pipe(pipe_class_memory);
7110 %}
7111 
7112 // Load Double
7113 instruct loadD(vRegD dst, memory8 mem)
7114 %{
7115   match(Set dst (LoadD mem));
7116   predicate(!needs_acquiring_load(n));
7117 
7118   ins_cost(4 * INSN_COST);
7119   format %{ "ldrd  $dst, $mem\t# double" %}
7120 
7121   ins_encode( aarch64_enc_ldrd(dst, mem) );
7122 
7123   ins_pipe(pipe_class_memory);
7124 %}
7125 
7126 
7127 // Load Int Constant
7128 instruct loadConI(iRegINoSp dst, immI src)
7129 %{
7130   match(Set dst src);
7131 
7132   ins_cost(INSN_COST);
7133   format %{ "mov $dst, $src\t# int" %}
7134 
7135   ins_encode( aarch64_enc_movw_imm(dst, src) );
7136 
7137   ins_pipe(ialu_imm);
7138 %}
7139 
7140 // Load Long Constant
7141 instruct loadConL(iRegLNoSp dst, immL src)
7142 %{
7143   match(Set dst src);
7144 
7145   ins_cost(INSN_COST);
7146   format %{ "mov $dst, $src\t# long" %}
7147 
7148   ins_encode( aarch64_enc_mov_imm(dst, src) );
7149 
7150   ins_pipe(ialu_imm);
7151 %}
7152 
7153 // Load Pointer Constant
7154 
7155 instruct loadConP(iRegPNoSp dst, immP con)
7156 %{
7157   match(Set dst con);
7158 
7159   ins_cost(INSN_COST * 4);
7160   format %{
7161     "mov  $dst, $con\t# ptr\n\t"
7162   %}
7163 
7164   ins_encode(aarch64_enc_mov_p(dst, con));
7165 
7166   ins_pipe(ialu_imm);
7167 %}
7168 
7169 // Load Null Pointer Constant
7170 
7171 instruct loadConP0(iRegPNoSp dst, immP0 con)
7172 %{
7173   match(Set dst con);
7174 
7175   ins_cost(INSN_COST);
7176   format %{ "mov  $dst, $con\t# NULL ptr" %}
7177 
7178   ins_encode(aarch64_enc_mov_p0(dst, con));
7179 
7180   ins_pipe(ialu_imm);
7181 %}
7182 
7183 // Load Pointer Constant One
7184 
7185 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7186 %{
7187   match(Set dst con);
7188 
7189   ins_cost(INSN_COST);
7190   format %{ "mov  $dst, $con\t# NULL ptr" %}
7191 
7192   ins_encode(aarch64_enc_mov_p1(dst, con));
7193 
7194   ins_pipe(ialu_imm);
7195 %}
7196 
7197 // Load Byte Map Base Constant
7198 
7199 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7200 %{
7201   match(Set dst con);
7202 
7203   ins_cost(INSN_COST);
7204   format %{ "adr  $dst, $con\t# Byte Map Base" %}
7205 
7206   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7207 
7208   ins_pipe(ialu_imm);
7209 %}
7210 
7211 // Load Narrow Pointer Constant
7212 
7213 instruct loadConN(iRegNNoSp dst, immN con)
7214 %{
7215   match(Set dst con);
7216 
7217   ins_cost(INSN_COST * 4);
7218   format %{ "mov  $dst, $con\t# compressed ptr" %}
7219 
7220   ins_encode(aarch64_enc_mov_n(dst, con));
7221 
7222   ins_pipe(ialu_imm);
7223 %}
7224 
7225 // Load Narrow Null Pointer Constant
7226 
7227 instruct loadConN0(iRegNNoSp dst, immN0 con)
7228 %{
7229   match(Set dst con);
7230 
7231   ins_cost(INSN_COST);
7232   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
7233 
7234   ins_encode(aarch64_enc_mov_n0(dst, con));
7235 
7236   ins_pipe(ialu_imm);
7237 %}
7238 
7239 // Load Narrow Klass Constant
7240 
7241 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7242 %{
7243   match(Set dst con);
7244 
7245   ins_cost(INSN_COST);
7246   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
7247 
7248   ins_encode(aarch64_enc_mov_nk(dst, con));
7249 
7250   ins_pipe(ialu_imm);
7251 %}
7252 
7253 // Load Packed Float Constant
7254 
7255 instruct loadConF_packed(vRegF dst, immFPacked con) %{
7256   match(Set dst con);
7257   ins_cost(INSN_COST * 4);
7258   format %{ "fmovs  $dst, $con"%}
7259   ins_encode %{
7260     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
7261   %}
7262 
7263   ins_pipe(fp_imm_s);
7264 %}
7265 
7266 // Load Float Constant
7267 
7268 instruct loadConF(vRegF dst, immF con) %{
7269   match(Set dst con);
7270 
7271   ins_cost(INSN_COST * 4);
7272 
7273   format %{
7274     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7275   %}
7276 
7277   ins_encode %{
7278     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
7279   %}
7280 
7281   ins_pipe(fp_load_constant_s);
7282 %}
7283 
7284 // Load Packed Double Constant
7285 
7286 instruct loadConD_packed(vRegD dst, immDPacked con) %{
7287   match(Set dst con);
7288   ins_cost(INSN_COST);
7289   format %{ "fmovd  $dst, $con"%}
7290   ins_encode %{
7291     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
7292   %}
7293 
7294   ins_pipe(fp_imm_d);
7295 %}
7296 
7297 // Load Double Constant
7298 
7299 instruct loadConD(vRegD dst, immD con) %{
7300   match(Set dst con);
7301 
7302   ins_cost(INSN_COST * 5);
7303   format %{
7304     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7305   %}
7306 
7307   ins_encode %{
7308     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
7309   %}
7310 
7311   ins_pipe(fp_load_constant_d);
7312 %}
7313 
7314 // Store Instructions
7315 
7316 // Store CMS card-mark Immediate
7317 instruct storeimmCM0(immI0 zero, memory1 mem)
7318 %{
7319   match(Set mem (StoreCM mem zero));
7320 
7321   ins_cost(INSN_COST);
7322   format %{ "storestore (elided)\n\t"
7323             "strb zr, $mem\t# byte" %}
7324 
7325   ins_encode(aarch64_enc_strb0(mem));
7326 
7327   ins_pipe(istore_mem);
7328 %}
7329 
7330 // Store CMS card-mark Immediate with intervening StoreStore
7331 // needed when using CMS with no conditional card marking
7332 instruct storeimmCM0_ordered(immI0 zero, memory1 mem)
7333 %{
7334   match(Set mem (StoreCM mem zero));
7335 
7336   ins_cost(INSN_COST * 2);
7337   format %{ "storestore\n\t"
7338             "dmb ishst"
7339             "\n\tstrb zr, $mem\t# byte" %}
7340 
7341   ins_encode(aarch64_enc_strb0_ordered(mem));
7342 
7343   ins_pipe(istore_mem);
7344 %}
7345 
7346 // Store Byte
7347 instruct storeB(iRegIorL2I src, memory1 mem)
7348 %{
7349   match(Set mem (StoreB mem src));
7350   predicate(!needs_releasing_store(n));
7351 
7352   ins_cost(INSN_COST);
7353   format %{ "strb  $src, $mem\t# byte" %}
7354 
7355   ins_encode(aarch64_enc_strb(src, mem));
7356 
7357   ins_pipe(istore_reg_mem);
7358 %}
7359 
7360 
7361 instruct storeimmB0(immI0 zero, memory1 mem)
7362 %{
7363   match(Set mem (StoreB mem zero));
7364   predicate(!needs_releasing_store(n));
7365 
7366   ins_cost(INSN_COST);
7367   format %{ "strb rscractch2, $mem\t# byte" %}
7368 
7369   ins_encode(aarch64_enc_strb0(mem));
7370 
7371   ins_pipe(istore_mem);
7372 %}
7373 
7374 // Store Char/Short
7375 instruct storeC(iRegIorL2I src, memory2 mem)
7376 %{
7377   match(Set mem (StoreC mem src));
7378   predicate(!needs_releasing_store(n));
7379 
7380   ins_cost(INSN_COST);
7381   format %{ "strh  $src, $mem\t# short" %}
7382 
7383   ins_encode(aarch64_enc_strh(src, mem));
7384 
7385   ins_pipe(istore_reg_mem);
7386 %}
7387 
7388 instruct storeimmC0(immI0 zero, memory2 mem)
7389 %{
7390   match(Set mem (StoreC mem zero));
7391   predicate(!needs_releasing_store(n));
7392 
7393   ins_cost(INSN_COST);
7394   format %{ "strh  zr, $mem\t# short" %}
7395 
7396   ins_encode(aarch64_enc_strh0(mem));
7397 
7398   ins_pipe(istore_mem);
7399 %}
7400 
7401 // Store Integer
7402 
7403 instruct storeI(iRegIorL2I src, memory4 mem)
7404 %{
7405   match(Set mem(StoreI mem src));
7406   predicate(!needs_releasing_store(n));
7407 
7408   ins_cost(INSN_COST);
7409   format %{ "strw  $src, $mem\t# int" %}
7410 
7411   ins_encode(aarch64_enc_strw(src, mem));
7412 
7413   ins_pipe(istore_reg_mem);
7414 %}
7415 
7416 instruct storeimmI0(immI0 zero, memory4 mem)
7417 %{
7418   match(Set mem(StoreI mem zero));
7419   predicate(!needs_releasing_store(n));
7420 
7421   ins_cost(INSN_COST);
7422   format %{ "strw  zr, $mem\t# int" %}
7423 
7424   ins_encode(aarch64_enc_strw0(mem));
7425 
7426   ins_pipe(istore_mem);
7427 %}
7428 
7429 // Store Long (64 bit signed)
7430 instruct storeL(iRegL src, memory8 mem)
7431 %{
7432   match(Set mem (StoreL mem src));
7433   predicate(!needs_releasing_store(n));
7434 
7435   ins_cost(INSN_COST);
7436   format %{ "str  $src, $mem\t# int" %}
7437 
7438   ins_encode(aarch64_enc_str(src, mem));
7439 
7440   ins_pipe(istore_reg_mem);
7441 %}
7442 
7443 // Store Long (64 bit signed)
7444 instruct storeimmL0(immL0 zero, memory8 mem)
7445 %{
7446   match(Set mem (StoreL mem zero));
7447   predicate(!needs_releasing_store(n));
7448 
7449   ins_cost(INSN_COST);
7450   format %{ "str  zr, $mem\t# int" %}
7451 
7452   ins_encode(aarch64_enc_str0(mem));
7453 
7454   ins_pipe(istore_mem);
7455 %}
7456 
7457 // Store Pointer
7458 instruct storeP(iRegP src, memory8 mem)
7459 %{
7460   match(Set mem (StoreP mem src));
7461   predicate(!needs_releasing_store(n));
7462 
7463   ins_cost(INSN_COST);
7464   format %{ "str  $src, $mem\t# ptr" %}
7465 
7466   ins_encode(aarch64_enc_str(src, mem));
7467 
7468   ins_pipe(istore_reg_mem);
7469 %}
7470 
7471 // Store Pointer
7472 instruct storeimmP0(immP0 zero, memory8 mem)
7473 %{
7474   match(Set mem (StoreP mem zero));
7475   predicate(!needs_releasing_store(n));
7476 
7477   ins_cost(INSN_COST);
7478   format %{ "str zr, $mem\t# ptr" %}
7479 
7480   ins_encode(aarch64_enc_str0(mem));
7481 
7482   ins_pipe(istore_mem);
7483 %}
7484 
7485 // Store Compressed Pointer
7486 instruct storeN(iRegN src, memory4 mem)
7487 %{
7488   match(Set mem (StoreN mem src));
7489   predicate(!needs_releasing_store(n));
7490 
7491   ins_cost(INSN_COST);
7492   format %{ "strw  $src, $mem\t# compressed ptr" %}
7493 
7494   ins_encode(aarch64_enc_strw(src, mem));
7495 
7496   ins_pipe(istore_reg_mem);
7497 %}
7498 
7499 instruct storeImmN0(immN0 zero, memory4 mem)
7500 %{
7501   match(Set mem (StoreN mem zero));
7502   predicate(!needs_releasing_store(n));
7503 
7504   ins_cost(INSN_COST);
7505   format %{ "strw  zr, $mem\t# compressed ptr" %}
7506 
7507   ins_encode(aarch64_enc_strw0(mem));
7508 
7509   ins_pipe(istore_mem);
7510 %}
7511 
7512 // Store Float
7513 instruct storeF(vRegF src, memory4 mem)
7514 %{
7515   match(Set mem (StoreF mem src));
7516   predicate(!needs_releasing_store(n));
7517 
7518   ins_cost(INSN_COST);
7519   format %{ "strs  $src, $mem\t# float" %}
7520 
7521   ins_encode( aarch64_enc_strs(src, mem) );
7522 
7523   ins_pipe(pipe_class_memory);
7524 %}
7525 
7526 // TODO
7527 // implement storeImmF0 and storeFImmPacked
7528 
7529 // Store Double
7530 instruct storeD(vRegD src, memory8 mem)
7531 %{
7532   match(Set mem (StoreD mem src));
7533   predicate(!needs_releasing_store(n));
7534 
7535   ins_cost(INSN_COST);
7536   format %{ "strd  $src, $mem\t# double" %}
7537 
7538   ins_encode( aarch64_enc_strd(src, mem) );
7539 
7540   ins_pipe(pipe_class_memory);
7541 %}
7542 
7543 // Store Compressed Klass Pointer
7544 instruct storeNKlass(iRegN src, memory4 mem)
7545 %{
7546   predicate(!needs_releasing_store(n));
7547   match(Set mem (StoreNKlass mem src));
7548 
7549   ins_cost(INSN_COST);
7550   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7551 
7552   ins_encode(aarch64_enc_strw(src, mem));
7553 
7554   ins_pipe(istore_reg_mem);
7555 %}
7556 
7557 // TODO
7558 // implement storeImmD0 and storeDImmPacked
7559 
7560 // prefetch instructions
7561 // Must be safe to execute with invalid address (cannot fault).
7562 
7563 instruct prefetchalloc( memory8 mem ) %{
7564   match(PrefetchAllocation mem);
7565 
7566   ins_cost(INSN_COST);
7567   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7568 
7569   ins_encode( aarch64_enc_prefetchw(mem) );
7570 
7571   ins_pipe(iload_prefetch);
7572 %}
7573 
7574 //  ---------------- volatile loads and stores ----------------
7575 
7576 // Load Byte (8 bit signed)
7577 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7578 %{
7579   match(Set dst (LoadB mem));
7580 
7581   ins_cost(VOLATILE_REF_COST);
7582   format %{ "ldarsb  $dst, $mem\t# byte" %}
7583 
7584   ins_encode(aarch64_enc_ldarsb(dst, mem));
7585 
7586   ins_pipe(pipe_serial);
7587 %}
7588 
7589 // Load Byte (8 bit signed) into long
7590 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7591 %{
7592   match(Set dst (ConvI2L (LoadB mem)));
7593 
7594   ins_cost(VOLATILE_REF_COST);
7595   format %{ "ldarsb  $dst, $mem\t# byte" %}
7596 
7597   ins_encode(aarch64_enc_ldarsb(dst, mem));
7598 
7599   ins_pipe(pipe_serial);
7600 %}
7601 
7602 // Load Byte (8 bit unsigned)
7603 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7604 %{
7605   match(Set dst (LoadUB mem));
7606 
7607   ins_cost(VOLATILE_REF_COST);
7608   format %{ "ldarb  $dst, $mem\t# byte" %}
7609 
7610   ins_encode(aarch64_enc_ldarb(dst, mem));
7611 
7612   ins_pipe(pipe_serial);
7613 %}
7614 
7615 // Load Byte (8 bit unsigned) into long
7616 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7617 %{
7618   match(Set dst (ConvI2L (LoadUB mem)));
7619 
7620   ins_cost(VOLATILE_REF_COST);
7621   format %{ "ldarb  $dst, $mem\t# byte" %}
7622 
7623   ins_encode(aarch64_enc_ldarb(dst, mem));
7624 
7625   ins_pipe(pipe_serial);
7626 %}
7627 
7628 // Load Short (16 bit signed)
7629 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7630 %{
7631   match(Set dst (LoadS mem));
7632 
7633   ins_cost(VOLATILE_REF_COST);
7634   format %{ "ldarshw  $dst, $mem\t# short" %}
7635 
7636   ins_encode(aarch64_enc_ldarshw(dst, mem));
7637 
7638   ins_pipe(pipe_serial);
7639 %}
7640 
7641 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7642 %{
7643   match(Set dst (LoadUS mem));
7644 
7645   ins_cost(VOLATILE_REF_COST);
7646   format %{ "ldarhw  $dst, $mem\t# short" %}
7647 
7648   ins_encode(aarch64_enc_ldarhw(dst, mem));
7649 
7650   ins_pipe(pipe_serial);
7651 %}
7652 
7653 // Load Short/Char (16 bit unsigned) into long
7654 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7655 %{
7656   match(Set dst (ConvI2L (LoadUS mem)));
7657 
7658   ins_cost(VOLATILE_REF_COST);
7659   format %{ "ldarh  $dst, $mem\t# short" %}
7660 
7661   ins_encode(aarch64_enc_ldarh(dst, mem));
7662 
7663   ins_pipe(pipe_serial);
7664 %}
7665 
7666 // Load Short/Char (16 bit signed) into long
7667 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7668 %{
7669   match(Set dst (ConvI2L (LoadS mem)));
7670 
7671   ins_cost(VOLATILE_REF_COST);
7672   format %{ "ldarh  $dst, $mem\t# short" %}
7673 
7674   ins_encode(aarch64_enc_ldarsh(dst, mem));
7675 
7676   ins_pipe(pipe_serial);
7677 %}
7678 
7679 // Load Integer (32 bit signed)
7680 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7681 %{
7682   match(Set dst (LoadI mem));
7683 
7684   ins_cost(VOLATILE_REF_COST);
7685   format %{ "ldarw  $dst, $mem\t# int" %}
7686 
7687   ins_encode(aarch64_enc_ldarw(dst, mem));
7688 
7689   ins_pipe(pipe_serial);
7690 %}
7691 
7692 // Load Integer (32 bit unsigned) into long
7693 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7694 %{
7695   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7696 
7697   ins_cost(VOLATILE_REF_COST);
7698   format %{ "ldarw  $dst, $mem\t# int" %}
7699 
7700   ins_encode(aarch64_enc_ldarw(dst, mem));
7701 
7702   ins_pipe(pipe_serial);
7703 %}
7704 
7705 // Load Long (64 bit signed)
7706 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7707 %{
7708   match(Set dst (LoadL mem));
7709 
7710   ins_cost(VOLATILE_REF_COST);
7711   format %{ "ldar  $dst, $mem\t# int" %}
7712 
7713   ins_encode(aarch64_enc_ldar(dst, mem));
7714 
7715   ins_pipe(pipe_serial);
7716 %}
7717 
7718 // Load Pointer
7719 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7720 %{
7721   match(Set dst (LoadP mem));
7722   predicate(n->as_Load()->barrier_data() == 0);
7723 
7724   ins_cost(VOLATILE_REF_COST);
7725   format %{ "ldar  $dst, $mem\t# ptr" %}
7726 
7727   ins_encode(aarch64_enc_ldar(dst, mem));
7728 
7729   ins_pipe(pipe_serial);
7730 %}
7731 
7732 // Load Compressed Pointer
7733 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7734 %{
7735   match(Set dst (LoadN mem));
7736 
7737   ins_cost(VOLATILE_REF_COST);
7738   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7739 
7740   ins_encode(aarch64_enc_ldarw(dst, mem));
7741 
7742   ins_pipe(pipe_serial);
7743 %}
7744 
7745 // Load Float
7746 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7747 %{
7748   match(Set dst (LoadF mem));
7749 
7750   ins_cost(VOLATILE_REF_COST);
7751   format %{ "ldars  $dst, $mem\t# float" %}
7752 
7753   ins_encode( aarch64_enc_fldars(dst, mem) );
7754 
7755   ins_pipe(pipe_serial);
7756 %}
7757 
7758 // Load Double
7759 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7760 %{
7761   match(Set dst (LoadD mem));
7762 
7763   ins_cost(VOLATILE_REF_COST);
7764   format %{ "ldard  $dst, $mem\t# double" %}
7765 
7766   ins_encode( aarch64_enc_fldard(dst, mem) );
7767 
7768   ins_pipe(pipe_serial);
7769 %}
7770 
7771 // Store Byte
7772 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7773 %{
7774   match(Set mem (StoreB mem src));
7775 
7776   ins_cost(VOLATILE_REF_COST);
7777   format %{ "stlrb  $src, $mem\t# byte" %}
7778 
7779   ins_encode(aarch64_enc_stlrb(src, mem));
7780 
7781   ins_pipe(pipe_class_memory);
7782 %}
7783 
7784 // Store Char/Short
7785 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7786 %{
7787   match(Set mem (StoreC mem src));
7788 
7789   ins_cost(VOLATILE_REF_COST);
7790   format %{ "stlrh  $src, $mem\t# short" %}
7791 
7792   ins_encode(aarch64_enc_stlrh(src, mem));
7793 
7794   ins_pipe(pipe_class_memory);
7795 %}
7796 
7797 // Store Integer
7798 
7799 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7800 %{
7801   match(Set mem(StoreI mem src));
7802 
7803   ins_cost(VOLATILE_REF_COST);
7804   format %{ "stlrw  $src, $mem\t# int" %}
7805 
7806   ins_encode(aarch64_enc_stlrw(src, mem));
7807 
7808   ins_pipe(pipe_class_memory);
7809 %}
7810 
7811 // Store Long (64 bit signed)
7812 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7813 %{
7814   match(Set mem (StoreL mem src));
7815 
7816   ins_cost(VOLATILE_REF_COST);
7817   format %{ "stlr  $src, $mem\t# int" %}
7818 
7819   ins_encode(aarch64_enc_stlr(src, mem));
7820 
7821   ins_pipe(pipe_class_memory);
7822 %}
7823 
7824 // Store Pointer
7825 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7826 %{
7827   match(Set mem (StoreP mem src));
7828 
7829   ins_cost(VOLATILE_REF_COST);
7830   format %{ "stlr  $src, $mem\t# ptr" %}
7831 
7832   ins_encode(aarch64_enc_stlr(src, mem));
7833 
7834   ins_pipe(pipe_class_memory);
7835 %}
7836 
7837 // Store Compressed Pointer
7838 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7839 %{
7840   match(Set mem (StoreN mem src));
7841 
7842   ins_cost(VOLATILE_REF_COST);
7843   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7844 
7845   ins_encode(aarch64_enc_stlrw(src, mem));
7846 
7847   ins_pipe(pipe_class_memory);
7848 %}
7849 
7850 // Store Float
7851 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7852 %{
7853   match(Set mem (StoreF mem src));
7854 
7855   ins_cost(VOLATILE_REF_COST);
7856   format %{ "stlrs  $src, $mem\t# float" %}
7857 
7858   ins_encode( aarch64_enc_fstlrs(src, mem) );
7859 
7860   ins_pipe(pipe_class_memory);
7861 %}
7862 
7863 // TODO
7864 // implement storeImmF0 and storeFImmPacked
7865 
7866 // Store Double
7867 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7868 %{
7869   match(Set mem (StoreD mem src));
7870 
7871   ins_cost(VOLATILE_REF_COST);
7872   format %{ "stlrd  $src, $mem\t# double" %}
7873 
7874   ins_encode( aarch64_enc_fstlrd(src, mem) );
7875 
7876   ins_pipe(pipe_class_memory);
7877 %}
7878 
7879 //  ---------------- end of volatile loads and stores ----------------
7880 
7881 instruct cacheWB(indirect addr)
7882 %{
7883   predicate(VM_Version::supports_data_cache_line_flush());
7884   match(CacheWB addr);
7885 
7886   ins_cost(100);
7887   format %{"cache wb $addr" %}
7888   ins_encode %{
7889     assert($addr->index_position() < 0, "should be");
7890     assert($addr$$disp == 0, "should be");
7891     __ cache_wb(Address($addr$$base$$Register, 0));
7892   %}
7893   ins_pipe(pipe_slow); // XXX
7894 %}
7895 
7896 instruct cacheWBPreSync()
7897 %{
7898   predicate(VM_Version::supports_data_cache_line_flush());
7899   match(CacheWBPreSync);
7900 
7901   ins_cost(100);
7902   format %{"cache wb presync" %}
7903   ins_encode %{
7904     __ cache_wbsync(true);
7905   %}
7906   ins_pipe(pipe_slow); // XXX
7907 %}
7908 
7909 instruct cacheWBPostSync()
7910 %{
7911   predicate(VM_Version::supports_data_cache_line_flush());
7912   match(CacheWBPostSync);
7913 
7914   ins_cost(100);
7915   format %{"cache wb postsync" %}
7916   ins_encode %{
7917     __ cache_wbsync(false);
7918   %}
7919   ins_pipe(pipe_slow); // XXX
7920 %}
7921 
7922 // ============================================================================
7923 // BSWAP Instructions
7924 
7925 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7926   match(Set dst (ReverseBytesI src));
7927 
7928   ins_cost(INSN_COST);
7929   format %{ "revw  $dst, $src" %}
7930 
7931   ins_encode %{
7932     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7933   %}
7934 
7935   ins_pipe(ialu_reg);
7936 %}
7937 
7938 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7939   match(Set dst (ReverseBytesL src));
7940 
7941   ins_cost(INSN_COST);
7942   format %{ "rev  $dst, $src" %}
7943 
7944   ins_encode %{
7945     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7946   %}
7947 
7948   ins_pipe(ialu_reg);
7949 %}
7950 
7951 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7952   match(Set dst (ReverseBytesUS src));
7953 
7954   ins_cost(INSN_COST);
7955   format %{ "rev16w  $dst, $src" %}
7956 
7957   ins_encode %{
7958     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7959   %}
7960 
7961   ins_pipe(ialu_reg);
7962 %}
7963 
7964 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7965   match(Set dst (ReverseBytesS src));
7966 
7967   ins_cost(INSN_COST);
7968   format %{ "rev16w  $dst, $src\n\t"
7969             "sbfmw $dst, $dst, #0, #15" %}
7970 
7971   ins_encode %{
7972     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7973     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7974   %}
7975 
7976   ins_pipe(ialu_reg);
7977 %}
7978 
7979 // ============================================================================
7980 // Zero Count Instructions
7981 
7982 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7983   match(Set dst (CountLeadingZerosI src));
7984 
7985   ins_cost(INSN_COST);
7986   format %{ "clzw  $dst, $src" %}
7987   ins_encode %{
7988     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7989   %}
7990 
7991   ins_pipe(ialu_reg);
7992 %}
7993 
7994 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7995   match(Set dst (CountLeadingZerosL src));
7996 
7997   ins_cost(INSN_COST);
7998   format %{ "clz   $dst, $src" %}
7999   ins_encode %{
8000     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8001   %}
8002 
8003   ins_pipe(ialu_reg);
8004 %}
8005 
8006 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8007   match(Set dst (CountTrailingZerosI src));
8008 
8009   ins_cost(INSN_COST * 2);
8010   format %{ "rbitw  $dst, $src\n\t"
8011             "clzw   $dst, $dst" %}
8012   ins_encode %{
8013     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8014     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8015   %}
8016 
8017   ins_pipe(ialu_reg);
8018 %}
8019 
8020 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8021   match(Set dst (CountTrailingZerosL src));
8022 
8023   ins_cost(INSN_COST * 2);
8024   format %{ "rbit   $dst, $src\n\t"
8025             "clz    $dst, $dst" %}
8026   ins_encode %{
8027     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8028     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8029   %}
8030 
8031   ins_pipe(ialu_reg);
8032 %}
8033 
8034 //---------- Population Count Instructions -------------------------------------
8035 //
8036 
8037 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8038   predicate(UsePopCountInstruction);
8039   match(Set dst (PopCountI src));
8040   effect(TEMP tmp);
8041   ins_cost(INSN_COST * 13);
8042 
8043   format %{ "movw   $src, $src\n\t"
8044             "mov    $tmp, $src\t# vector (1D)\n\t"
8045             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8046             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8047             "mov    $dst, $tmp\t# vector (1D)" %}
8048   ins_encode %{
8049     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8050     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8051     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8052     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8053     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8054   %}
8055 
8056   ins_pipe(pipe_class_default);
8057 %}
8058 
8059 instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
8060   predicate(UsePopCountInstruction);
8061   match(Set dst (PopCountI (LoadI mem)));
8062   effect(TEMP tmp);
8063   ins_cost(INSN_COST * 13);
8064 
8065   format %{ "ldrs   $tmp, $mem\n\t"
8066             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8067             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8068             "mov    $dst, $tmp\t# vector (1D)" %}
8069   ins_encode %{
8070     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8071     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8072               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
8073     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8074     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8075     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8076   %}
8077 
8078   ins_pipe(pipe_class_default);
8079 %}
8080 
8081 // Note: Long.bitCount(long) returns an int.
8082 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8083   predicate(UsePopCountInstruction);
8084   match(Set dst (PopCountL src));
8085   effect(TEMP tmp);
8086   ins_cost(INSN_COST * 13);
8087 
8088   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
8089             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8090             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8091             "mov    $dst, $tmp\t# vector (1D)" %}
8092   ins_encode %{
8093     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8094     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8095     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8096     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8097   %}
8098 
8099   ins_pipe(pipe_class_default);
8100 %}
8101 
8102 instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
8103   predicate(UsePopCountInstruction);
8104   match(Set dst (PopCountL (LoadL mem)));
8105   effect(TEMP tmp);
8106   ins_cost(INSN_COST * 13);
8107 
8108   format %{ "ldrd   $tmp, $mem\n\t"
8109             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8110             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8111             "mov    $dst, $tmp\t# vector (1D)" %}
8112   ins_encode %{
8113     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8114     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8115               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
8116     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8117     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8118     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8119   %}
8120 
8121   ins_pipe(pipe_class_default);
8122 %}
8123 
8124 // ============================================================================
8125 // MemBar Instruction
8126 
8127 instruct load_fence() %{
8128   match(LoadFence);
8129   ins_cost(VOLATILE_REF_COST);
8130 
8131   format %{ "load_fence" %}
8132 
8133   ins_encode %{
8134     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8135   %}
8136   ins_pipe(pipe_serial);
8137 %}
8138 
8139 instruct unnecessary_membar_acquire() %{
8140   predicate(unnecessary_acquire(n));
8141   match(MemBarAcquire);
8142   ins_cost(0);
8143 
8144   format %{ "membar_acquire (elided)" %}
8145 
8146   ins_encode %{
8147     __ block_comment("membar_acquire (elided)");
8148   %}
8149 
8150   ins_pipe(pipe_class_empty);
8151 %}
8152 
8153 instruct membar_acquire() %{
8154   match(MemBarAcquire);
8155   ins_cost(VOLATILE_REF_COST);
8156 
8157   format %{ "membar_acquire\n\t"
8158             "dmb ish" %}
8159 
8160   ins_encode %{
8161     __ block_comment("membar_acquire");
8162     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8163   %}
8164 
8165   ins_pipe(pipe_serial);
8166 %}
8167 
8168 
8169 instruct membar_acquire_lock() %{
8170   match(MemBarAcquireLock);
8171   ins_cost(VOLATILE_REF_COST);
8172 
8173   format %{ "membar_acquire_lock (elided)" %}
8174 
8175   ins_encode %{
8176     __ block_comment("membar_acquire_lock (elided)");
8177   %}
8178 
8179   ins_pipe(pipe_serial);
8180 %}
8181 
8182 instruct store_fence() %{
8183   match(StoreFence);
8184   ins_cost(VOLATILE_REF_COST);
8185 
8186   format %{ "store_fence" %}
8187 
8188   ins_encode %{
8189     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8190   %}
8191   ins_pipe(pipe_serial);
8192 %}
8193 
8194 instruct unnecessary_membar_release() %{
8195   predicate(unnecessary_release(n));
8196   match(MemBarRelease);
8197   ins_cost(0);
8198 
8199   format %{ "membar_release (elided)" %}
8200 
8201   ins_encode %{
8202     __ block_comment("membar_release (elided)");
8203   %}
8204   ins_pipe(pipe_serial);
8205 %}
8206 
8207 instruct membar_release() %{
8208   match(MemBarRelease);
8209   ins_cost(VOLATILE_REF_COST);
8210 
8211   format %{ "membar_release\n\t"
8212             "dmb ish" %}
8213 
8214   ins_encode %{
8215     __ block_comment("membar_release");
8216     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8217   %}
8218   ins_pipe(pipe_serial);
8219 %}
8220 
8221 instruct membar_storestore() %{
8222   match(MemBarStoreStore);
8223   ins_cost(VOLATILE_REF_COST);
8224 
8225   format %{ "MEMBAR-store-store" %}
8226 
8227   ins_encode %{
8228     __ membar(Assembler::StoreStore);
8229   %}
8230   ins_pipe(pipe_serial);
8231 %}
8232 
8233 instruct membar_release_lock() %{
8234   match(MemBarReleaseLock);
8235   ins_cost(VOLATILE_REF_COST);
8236 
8237   format %{ "membar_release_lock (elided)" %}
8238 
8239   ins_encode %{
8240     __ block_comment("membar_release_lock (elided)");
8241   %}
8242 
8243   ins_pipe(pipe_serial);
8244 %}
8245 
8246 instruct unnecessary_membar_volatile() %{
8247   predicate(unnecessary_volatile(n));
8248   match(MemBarVolatile);
8249   ins_cost(0);
8250 
8251   format %{ "membar_volatile (elided)" %}
8252 
8253   ins_encode %{
8254     __ block_comment("membar_volatile (elided)");
8255   %}
8256 
8257   ins_pipe(pipe_serial);
8258 %}
8259 
8260 instruct membar_volatile() %{
8261   match(MemBarVolatile);
8262   ins_cost(VOLATILE_REF_COST*100);
8263 
8264   format %{ "membar_volatile\n\t"
8265              "dmb ish"%}
8266 
8267   ins_encode %{
8268     __ block_comment("membar_volatile");
8269     __ membar(Assembler::StoreLoad);
8270   %}
8271 
8272   ins_pipe(pipe_serial);
8273 %}
8274 
8275 // ============================================================================
8276 // Cast/Convert Instructions
8277 
8278 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8279   match(Set dst (CastX2P src));
8280 
8281   ins_cost(INSN_COST);
8282   format %{ "mov $dst, $src\t# long -> ptr" %}
8283 
8284   ins_encode %{
8285     if ($dst$$reg != $src$$reg) {
8286       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8287     }
8288   %}
8289 
8290   ins_pipe(ialu_reg);
8291 %}
8292 
8293 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8294   match(Set dst (CastP2X src));
8295 
8296   ins_cost(INSN_COST);
8297   format %{ "mov $dst, $src\t# ptr -> long" %}
8298 
8299   ins_encode %{
8300     if ($dst$$reg != $src$$reg) {
8301       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8302     }
8303   %}
8304 
8305   ins_pipe(ialu_reg);
8306 %}
8307 
8308 // Convert oop into int for vectors alignment masking
8309 instruct convP2I(iRegINoSp dst, iRegP src) %{
8310   match(Set dst (ConvL2I (CastP2X src)));
8311 
8312   ins_cost(INSN_COST);
8313   format %{ "movw $dst, $src\t# ptr -> int" %}
8314   ins_encode %{
8315     __ movw($dst$$Register, $src$$Register);
8316   %}
8317 
8318   ins_pipe(ialu_reg);
8319 %}
8320 
8321 // Convert compressed oop into int for vectors alignment masking
8322 // in case of 32bit oops (heap < 4Gb).
8323 instruct convN2I(iRegINoSp dst, iRegN src)
8324 %{
8325   predicate(CompressedOops::shift() == 0);
8326   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8327 
8328   ins_cost(INSN_COST);
8329   format %{ "mov dst, $src\t# compressed ptr -> int" %}
8330   ins_encode %{
8331     __ movw($dst$$Register, $src$$Register);
8332   %}
8333 
8334   ins_pipe(ialu_reg);
8335 %}
8336 
8337 
8338 // Convert oop pointer into compressed form
8339 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8340   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8341   match(Set dst (EncodeP src));
8342   effect(KILL cr);
8343   ins_cost(INSN_COST * 3);
8344   format %{ "encode_heap_oop $dst, $src" %}
8345   ins_encode %{
8346     Register s = $src$$Register;
8347     Register d = $dst$$Register;
8348     __ encode_heap_oop(d, s);
8349   %}
8350   ins_pipe(ialu_reg);
8351 %}
8352 
8353 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8354   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8355   match(Set dst (EncodeP src));
8356   ins_cost(INSN_COST * 3);
8357   format %{ "encode_heap_oop_not_null $dst, $src" %}
8358   ins_encode %{
8359     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8360   %}
8361   ins_pipe(ialu_reg);
8362 %}
8363 
8364 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8365   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8366             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8367   match(Set dst (DecodeN src));
8368   ins_cost(INSN_COST * 3);
8369   format %{ "decode_heap_oop $dst, $src" %}
8370   ins_encode %{
8371     Register s = $src$$Register;
8372     Register d = $dst$$Register;
8373     __ decode_heap_oop(d, s);
8374   %}
8375   ins_pipe(ialu_reg);
8376 %}
8377 
8378 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8379   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8380             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8381   match(Set dst (DecodeN src));
8382   ins_cost(INSN_COST * 3);
8383   format %{ "decode_heap_oop_not_null $dst, $src" %}
8384   ins_encode %{
8385     Register s = $src$$Register;
8386     Register d = $dst$$Register;
8387     __ decode_heap_oop_not_null(d, s);
8388   %}
8389   ins_pipe(ialu_reg);
8390 %}
8391 
8392 // n.b. AArch64 implementations of encode_klass_not_null and
8393 // decode_klass_not_null do not modify the flags register so, unlike
8394 // Intel, we don't kill CR as a side effect here
8395 
8396 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
8397   match(Set dst (EncodePKlass src));
8398 
8399   ins_cost(INSN_COST * 3);
8400   format %{ "encode_klass_not_null $dst,$src" %}
8401 
8402   ins_encode %{
8403     Register src_reg = as_Register($src$$reg);
8404     Register dst_reg = as_Register($dst$$reg);
8405     __ encode_klass_not_null(dst_reg, src_reg);
8406   %}
8407 
8408    ins_pipe(ialu_reg);
8409 %}
8410 
8411 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8412   match(Set dst (DecodeNKlass src));
8413 
8414   ins_cost(INSN_COST * 3);
8415   format %{ "decode_klass_not_null $dst,$src" %}
8416 
8417   ins_encode %{
8418     Register src_reg = as_Register($src$$reg);
8419     Register dst_reg = as_Register($dst$$reg);
8420     if (dst_reg != src_reg) {
8421       __ decode_klass_not_null(dst_reg, src_reg);
8422     } else {
8423       __ decode_klass_not_null(dst_reg);
8424     }
8425   %}
8426 
8427    ins_pipe(ialu_reg);
8428 %}
8429 
8430 instruct checkCastPP(iRegPNoSp dst)
8431 %{
8432   match(Set dst (CheckCastPP dst));
8433 
8434   size(0);
8435   format %{ "# checkcastPP of $dst" %}
8436   ins_encode(/* empty encoding */);
8437   ins_pipe(pipe_class_empty);
8438 %}
8439 
8440 instruct castPP(iRegPNoSp dst)
8441 %{
8442   match(Set dst (CastPP dst));
8443 
8444   size(0);
8445   format %{ "# castPP of $dst" %}
8446   ins_encode(/* empty encoding */);
8447   ins_pipe(pipe_class_empty);
8448 %}
8449 
8450 instruct castII(iRegI dst)
8451 %{
8452   match(Set dst (CastII dst));
8453 
8454   size(0);
8455   format %{ "# castII of $dst" %}
8456   ins_encode(/* empty encoding */);
8457   ins_cost(0);
8458   ins_pipe(pipe_class_empty);
8459 %}
8460 
8461 // ============================================================================
8462 // Atomic operation instructions
8463 //
8464 // Intel and SPARC both implement Ideal Node LoadPLocked and
8465 // Store{PIL}Conditional instructions using a normal load for the
8466 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8467 //
8468 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8469 // pair to lock object allocations from Eden space when not using
8470 // TLABs.
8471 //
8472 // There does not appear to be a Load{IL}Locked Ideal Node and the
8473 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8474 // and to use StoreIConditional only for 32-bit and StoreLConditional
8475 // only for 64-bit.
8476 //
8477 // We implement LoadPLocked and StorePLocked instructions using,
8478 // respectively the AArch64 hw load-exclusive and store-conditional
8479 // instructions. Whereas we must implement each of
8480 // Store{IL}Conditional using a CAS which employs a pair of
8481 // instructions comprising a load-exclusive followed by a
8482 // store-conditional.
8483 
8484 
8485 // Locked-load (linked load) of the current heap-top
8486 // used when updating the eden heap top
8487 // implemented using ldaxr on AArch64
8488 
8489 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8490 %{
8491   match(Set dst (LoadPLocked mem));
8492 
8493   ins_cost(VOLATILE_REF_COST);
8494 
8495   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8496 
8497   ins_encode(aarch64_enc_ldaxr(dst, mem));
8498 
8499   ins_pipe(pipe_serial);
8500 %}
8501 
8502 // Conditional-store of the updated heap-top.
8503 // Used during allocation of the shared heap.
8504 // Sets flag (EQ) on success.
8505 // implemented using stlxr on AArch64.
8506 
8507 instruct storePConditional(memory8 heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8508 %{
8509   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8510 
8511   ins_cost(VOLATILE_REF_COST);
8512 
8513  // TODO
8514  // do we need to do a store-conditional release or can we just use a
8515  // plain store-conditional?
8516 
8517   format %{
8518     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8519     "cmpw rscratch1, zr\t# EQ on successful write"
8520   %}
8521 
8522   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8523 
8524   ins_pipe(pipe_serial);
8525 %}
8526 
8527 
8528 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8529 // when attempting to rebias a lock towards the current thread.  We
8530 // must use the acquire form of cmpxchg in order to guarantee acquire
8531 // semantics in this case.
8532 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8533 %{
8534   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8535 
8536   ins_cost(VOLATILE_REF_COST);
8537 
8538   format %{
8539     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8540     "cmpw rscratch1, zr\t# EQ on successful write"
8541   %}
8542 
8543   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8544 
8545   ins_pipe(pipe_slow);
8546 %}
8547 
8548 // storeIConditional also has acquire semantics, for no better reason
8549 // than matching storeLConditional.  At the time of writing this
8550 // comment storeIConditional was not used anywhere by AArch64.
8551 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8552 %{
8553   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8554 
8555   ins_cost(VOLATILE_REF_COST);
8556 
8557   format %{
8558     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8559     "cmpw rscratch1, zr\t# EQ on successful write"
8560   %}
8561 
8562   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8563 
8564   ins_pipe(pipe_slow);
8565 %}
8566 
8567 // standard CompareAndSwapX when we are using barriers
8568 // these have higher priority than the rules selected by a predicate
8569 
8570 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8571 // can't match them
8572 
8573 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8574 
8575   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8576   ins_cost(2 * VOLATILE_REF_COST);
8577 
8578   effect(KILL cr);
8579 
8580   format %{
8581     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8582     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8583   %}
8584 
8585   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8586             aarch64_enc_cset_eq(res));
8587 
8588   ins_pipe(pipe_slow);
8589 %}
8590 
8591 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8592 
8593   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8594   ins_cost(2 * VOLATILE_REF_COST);
8595 
8596   effect(KILL cr);
8597 
8598   format %{
8599     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8600     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8601   %}
8602 
8603   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8604             aarch64_enc_cset_eq(res));
8605 
8606   ins_pipe(pipe_slow);
8607 %}
8608 
8609 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8610 
8611   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8612   ins_cost(2 * VOLATILE_REF_COST);
8613 
8614   effect(KILL cr);
8615 
8616  format %{
8617     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8618     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8619  %}
8620 
8621  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8622             aarch64_enc_cset_eq(res));
8623 
8624   ins_pipe(pipe_slow);
8625 %}
8626 
8627 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8628 
8629   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8630   ins_cost(2 * VOLATILE_REF_COST);
8631 
8632   effect(KILL cr);
8633 
8634  format %{
8635     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8636     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8637  %}
8638 
8639  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8640             aarch64_enc_cset_eq(res));
8641 
8642   ins_pipe(pipe_slow);
8643 %}
8644 
8645 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8646 
8647   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8648   predicate(n->as_LoadStore()->barrier_data() == 0);
8649   ins_cost(2 * VOLATILE_REF_COST);
8650 
8651   effect(KILL cr);
8652 
8653  format %{
8654     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8655     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8656  %}
8657 
8658  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8659             aarch64_enc_cset_eq(res));
8660 
8661   ins_pipe(pipe_slow);
8662 %}
8663 
8664 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8665 
8666   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8667   ins_cost(2 * VOLATILE_REF_COST);
8668 
8669   effect(KILL cr);
8670 
8671  format %{
8672     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8673     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8674  %}
8675 
8676  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8677             aarch64_enc_cset_eq(res));
8678 
8679   ins_pipe(pipe_slow);
8680 %}
8681 
8682 // alternative CompareAndSwapX when we are eliding barriers
8683 
8684 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8685 
8686   predicate(needs_acquiring_load_exclusive(n));
8687   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8688   ins_cost(VOLATILE_REF_COST);
8689 
8690   effect(KILL cr);
8691 
8692   format %{
8693     "cmpxchgb_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8694     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8695   %}
8696 
8697   ins_encode(aarch64_enc_cmpxchgb_acq(mem, oldval, newval),
8698             aarch64_enc_cset_eq(res));
8699 
8700   ins_pipe(pipe_slow);
8701 %}
8702 
8703 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8704 
8705   predicate(needs_acquiring_load_exclusive(n));
8706   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8707   ins_cost(VOLATILE_REF_COST);
8708 
8709   effect(KILL cr);
8710 
8711   format %{
8712     "cmpxchgs_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8713     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8714   %}
8715 
8716   ins_encode(aarch64_enc_cmpxchgs_acq(mem, oldval, newval),
8717             aarch64_enc_cset_eq(res));
8718 
8719   ins_pipe(pipe_slow);
8720 %}
8721 
8722 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8723 
8724   predicate(needs_acquiring_load_exclusive(n));
8725   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8726   ins_cost(VOLATILE_REF_COST);
8727 
8728   effect(KILL cr);
8729 
8730  format %{
8731     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8732     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8733  %}
8734 
8735  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8736             aarch64_enc_cset_eq(res));
8737 
8738   ins_pipe(pipe_slow);
8739 %}
8740 
8741 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8742 
8743   predicate(needs_acquiring_load_exclusive(n));
8744   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8745   ins_cost(VOLATILE_REF_COST);
8746 
8747   effect(KILL cr);
8748 
8749  format %{
8750     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8751     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8752  %}
8753 
8754  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8755             aarch64_enc_cset_eq(res));
8756 
8757   ins_pipe(pipe_slow);
8758 %}
8759 
8760 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8761 
8762   predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
8763   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8764   ins_cost(VOLATILE_REF_COST);
8765 
8766   effect(KILL cr);
8767 
8768  format %{
8769     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8770     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8771  %}
8772 
8773  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8774             aarch64_enc_cset_eq(res));
8775 
8776   ins_pipe(pipe_slow);
8777 %}
8778 
8779 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8780 
8781   predicate(needs_acquiring_load_exclusive(n));
8782   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8783   ins_cost(VOLATILE_REF_COST);
8784 
8785   effect(KILL cr);
8786 
8787  format %{
8788     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8789     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8790  %}
8791 
8792  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8793             aarch64_enc_cset_eq(res));
8794 
8795   ins_pipe(pipe_slow);
8796 %}
8797 
8798 
8799 // ---------------------------------------------------------------------
8800 
8801 
8802 // BEGIN This section of the file is automatically generated. Do not edit --------------
8803 
8804 // Sundry CAS operations.  Note that release is always true,
8805 // regardless of the memory ordering of the CAS.  This is because we
8806 // need the volatile case to be sequentially consistent but there is
8807 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8808 // can't check the type of memory ordering here, so we always emit a
8809 // STLXR.
8810 
8811 // This section is generated from aarch64_ad_cas.m4
8812 
8813 
8814 
8815 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8816   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8817   ins_cost(2 * VOLATILE_REF_COST);
8818   effect(TEMP_DEF res, KILL cr);
8819   format %{
8820     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8821   %}
8822   ins_encode %{
8823     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8824                Assembler::byte, /*acquire*/ false, /*release*/ true,
8825                /*weak*/ false, $res$$Register);
8826     __ sxtbw($res$$Register, $res$$Register);
8827   %}
8828   ins_pipe(pipe_slow);
8829 %}
8830 
8831 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8832   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8833   ins_cost(2 * VOLATILE_REF_COST);
8834   effect(TEMP_DEF res, KILL cr);
8835   format %{
8836     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8837   %}
8838   ins_encode %{
8839     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8840                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8841                /*weak*/ false, $res$$Register);
8842     __ sxthw($res$$Register, $res$$Register);
8843   %}
8844   ins_pipe(pipe_slow);
8845 %}
8846 
8847 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8848   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8849   ins_cost(2 * VOLATILE_REF_COST);
8850   effect(TEMP_DEF res, KILL cr);
8851   format %{
8852     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8853   %}
8854   ins_encode %{
8855     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8856                Assembler::word, /*acquire*/ false, /*release*/ true,
8857                /*weak*/ false, $res$$Register);
8858   %}
8859   ins_pipe(pipe_slow);
8860 %}
8861 
8862 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8863   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8864   ins_cost(2 * VOLATILE_REF_COST);
8865   effect(TEMP_DEF res, KILL cr);
8866   format %{
8867     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8868   %}
8869   ins_encode %{
8870     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8871                Assembler::xword, /*acquire*/ false, /*release*/ true,
8872                /*weak*/ false, $res$$Register);
8873   %}
8874   ins_pipe(pipe_slow);
8875 %}
8876 
8877 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8878   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8879   ins_cost(2 * VOLATILE_REF_COST);
8880   effect(TEMP_DEF res, KILL cr);
8881   format %{
8882     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8883   %}
8884   ins_encode %{
8885     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8886                Assembler::word, /*acquire*/ false, /*release*/ true,
8887                /*weak*/ false, $res$$Register);
8888   %}
8889   ins_pipe(pipe_slow);
8890 %}
8891 
8892 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8893   predicate(n->as_LoadStore()->barrier_data() == 0);
8894   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8895   ins_cost(2 * VOLATILE_REF_COST);
8896   effect(TEMP_DEF res, KILL cr);
8897   format %{
8898     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8899   %}
8900   ins_encode %{
8901     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8902                Assembler::xword, /*acquire*/ false, /*release*/ true,
8903                /*weak*/ false, $res$$Register);
8904   %}
8905   ins_pipe(pipe_slow);
8906 %}
8907 
8908 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8909   predicate(needs_acquiring_load_exclusive(n));
8910   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8911   ins_cost(VOLATILE_REF_COST);
8912   effect(TEMP_DEF res, KILL cr);
8913   format %{
8914     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8915   %}
8916   ins_encode %{
8917     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8918                Assembler::byte, /*acquire*/ true, /*release*/ true,
8919                /*weak*/ false, $res$$Register);
8920     __ sxtbw($res$$Register, $res$$Register);
8921   %}
8922   ins_pipe(pipe_slow);
8923 %}
8924 
8925 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8926   predicate(needs_acquiring_load_exclusive(n));
8927   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8928   ins_cost(VOLATILE_REF_COST);
8929   effect(TEMP_DEF res, KILL cr);
8930   format %{
8931     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8932   %}
8933   ins_encode %{
8934     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8935                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8936                /*weak*/ false, $res$$Register);
8937     __ sxthw($res$$Register, $res$$Register);
8938   %}
8939   ins_pipe(pipe_slow);
8940 %}
8941 
8942 
8943 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8944   predicate(needs_acquiring_load_exclusive(n));
8945   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8946   ins_cost(VOLATILE_REF_COST);
8947   effect(TEMP_DEF res, KILL cr);
8948   format %{
8949     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8950   %}
8951   ins_encode %{
8952     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8953                Assembler::word, /*acquire*/ true, /*release*/ true,
8954                /*weak*/ false, $res$$Register);
8955   %}
8956   ins_pipe(pipe_slow);
8957 %}
8958 
8959 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8960   predicate(needs_acquiring_load_exclusive(n));
8961   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8962   ins_cost(VOLATILE_REF_COST);
8963   effect(TEMP_DEF res, KILL cr);
8964   format %{
8965     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8966   %}
8967   ins_encode %{
8968     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8969                Assembler::xword, /*acquire*/ true, /*release*/ true,
8970                /*weak*/ false, $res$$Register);
8971   %}
8972   ins_pipe(pipe_slow);
8973 %}
8974 
8975 
8976 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8977   predicate(needs_acquiring_load_exclusive(n));
8978   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8979   ins_cost(VOLATILE_REF_COST);
8980   effect(TEMP_DEF res, KILL cr);
8981   format %{
8982     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8983   %}
8984   ins_encode %{
8985     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8986                Assembler::word, /*acquire*/ true, /*release*/ true,
8987                /*weak*/ false, $res$$Register);
8988   %}
8989   ins_pipe(pipe_slow);
8990 %}
8991 
8992 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8993   predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
8994   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8995   ins_cost(VOLATILE_REF_COST);
8996   effect(TEMP_DEF res, KILL cr);
8997   format %{
8998     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8999   %}
9000   ins_encode %{
9001     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9002                Assembler::xword, /*acquire*/ true, /*release*/ true,
9003                /*weak*/ false, $res$$Register);
9004   %}
9005   ins_pipe(pipe_slow);
9006 %}
9007 
9008 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9009   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9010   ins_cost(2 * VOLATILE_REF_COST);
9011   effect(KILL cr);
9012   format %{
9013     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9014     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9015   %}
9016   ins_encode %{
9017     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9018                Assembler::byte, /*acquire*/ false, /*release*/ true,
9019                /*weak*/ true, noreg);
9020     __ csetw($res$$Register, Assembler::EQ);
9021   %}
9022   ins_pipe(pipe_slow);
9023 %}
9024 
9025 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9026   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9027   ins_cost(2 * VOLATILE_REF_COST);
9028   effect(KILL cr);
9029   format %{
9030     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9031     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9032   %}
9033   ins_encode %{
9034     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9035                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9036                /*weak*/ true, noreg);
9037     __ csetw($res$$Register, Assembler::EQ);
9038   %}
9039   ins_pipe(pipe_slow);
9040 %}
9041 
9042 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9043   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9044   ins_cost(2 * VOLATILE_REF_COST);
9045   effect(KILL cr);
9046   format %{
9047     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9048     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9049   %}
9050   ins_encode %{
9051     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9052                Assembler::word, /*acquire*/ false, /*release*/ true,
9053                /*weak*/ true, noreg);
9054     __ csetw($res$$Register, Assembler::EQ);
9055   %}
9056   ins_pipe(pipe_slow);
9057 %}
9058 
9059 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9060   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9061   ins_cost(2 * VOLATILE_REF_COST);
9062   effect(KILL cr);
9063   format %{
9064     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9065     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9066   %}
9067   ins_encode %{
9068     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9069                Assembler::xword, /*acquire*/ false, /*release*/ true,
9070                /*weak*/ true, noreg);
9071     __ csetw($res$$Register, Assembler::EQ);
9072   %}
9073   ins_pipe(pipe_slow);
9074 %}
9075 
9076 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9077   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9078   ins_cost(2 * VOLATILE_REF_COST);
9079   effect(KILL cr);
9080   format %{
9081     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9082     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9083   %}
9084   ins_encode %{
9085     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9086                Assembler::word, /*acquire*/ false, /*release*/ true,
9087                /*weak*/ true, noreg);
9088     __ csetw($res$$Register, Assembler::EQ);
9089   %}
9090   ins_pipe(pipe_slow);
9091 %}
9092 
9093 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9094   predicate(n->as_LoadStore()->barrier_data() == 0);
9095   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9096   ins_cost(2 * VOLATILE_REF_COST);
9097   effect(KILL cr);
9098   format %{
9099     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9100     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9101   %}
9102   ins_encode %{
9103     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9104                Assembler::xword, /*acquire*/ false, /*release*/ true,
9105                /*weak*/ true, noreg);
9106     __ csetw($res$$Register, Assembler::EQ);
9107   %}
9108   ins_pipe(pipe_slow);
9109 %}
9110 
9111 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9112   predicate(needs_acquiring_load_exclusive(n));
9113   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9114   ins_cost(VOLATILE_REF_COST);
9115   effect(KILL cr);
9116   format %{
9117     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9118     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9119   %}
9120   ins_encode %{
9121     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9122                Assembler::byte, /*acquire*/ true, /*release*/ true,
9123                /*weak*/ true, noreg);
9124     __ csetw($res$$Register, Assembler::EQ);
9125   %}
9126   ins_pipe(pipe_slow);
9127 %}
9128 
9129 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9130   predicate(needs_acquiring_load_exclusive(n));
9131   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9132   ins_cost(VOLATILE_REF_COST);
9133   effect(KILL cr);
9134   format %{
9135     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9136     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9137   %}
9138   ins_encode %{
9139     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9140                Assembler::halfword, /*acquire*/ true, /*release*/ true,
9141                /*weak*/ true, noreg);
9142     __ csetw($res$$Register, Assembler::EQ);
9143   %}
9144   ins_pipe(pipe_slow);
9145 %}
9146 
9147 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9148   predicate(needs_acquiring_load_exclusive(n));
9149   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9150   ins_cost(VOLATILE_REF_COST);
9151   effect(KILL cr);
9152   format %{
9153     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9154     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9155   %}
9156   ins_encode %{
9157     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9158                Assembler::word, /*acquire*/ true, /*release*/ true,
9159                /*weak*/ true, noreg);
9160     __ csetw($res$$Register, Assembler::EQ);
9161   %}
9162   ins_pipe(pipe_slow);
9163 %}
9164 
9165 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9166   predicate(needs_acquiring_load_exclusive(n));
9167   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9168   ins_cost(VOLATILE_REF_COST);
9169   effect(KILL cr);
9170   format %{
9171     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9172     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9173   %}
9174   ins_encode %{
9175     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9176                Assembler::xword, /*acquire*/ true, /*release*/ true,
9177                /*weak*/ true, noreg);
9178     __ csetw($res$$Register, Assembler::EQ);
9179   %}
9180   ins_pipe(pipe_slow);
9181 %}
9182 
9183 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9184   predicate(needs_acquiring_load_exclusive(n));
9185   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9186   ins_cost(VOLATILE_REF_COST);
9187   effect(KILL cr);
9188   format %{
9189     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9190     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9191   %}
9192   ins_encode %{
9193     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9194                Assembler::word, /*acquire*/ true, /*release*/ true,
9195                /*weak*/ true, noreg);
9196     __ csetw($res$$Register, Assembler::EQ);
9197   %}
9198   ins_pipe(pipe_slow);
9199 %}
9200 
9201 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9202   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9203   predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
9204   ins_cost(VOLATILE_REF_COST);
9205   effect(KILL cr);
9206   format %{
9207     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9208     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9209   %}
9210   ins_encode %{
9211     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9212                Assembler::xword, /*acquire*/ true, /*release*/ true,
9213                /*weak*/ true, noreg);
9214     __ csetw($res$$Register, Assembler::EQ);
9215   %}
9216   ins_pipe(pipe_slow);
9217 %}
9218 
9219 // END This section of the file is automatically generated. Do not edit --------------
9220 // ---------------------------------------------------------------------
9221 
9222 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
9223   match(Set prev (GetAndSetI mem newv));
9224   ins_cost(2 * VOLATILE_REF_COST);
9225   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
9226   ins_encode %{
9227     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9228   %}
9229   ins_pipe(pipe_serial);
9230 %}
9231 
9232 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
9233   match(Set prev (GetAndSetL mem newv));
9234   ins_cost(2 * VOLATILE_REF_COST);
9235   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9236   ins_encode %{
9237     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9238   %}
9239   ins_pipe(pipe_serial);
9240 %}
9241 
9242 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
9243   match(Set prev (GetAndSetN mem newv));
9244   ins_cost(2 * VOLATILE_REF_COST);
9245   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9246   ins_encode %{
9247     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9248   %}
9249   ins_pipe(pipe_serial);
9250 %}
9251 
9252 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
9253   predicate(n->as_LoadStore()->barrier_data() == 0);
9254   match(Set prev (GetAndSetP mem newv));
9255   ins_cost(2 * VOLATILE_REF_COST);
9256   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9257   ins_encode %{
9258     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9259   %}
9260   ins_pipe(pipe_serial);
9261 %}
9262 
9263 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
9264   predicate(needs_acquiring_load_exclusive(n));
9265   match(Set prev (GetAndSetI mem newv));
9266   ins_cost(VOLATILE_REF_COST);
9267   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
9268   ins_encode %{
9269     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9270   %}
9271   ins_pipe(pipe_serial);
9272 %}
9273 
9274 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
9275   predicate(needs_acquiring_load_exclusive(n));
9276   match(Set prev (GetAndSetL mem newv));
9277   ins_cost(VOLATILE_REF_COST);
9278   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
9279   ins_encode %{
9280     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
9281   %}
9282   ins_pipe(pipe_serial);
9283 %}
9284 
9285 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
9286   predicate(needs_acquiring_load_exclusive(n));
9287   match(Set prev (GetAndSetN mem newv));
9288   ins_cost(VOLATILE_REF_COST);
9289   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
9290   ins_encode %{
9291     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9292   %}
9293   ins_pipe(pipe_serial);
9294 %}
9295 
9296 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
9297   predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
9298   match(Set prev (GetAndSetP mem newv));
9299   ins_cost(VOLATILE_REF_COST);
9300   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
9301   ins_encode %{
9302     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
9303   %}
9304   ins_pipe(pipe_serial);
9305 %}
9306 
9307 
9308 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9309   match(Set newval (GetAndAddL mem incr));
9310   ins_cost(2 * VOLATILE_REF_COST + 1);
9311   format %{ "get_and_addL $newval, [$mem], $incr" %}
9312   ins_encode %{
9313     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9314   %}
9315   ins_pipe(pipe_serial);
9316 %}
9317 
9318 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9319   predicate(n->as_LoadStore()->result_not_used());
9320   match(Set dummy (GetAndAddL mem incr));
9321   ins_cost(2 * VOLATILE_REF_COST);
9322   format %{ "get_and_addL [$mem], $incr" %}
9323   ins_encode %{
9324     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9325   %}
9326   ins_pipe(pipe_serial);
9327 %}
9328 
9329 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9330   match(Set newval (GetAndAddL mem incr));
9331   ins_cost(2 * VOLATILE_REF_COST + 1);
9332   format %{ "get_and_addL $newval, [$mem], $incr" %}
9333   ins_encode %{
9334     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9335   %}
9336   ins_pipe(pipe_serial);
9337 %}
9338 
9339 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9340   predicate(n->as_LoadStore()->result_not_used());
9341   match(Set dummy (GetAndAddL mem incr));
9342   ins_cost(2 * VOLATILE_REF_COST);
9343   format %{ "get_and_addL [$mem], $incr" %}
9344   ins_encode %{
9345     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9346   %}
9347   ins_pipe(pipe_serial);
9348 %}
9349 
9350 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9351   match(Set newval (GetAndAddI mem incr));
9352   ins_cost(2 * VOLATILE_REF_COST + 1);
9353   format %{ "get_and_addI $newval, [$mem], $incr" %}
9354   ins_encode %{
9355     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9356   %}
9357   ins_pipe(pipe_serial);
9358 %}
9359 
9360 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9361   predicate(n->as_LoadStore()->result_not_used());
9362   match(Set dummy (GetAndAddI mem incr));
9363   ins_cost(2 * VOLATILE_REF_COST);
9364   format %{ "get_and_addI [$mem], $incr" %}
9365   ins_encode %{
9366     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9367   %}
9368   ins_pipe(pipe_serial);
9369 %}
9370 
9371 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9372   match(Set newval (GetAndAddI mem incr));
9373   ins_cost(2 * VOLATILE_REF_COST + 1);
9374   format %{ "get_and_addI $newval, [$mem], $incr" %}
9375   ins_encode %{
9376     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9377   %}
9378   ins_pipe(pipe_serial);
9379 %}
9380 
9381 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9382   predicate(n->as_LoadStore()->result_not_used());
9383   match(Set dummy (GetAndAddI mem incr));
9384   ins_cost(2 * VOLATILE_REF_COST);
9385   format %{ "get_and_addI [$mem], $incr" %}
9386   ins_encode %{
9387     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9388   %}
9389   ins_pipe(pipe_serial);
9390 %}
9391 
9392 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
9393   predicate(needs_acquiring_load_exclusive(n));
9394   match(Set newval (GetAndAddL mem incr));
9395   ins_cost(VOLATILE_REF_COST + 1);
9396   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
9397   ins_encode %{
9398     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
9399   %}
9400   ins_pipe(pipe_serial);
9401 %}
9402 
9403 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
9404   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9405   match(Set dummy (GetAndAddL mem incr));
9406   ins_cost(VOLATILE_REF_COST);
9407   format %{ "get_and_addL_acq [$mem], $incr" %}
9408   ins_encode %{
9409     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
9410   %}
9411   ins_pipe(pipe_serial);
9412 %}
9413 
9414 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9415   predicate(needs_acquiring_load_exclusive(n));
9416   match(Set newval (GetAndAddL mem incr));
9417   ins_cost(VOLATILE_REF_COST + 1);
9418   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
9419   ins_encode %{
9420     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
9421   %}
9422   ins_pipe(pipe_serial);
9423 %}
9424 
9425 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
9426   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9427   match(Set dummy (GetAndAddL mem incr));
9428   ins_cost(VOLATILE_REF_COST);
9429   format %{ "get_and_addL_acq [$mem], $incr" %}
9430   ins_encode %{
9431     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
9432   %}
9433   ins_pipe(pipe_serial);
9434 %}
9435 
9436 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9437   predicate(needs_acquiring_load_exclusive(n));
9438   match(Set newval (GetAndAddI mem incr));
9439   ins_cost(VOLATILE_REF_COST + 1);
9440   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9441   ins_encode %{
9442     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9443   %}
9444   ins_pipe(pipe_serial);
9445 %}
9446 
9447 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
9448   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9449   match(Set dummy (GetAndAddI mem incr));
9450   ins_cost(VOLATILE_REF_COST);
9451   format %{ "get_and_addI_acq [$mem], $incr" %}
9452   ins_encode %{
9453     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
9454   %}
9455   ins_pipe(pipe_serial);
9456 %}
9457 
9458 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9459   predicate(needs_acquiring_load_exclusive(n));
9460   match(Set newval (GetAndAddI mem incr));
9461   ins_cost(VOLATILE_REF_COST + 1);
9462   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9463   ins_encode %{
9464     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9465   %}
9466   ins_pipe(pipe_serial);
9467 %}
9468 
9469 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
9470   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9471   match(Set dummy (GetAndAddI mem incr));
9472   ins_cost(VOLATILE_REF_COST);
9473   format %{ "get_and_addI_acq [$mem], $incr" %}
9474   ins_encode %{
9475     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
9476   %}
9477   ins_pipe(pipe_serial);
9478 %}
9479 
9480 // Manifest a CmpL result in an integer register.
9481 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9482 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9483 %{
9484   match(Set dst (CmpL3 src1 src2));
9485   effect(KILL flags);
9486 
9487   ins_cost(INSN_COST * 6);
9488   format %{
9489       "cmp $src1, $src2"
9490       "csetw $dst, ne"
9491       "cnegw $dst, lt"
9492   %}
9493   // format %{ "CmpL3 $dst, $src1, $src2" %}
9494   ins_encode %{
9495     __ cmp($src1$$Register, $src2$$Register);
9496     __ csetw($dst$$Register, Assembler::NE);
9497     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9498   %}
9499 
9500   ins_pipe(pipe_class_default);
9501 %}
9502 
9503 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9504 %{
9505   match(Set dst (CmpL3 src1 src2));
9506   effect(KILL flags);
9507 
9508   ins_cost(INSN_COST * 6);
9509   format %{
9510       "cmp $src1, $src2"
9511       "csetw $dst, ne"
9512       "cnegw $dst, lt"
9513   %}
9514   ins_encode %{
9515     int32_t con = (int32_t)$src2$$constant;
9516      if (con < 0) {
9517       __ adds(zr, $src1$$Register, -con);
9518     } else {
9519       __ subs(zr, $src1$$Register, con);
9520     }
9521     __ csetw($dst$$Register, Assembler::NE);
9522     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9523   %}
9524 
9525   ins_pipe(pipe_class_default);
9526 %}
9527 
9528 // ============================================================================
9529 // Conditional Move Instructions
9530 
9531 // n.b. we have identical rules for both a signed compare op (cmpOp)
9532 // and an unsigned compare op (cmpOpU). it would be nice if we could
9533 // define an op class which merged both inputs and use it to type the
9534 // argument to a single rule. unfortunatelyt his fails because the
9535 // opclass does not live up to the COND_INTER interface of its
9536 // component operands. When the generic code tries to negate the
9537 // operand it ends up running the generci Machoper::negate method
9538 // which throws a ShouldNotHappen. So, we have to provide two flavours
9539 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9540 
9541 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9542   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9543 
9544   ins_cost(INSN_COST * 2);
9545   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9546 
9547   ins_encode %{
9548     __ cselw(as_Register($dst$$reg),
9549              as_Register($src2$$reg),
9550              as_Register($src1$$reg),
9551              (Assembler::Condition)$cmp$$cmpcode);
9552   %}
9553 
9554   ins_pipe(icond_reg_reg);
9555 %}
9556 
9557 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9558   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9559 
9560   ins_cost(INSN_COST * 2);
9561   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9562 
9563   ins_encode %{
9564     __ cselw(as_Register($dst$$reg),
9565              as_Register($src2$$reg),
9566              as_Register($src1$$reg),
9567              (Assembler::Condition)$cmp$$cmpcode);
9568   %}
9569 
9570   ins_pipe(icond_reg_reg);
9571 %}
9572 
9573 // special cases where one arg is zero
9574 
9575 // n.b. this is selected in preference to the rule above because it
9576 // avoids loading constant 0 into a source register
9577 
9578 // TODO
9579 // we ought only to be able to cull one of these variants as the ideal
9580 // transforms ought always to order the zero consistently (to left/right?)
9581 
9582 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9583   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9584 
9585   ins_cost(INSN_COST * 2);
9586   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9587 
9588   ins_encode %{
9589     __ cselw(as_Register($dst$$reg),
9590              as_Register($src$$reg),
9591              zr,
9592              (Assembler::Condition)$cmp$$cmpcode);
9593   %}
9594 
9595   ins_pipe(icond_reg);
9596 %}
9597 
9598 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9599   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9600 
9601   ins_cost(INSN_COST * 2);
9602   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9603 
9604   ins_encode %{
9605     __ cselw(as_Register($dst$$reg),
9606              as_Register($src$$reg),
9607              zr,
9608              (Assembler::Condition)$cmp$$cmpcode);
9609   %}
9610 
9611   ins_pipe(icond_reg);
9612 %}
9613 
9614 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9615   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9616 
9617   ins_cost(INSN_COST * 2);
9618   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9619 
9620   ins_encode %{
9621     __ cselw(as_Register($dst$$reg),
9622              zr,
9623              as_Register($src$$reg),
9624              (Assembler::Condition)$cmp$$cmpcode);
9625   %}
9626 
9627   ins_pipe(icond_reg);
9628 %}
9629 
9630 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9631   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9632 
9633   ins_cost(INSN_COST * 2);
9634   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9635 
9636   ins_encode %{
9637     __ cselw(as_Register($dst$$reg),
9638              zr,
9639              as_Register($src$$reg),
9640              (Assembler::Condition)$cmp$$cmpcode);
9641   %}
9642 
9643   ins_pipe(icond_reg);
9644 %}
9645 
9646 // special case for creating a boolean 0 or 1
9647 
9648 // n.b. this is selected in preference to the rule above because it
9649 // avoids loading constants 0 and 1 into a source register
9650 
9651 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9652   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9653 
9654   ins_cost(INSN_COST * 2);
9655   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9656 
9657   ins_encode %{
9658     // equivalently
9659     // cset(as_Register($dst$$reg),
9660     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9661     __ csincw(as_Register($dst$$reg),
9662              zr,
9663              zr,
9664              (Assembler::Condition)$cmp$$cmpcode);
9665   %}
9666 
9667   ins_pipe(icond_none);
9668 %}
9669 
9670 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9671   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9672 
9673   ins_cost(INSN_COST * 2);
9674   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9675 
9676   ins_encode %{
9677     // equivalently
9678     // cset(as_Register($dst$$reg),
9679     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9680     __ csincw(as_Register($dst$$reg),
9681              zr,
9682              zr,
9683              (Assembler::Condition)$cmp$$cmpcode);
9684   %}
9685 
9686   ins_pipe(icond_none);
9687 %}
9688 
9689 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9690   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9691 
9692   ins_cost(INSN_COST * 2);
9693   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9694 
9695   ins_encode %{
9696     __ csel(as_Register($dst$$reg),
9697             as_Register($src2$$reg),
9698             as_Register($src1$$reg),
9699             (Assembler::Condition)$cmp$$cmpcode);
9700   %}
9701 
9702   ins_pipe(icond_reg_reg);
9703 %}
9704 
9705 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9706   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9707 
9708   ins_cost(INSN_COST * 2);
9709   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9710 
9711   ins_encode %{
9712     __ csel(as_Register($dst$$reg),
9713             as_Register($src2$$reg),
9714             as_Register($src1$$reg),
9715             (Assembler::Condition)$cmp$$cmpcode);
9716   %}
9717 
9718   ins_pipe(icond_reg_reg);
9719 %}
9720 
9721 // special cases where one arg is zero
9722 
9723 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9724   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9725 
9726   ins_cost(INSN_COST * 2);
9727   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9728 
9729   ins_encode %{
9730     __ csel(as_Register($dst$$reg),
9731             zr,
9732             as_Register($src$$reg),
9733             (Assembler::Condition)$cmp$$cmpcode);
9734   %}
9735 
9736   ins_pipe(icond_reg);
9737 %}
9738 
9739 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9740   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9741 
9742   ins_cost(INSN_COST * 2);
9743   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9744 
9745   ins_encode %{
9746     __ csel(as_Register($dst$$reg),
9747             zr,
9748             as_Register($src$$reg),
9749             (Assembler::Condition)$cmp$$cmpcode);
9750   %}
9751 
9752   ins_pipe(icond_reg);
9753 %}
9754 
9755 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9756   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9757 
9758   ins_cost(INSN_COST * 2);
9759   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9760 
9761   ins_encode %{
9762     __ csel(as_Register($dst$$reg),
9763             as_Register($src$$reg),
9764             zr,
9765             (Assembler::Condition)$cmp$$cmpcode);
9766   %}
9767 
9768   ins_pipe(icond_reg);
9769 %}
9770 
9771 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9772   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9773 
9774   ins_cost(INSN_COST * 2);
9775   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9776 
9777   ins_encode %{
9778     __ csel(as_Register($dst$$reg),
9779             as_Register($src$$reg),
9780             zr,
9781             (Assembler::Condition)$cmp$$cmpcode);
9782   %}
9783 
9784   ins_pipe(icond_reg);
9785 %}
9786 
9787 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9788   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9789 
9790   ins_cost(INSN_COST * 2);
9791   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9792 
9793   ins_encode %{
9794     __ csel(as_Register($dst$$reg),
9795             as_Register($src2$$reg),
9796             as_Register($src1$$reg),
9797             (Assembler::Condition)$cmp$$cmpcode);
9798   %}
9799 
9800   ins_pipe(icond_reg_reg);
9801 %}
9802 
9803 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9804   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9805 
9806   ins_cost(INSN_COST * 2);
9807   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9808 
9809   ins_encode %{
9810     __ csel(as_Register($dst$$reg),
9811             as_Register($src2$$reg),
9812             as_Register($src1$$reg),
9813             (Assembler::Condition)$cmp$$cmpcode);
9814   %}
9815 
9816   ins_pipe(icond_reg_reg);
9817 %}
9818 
9819 // special cases where one arg is zero
9820 
9821 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9822   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9823 
9824   ins_cost(INSN_COST * 2);
9825   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9826 
9827   ins_encode %{
9828     __ csel(as_Register($dst$$reg),
9829             zr,
9830             as_Register($src$$reg),
9831             (Assembler::Condition)$cmp$$cmpcode);
9832   %}
9833 
9834   ins_pipe(icond_reg);
9835 %}
9836 
9837 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9838   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9839 
9840   ins_cost(INSN_COST * 2);
9841   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9842 
9843   ins_encode %{
9844     __ csel(as_Register($dst$$reg),
9845             zr,
9846             as_Register($src$$reg),
9847             (Assembler::Condition)$cmp$$cmpcode);
9848   %}
9849 
9850   ins_pipe(icond_reg);
9851 %}
9852 
9853 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9854   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9855 
9856   ins_cost(INSN_COST * 2);
9857   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9858 
9859   ins_encode %{
9860     __ csel(as_Register($dst$$reg),
9861             as_Register($src$$reg),
9862             zr,
9863             (Assembler::Condition)$cmp$$cmpcode);
9864   %}
9865 
9866   ins_pipe(icond_reg);
9867 %}
9868 
9869 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9870   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9871 
9872   ins_cost(INSN_COST * 2);
9873   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9874 
9875   ins_encode %{
9876     __ csel(as_Register($dst$$reg),
9877             as_Register($src$$reg),
9878             zr,
9879             (Assembler::Condition)$cmp$$cmpcode);
9880   %}
9881 
9882   ins_pipe(icond_reg);
9883 %}
9884 
9885 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9886   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9887 
9888   ins_cost(INSN_COST * 2);
9889   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9890 
9891   ins_encode %{
9892     __ cselw(as_Register($dst$$reg),
9893              as_Register($src2$$reg),
9894              as_Register($src1$$reg),
9895              (Assembler::Condition)$cmp$$cmpcode);
9896   %}
9897 
9898   ins_pipe(icond_reg_reg);
9899 %}
9900 
9901 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9902   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9903 
9904   ins_cost(INSN_COST * 2);
9905   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9906 
9907   ins_encode %{
9908     __ cselw(as_Register($dst$$reg),
9909              as_Register($src2$$reg),
9910              as_Register($src1$$reg),
9911              (Assembler::Condition)$cmp$$cmpcode);
9912   %}
9913 
9914   ins_pipe(icond_reg_reg);
9915 %}
9916 
9917 // special cases where one arg is zero
9918 
9919 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9920   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9921 
9922   ins_cost(INSN_COST * 2);
9923   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9924 
9925   ins_encode %{
9926     __ cselw(as_Register($dst$$reg),
9927              zr,
9928              as_Register($src$$reg),
9929              (Assembler::Condition)$cmp$$cmpcode);
9930   %}
9931 
9932   ins_pipe(icond_reg);
9933 %}
9934 
9935 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9936   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9937 
9938   ins_cost(INSN_COST * 2);
9939   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9940 
9941   ins_encode %{
9942     __ cselw(as_Register($dst$$reg),
9943              zr,
9944              as_Register($src$$reg),
9945              (Assembler::Condition)$cmp$$cmpcode);
9946   %}
9947 
9948   ins_pipe(icond_reg);
9949 %}
9950 
9951 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9952   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9953 
9954   ins_cost(INSN_COST * 2);
9955   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9956 
9957   ins_encode %{
9958     __ cselw(as_Register($dst$$reg),
9959              as_Register($src$$reg),
9960              zr,
9961              (Assembler::Condition)$cmp$$cmpcode);
9962   %}
9963 
9964   ins_pipe(icond_reg);
9965 %}
9966 
9967 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9968   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9969 
9970   ins_cost(INSN_COST * 2);
9971   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9972 
9973   ins_encode %{
9974     __ cselw(as_Register($dst$$reg),
9975              as_Register($src$$reg),
9976              zr,
9977              (Assembler::Condition)$cmp$$cmpcode);
9978   %}
9979 
9980   ins_pipe(icond_reg);
9981 %}
9982 
9983 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9984 %{
9985   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9986 
9987   ins_cost(INSN_COST * 3);
9988 
9989   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9990   ins_encode %{
9991     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9992     __ fcsels(as_FloatRegister($dst$$reg),
9993               as_FloatRegister($src2$$reg),
9994               as_FloatRegister($src1$$reg),
9995               cond);
9996   %}
9997 
9998   ins_pipe(fp_cond_reg_reg_s);
9999 %}
10000 
10001 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10002 %{
10003   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10004 
10005   ins_cost(INSN_COST * 3);
10006 
10007   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10008   ins_encode %{
10009     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10010     __ fcsels(as_FloatRegister($dst$$reg),
10011               as_FloatRegister($src2$$reg),
10012               as_FloatRegister($src1$$reg),
10013               cond);
10014   %}
10015 
10016   ins_pipe(fp_cond_reg_reg_s);
10017 %}
10018 
10019 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10020 %{
10021   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10022 
10023   ins_cost(INSN_COST * 3);
10024 
10025   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10026   ins_encode %{
10027     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10028     __ fcseld(as_FloatRegister($dst$$reg),
10029               as_FloatRegister($src2$$reg),
10030               as_FloatRegister($src1$$reg),
10031               cond);
10032   %}
10033 
10034   ins_pipe(fp_cond_reg_reg_d);
10035 %}
10036 
10037 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10038 %{
10039   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10040 
10041   ins_cost(INSN_COST * 3);
10042 
10043   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10044   ins_encode %{
10045     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10046     __ fcseld(as_FloatRegister($dst$$reg),
10047               as_FloatRegister($src2$$reg),
10048               as_FloatRegister($src1$$reg),
10049               cond);
10050   %}
10051 
10052   ins_pipe(fp_cond_reg_reg_d);
10053 %}
10054 
10055 // ============================================================================
10056 // Arithmetic Instructions
10057 //
10058 
10059 // Integer Addition
10060 
10061 // TODO
10062 // these currently employ operations which do not set CR and hence are
10063 // not flagged as killing CR but we would like to isolate the cases
10064 // where we want to set flags from those where we don't. need to work
10065 // out how to do that.
10066 
10067 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10068   match(Set dst (AddI src1 src2));
10069 
10070   ins_cost(INSN_COST);
10071   format %{ "addw  $dst, $src1, $src2" %}
10072 
10073   ins_encode %{
10074     __ addw(as_Register($dst$$reg),
10075             as_Register($src1$$reg),
10076             as_Register($src2$$reg));
10077   %}
10078 
10079   ins_pipe(ialu_reg_reg);
10080 %}
10081 
10082 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10083   match(Set dst (AddI src1 src2));
10084 
10085   ins_cost(INSN_COST);
10086   format %{ "addw $dst, $src1, $src2" %}
10087 
10088   // use opcode to indicate that this is an add not a sub
10089   opcode(0x0);
10090 
10091   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10092 
10093   ins_pipe(ialu_reg_imm);
10094 %}
10095 
10096 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10097   match(Set dst (AddI (ConvL2I src1) src2));
10098 
10099   ins_cost(INSN_COST);
10100   format %{ "addw $dst, $src1, $src2" %}
10101 
10102   // use opcode to indicate that this is an add not a sub
10103   opcode(0x0);
10104 
10105   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10106 
10107   ins_pipe(ialu_reg_imm);
10108 %}
10109 
10110 // Pointer Addition
10111 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10112   match(Set dst (AddP src1 src2));
10113 
10114   ins_cost(INSN_COST);
10115   format %{ "add $dst, $src1, $src2\t# ptr" %}
10116 
10117   ins_encode %{
10118     __ add(as_Register($dst$$reg),
10119            as_Register($src1$$reg),
10120            as_Register($src2$$reg));
10121   %}
10122 
10123   ins_pipe(ialu_reg_reg);
10124 %}
10125 
10126 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10127   match(Set dst (AddP src1 (ConvI2L src2)));
10128 
10129   ins_cost(1.9 * INSN_COST);
10130   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10131 
10132   ins_encode %{
10133     __ add(as_Register($dst$$reg),
10134            as_Register($src1$$reg),
10135            as_Register($src2$$reg), ext::sxtw);
10136   %}
10137 
10138   ins_pipe(ialu_reg_reg);
10139 %}
10140 
10141 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10142   match(Set dst (AddP src1 (LShiftL src2 scale)));
10143 
10144   ins_cost(1.9 * INSN_COST);
10145   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10146 
10147   ins_encode %{
10148     __ lea(as_Register($dst$$reg),
10149            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10150                    Address::lsl($scale$$constant)));
10151   %}
10152 
10153   ins_pipe(ialu_reg_reg_shift);
10154 %}
10155 
10156 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10157   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10158 
10159   ins_cost(1.9 * INSN_COST);
10160   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10161 
10162   ins_encode %{
10163     __ lea(as_Register($dst$$reg),
10164            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10165                    Address::sxtw($scale$$constant)));
10166   %}
10167 
10168   ins_pipe(ialu_reg_reg_shift);
10169 %}
10170 
10171 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10172   match(Set dst (LShiftL (ConvI2L src) scale));
10173 
10174   ins_cost(INSN_COST);
10175   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10176 
10177   ins_encode %{
10178     __ sbfiz(as_Register($dst$$reg),
10179           as_Register($src$$reg),
10180           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10181   %}
10182 
10183   ins_pipe(ialu_reg_shift);
10184 %}
10185 
10186 // Pointer Immediate Addition
10187 // n.b. this needs to be more expensive than using an indirect memory
10188 // operand
10189 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10190   match(Set dst (AddP src1 src2));
10191 
10192   ins_cost(INSN_COST);
10193   format %{ "add $dst, $src1, $src2\t# ptr" %}
10194 
10195   // use opcode to indicate that this is an add not a sub
10196   opcode(0x0);
10197 
10198   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10199 
10200   ins_pipe(ialu_reg_imm);
10201 %}
10202 
10203 // Long Addition
10204 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10205 
10206   match(Set dst (AddL src1 src2));
10207 
10208   ins_cost(INSN_COST);
10209   format %{ "add  $dst, $src1, $src2" %}
10210 
10211   ins_encode %{
10212     __ add(as_Register($dst$$reg),
10213            as_Register($src1$$reg),
10214            as_Register($src2$$reg));
10215   %}
10216 
10217   ins_pipe(ialu_reg_reg);
10218 %}
10219 
10220 // No constant pool entries requiredLong Immediate Addition.
10221 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10222   match(Set dst (AddL src1 src2));
10223 
10224   ins_cost(INSN_COST);
10225   format %{ "add $dst, $src1, $src2" %}
10226 
10227   // use opcode to indicate that this is an add not a sub
10228   opcode(0x0);
10229 
10230   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10231 
10232   ins_pipe(ialu_reg_imm);
10233 %}
10234 
10235 // Integer Subtraction
10236 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10237   match(Set dst (SubI src1 src2));
10238 
10239   ins_cost(INSN_COST);
10240   format %{ "subw  $dst, $src1, $src2" %}
10241 
10242   ins_encode %{
10243     __ subw(as_Register($dst$$reg),
10244             as_Register($src1$$reg),
10245             as_Register($src2$$reg));
10246   %}
10247 
10248   ins_pipe(ialu_reg_reg);
10249 %}
10250 
10251 // Immediate Subtraction
10252 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10253   match(Set dst (SubI src1 src2));
10254 
10255   ins_cost(INSN_COST);
10256   format %{ "subw $dst, $src1, $src2" %}
10257 
10258   // use opcode to indicate that this is a sub not an add
10259   opcode(0x1);
10260 
10261   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10262 
10263   ins_pipe(ialu_reg_imm);
10264 %}
10265 
10266 // Long Subtraction
10267 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10268 
10269   match(Set dst (SubL src1 src2));
10270 
10271   ins_cost(INSN_COST);
10272   format %{ "sub  $dst, $src1, $src2" %}
10273 
10274   ins_encode %{
10275     __ sub(as_Register($dst$$reg),
10276            as_Register($src1$$reg),
10277            as_Register($src2$$reg));
10278   %}
10279 
10280   ins_pipe(ialu_reg_reg);
10281 %}
10282 
10283 // No constant pool entries requiredLong Immediate Subtraction.
10284 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10285   match(Set dst (SubL src1 src2));
10286 
10287   ins_cost(INSN_COST);
10288   format %{ "sub$dst, $src1, $src2" %}
10289 
10290   // use opcode to indicate that this is a sub not an add
10291   opcode(0x1);
10292 
10293   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10294 
10295   ins_pipe(ialu_reg_imm);
10296 %}
10297 
10298 // Integer Negation (special case for sub)
10299 
10300 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10301   match(Set dst (SubI zero src));
10302 
10303   ins_cost(INSN_COST);
10304   format %{ "negw $dst, $src\t# int" %}
10305 
10306   ins_encode %{
10307     __ negw(as_Register($dst$$reg),
10308             as_Register($src$$reg));
10309   %}
10310 
10311   ins_pipe(ialu_reg);
10312 %}
10313 
10314 // Long Negation
10315 
10316 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
10317   match(Set dst (SubL zero src));
10318 
10319   ins_cost(INSN_COST);
10320   format %{ "neg $dst, $src\t# long" %}
10321 
10322   ins_encode %{
10323     __ neg(as_Register($dst$$reg),
10324            as_Register($src$$reg));
10325   %}
10326 
10327   ins_pipe(ialu_reg);
10328 %}
10329 
10330 // Integer Multiply
10331 
10332 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10333   match(Set dst (MulI src1 src2));
10334 
10335   ins_cost(INSN_COST * 3);
10336   format %{ "mulw  $dst, $src1, $src2" %}
10337 
10338   ins_encode %{
10339     __ mulw(as_Register($dst$$reg),
10340             as_Register($src1$$reg),
10341             as_Register($src2$$reg));
10342   %}
10343 
10344   ins_pipe(imul_reg_reg);
10345 %}
10346 
10347 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10348   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
10349 
10350   ins_cost(INSN_COST * 3);
10351   format %{ "smull  $dst, $src1, $src2" %}
10352 
10353   ins_encode %{
10354     __ smull(as_Register($dst$$reg),
10355              as_Register($src1$$reg),
10356              as_Register($src2$$reg));
10357   %}
10358 
10359   ins_pipe(imul_reg_reg);
10360 %}
10361 
10362 // Long Multiply
10363 
10364 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10365   match(Set dst (MulL src1 src2));
10366 
10367   ins_cost(INSN_COST * 5);
10368   format %{ "mul  $dst, $src1, $src2" %}
10369 
10370   ins_encode %{
10371     __ mul(as_Register($dst$$reg),
10372            as_Register($src1$$reg),
10373            as_Register($src2$$reg));
10374   %}
10375 
10376   ins_pipe(lmul_reg_reg);
10377 %}
10378 
10379 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10380 %{
10381   match(Set dst (MulHiL src1 src2));
10382 
10383   ins_cost(INSN_COST * 7);
10384   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10385 
10386   ins_encode %{
10387     __ smulh(as_Register($dst$$reg),
10388              as_Register($src1$$reg),
10389              as_Register($src2$$reg));
10390   %}
10391 
10392   ins_pipe(lmul_reg_reg);
10393 %}
10394 
10395 // Combined Integer Multiply & Add/Sub
10396 
10397 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10398   match(Set dst (AddI src3 (MulI src1 src2)));
10399 
10400   ins_cost(INSN_COST * 3);
10401   format %{ "madd  $dst, $src1, $src2, $src3" %}
10402 
10403   ins_encode %{
10404     __ maddw(as_Register($dst$$reg),
10405              as_Register($src1$$reg),
10406              as_Register($src2$$reg),
10407              as_Register($src3$$reg));
10408   %}
10409 
10410   ins_pipe(imac_reg_reg);
10411 %}
10412 
10413 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10414   match(Set dst (SubI src3 (MulI src1 src2)));
10415 
10416   ins_cost(INSN_COST * 3);
10417   format %{ "msub  $dst, $src1, $src2, $src3" %}
10418 
10419   ins_encode %{
10420     __ msubw(as_Register($dst$$reg),
10421              as_Register($src1$$reg),
10422              as_Register($src2$$reg),
10423              as_Register($src3$$reg));
10424   %}
10425 
10426   ins_pipe(imac_reg_reg);
10427 %}
10428 
10429 // Combined Integer Multiply & Neg
10430 
10431 instruct mnegI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI0 zero) %{
10432   match(Set dst (MulI (SubI zero src1) src2));
10433   match(Set dst (MulI src1 (SubI zero src2)));
10434 
10435   ins_cost(INSN_COST * 3);
10436   format %{ "mneg  $dst, $src1, $src2" %}
10437 
10438   ins_encode %{
10439     __ mnegw(as_Register($dst$$reg),
10440              as_Register($src1$$reg),
10441              as_Register($src2$$reg));
10442   %}
10443 
10444   ins_pipe(imac_reg_reg);
10445 %}
10446 
10447 // Combined Long Multiply & Add/Sub
10448 
10449 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10450   match(Set dst (AddL src3 (MulL src1 src2)));
10451 
10452   ins_cost(INSN_COST * 5);
10453   format %{ "madd  $dst, $src1, $src2, $src3" %}
10454 
10455   ins_encode %{
10456     __ madd(as_Register($dst$$reg),
10457             as_Register($src1$$reg),
10458             as_Register($src2$$reg),
10459             as_Register($src3$$reg));
10460   %}
10461 
10462   ins_pipe(lmac_reg_reg);
10463 %}
10464 
10465 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10466   match(Set dst (SubL src3 (MulL src1 src2)));
10467 
10468   ins_cost(INSN_COST * 5);
10469   format %{ "msub  $dst, $src1, $src2, $src3" %}
10470 
10471   ins_encode %{
10472     __ msub(as_Register($dst$$reg),
10473             as_Register($src1$$reg),
10474             as_Register($src2$$reg),
10475             as_Register($src3$$reg));
10476   %}
10477 
10478   ins_pipe(lmac_reg_reg);
10479 %}
10480 
10481 // Combined Long Multiply & Neg
10482 
10483 instruct mnegL(iRegLNoSp dst, iRegL src1, iRegL src2, immL0 zero) %{
10484   match(Set dst (MulL (SubL zero src1) src2));
10485   match(Set dst (MulL src1 (SubL zero src2)));
10486 
10487   ins_cost(INSN_COST * 5);
10488   format %{ "mneg  $dst, $src1, $src2" %}
10489 
10490   ins_encode %{
10491     __ mneg(as_Register($dst$$reg),
10492             as_Register($src1$$reg),
10493             as_Register($src2$$reg));
10494   %}
10495 
10496   ins_pipe(lmac_reg_reg);
10497 %}
10498 
10499 // Combine Integer Signed Multiply & Add/Sub/Neg Long
10500 
10501 instruct smaddL(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegLNoSp src3) %{
10502   match(Set dst (AddL src3 (MulL (ConvI2L src1) (ConvI2L src2))));
10503 
10504   ins_cost(INSN_COST * 3);
10505   format %{ "smaddl  $dst, $src1, $src2, $src3" %}
10506 
10507   ins_encode %{
10508     __ smaddl(as_Register($dst$$reg),
10509               as_Register($src1$$reg),
10510               as_Register($src2$$reg),
10511               as_Register($src3$$reg));
10512   %}
10513 
10514   ins_pipe(imac_reg_reg);
10515 %}
10516 
10517 instruct smsubL(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegLNoSp src3) %{
10518   match(Set dst (SubL src3 (MulL (ConvI2L src1) (ConvI2L src2))));
10519 
10520   ins_cost(INSN_COST * 3);
10521   format %{ "smsubl  $dst, $src1, $src2, $src3" %}
10522 
10523   ins_encode %{
10524     __ smsubl(as_Register($dst$$reg),
10525               as_Register($src1$$reg),
10526               as_Register($src2$$reg),
10527               as_Register($src3$$reg));
10528   %}
10529 
10530   ins_pipe(imac_reg_reg);
10531 %}
10532 
10533 instruct smnegL(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2, immL0 zero) %{
10534   match(Set dst (MulL (SubL zero (ConvI2L src1)) (ConvI2L src2)));
10535   match(Set dst (MulL (ConvI2L src1) (SubL zero (ConvI2L src2))));
10536 
10537   ins_cost(INSN_COST * 3);
10538   format %{ "smnegl  $dst, $src1, $src2" %}
10539 
10540   ins_encode %{
10541     __ smnegl(as_Register($dst$$reg),
10542               as_Register($src1$$reg),
10543               as_Register($src2$$reg));
10544   %}
10545 
10546   ins_pipe(imac_reg_reg);
10547 %}
10548 
10549 // Combined Multiply-Add Shorts into Integer (dst = src1 * src2 + src3 * src4)
10550 
10551 instruct muladdS2I(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3, iRegIorL2I src4) %{
10552   match(Set dst (MulAddS2I (Binary src1 src2) (Binary src3 src4)));
10553 
10554   ins_cost(INSN_COST * 5);
10555   format %{ "mulw  rscratch1, $src1, $src2\n\t"
10556             "maddw $dst, $src3, $src4, rscratch1" %}
10557 
10558   ins_encode %{
10559     __ mulw(rscratch1, as_Register($src1$$reg), as_Register($src2$$reg));
10560     __ maddw(as_Register($dst$$reg), as_Register($src3$$reg), as_Register($src4$$reg), rscratch1); %}
10561 
10562   ins_pipe(imac_reg_reg);
10563 %}
10564 
10565 // Integer Divide
10566 
10567 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10568   match(Set dst (DivI src1 src2));
10569 
10570   ins_cost(INSN_COST * 19);
10571   format %{ "sdivw  $dst, $src1, $src2" %}
10572 
10573   ins_encode(aarch64_enc_divw(dst, src1, src2));
10574   ins_pipe(idiv_reg_reg);
10575 %}
10576 
10577 // Long Divide
10578 
10579 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10580   match(Set dst (DivL src1 src2));
10581 
10582   ins_cost(INSN_COST * 35);
10583   format %{ "sdiv   $dst, $src1, $src2" %}
10584 
10585   ins_encode(aarch64_enc_div(dst, src1, src2));
10586   ins_pipe(ldiv_reg_reg);
10587 %}
10588 
10589 // Integer Remainder
10590 
10591 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10592   match(Set dst (ModI src1 src2));
10593 
10594   ins_cost(INSN_COST * 22);
10595   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10596             "msubw($dst, rscratch1, $src2, $src1" %}
10597 
10598   ins_encode(aarch64_enc_modw(dst, src1, src2));
10599   ins_pipe(idiv_reg_reg);
10600 %}
10601 
10602 // Long Remainder
10603 
10604 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10605   match(Set dst (ModL src1 src2));
10606 
10607   ins_cost(INSN_COST * 38);
10608   format %{ "sdiv   rscratch1, $src1, $src2\n"
10609             "msub($dst, rscratch1, $src2, $src1" %}
10610 
10611   ins_encode(aarch64_enc_mod(dst, src1, src2));
10612   ins_pipe(ldiv_reg_reg);
10613 %}
10614 
10615 // Integer Shifts
10616 
10617 // Shift Left Register
10618 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10619   match(Set dst (LShiftI src1 src2));
10620 
10621   ins_cost(INSN_COST * 2);
10622   format %{ "lslvw  $dst, $src1, $src2" %}
10623 
10624   ins_encode %{
10625     __ lslvw(as_Register($dst$$reg),
10626              as_Register($src1$$reg),
10627              as_Register($src2$$reg));
10628   %}
10629 
10630   ins_pipe(ialu_reg_reg_vshift);
10631 %}
10632 
10633 // Shift Left Immediate
10634 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10635   match(Set dst (LShiftI src1 src2));
10636 
10637   ins_cost(INSN_COST);
10638   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10639 
10640   ins_encode %{
10641     __ lslw(as_Register($dst$$reg),
10642             as_Register($src1$$reg),
10643             $src2$$constant & 0x1f);
10644   %}
10645 
10646   ins_pipe(ialu_reg_shift);
10647 %}
10648 
10649 // Shift Right Logical Register
10650 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10651   match(Set dst (URShiftI src1 src2));
10652 
10653   ins_cost(INSN_COST * 2);
10654   format %{ "lsrvw  $dst, $src1, $src2" %}
10655 
10656   ins_encode %{
10657     __ lsrvw(as_Register($dst$$reg),
10658              as_Register($src1$$reg),
10659              as_Register($src2$$reg));
10660   %}
10661 
10662   ins_pipe(ialu_reg_reg_vshift);
10663 %}
10664 
10665 // Shift Right Logical Immediate
10666 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10667   match(Set dst (URShiftI src1 src2));
10668 
10669   ins_cost(INSN_COST);
10670   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10671 
10672   ins_encode %{
10673     __ lsrw(as_Register($dst$$reg),
10674             as_Register($src1$$reg),
10675             $src2$$constant & 0x1f);
10676   %}
10677 
10678   ins_pipe(ialu_reg_shift);
10679 %}
10680 
10681 // Shift Right Arithmetic Register
10682 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10683   match(Set dst (RShiftI src1 src2));
10684 
10685   ins_cost(INSN_COST * 2);
10686   format %{ "asrvw  $dst, $src1, $src2" %}
10687 
10688   ins_encode %{
10689     __ asrvw(as_Register($dst$$reg),
10690              as_Register($src1$$reg),
10691              as_Register($src2$$reg));
10692   %}
10693 
10694   ins_pipe(ialu_reg_reg_vshift);
10695 %}
10696 
10697 // Shift Right Arithmetic Immediate
10698 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10699   match(Set dst (RShiftI src1 src2));
10700 
10701   ins_cost(INSN_COST);
10702   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10703 
10704   ins_encode %{
10705     __ asrw(as_Register($dst$$reg),
10706             as_Register($src1$$reg),
10707             $src2$$constant & 0x1f);
10708   %}
10709 
10710   ins_pipe(ialu_reg_shift);
10711 %}
10712 
10713 // Combined Int Mask and Right Shift (using UBFM)
10714 // TODO
10715 
10716 // Long Shifts
10717 
10718 // Shift Left Register
10719 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10720   match(Set dst (LShiftL src1 src2));
10721 
10722   ins_cost(INSN_COST * 2);
10723   format %{ "lslv  $dst, $src1, $src2" %}
10724 
10725   ins_encode %{
10726     __ lslv(as_Register($dst$$reg),
10727             as_Register($src1$$reg),
10728             as_Register($src2$$reg));
10729   %}
10730 
10731   ins_pipe(ialu_reg_reg_vshift);
10732 %}
10733 
10734 // Shift Left Immediate
10735 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10736   match(Set dst (LShiftL src1 src2));
10737 
10738   ins_cost(INSN_COST);
10739   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10740 
10741   ins_encode %{
10742     __ lsl(as_Register($dst$$reg),
10743             as_Register($src1$$reg),
10744             $src2$$constant & 0x3f);
10745   %}
10746 
10747   ins_pipe(ialu_reg_shift);
10748 %}
10749 
10750 // Shift Right Logical Register
10751 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10752   match(Set dst (URShiftL src1 src2));
10753 
10754   ins_cost(INSN_COST * 2);
10755   format %{ "lsrv  $dst, $src1, $src2" %}
10756 
10757   ins_encode %{
10758     __ lsrv(as_Register($dst$$reg),
10759             as_Register($src1$$reg),
10760             as_Register($src2$$reg));
10761   %}
10762 
10763   ins_pipe(ialu_reg_reg_vshift);
10764 %}
10765 
10766 // Shift Right Logical Immediate
10767 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10768   match(Set dst (URShiftL src1 src2));
10769 
10770   ins_cost(INSN_COST);
10771   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10772 
10773   ins_encode %{
10774     __ lsr(as_Register($dst$$reg),
10775            as_Register($src1$$reg),
10776            $src2$$constant & 0x3f);
10777   %}
10778 
10779   ins_pipe(ialu_reg_shift);
10780 %}
10781 
10782 // A special-case pattern for card table stores.
10783 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10784   match(Set dst (URShiftL (CastP2X src1) src2));
10785 
10786   ins_cost(INSN_COST);
10787   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10788 
10789   ins_encode %{
10790     __ lsr(as_Register($dst$$reg),
10791            as_Register($src1$$reg),
10792            $src2$$constant & 0x3f);
10793   %}
10794 
10795   ins_pipe(ialu_reg_shift);
10796 %}
10797 
10798 // Shift Right Arithmetic Register
10799 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10800   match(Set dst (RShiftL src1 src2));
10801 
10802   ins_cost(INSN_COST * 2);
10803   format %{ "asrv  $dst, $src1, $src2" %}
10804 
10805   ins_encode %{
10806     __ asrv(as_Register($dst$$reg),
10807             as_Register($src1$$reg),
10808             as_Register($src2$$reg));
10809   %}
10810 
10811   ins_pipe(ialu_reg_reg_vshift);
10812 %}
10813 
10814 // Shift Right Arithmetic Immediate
10815 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10816   match(Set dst (RShiftL src1 src2));
10817 
10818   ins_cost(INSN_COST);
10819   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10820 
10821   ins_encode %{
10822     __ asr(as_Register($dst$$reg),
10823            as_Register($src1$$reg),
10824            $src2$$constant & 0x3f);
10825   %}
10826 
10827   ins_pipe(ialu_reg_shift);
10828 %}
10829 
10830 // BEGIN This section of the file is automatically generated. Do not edit --------------
10831 
10832 instruct regL_not_reg(iRegLNoSp dst,
10833                          iRegL src1, immL_M1 m1,
10834                          rFlagsReg cr) %{
10835   match(Set dst (XorL src1 m1));
10836   ins_cost(INSN_COST);
10837   format %{ "eon  $dst, $src1, zr" %}
10838 
10839   ins_encode %{
10840     __ eon(as_Register($dst$$reg),
10841               as_Register($src1$$reg),
10842               zr,
10843               Assembler::LSL, 0);
10844   %}
10845 
10846   ins_pipe(ialu_reg);
10847 %}
10848 instruct regI_not_reg(iRegINoSp dst,
10849                          iRegIorL2I src1, immI_M1 m1,
10850                          rFlagsReg cr) %{
10851   match(Set dst (XorI src1 m1));
10852   ins_cost(INSN_COST);
10853   format %{ "eonw  $dst, $src1, zr" %}
10854 
10855   ins_encode %{
10856     __ eonw(as_Register($dst$$reg),
10857               as_Register($src1$$reg),
10858               zr,
10859               Assembler::LSL, 0);
10860   %}
10861 
10862   ins_pipe(ialu_reg);
10863 %}
10864 
10865 instruct AndI_reg_not_reg(iRegINoSp dst,
10866                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10867                          rFlagsReg cr) %{
10868   match(Set dst (AndI src1 (XorI src2 m1)));
10869   ins_cost(INSN_COST);
10870   format %{ "bicw  $dst, $src1, $src2" %}
10871 
10872   ins_encode %{
10873     __ bicw(as_Register($dst$$reg),
10874               as_Register($src1$$reg),
10875               as_Register($src2$$reg),
10876               Assembler::LSL, 0);
10877   %}
10878 
10879   ins_pipe(ialu_reg_reg);
10880 %}
10881 
10882 instruct AndL_reg_not_reg(iRegLNoSp dst,
10883                          iRegL src1, iRegL src2, immL_M1 m1,
10884                          rFlagsReg cr) %{
10885   match(Set dst (AndL src1 (XorL src2 m1)));
10886   ins_cost(INSN_COST);
10887   format %{ "bic  $dst, $src1, $src2" %}
10888 
10889   ins_encode %{
10890     __ bic(as_Register($dst$$reg),
10891               as_Register($src1$$reg),
10892               as_Register($src2$$reg),
10893               Assembler::LSL, 0);
10894   %}
10895 
10896   ins_pipe(ialu_reg_reg);
10897 %}
10898 
10899 instruct OrI_reg_not_reg(iRegINoSp dst,
10900                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10901                          rFlagsReg cr) %{
10902   match(Set dst (OrI src1 (XorI src2 m1)));
10903   ins_cost(INSN_COST);
10904   format %{ "ornw  $dst, $src1, $src2" %}
10905 
10906   ins_encode %{
10907     __ ornw(as_Register($dst$$reg),
10908               as_Register($src1$$reg),
10909               as_Register($src2$$reg),
10910               Assembler::LSL, 0);
10911   %}
10912 
10913   ins_pipe(ialu_reg_reg);
10914 %}
10915 
10916 instruct OrL_reg_not_reg(iRegLNoSp dst,
10917                          iRegL src1, iRegL src2, immL_M1 m1,
10918                          rFlagsReg cr) %{
10919   match(Set dst (OrL src1 (XorL src2 m1)));
10920   ins_cost(INSN_COST);
10921   format %{ "orn  $dst, $src1, $src2" %}
10922 
10923   ins_encode %{
10924     __ orn(as_Register($dst$$reg),
10925               as_Register($src1$$reg),
10926               as_Register($src2$$reg),
10927               Assembler::LSL, 0);
10928   %}
10929 
10930   ins_pipe(ialu_reg_reg);
10931 %}
10932 
10933 instruct XorI_reg_not_reg(iRegINoSp dst,
10934                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10935                          rFlagsReg cr) %{
10936   match(Set dst (XorI m1 (XorI src2 src1)));
10937   ins_cost(INSN_COST);
10938   format %{ "eonw  $dst, $src1, $src2" %}
10939 
10940   ins_encode %{
10941     __ eonw(as_Register($dst$$reg),
10942               as_Register($src1$$reg),
10943               as_Register($src2$$reg),
10944               Assembler::LSL, 0);
10945   %}
10946 
10947   ins_pipe(ialu_reg_reg);
10948 %}
10949 
10950 instruct XorL_reg_not_reg(iRegLNoSp dst,
10951                          iRegL src1, iRegL src2, immL_M1 m1,
10952                          rFlagsReg cr) %{
10953   match(Set dst (XorL m1 (XorL src2 src1)));
10954   ins_cost(INSN_COST);
10955   format %{ "eon  $dst, $src1, $src2" %}
10956 
10957   ins_encode %{
10958     __ eon(as_Register($dst$$reg),
10959               as_Register($src1$$reg),
10960               as_Register($src2$$reg),
10961               Assembler::LSL, 0);
10962   %}
10963 
10964   ins_pipe(ialu_reg_reg);
10965 %}
10966 
10967 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10968                          iRegIorL2I src1, iRegIorL2I src2,
10969                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10970   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10971   ins_cost(1.9 * INSN_COST);
10972   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10973 
10974   ins_encode %{
10975     __ bicw(as_Register($dst$$reg),
10976               as_Register($src1$$reg),
10977               as_Register($src2$$reg),
10978               Assembler::LSR,
10979               $src3$$constant & 0x1f);
10980   %}
10981 
10982   ins_pipe(ialu_reg_reg_shift);
10983 %}
10984 
10985 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10986                          iRegL src1, iRegL src2,
10987                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10988   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10989   ins_cost(1.9 * INSN_COST);
10990   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10991 
10992   ins_encode %{
10993     __ bic(as_Register($dst$$reg),
10994               as_Register($src1$$reg),
10995               as_Register($src2$$reg),
10996               Assembler::LSR,
10997               $src3$$constant & 0x3f);
10998   %}
10999 
11000   ins_pipe(ialu_reg_reg_shift);
11001 %}
11002 
11003 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11004                          iRegIorL2I src1, iRegIorL2I src2,
11005                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11006   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11007   ins_cost(1.9 * INSN_COST);
11008   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11009 
11010   ins_encode %{
11011     __ bicw(as_Register($dst$$reg),
11012               as_Register($src1$$reg),
11013               as_Register($src2$$reg),
11014               Assembler::ASR,
11015               $src3$$constant & 0x1f);
11016   %}
11017 
11018   ins_pipe(ialu_reg_reg_shift);
11019 %}
11020 
11021 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11022                          iRegL src1, iRegL src2,
11023                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11024   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11025   ins_cost(1.9 * INSN_COST);
11026   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11027 
11028   ins_encode %{
11029     __ bic(as_Register($dst$$reg),
11030               as_Register($src1$$reg),
11031               as_Register($src2$$reg),
11032               Assembler::ASR,
11033               $src3$$constant & 0x3f);
11034   %}
11035 
11036   ins_pipe(ialu_reg_reg_shift);
11037 %}
11038 
11039 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11040                          iRegIorL2I src1, iRegIorL2I src2,
11041                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11042   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11043   ins_cost(1.9 * INSN_COST);
11044   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11045 
11046   ins_encode %{
11047     __ bicw(as_Register($dst$$reg),
11048               as_Register($src1$$reg),
11049               as_Register($src2$$reg),
11050               Assembler::LSL,
11051               $src3$$constant & 0x1f);
11052   %}
11053 
11054   ins_pipe(ialu_reg_reg_shift);
11055 %}
11056 
11057 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11058                          iRegL src1, iRegL src2,
11059                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11060   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11061   ins_cost(1.9 * INSN_COST);
11062   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11063 
11064   ins_encode %{
11065     __ bic(as_Register($dst$$reg),
11066               as_Register($src1$$reg),
11067               as_Register($src2$$reg),
11068               Assembler::LSL,
11069               $src3$$constant & 0x3f);
11070   %}
11071 
11072   ins_pipe(ialu_reg_reg_shift);
11073 %}
11074 
11075 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11076                          iRegIorL2I src1, iRegIorL2I src2,
11077                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11078   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11079   ins_cost(1.9 * INSN_COST);
11080   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11081 
11082   ins_encode %{
11083     __ eonw(as_Register($dst$$reg),
11084               as_Register($src1$$reg),
11085               as_Register($src2$$reg),
11086               Assembler::LSR,
11087               $src3$$constant & 0x1f);
11088   %}
11089 
11090   ins_pipe(ialu_reg_reg_shift);
11091 %}
11092 
11093 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11094                          iRegL src1, iRegL src2,
11095                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11096   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11097   ins_cost(1.9 * INSN_COST);
11098   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11099 
11100   ins_encode %{
11101     __ eon(as_Register($dst$$reg),
11102               as_Register($src1$$reg),
11103               as_Register($src2$$reg),
11104               Assembler::LSR,
11105               $src3$$constant & 0x3f);
11106   %}
11107 
11108   ins_pipe(ialu_reg_reg_shift);
11109 %}
11110 
11111 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11112                          iRegIorL2I src1, iRegIorL2I src2,
11113                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11114   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11115   ins_cost(1.9 * INSN_COST);
11116   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11117 
11118   ins_encode %{
11119     __ eonw(as_Register($dst$$reg),
11120               as_Register($src1$$reg),
11121               as_Register($src2$$reg),
11122               Assembler::ASR,
11123               $src3$$constant & 0x1f);
11124   %}
11125 
11126   ins_pipe(ialu_reg_reg_shift);
11127 %}
11128 
11129 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11130                          iRegL src1, iRegL src2,
11131                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11132   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11133   ins_cost(1.9 * INSN_COST);
11134   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11135 
11136   ins_encode %{
11137     __ eon(as_Register($dst$$reg),
11138               as_Register($src1$$reg),
11139               as_Register($src2$$reg),
11140               Assembler::ASR,
11141               $src3$$constant & 0x3f);
11142   %}
11143 
11144   ins_pipe(ialu_reg_reg_shift);
11145 %}
11146 
11147 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11148                          iRegIorL2I src1, iRegIorL2I src2,
11149                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11150   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11151   ins_cost(1.9 * INSN_COST);
11152   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11153 
11154   ins_encode %{
11155     __ eonw(as_Register($dst$$reg),
11156               as_Register($src1$$reg),
11157               as_Register($src2$$reg),
11158               Assembler::LSL,
11159               $src3$$constant & 0x1f);
11160   %}
11161 
11162   ins_pipe(ialu_reg_reg_shift);
11163 %}
11164 
11165 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11166                          iRegL src1, iRegL src2,
11167                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11168   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11169   ins_cost(1.9 * INSN_COST);
11170   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11171 
11172   ins_encode %{
11173     __ eon(as_Register($dst$$reg),
11174               as_Register($src1$$reg),
11175               as_Register($src2$$reg),
11176               Assembler::LSL,
11177               $src3$$constant & 0x3f);
11178   %}
11179 
11180   ins_pipe(ialu_reg_reg_shift);
11181 %}
11182 
11183 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11184                          iRegIorL2I src1, iRegIorL2I src2,
11185                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11186   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11187   ins_cost(1.9 * INSN_COST);
11188   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11189 
11190   ins_encode %{
11191     __ ornw(as_Register($dst$$reg),
11192               as_Register($src1$$reg),
11193               as_Register($src2$$reg),
11194               Assembler::LSR,
11195               $src3$$constant & 0x1f);
11196   %}
11197 
11198   ins_pipe(ialu_reg_reg_shift);
11199 %}
11200 
11201 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11202                          iRegL src1, iRegL src2,
11203                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11204   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11205   ins_cost(1.9 * INSN_COST);
11206   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11207 
11208   ins_encode %{
11209     __ orn(as_Register($dst$$reg),
11210               as_Register($src1$$reg),
11211               as_Register($src2$$reg),
11212               Assembler::LSR,
11213               $src3$$constant & 0x3f);
11214   %}
11215 
11216   ins_pipe(ialu_reg_reg_shift);
11217 %}
11218 
11219 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11220                          iRegIorL2I src1, iRegIorL2I src2,
11221                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11222   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11223   ins_cost(1.9 * INSN_COST);
11224   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11225 
11226   ins_encode %{
11227     __ ornw(as_Register($dst$$reg),
11228               as_Register($src1$$reg),
11229               as_Register($src2$$reg),
11230               Assembler::ASR,
11231               $src3$$constant & 0x1f);
11232   %}
11233 
11234   ins_pipe(ialu_reg_reg_shift);
11235 %}
11236 
11237 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11238                          iRegL src1, iRegL src2,
11239                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11240   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11241   ins_cost(1.9 * INSN_COST);
11242   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11243 
11244   ins_encode %{
11245     __ orn(as_Register($dst$$reg),
11246               as_Register($src1$$reg),
11247               as_Register($src2$$reg),
11248               Assembler::ASR,
11249               $src3$$constant & 0x3f);
11250   %}
11251 
11252   ins_pipe(ialu_reg_reg_shift);
11253 %}
11254 
11255 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11256                          iRegIorL2I src1, iRegIorL2I src2,
11257                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11258   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11259   ins_cost(1.9 * INSN_COST);
11260   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11261 
11262   ins_encode %{
11263     __ ornw(as_Register($dst$$reg),
11264               as_Register($src1$$reg),
11265               as_Register($src2$$reg),
11266               Assembler::LSL,
11267               $src3$$constant & 0x1f);
11268   %}
11269 
11270   ins_pipe(ialu_reg_reg_shift);
11271 %}
11272 
11273 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11274                          iRegL src1, iRegL src2,
11275                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11276   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11277   ins_cost(1.9 * INSN_COST);
11278   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11279 
11280   ins_encode %{
11281     __ orn(as_Register($dst$$reg),
11282               as_Register($src1$$reg),
11283               as_Register($src2$$reg),
11284               Assembler::LSL,
11285               $src3$$constant & 0x3f);
11286   %}
11287 
11288   ins_pipe(ialu_reg_reg_shift);
11289 %}
11290 
11291 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11292                          iRegIorL2I src1, iRegIorL2I src2,
11293                          immI src3, rFlagsReg cr) %{
11294   match(Set dst (AndI src1 (URShiftI src2 src3)));
11295 
11296   ins_cost(1.9 * INSN_COST);
11297   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11298 
11299   ins_encode %{
11300     __ andw(as_Register($dst$$reg),
11301               as_Register($src1$$reg),
11302               as_Register($src2$$reg),
11303               Assembler::LSR,
11304               $src3$$constant & 0x1f);
11305   %}
11306 
11307   ins_pipe(ialu_reg_reg_shift);
11308 %}
11309 
11310 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11311                          iRegL src1, iRegL src2,
11312                          immI src3, rFlagsReg cr) %{
11313   match(Set dst (AndL src1 (URShiftL src2 src3)));
11314 
11315   ins_cost(1.9 * INSN_COST);
11316   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11317 
11318   ins_encode %{
11319     __ andr(as_Register($dst$$reg),
11320               as_Register($src1$$reg),
11321               as_Register($src2$$reg),
11322               Assembler::LSR,
11323               $src3$$constant & 0x3f);
11324   %}
11325 
11326   ins_pipe(ialu_reg_reg_shift);
11327 %}
11328 
11329 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11330                          iRegIorL2I src1, iRegIorL2I src2,
11331                          immI src3, rFlagsReg cr) %{
11332   match(Set dst (AndI src1 (RShiftI src2 src3)));
11333 
11334   ins_cost(1.9 * INSN_COST);
11335   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11336 
11337   ins_encode %{
11338     __ andw(as_Register($dst$$reg),
11339               as_Register($src1$$reg),
11340               as_Register($src2$$reg),
11341               Assembler::ASR,
11342               $src3$$constant & 0x1f);
11343   %}
11344 
11345   ins_pipe(ialu_reg_reg_shift);
11346 %}
11347 
11348 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11349                          iRegL src1, iRegL src2,
11350                          immI src3, rFlagsReg cr) %{
11351   match(Set dst (AndL src1 (RShiftL src2 src3)));
11352 
11353   ins_cost(1.9 * INSN_COST);
11354   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11355 
11356   ins_encode %{
11357     __ andr(as_Register($dst$$reg),
11358               as_Register($src1$$reg),
11359               as_Register($src2$$reg),
11360               Assembler::ASR,
11361               $src3$$constant & 0x3f);
11362   %}
11363 
11364   ins_pipe(ialu_reg_reg_shift);
11365 %}
11366 
11367 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11368                          iRegIorL2I src1, iRegIorL2I src2,
11369                          immI src3, rFlagsReg cr) %{
11370   match(Set dst (AndI src1 (LShiftI src2 src3)));
11371 
11372   ins_cost(1.9 * INSN_COST);
11373   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11374 
11375   ins_encode %{
11376     __ andw(as_Register($dst$$reg),
11377               as_Register($src1$$reg),
11378               as_Register($src2$$reg),
11379               Assembler::LSL,
11380               $src3$$constant & 0x1f);
11381   %}
11382 
11383   ins_pipe(ialu_reg_reg_shift);
11384 %}
11385 
11386 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11387                          iRegL src1, iRegL src2,
11388                          immI src3, rFlagsReg cr) %{
11389   match(Set dst (AndL src1 (LShiftL src2 src3)));
11390 
11391   ins_cost(1.9 * INSN_COST);
11392   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11393 
11394   ins_encode %{
11395     __ andr(as_Register($dst$$reg),
11396               as_Register($src1$$reg),
11397               as_Register($src2$$reg),
11398               Assembler::LSL,
11399               $src3$$constant & 0x3f);
11400   %}
11401 
11402   ins_pipe(ialu_reg_reg_shift);
11403 %}
11404 
11405 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11406                          iRegIorL2I src1, iRegIorL2I src2,
11407                          immI src3, rFlagsReg cr) %{
11408   match(Set dst (XorI src1 (URShiftI src2 src3)));
11409 
11410   ins_cost(1.9 * INSN_COST);
11411   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
11412 
11413   ins_encode %{
11414     __ eorw(as_Register($dst$$reg),
11415               as_Register($src1$$reg),
11416               as_Register($src2$$reg),
11417               Assembler::LSR,
11418               $src3$$constant & 0x1f);
11419   %}
11420 
11421   ins_pipe(ialu_reg_reg_shift);
11422 %}
11423 
11424 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11425                          iRegL src1, iRegL src2,
11426                          immI src3, rFlagsReg cr) %{
11427   match(Set dst (XorL src1 (URShiftL src2 src3)));
11428 
11429   ins_cost(1.9 * INSN_COST);
11430   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
11431 
11432   ins_encode %{
11433     __ eor(as_Register($dst$$reg),
11434               as_Register($src1$$reg),
11435               as_Register($src2$$reg),
11436               Assembler::LSR,
11437               $src3$$constant & 0x3f);
11438   %}
11439 
11440   ins_pipe(ialu_reg_reg_shift);
11441 %}
11442 
11443 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11444                          iRegIorL2I src1, iRegIorL2I src2,
11445                          immI src3, rFlagsReg cr) %{
11446   match(Set dst (XorI src1 (RShiftI src2 src3)));
11447 
11448   ins_cost(1.9 * INSN_COST);
11449   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11450 
11451   ins_encode %{
11452     __ eorw(as_Register($dst$$reg),
11453               as_Register($src1$$reg),
11454               as_Register($src2$$reg),
11455               Assembler::ASR,
11456               $src3$$constant & 0x1f);
11457   %}
11458 
11459   ins_pipe(ialu_reg_reg_shift);
11460 %}
11461 
11462 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11463                          iRegL src1, iRegL src2,
11464                          immI src3, rFlagsReg cr) %{
11465   match(Set dst (XorL src1 (RShiftL src2 src3)));
11466 
11467   ins_cost(1.9 * INSN_COST);
11468   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11469 
11470   ins_encode %{
11471     __ eor(as_Register($dst$$reg),
11472               as_Register($src1$$reg),
11473               as_Register($src2$$reg),
11474               Assembler::ASR,
11475               $src3$$constant & 0x3f);
11476   %}
11477 
11478   ins_pipe(ialu_reg_reg_shift);
11479 %}
11480 
11481 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11482                          iRegIorL2I src1, iRegIorL2I src2,
11483                          immI src3, rFlagsReg cr) %{
11484   match(Set dst (XorI src1 (LShiftI src2 src3)));
11485 
11486   ins_cost(1.9 * INSN_COST);
11487   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11488 
11489   ins_encode %{
11490     __ eorw(as_Register($dst$$reg),
11491               as_Register($src1$$reg),
11492               as_Register($src2$$reg),
11493               Assembler::LSL,
11494               $src3$$constant & 0x1f);
11495   %}
11496 
11497   ins_pipe(ialu_reg_reg_shift);
11498 %}
11499 
11500 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11501                          iRegL src1, iRegL src2,
11502                          immI src3, rFlagsReg cr) %{
11503   match(Set dst (XorL src1 (LShiftL src2 src3)));
11504 
11505   ins_cost(1.9 * INSN_COST);
11506   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11507 
11508   ins_encode %{
11509     __ eor(as_Register($dst$$reg),
11510               as_Register($src1$$reg),
11511               as_Register($src2$$reg),
11512               Assembler::LSL,
11513               $src3$$constant & 0x3f);
11514   %}
11515 
11516   ins_pipe(ialu_reg_reg_shift);
11517 %}
11518 
11519 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11520                          iRegIorL2I src1, iRegIorL2I src2,
11521                          immI src3, rFlagsReg cr) %{
11522   match(Set dst (OrI src1 (URShiftI src2 src3)));
11523 
11524   ins_cost(1.9 * INSN_COST);
11525   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11526 
11527   ins_encode %{
11528     __ orrw(as_Register($dst$$reg),
11529               as_Register($src1$$reg),
11530               as_Register($src2$$reg),
11531               Assembler::LSR,
11532               $src3$$constant & 0x1f);
11533   %}
11534 
11535   ins_pipe(ialu_reg_reg_shift);
11536 %}
11537 
11538 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11539                          iRegL src1, iRegL src2,
11540                          immI src3, rFlagsReg cr) %{
11541   match(Set dst (OrL src1 (URShiftL src2 src3)));
11542 
11543   ins_cost(1.9 * INSN_COST);
11544   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11545 
11546   ins_encode %{
11547     __ orr(as_Register($dst$$reg),
11548               as_Register($src1$$reg),
11549               as_Register($src2$$reg),
11550               Assembler::LSR,
11551               $src3$$constant & 0x3f);
11552   %}
11553 
11554   ins_pipe(ialu_reg_reg_shift);
11555 %}
11556 
11557 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11558                          iRegIorL2I src1, iRegIorL2I src2,
11559                          immI src3, rFlagsReg cr) %{
11560   match(Set dst (OrI src1 (RShiftI src2 src3)));
11561 
11562   ins_cost(1.9 * INSN_COST);
11563   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11564 
11565   ins_encode %{
11566     __ orrw(as_Register($dst$$reg),
11567               as_Register($src1$$reg),
11568               as_Register($src2$$reg),
11569               Assembler::ASR,
11570               $src3$$constant & 0x1f);
11571   %}
11572 
11573   ins_pipe(ialu_reg_reg_shift);
11574 %}
11575 
11576 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11577                          iRegL src1, iRegL src2,
11578                          immI src3, rFlagsReg cr) %{
11579   match(Set dst (OrL src1 (RShiftL src2 src3)));
11580 
11581   ins_cost(1.9 * INSN_COST);
11582   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11583 
11584   ins_encode %{
11585     __ orr(as_Register($dst$$reg),
11586               as_Register($src1$$reg),
11587               as_Register($src2$$reg),
11588               Assembler::ASR,
11589               $src3$$constant & 0x3f);
11590   %}
11591 
11592   ins_pipe(ialu_reg_reg_shift);
11593 %}
11594 
11595 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11596                          iRegIorL2I src1, iRegIorL2I src2,
11597                          immI src3, rFlagsReg cr) %{
11598   match(Set dst (OrI src1 (LShiftI src2 src3)));
11599 
11600   ins_cost(1.9 * INSN_COST);
11601   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11602 
11603   ins_encode %{
11604     __ orrw(as_Register($dst$$reg),
11605               as_Register($src1$$reg),
11606               as_Register($src2$$reg),
11607               Assembler::LSL,
11608               $src3$$constant & 0x1f);
11609   %}
11610 
11611   ins_pipe(ialu_reg_reg_shift);
11612 %}
11613 
11614 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11615                          iRegL src1, iRegL src2,
11616                          immI src3, rFlagsReg cr) %{
11617   match(Set dst (OrL src1 (LShiftL src2 src3)));
11618 
11619   ins_cost(1.9 * INSN_COST);
11620   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11621 
11622   ins_encode %{
11623     __ orr(as_Register($dst$$reg),
11624               as_Register($src1$$reg),
11625               as_Register($src2$$reg),
11626               Assembler::LSL,
11627               $src3$$constant & 0x3f);
11628   %}
11629 
11630   ins_pipe(ialu_reg_reg_shift);
11631 %}
11632 
11633 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11634                          iRegIorL2I src1, iRegIorL2I src2,
11635                          immI src3, rFlagsReg cr) %{
11636   match(Set dst (AddI src1 (URShiftI src2 src3)));
11637 
11638   ins_cost(1.9 * INSN_COST);
11639   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11640 
11641   ins_encode %{
11642     __ addw(as_Register($dst$$reg),
11643               as_Register($src1$$reg),
11644               as_Register($src2$$reg),
11645               Assembler::LSR,
11646               $src3$$constant & 0x1f);
11647   %}
11648 
11649   ins_pipe(ialu_reg_reg_shift);
11650 %}
11651 
11652 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11653                          iRegL src1, iRegL src2,
11654                          immI src3, rFlagsReg cr) %{
11655   match(Set dst (AddL src1 (URShiftL src2 src3)));
11656 
11657   ins_cost(1.9 * INSN_COST);
11658   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11659 
11660   ins_encode %{
11661     __ add(as_Register($dst$$reg),
11662               as_Register($src1$$reg),
11663               as_Register($src2$$reg),
11664               Assembler::LSR,
11665               $src3$$constant & 0x3f);
11666   %}
11667 
11668   ins_pipe(ialu_reg_reg_shift);
11669 %}
11670 
11671 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11672                          iRegIorL2I src1, iRegIorL2I src2,
11673                          immI src3, rFlagsReg cr) %{
11674   match(Set dst (AddI src1 (RShiftI src2 src3)));
11675 
11676   ins_cost(1.9 * INSN_COST);
11677   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11678 
11679   ins_encode %{
11680     __ addw(as_Register($dst$$reg),
11681               as_Register($src1$$reg),
11682               as_Register($src2$$reg),
11683               Assembler::ASR,
11684               $src3$$constant & 0x1f);
11685   %}
11686 
11687   ins_pipe(ialu_reg_reg_shift);
11688 %}
11689 
11690 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11691                          iRegL src1, iRegL src2,
11692                          immI src3, rFlagsReg cr) %{
11693   match(Set dst (AddL src1 (RShiftL src2 src3)));
11694 
11695   ins_cost(1.9 * INSN_COST);
11696   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11697 
11698   ins_encode %{
11699     __ add(as_Register($dst$$reg),
11700               as_Register($src1$$reg),
11701               as_Register($src2$$reg),
11702               Assembler::ASR,
11703               $src3$$constant & 0x3f);
11704   %}
11705 
11706   ins_pipe(ialu_reg_reg_shift);
11707 %}
11708 
11709 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11710                          iRegIorL2I src1, iRegIorL2I src2,
11711                          immI src3, rFlagsReg cr) %{
11712   match(Set dst (AddI src1 (LShiftI src2 src3)));
11713 
11714   ins_cost(1.9 * INSN_COST);
11715   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11716 
11717   ins_encode %{
11718     __ addw(as_Register($dst$$reg),
11719               as_Register($src1$$reg),
11720               as_Register($src2$$reg),
11721               Assembler::LSL,
11722               $src3$$constant & 0x1f);
11723   %}
11724 
11725   ins_pipe(ialu_reg_reg_shift);
11726 %}
11727 
11728 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11729                          iRegL src1, iRegL src2,
11730                          immI src3, rFlagsReg cr) %{
11731   match(Set dst (AddL src1 (LShiftL src2 src3)));
11732 
11733   ins_cost(1.9 * INSN_COST);
11734   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11735 
11736   ins_encode %{
11737     __ add(as_Register($dst$$reg),
11738               as_Register($src1$$reg),
11739               as_Register($src2$$reg),
11740               Assembler::LSL,
11741               $src3$$constant & 0x3f);
11742   %}
11743 
11744   ins_pipe(ialu_reg_reg_shift);
11745 %}
11746 
11747 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11748                          iRegIorL2I src1, iRegIorL2I src2,
11749                          immI src3, rFlagsReg cr) %{
11750   match(Set dst (SubI src1 (URShiftI src2 src3)));
11751 
11752   ins_cost(1.9 * INSN_COST);
11753   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11754 
11755   ins_encode %{
11756     __ subw(as_Register($dst$$reg),
11757               as_Register($src1$$reg),
11758               as_Register($src2$$reg),
11759               Assembler::LSR,
11760               $src3$$constant & 0x1f);
11761   %}
11762 
11763   ins_pipe(ialu_reg_reg_shift);
11764 %}
11765 
11766 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11767                          iRegL src1, iRegL src2,
11768                          immI src3, rFlagsReg cr) %{
11769   match(Set dst (SubL src1 (URShiftL src2 src3)));
11770 
11771   ins_cost(1.9 * INSN_COST);
11772   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11773 
11774   ins_encode %{
11775     __ sub(as_Register($dst$$reg),
11776               as_Register($src1$$reg),
11777               as_Register($src2$$reg),
11778               Assembler::LSR,
11779               $src3$$constant & 0x3f);
11780   %}
11781 
11782   ins_pipe(ialu_reg_reg_shift);
11783 %}
11784 
11785 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11786                          iRegIorL2I src1, iRegIorL2I src2,
11787                          immI src3, rFlagsReg cr) %{
11788   match(Set dst (SubI src1 (RShiftI src2 src3)));
11789 
11790   ins_cost(1.9 * INSN_COST);
11791   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11792 
11793   ins_encode %{
11794     __ subw(as_Register($dst$$reg),
11795               as_Register($src1$$reg),
11796               as_Register($src2$$reg),
11797               Assembler::ASR,
11798               $src3$$constant & 0x1f);
11799   %}
11800 
11801   ins_pipe(ialu_reg_reg_shift);
11802 %}
11803 
11804 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11805                          iRegL src1, iRegL src2,
11806                          immI src3, rFlagsReg cr) %{
11807   match(Set dst (SubL src1 (RShiftL src2 src3)));
11808 
11809   ins_cost(1.9 * INSN_COST);
11810   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11811 
11812   ins_encode %{
11813     __ sub(as_Register($dst$$reg),
11814               as_Register($src1$$reg),
11815               as_Register($src2$$reg),
11816               Assembler::ASR,
11817               $src3$$constant & 0x3f);
11818   %}
11819 
11820   ins_pipe(ialu_reg_reg_shift);
11821 %}
11822 
11823 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11824                          iRegIorL2I src1, iRegIorL2I src2,
11825                          immI src3, rFlagsReg cr) %{
11826   match(Set dst (SubI src1 (LShiftI src2 src3)));
11827 
11828   ins_cost(1.9 * INSN_COST);
11829   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11830 
11831   ins_encode %{
11832     __ subw(as_Register($dst$$reg),
11833               as_Register($src1$$reg),
11834               as_Register($src2$$reg),
11835               Assembler::LSL,
11836               $src3$$constant & 0x1f);
11837   %}
11838 
11839   ins_pipe(ialu_reg_reg_shift);
11840 %}
11841 
11842 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11843                          iRegL src1, iRegL src2,
11844                          immI src3, rFlagsReg cr) %{
11845   match(Set dst (SubL src1 (LShiftL src2 src3)));
11846 
11847   ins_cost(1.9 * INSN_COST);
11848   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11849 
11850   ins_encode %{
11851     __ sub(as_Register($dst$$reg),
11852               as_Register($src1$$reg),
11853               as_Register($src2$$reg),
11854               Assembler::LSL,
11855               $src3$$constant & 0x3f);
11856   %}
11857 
11858   ins_pipe(ialu_reg_reg_shift);
11859 %}
11860 
11861 
11862 
11863 // Shift Left followed by Shift Right.
11864 // This idiom is used by the compiler for the i2b bytecode etc.
11865 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11866 %{
11867   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11868   ins_cost(INSN_COST * 2);
11869   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11870   ins_encode %{
11871     int lshift = $lshift_count$$constant & 63;
11872     int rshift = $rshift_count$$constant & 63;
11873     int s = 63 - lshift;
11874     int r = (rshift - lshift) & 63;
11875     __ sbfm(as_Register($dst$$reg),
11876             as_Register($src$$reg),
11877             r, s);
11878   %}
11879 
11880   ins_pipe(ialu_reg_shift);
11881 %}
11882 
11883 // Shift Left followed by Shift Right.
11884 // This idiom is used by the compiler for the i2b bytecode etc.
11885 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11886 %{
11887   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11888   ins_cost(INSN_COST * 2);
11889   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11890   ins_encode %{
11891     int lshift = $lshift_count$$constant & 31;
11892     int rshift = $rshift_count$$constant & 31;
11893     int s = 31 - lshift;
11894     int r = (rshift - lshift) & 31;
11895     __ sbfmw(as_Register($dst$$reg),
11896             as_Register($src$$reg),
11897             r, s);
11898   %}
11899 
11900   ins_pipe(ialu_reg_shift);
11901 %}
11902 
11903 // Shift Left followed by Shift Right.
11904 // This idiom is used by the compiler for the i2b bytecode etc.
11905 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11906 %{
11907   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11908   ins_cost(INSN_COST * 2);
11909   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11910   ins_encode %{
11911     int lshift = $lshift_count$$constant & 63;
11912     int rshift = $rshift_count$$constant & 63;
11913     int s = 63 - lshift;
11914     int r = (rshift - lshift) & 63;
11915     __ ubfm(as_Register($dst$$reg),
11916             as_Register($src$$reg),
11917             r, s);
11918   %}
11919 
11920   ins_pipe(ialu_reg_shift);
11921 %}
11922 
11923 // Shift Left followed by Shift Right.
11924 // This idiom is used by the compiler for the i2b bytecode etc.
11925 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11926 %{
11927   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11928   ins_cost(INSN_COST * 2);
11929   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11930   ins_encode %{
11931     int lshift = $lshift_count$$constant & 31;
11932     int rshift = $rshift_count$$constant & 31;
11933     int s = 31 - lshift;
11934     int r = (rshift - lshift) & 31;
11935     __ ubfmw(as_Register($dst$$reg),
11936             as_Register($src$$reg),
11937             r, s);
11938   %}
11939 
11940   ins_pipe(ialu_reg_shift);
11941 %}
11942 // Bitfield extract with shift & mask
11943 
11944 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11945 %{
11946   match(Set dst (AndI (URShiftI src rshift) mask));
11947   // Make sure we are not going to exceed what ubfxw can do.
11948   predicate((exact_log2(n->in(2)->get_int() + 1) + (n->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
11949 
11950   ins_cost(INSN_COST);
11951   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11952   ins_encode %{
11953     int rshift = $rshift$$constant & 31;
11954     long mask = $mask$$constant;
11955     int width = exact_log2(mask+1);
11956     __ ubfxw(as_Register($dst$$reg),
11957             as_Register($src$$reg), rshift, width);
11958   %}
11959   ins_pipe(ialu_reg_shift);
11960 %}
11961 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11962 %{
11963   match(Set dst (AndL (URShiftL src rshift) mask));
11964   // Make sure we are not going to exceed what ubfx can do.
11965   predicate((exact_log2_long(n->in(2)->get_long() + 1) + (n->in(1)->in(2)->get_int() & 63)) <= (63 + 1));
11966 
11967   ins_cost(INSN_COST);
11968   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11969   ins_encode %{
11970     int rshift = $rshift$$constant & 63;
11971     long mask = $mask$$constant;
11972     int width = exact_log2_long(mask+1);
11973     __ ubfx(as_Register($dst$$reg),
11974             as_Register($src$$reg), rshift, width);
11975   %}
11976   ins_pipe(ialu_reg_shift);
11977 %}
11978 
11979 // We can use ubfx when extending an And with a mask when we know mask
11980 // is positive.  We know that because immI_bitmask guarantees it.
11981 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11982 %{
11983   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11984   // Make sure we are not going to exceed what ubfxw can do.
11985   predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1));
11986 
11987   ins_cost(INSN_COST * 2);
11988   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11989   ins_encode %{
11990     int rshift = $rshift$$constant & 31;
11991     long mask = $mask$$constant;
11992     int width = exact_log2(mask+1);
11993     __ ubfx(as_Register($dst$$reg),
11994             as_Register($src$$reg), rshift, width);
11995   %}
11996   ins_pipe(ialu_reg_shift);
11997 %}
11998 
11999 // We can use ubfiz when masking by a positive number and then left shifting the result.
12000 // We know that the mask is positive because immI_bitmask guarantees it.
12001 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12002 %{
12003   match(Set dst (LShiftI (AndI src mask) lshift));
12004   predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(2)->get_int() & 31)) <= (31 + 1));
12005 
12006   ins_cost(INSN_COST);
12007   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12008   ins_encode %{
12009     int lshift = $lshift$$constant & 31;
12010     long mask = $mask$$constant;
12011     int width = exact_log2(mask+1);
12012     __ ubfizw(as_Register($dst$$reg),
12013           as_Register($src$$reg), lshift, width);
12014   %}
12015   ins_pipe(ialu_reg_shift);
12016 %}
12017 // We can use ubfiz when masking by a positive number and then left shifting the result.
12018 // We know that the mask is positive because immL_bitmask guarantees it.
12019 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12020 %{
12021   match(Set dst (LShiftL (AndL src mask) lshift));
12022   predicate((exact_log2_long(n->in(1)->in(2)->get_long() + 1) + (n->in(2)->get_int() & 63)) <= (63 + 1));
12023 
12024   ins_cost(INSN_COST);
12025   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12026   ins_encode %{
12027     int lshift = $lshift$$constant & 63;
12028     long mask = $mask$$constant;
12029     int width = exact_log2_long(mask+1);
12030     __ ubfiz(as_Register($dst$$reg),
12031           as_Register($src$$reg), lshift, width);
12032   %}
12033   ins_pipe(ialu_reg_shift);
12034 %}
12035 
12036 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12037 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12038 %{
12039   match(Set dst (LShiftL (ConvI2L (AndI src mask)) lshift));
12040   predicate((exact_log2(n->in(1)->in(1)->in(2)->get_int() + 1) + (n->in(2)->get_int() & 63)) <= (63 + 1));
12041 
12042   ins_cost(INSN_COST);
12043   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12044   ins_encode %{
12045     int lshift = $lshift$$constant & 63;
12046     long mask = $mask$$constant;
12047     int width = exact_log2(mask+1);
12048     __ ubfiz(as_Register($dst$$reg),
12049              as_Register($src$$reg), lshift, width);
12050   %}
12051   ins_pipe(ialu_reg_shift);
12052 %}
12053 
12054 // Rotations
12055 
12056 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12057 %{
12058   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12059   predicate(0 == (((n->in(1)->in(2)->get_int() & 63) + (n->in(2)->in(2)->get_int() & 63)) & 63));
12060 
12061   ins_cost(INSN_COST);
12062   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12063 
12064   ins_encode %{
12065     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12066             $rshift$$constant & 63);
12067   %}
12068   ins_pipe(ialu_reg_reg_extr);
12069 %}
12070 
12071 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12072 %{
12073   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12074   predicate(0 == (((n->in(1)->in(2)->get_int() & 31) + (n->in(2)->in(2)->get_int() & 31)) & 31));
12075 
12076   ins_cost(INSN_COST);
12077   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12078 
12079   ins_encode %{
12080     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12081             $rshift$$constant & 31);
12082   %}
12083   ins_pipe(ialu_reg_reg_extr);
12084 %}
12085 
12086 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12087 %{
12088   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12089   predicate(0 == (((n->in(1)->in(2)->get_int() & 63) + (n->in(2)->in(2)->get_int() & 63)) & 63));
12090 
12091   ins_cost(INSN_COST);
12092   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12093 
12094   ins_encode %{
12095     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12096             $rshift$$constant & 63);
12097   %}
12098   ins_pipe(ialu_reg_reg_extr);
12099 %}
12100 
12101 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12102 %{
12103   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12104   predicate(0 == (((n->in(1)->in(2)->get_int() & 31) + (n->in(2)->in(2)->get_int() & 31)) & 31));
12105 
12106   ins_cost(INSN_COST);
12107   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12108 
12109   ins_encode %{
12110     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12111             $rshift$$constant & 31);
12112   %}
12113   ins_pipe(ialu_reg_reg_extr);
12114 %}
12115 
12116 
12117 // rol expander
12118 
12119 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12120 %{
12121   effect(DEF dst, USE src, USE shift);
12122 
12123   format %{ "rol    $dst, $src, $shift" %}
12124   ins_cost(INSN_COST * 3);
12125   ins_encode %{
12126     __ subw(rscratch1, zr, as_Register($shift$$reg));
12127     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12128             rscratch1);
12129     %}
12130   ins_pipe(ialu_reg_reg_vshift);
12131 %}
12132 
12133 // rol expander
12134 
12135 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12136 %{
12137   effect(DEF dst, USE src, USE shift);
12138 
12139   format %{ "rol    $dst, $src, $shift" %}
12140   ins_cost(INSN_COST * 3);
12141   ins_encode %{
12142     __ subw(rscratch1, zr, as_Register($shift$$reg));
12143     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12144             rscratch1);
12145     %}
12146   ins_pipe(ialu_reg_reg_vshift);
12147 %}
12148 
12149 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12150 %{
12151   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12152 
12153   expand %{
12154     rolL_rReg(dst, src, shift, cr);
12155   %}
12156 %}
12157 
12158 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12159 %{
12160   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12161 
12162   expand %{
12163     rolL_rReg(dst, src, shift, cr);
12164   %}
12165 %}
12166 
12167 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12168 %{
12169   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12170 
12171   expand %{
12172     rolI_rReg(dst, src, shift, cr);
12173   %}
12174 %}
12175 
12176 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12177 %{
12178   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12179 
12180   expand %{
12181     rolI_rReg(dst, src, shift, cr);
12182   %}
12183 %}
12184 
12185 // ror expander
12186 
12187 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12188 %{
12189   effect(DEF dst, USE src, USE shift);
12190 
12191   format %{ "ror    $dst, $src, $shift" %}
12192   ins_cost(INSN_COST);
12193   ins_encode %{
12194     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12195             as_Register($shift$$reg));
12196     %}
12197   ins_pipe(ialu_reg_reg_vshift);
12198 %}
12199 
12200 // ror expander
12201 
12202 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12203 %{
12204   effect(DEF dst, USE src, USE shift);
12205 
12206   format %{ "ror    $dst, $src, $shift" %}
12207   ins_cost(INSN_COST);
12208   ins_encode %{
12209     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12210             as_Register($shift$$reg));
12211     %}
12212   ins_pipe(ialu_reg_reg_vshift);
12213 %}
12214 
12215 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12216 %{
12217   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12218 
12219   expand %{
12220     rorL_rReg(dst, src, shift, cr);
12221   %}
12222 %}
12223 
12224 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12225 %{
12226   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12227 
12228   expand %{
12229     rorL_rReg(dst, src, shift, cr);
12230   %}
12231 %}
12232 
12233 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12234 %{
12235   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12236 
12237   expand %{
12238     rorI_rReg(dst, src, shift, cr);
12239   %}
12240 %}
12241 
12242 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12243 %{
12244   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12245 
12246   expand %{
12247     rorI_rReg(dst, src, shift, cr);
12248   %}
12249 %}
12250 
12251 // Add/subtract (extended)
12252 
12253 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12254 %{
12255   match(Set dst (AddL src1 (ConvI2L src2)));
12256   ins_cost(INSN_COST);
12257   format %{ "add  $dst, $src1, $src2, sxtw" %}
12258 
12259    ins_encode %{
12260      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12261             as_Register($src2$$reg), ext::sxtw);
12262    %}
12263   ins_pipe(ialu_reg_reg);
12264 %};
12265 
12266 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12267 %{
12268   match(Set dst (SubL src1 (ConvI2L src2)));
12269   ins_cost(INSN_COST);
12270   format %{ "sub  $dst, $src1, $src2, sxtw" %}
12271 
12272    ins_encode %{
12273      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12274             as_Register($src2$$reg), ext::sxtw);
12275    %}
12276   ins_pipe(ialu_reg_reg);
12277 %};
12278 
12279 
12280 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12281 %{
12282   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12283   ins_cost(INSN_COST);
12284   format %{ "add  $dst, $src1, $src2, sxth" %}
12285 
12286    ins_encode %{
12287      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12288             as_Register($src2$$reg), ext::sxth);
12289    %}
12290   ins_pipe(ialu_reg_reg);
12291 %}
12292 
12293 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12294 %{
12295   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12296   ins_cost(INSN_COST);
12297   format %{ "add  $dst, $src1, $src2, sxtb" %}
12298 
12299    ins_encode %{
12300      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12301             as_Register($src2$$reg), ext::sxtb);
12302    %}
12303   ins_pipe(ialu_reg_reg);
12304 %}
12305 
12306 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12307 %{
12308   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12309   ins_cost(INSN_COST);
12310   format %{ "add  $dst, $src1, $src2, uxtb" %}
12311 
12312    ins_encode %{
12313      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12314             as_Register($src2$$reg), ext::uxtb);
12315    %}
12316   ins_pipe(ialu_reg_reg);
12317 %}
12318 
12319 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12320 %{
12321   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12322   ins_cost(INSN_COST);
12323   format %{ "add  $dst, $src1, $src2, sxth" %}
12324 
12325    ins_encode %{
12326      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12327             as_Register($src2$$reg), ext::sxth);
12328    %}
12329   ins_pipe(ialu_reg_reg);
12330 %}
12331 
12332 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12333 %{
12334   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12335   ins_cost(INSN_COST);
12336   format %{ "add  $dst, $src1, $src2, sxtw" %}
12337 
12338    ins_encode %{
12339      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12340             as_Register($src2$$reg), ext::sxtw);
12341    %}
12342   ins_pipe(ialu_reg_reg);
12343 %}
12344 
12345 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12346 %{
12347   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12348   ins_cost(INSN_COST);
12349   format %{ "add  $dst, $src1, $src2, sxtb" %}
12350 
12351    ins_encode %{
12352      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12353             as_Register($src2$$reg), ext::sxtb);
12354    %}
12355   ins_pipe(ialu_reg_reg);
12356 %}
12357 
12358 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12359 %{
12360   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12361   ins_cost(INSN_COST);
12362   format %{ "add  $dst, $src1, $src2, uxtb" %}
12363 
12364    ins_encode %{
12365      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12366             as_Register($src2$$reg), ext::uxtb);
12367    %}
12368   ins_pipe(ialu_reg_reg);
12369 %}
12370 
12371 
12372 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12373 %{
12374   match(Set dst (AddI src1 (AndI src2 mask)));
12375   ins_cost(INSN_COST);
12376   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12377 
12378    ins_encode %{
12379      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12380             as_Register($src2$$reg), ext::uxtb);
12381    %}
12382   ins_pipe(ialu_reg_reg);
12383 %}
12384 
12385 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12386 %{
12387   match(Set dst (AddI src1 (AndI src2 mask)));
12388   ins_cost(INSN_COST);
12389   format %{ "addw  $dst, $src1, $src2, uxth" %}
12390 
12391    ins_encode %{
12392      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12393             as_Register($src2$$reg), ext::uxth);
12394    %}
12395   ins_pipe(ialu_reg_reg);
12396 %}
12397 
12398 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12399 %{
12400   match(Set dst (AddL src1 (AndL src2 mask)));
12401   ins_cost(INSN_COST);
12402   format %{ "add  $dst, $src1, $src2, uxtb" %}
12403 
12404    ins_encode %{
12405      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12406             as_Register($src2$$reg), ext::uxtb);
12407    %}
12408   ins_pipe(ialu_reg_reg);
12409 %}
12410 
12411 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12412 %{
12413   match(Set dst (AddL src1 (AndL src2 mask)));
12414   ins_cost(INSN_COST);
12415   format %{ "add  $dst, $src1, $src2, uxth" %}
12416 
12417    ins_encode %{
12418      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12419             as_Register($src2$$reg), ext::uxth);
12420    %}
12421   ins_pipe(ialu_reg_reg);
12422 %}
12423 
12424 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12425 %{
12426   match(Set dst (AddL src1 (AndL src2 mask)));
12427   ins_cost(INSN_COST);
12428   format %{ "add  $dst, $src1, $src2, uxtw" %}
12429 
12430    ins_encode %{
12431      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12432             as_Register($src2$$reg), ext::uxtw);
12433    %}
12434   ins_pipe(ialu_reg_reg);
12435 %}
12436 
12437 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12438 %{
12439   match(Set dst (SubI src1 (AndI src2 mask)));
12440   ins_cost(INSN_COST);
12441   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12442 
12443    ins_encode %{
12444      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12445             as_Register($src2$$reg), ext::uxtb);
12446    %}
12447   ins_pipe(ialu_reg_reg);
12448 %}
12449 
12450 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12451 %{
12452   match(Set dst (SubI src1 (AndI src2 mask)));
12453   ins_cost(INSN_COST);
12454   format %{ "subw  $dst, $src1, $src2, uxth" %}
12455 
12456    ins_encode %{
12457      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12458             as_Register($src2$$reg), ext::uxth);
12459    %}
12460   ins_pipe(ialu_reg_reg);
12461 %}
12462 
12463 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12464 %{
12465   match(Set dst (SubL src1 (AndL src2 mask)));
12466   ins_cost(INSN_COST);
12467   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12468 
12469    ins_encode %{
12470      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12471             as_Register($src2$$reg), ext::uxtb);
12472    %}
12473   ins_pipe(ialu_reg_reg);
12474 %}
12475 
12476 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12477 %{
12478   match(Set dst (SubL src1 (AndL src2 mask)));
12479   ins_cost(INSN_COST);
12480   format %{ "sub  $dst, $src1, $src2, uxth" %}
12481 
12482    ins_encode %{
12483      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12484             as_Register($src2$$reg), ext::uxth);
12485    %}
12486   ins_pipe(ialu_reg_reg);
12487 %}
12488 
12489 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12490 %{
12491   match(Set dst (SubL src1 (AndL src2 mask)));
12492   ins_cost(INSN_COST);
12493   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12494 
12495    ins_encode %{
12496      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12497             as_Register($src2$$reg), ext::uxtw);
12498    %}
12499   ins_pipe(ialu_reg_reg);
12500 %}
12501 
12502 
12503 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12504 %{
12505   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12506   ins_cost(1.9 * INSN_COST);
12507   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
12508 
12509    ins_encode %{
12510      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12511             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12512    %}
12513   ins_pipe(ialu_reg_reg_shift);
12514 %}
12515 
12516 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12517 %{
12518   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12519   ins_cost(1.9 * INSN_COST);
12520   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
12521 
12522    ins_encode %{
12523      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12524             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12525    %}
12526   ins_pipe(ialu_reg_reg_shift);
12527 %}
12528 
12529 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12530 %{
12531   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12532   ins_cost(1.9 * INSN_COST);
12533   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
12534 
12535    ins_encode %{
12536      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12537             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12538    %}
12539   ins_pipe(ialu_reg_reg_shift);
12540 %}
12541 
12542 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12543 %{
12544   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12545   ins_cost(1.9 * INSN_COST);
12546   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
12547 
12548    ins_encode %{
12549      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12550             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12551    %}
12552   ins_pipe(ialu_reg_reg_shift);
12553 %}
12554 
12555 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12556 %{
12557   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12558   ins_cost(1.9 * INSN_COST);
12559   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
12560 
12561    ins_encode %{
12562      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12563             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12564    %}
12565   ins_pipe(ialu_reg_reg_shift);
12566 %}
12567 
12568 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12569 %{
12570   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12571   ins_cost(1.9 * INSN_COST);
12572   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
12573 
12574    ins_encode %{
12575      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12576             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12577    %}
12578   ins_pipe(ialu_reg_reg_shift);
12579 %}
12580 
12581 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12582 %{
12583   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12584   ins_cost(1.9 * INSN_COST);
12585   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
12586 
12587    ins_encode %{
12588      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12589             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12590    %}
12591   ins_pipe(ialu_reg_reg_shift);
12592 %}
12593 
12594 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12595 %{
12596   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12597   ins_cost(1.9 * INSN_COST);
12598   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
12599 
12600    ins_encode %{
12601      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12602             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12603    %}
12604   ins_pipe(ialu_reg_reg_shift);
12605 %}
12606 
12607 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12608 %{
12609   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12610   ins_cost(1.9 * INSN_COST);
12611   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
12612 
12613    ins_encode %{
12614      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12615             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12616    %}
12617   ins_pipe(ialu_reg_reg_shift);
12618 %}
12619 
12620 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12621 %{
12622   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12623   ins_cost(1.9 * INSN_COST);
12624   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
12625 
12626    ins_encode %{
12627      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12628             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12629    %}
12630   ins_pipe(ialu_reg_reg_shift);
12631 %}
12632 
12633 
12634 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12635 %{
12636   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
12637   ins_cost(1.9 * INSN_COST);
12638   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
12639 
12640    ins_encode %{
12641      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12642             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12643    %}
12644   ins_pipe(ialu_reg_reg_shift);
12645 %};
12646 
12647 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12648 %{
12649   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
12650   ins_cost(1.9 * INSN_COST);
12651   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
12652 
12653    ins_encode %{
12654      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12655             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12656    %}
12657   ins_pipe(ialu_reg_reg_shift);
12658 %};
12659 
12660 
12661 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12662 %{
12663   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12664   ins_cost(1.9 * INSN_COST);
12665   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
12666 
12667    ins_encode %{
12668      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12669             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12670    %}
12671   ins_pipe(ialu_reg_reg_shift);
12672 %}
12673 
12674 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12675 %{
12676   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12677   ins_cost(1.9 * INSN_COST);
12678   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
12679 
12680    ins_encode %{
12681      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12682             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12683    %}
12684   ins_pipe(ialu_reg_reg_shift);
12685 %}
12686 
12687 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12688 %{
12689   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12690   ins_cost(1.9 * INSN_COST);
12691   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
12692 
12693    ins_encode %{
12694      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12695             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12696    %}
12697   ins_pipe(ialu_reg_reg_shift);
12698 %}
12699 
12700 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12701 %{
12702   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12703   ins_cost(1.9 * INSN_COST);
12704   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
12705 
12706    ins_encode %{
12707      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12708             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12709    %}
12710   ins_pipe(ialu_reg_reg_shift);
12711 %}
12712 
12713 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12714 %{
12715   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12716   ins_cost(1.9 * INSN_COST);
12717   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
12718 
12719    ins_encode %{
12720      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12721             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12722    %}
12723   ins_pipe(ialu_reg_reg_shift);
12724 %}
12725 
12726 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12727 %{
12728   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12729   ins_cost(1.9 * INSN_COST);
12730   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
12731 
12732    ins_encode %{
12733      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12734             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12735    %}
12736   ins_pipe(ialu_reg_reg_shift);
12737 %}
12738 
12739 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12740 %{
12741   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12742   ins_cost(1.9 * INSN_COST);
12743   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
12744 
12745    ins_encode %{
12746      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12747             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12748    %}
12749   ins_pipe(ialu_reg_reg_shift);
12750 %}
12751 
12752 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12753 %{
12754   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12755   ins_cost(1.9 * INSN_COST);
12756   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
12757 
12758    ins_encode %{
12759      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12760             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12761    %}
12762   ins_pipe(ialu_reg_reg_shift);
12763 %}
12764 
12765 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12766 %{
12767   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12768   ins_cost(1.9 * INSN_COST);
12769   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
12770 
12771    ins_encode %{
12772      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12773             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12774    %}
12775   ins_pipe(ialu_reg_reg_shift);
12776 %}
12777 
12778 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12779 %{
12780   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12781   ins_cost(1.9 * INSN_COST);
12782   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
12783 
12784    ins_encode %{
12785      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12786             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12787    %}
12788   ins_pipe(ialu_reg_reg_shift);
12789 %}
12790 // END This section of the file is automatically generated. Do not edit --------------
12791 
12792 // ============================================================================
12793 // Floating Point Arithmetic Instructions
12794 
12795 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12796   match(Set dst (AddF src1 src2));
12797 
12798   ins_cost(INSN_COST * 5);
12799   format %{ "fadds   $dst, $src1, $src2" %}
12800 
12801   ins_encode %{
12802     __ fadds(as_FloatRegister($dst$$reg),
12803              as_FloatRegister($src1$$reg),
12804              as_FloatRegister($src2$$reg));
12805   %}
12806 
12807   ins_pipe(fp_dop_reg_reg_s);
12808 %}
12809 
12810 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12811   match(Set dst (AddD src1 src2));
12812 
12813   ins_cost(INSN_COST * 5);
12814   format %{ "faddd   $dst, $src1, $src2" %}
12815 
12816   ins_encode %{
12817     __ faddd(as_FloatRegister($dst$$reg),
12818              as_FloatRegister($src1$$reg),
12819              as_FloatRegister($src2$$reg));
12820   %}
12821 
12822   ins_pipe(fp_dop_reg_reg_d);
12823 %}
12824 
12825 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12826   match(Set dst (SubF src1 src2));
12827 
12828   ins_cost(INSN_COST * 5);
12829   format %{ "fsubs   $dst, $src1, $src2" %}
12830 
12831   ins_encode %{
12832     __ fsubs(as_FloatRegister($dst$$reg),
12833              as_FloatRegister($src1$$reg),
12834              as_FloatRegister($src2$$reg));
12835   %}
12836 
12837   ins_pipe(fp_dop_reg_reg_s);
12838 %}
12839 
12840 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12841   match(Set dst (SubD src1 src2));
12842 
12843   ins_cost(INSN_COST * 5);
12844   format %{ "fsubd   $dst, $src1, $src2" %}
12845 
12846   ins_encode %{
12847     __ fsubd(as_FloatRegister($dst$$reg),
12848              as_FloatRegister($src1$$reg),
12849              as_FloatRegister($src2$$reg));
12850   %}
12851 
12852   ins_pipe(fp_dop_reg_reg_d);
12853 %}
12854 
12855 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12856   match(Set dst (MulF src1 src2));
12857 
12858   ins_cost(INSN_COST * 6);
12859   format %{ "fmuls   $dst, $src1, $src2" %}
12860 
12861   ins_encode %{
12862     __ fmuls(as_FloatRegister($dst$$reg),
12863              as_FloatRegister($src1$$reg),
12864              as_FloatRegister($src2$$reg));
12865   %}
12866 
12867   ins_pipe(fp_dop_reg_reg_s);
12868 %}
12869 
12870 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12871   match(Set dst (MulD src1 src2));
12872 
12873   ins_cost(INSN_COST * 6);
12874   format %{ "fmuld   $dst, $src1, $src2" %}
12875 
12876   ins_encode %{
12877     __ fmuld(as_FloatRegister($dst$$reg),
12878              as_FloatRegister($src1$$reg),
12879              as_FloatRegister($src2$$reg));
12880   %}
12881 
12882   ins_pipe(fp_dop_reg_reg_d);
12883 %}
12884 
12885 // src1 * src2 + src3
12886 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12887   predicate(UseFMA);
12888   match(Set dst (FmaF src3 (Binary src1 src2)));
12889 
12890   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12891 
12892   ins_encode %{
12893     __ fmadds(as_FloatRegister($dst$$reg),
12894              as_FloatRegister($src1$$reg),
12895              as_FloatRegister($src2$$reg),
12896              as_FloatRegister($src3$$reg));
12897   %}
12898 
12899   ins_pipe(pipe_class_default);
12900 %}
12901 
12902 // src1 * src2 + src3
12903 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12904   predicate(UseFMA);
12905   match(Set dst (FmaD src3 (Binary src1 src2)));
12906 
12907   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12908 
12909   ins_encode %{
12910     __ fmaddd(as_FloatRegister($dst$$reg),
12911              as_FloatRegister($src1$$reg),
12912              as_FloatRegister($src2$$reg),
12913              as_FloatRegister($src3$$reg));
12914   %}
12915 
12916   ins_pipe(pipe_class_default);
12917 %}
12918 
12919 // -src1 * src2 + src3
12920 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12921   predicate(UseFMA);
12922   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12923   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12924 
12925   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12926 
12927   ins_encode %{
12928     __ fmsubs(as_FloatRegister($dst$$reg),
12929               as_FloatRegister($src1$$reg),
12930               as_FloatRegister($src2$$reg),
12931               as_FloatRegister($src3$$reg));
12932   %}
12933 
12934   ins_pipe(pipe_class_default);
12935 %}
12936 
12937 // -src1 * src2 + src3
12938 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12939   predicate(UseFMA);
12940   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12941   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12942 
12943   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12944 
12945   ins_encode %{
12946     __ fmsubd(as_FloatRegister($dst$$reg),
12947               as_FloatRegister($src1$$reg),
12948               as_FloatRegister($src2$$reg),
12949               as_FloatRegister($src3$$reg));
12950   %}
12951 
12952   ins_pipe(pipe_class_default);
12953 %}
12954 
12955 // -src1 * src2 - src3
12956 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12957   predicate(UseFMA);
12958   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12959   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12960 
12961   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12962 
12963   ins_encode %{
12964     __ fnmadds(as_FloatRegister($dst$$reg),
12965                as_FloatRegister($src1$$reg),
12966                as_FloatRegister($src2$$reg),
12967                as_FloatRegister($src3$$reg));
12968   %}
12969 
12970   ins_pipe(pipe_class_default);
12971 %}
12972 
12973 // -src1 * src2 - src3
12974 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12975   predicate(UseFMA);
12976   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12977   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12978 
12979   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12980 
12981   ins_encode %{
12982     __ fnmaddd(as_FloatRegister($dst$$reg),
12983                as_FloatRegister($src1$$reg),
12984                as_FloatRegister($src2$$reg),
12985                as_FloatRegister($src3$$reg));
12986   %}
12987 
12988   ins_pipe(pipe_class_default);
12989 %}
12990 
12991 // src1 * src2 - src3
12992 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12993   predicate(UseFMA);
12994   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12995 
12996   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12997 
12998   ins_encode %{
12999     __ fnmsubs(as_FloatRegister($dst$$reg),
13000                as_FloatRegister($src1$$reg),
13001                as_FloatRegister($src2$$reg),
13002                as_FloatRegister($src3$$reg));
13003   %}
13004 
13005   ins_pipe(pipe_class_default);
13006 %}
13007 
13008 // src1 * src2 - src3
13009 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13010   predicate(UseFMA);
13011   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13012 
13013   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13014 
13015   ins_encode %{
13016   // n.b. insn name should be fnmsubd
13017     __ fnmsub(as_FloatRegister($dst$$reg),
13018               as_FloatRegister($src1$$reg),
13019               as_FloatRegister($src2$$reg),
13020               as_FloatRegister($src3$$reg));
13021   %}
13022 
13023   ins_pipe(pipe_class_default);
13024 %}
13025 
13026 
13027 // Math.max(FF)F
13028 instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13029   match(Set dst (MaxF src1 src2));
13030 
13031   format %{ "fmaxs   $dst, $src1, $src2" %}
13032   ins_encode %{
13033     __ fmaxs(as_FloatRegister($dst$$reg),
13034              as_FloatRegister($src1$$reg),
13035              as_FloatRegister($src2$$reg));
13036   %}
13037 
13038   ins_pipe(fp_dop_reg_reg_s);
13039 %}
13040 
13041 // Math.min(FF)F
13042 instruct minF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13043   match(Set dst (MinF src1 src2));
13044 
13045   format %{ "fmins   $dst, $src1, $src2" %}
13046   ins_encode %{
13047     __ fmins(as_FloatRegister($dst$$reg),
13048              as_FloatRegister($src1$$reg),
13049              as_FloatRegister($src2$$reg));
13050   %}
13051 
13052   ins_pipe(fp_dop_reg_reg_s);
13053 %}
13054 
13055 // Math.max(DD)D
13056 instruct maxD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13057   match(Set dst (MaxD src1 src2));
13058 
13059   format %{ "fmaxd   $dst, $src1, $src2" %}
13060   ins_encode %{
13061     __ fmaxd(as_FloatRegister($dst$$reg),
13062              as_FloatRegister($src1$$reg),
13063              as_FloatRegister($src2$$reg));
13064   %}
13065 
13066   ins_pipe(fp_dop_reg_reg_d);
13067 %}
13068 
13069 // Math.min(DD)D
13070 instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13071   match(Set dst (MinD src1 src2));
13072 
13073   format %{ "fmind   $dst, $src1, $src2" %}
13074   ins_encode %{
13075     __ fmind(as_FloatRegister($dst$$reg),
13076              as_FloatRegister($src1$$reg),
13077              as_FloatRegister($src2$$reg));
13078   %}
13079 
13080   ins_pipe(fp_dop_reg_reg_d);
13081 %}
13082 
13083 
13084 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13085   match(Set dst (DivF src1  src2));
13086 
13087   ins_cost(INSN_COST * 18);
13088   format %{ "fdivs   $dst, $src1, $src2" %}
13089 
13090   ins_encode %{
13091     __ fdivs(as_FloatRegister($dst$$reg),
13092              as_FloatRegister($src1$$reg),
13093              as_FloatRegister($src2$$reg));
13094   %}
13095 
13096   ins_pipe(fp_div_s);
13097 %}
13098 
13099 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13100   match(Set dst (DivD src1  src2));
13101 
13102   ins_cost(INSN_COST * 32);
13103   format %{ "fdivd   $dst, $src1, $src2" %}
13104 
13105   ins_encode %{
13106     __ fdivd(as_FloatRegister($dst$$reg),
13107              as_FloatRegister($src1$$reg),
13108              as_FloatRegister($src2$$reg));
13109   %}
13110 
13111   ins_pipe(fp_div_d);
13112 %}
13113 
13114 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13115   match(Set dst (NegF src));
13116 
13117   ins_cost(INSN_COST * 3);
13118   format %{ "fneg   $dst, $src" %}
13119 
13120   ins_encode %{
13121     __ fnegs(as_FloatRegister($dst$$reg),
13122              as_FloatRegister($src$$reg));
13123   %}
13124 
13125   ins_pipe(fp_uop_s);
13126 %}
13127 
13128 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13129   match(Set dst (NegD src));
13130 
13131   ins_cost(INSN_COST * 3);
13132   format %{ "fnegd   $dst, $src" %}
13133 
13134   ins_encode %{
13135     __ fnegd(as_FloatRegister($dst$$reg),
13136              as_FloatRegister($src$$reg));
13137   %}
13138 
13139   ins_pipe(fp_uop_d);
13140 %}
13141 
13142 instruct absF_reg(vRegF dst, vRegF src) %{
13143   match(Set dst (AbsF src));
13144 
13145   ins_cost(INSN_COST * 3);
13146   format %{ "fabss   $dst, $src" %}
13147   ins_encode %{
13148     __ fabss(as_FloatRegister($dst$$reg),
13149              as_FloatRegister($src$$reg));
13150   %}
13151 
13152   ins_pipe(fp_uop_s);
13153 %}
13154 
13155 instruct absD_reg(vRegD dst, vRegD src) %{
13156   match(Set dst (AbsD src));
13157 
13158   ins_cost(INSN_COST * 3);
13159   format %{ "fabsd   $dst, $src" %}
13160   ins_encode %{
13161     __ fabsd(as_FloatRegister($dst$$reg),
13162              as_FloatRegister($src$$reg));
13163   %}
13164 
13165   ins_pipe(fp_uop_d);
13166 %}
13167 
13168 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13169   match(Set dst (SqrtD src));
13170 
13171   ins_cost(INSN_COST * 50);
13172   format %{ "fsqrtd  $dst, $src" %}
13173   ins_encode %{
13174     __ fsqrtd(as_FloatRegister($dst$$reg),
13175              as_FloatRegister($src$$reg));
13176   %}
13177 
13178   ins_pipe(fp_div_s);
13179 %}
13180 
13181 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13182   match(Set dst (SqrtF src));
13183 
13184   ins_cost(INSN_COST * 50);
13185   format %{ "fsqrts  $dst, $src" %}
13186   ins_encode %{
13187     __ fsqrts(as_FloatRegister($dst$$reg),
13188              as_FloatRegister($src$$reg));
13189   %}
13190 
13191   ins_pipe(fp_div_d);
13192 %}
13193 
13194 // Math.rint, floor, ceil
13195 instruct roundD_reg(vRegD dst, vRegD src, immI rmode) %{
13196   match(Set dst (RoundDoubleMode src rmode));
13197   format %{ "frint  $dst, $src, $rmode" %}
13198   ins_encode %{
13199     switch ($rmode$$constant) {
13200       case RoundDoubleModeNode::rmode_rint:
13201         __ frintnd(as_FloatRegister($dst$$reg),
13202                    as_FloatRegister($src$$reg));
13203         break;
13204       case RoundDoubleModeNode::rmode_floor:
13205         __ frintmd(as_FloatRegister($dst$$reg),
13206                    as_FloatRegister($src$$reg));
13207         break;
13208       case RoundDoubleModeNode::rmode_ceil:
13209         __ frintpd(as_FloatRegister($dst$$reg),
13210                    as_FloatRegister($src$$reg));
13211         break;
13212     }
13213   %}
13214   ins_pipe(fp_uop_d);
13215 %}
13216 
13217 // ============================================================================
13218 // Logical Instructions
13219 
13220 // Integer Logical Instructions
13221 
13222 // And Instructions
13223 
13224 
13225 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13226   match(Set dst (AndI src1 src2));
13227 
13228   format %{ "andw  $dst, $src1, $src2\t# int" %}
13229 
13230   ins_cost(INSN_COST);
13231   ins_encode %{
13232     __ andw(as_Register($dst$$reg),
13233             as_Register($src1$$reg),
13234             as_Register($src2$$reg));
13235   %}
13236 
13237   ins_pipe(ialu_reg_reg);
13238 %}
13239 
13240 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13241   match(Set dst (AndI src1 src2));
13242 
13243   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13244 
13245   ins_cost(INSN_COST);
13246   ins_encode %{
13247     __ andw(as_Register($dst$$reg),
13248             as_Register($src1$$reg),
13249             (unsigned long)($src2$$constant));
13250   %}
13251 
13252   ins_pipe(ialu_reg_imm);
13253 %}
13254 
13255 // Or Instructions
13256 
13257 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13258   match(Set dst (OrI src1 src2));
13259 
13260   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13261 
13262   ins_cost(INSN_COST);
13263   ins_encode %{
13264     __ orrw(as_Register($dst$$reg),
13265             as_Register($src1$$reg),
13266             as_Register($src2$$reg));
13267   %}
13268 
13269   ins_pipe(ialu_reg_reg);
13270 %}
13271 
13272 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13273   match(Set dst (OrI src1 src2));
13274 
13275   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13276 
13277   ins_cost(INSN_COST);
13278   ins_encode %{
13279     __ orrw(as_Register($dst$$reg),
13280             as_Register($src1$$reg),
13281             (unsigned long)($src2$$constant));
13282   %}
13283 
13284   ins_pipe(ialu_reg_imm);
13285 %}
13286 
13287 // Xor Instructions
13288 
13289 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13290   match(Set dst (XorI src1 src2));
13291 
13292   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13293 
13294   ins_cost(INSN_COST);
13295   ins_encode %{
13296     __ eorw(as_Register($dst$$reg),
13297             as_Register($src1$$reg),
13298             as_Register($src2$$reg));
13299   %}
13300 
13301   ins_pipe(ialu_reg_reg);
13302 %}
13303 
13304 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13305   match(Set dst (XorI src1 src2));
13306 
13307   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13308 
13309   ins_cost(INSN_COST);
13310   ins_encode %{
13311     __ eorw(as_Register($dst$$reg),
13312             as_Register($src1$$reg),
13313             (unsigned long)($src2$$constant));
13314   %}
13315 
13316   ins_pipe(ialu_reg_imm);
13317 %}
13318 
13319 // Long Logical Instructions
13320 // TODO
13321 
13322 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13323   match(Set dst (AndL src1 src2));
13324 
13325   format %{ "and  $dst, $src1, $src2\t# int" %}
13326 
13327   ins_cost(INSN_COST);
13328   ins_encode %{
13329     __ andr(as_Register($dst$$reg),
13330             as_Register($src1$$reg),
13331             as_Register($src2$$reg));
13332   %}
13333 
13334   ins_pipe(ialu_reg_reg);
13335 %}
13336 
13337 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13338   match(Set dst (AndL src1 src2));
13339 
13340   format %{ "and  $dst, $src1, $src2\t# int" %}
13341 
13342   ins_cost(INSN_COST);
13343   ins_encode %{
13344     __ andr(as_Register($dst$$reg),
13345             as_Register($src1$$reg),
13346             (unsigned long)($src2$$constant));
13347   %}
13348 
13349   ins_pipe(ialu_reg_imm);
13350 %}
13351 
13352 // Or Instructions
13353 
13354 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13355   match(Set dst (OrL src1 src2));
13356 
13357   format %{ "orr  $dst, $src1, $src2\t# int" %}
13358 
13359   ins_cost(INSN_COST);
13360   ins_encode %{
13361     __ orr(as_Register($dst$$reg),
13362            as_Register($src1$$reg),
13363            as_Register($src2$$reg));
13364   %}
13365 
13366   ins_pipe(ialu_reg_reg);
13367 %}
13368 
13369 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13370   match(Set dst (OrL src1 src2));
13371 
13372   format %{ "orr  $dst, $src1, $src2\t# int" %}
13373 
13374   ins_cost(INSN_COST);
13375   ins_encode %{
13376     __ orr(as_Register($dst$$reg),
13377            as_Register($src1$$reg),
13378            (unsigned long)($src2$$constant));
13379   %}
13380 
13381   ins_pipe(ialu_reg_imm);
13382 %}
13383 
13384 // Xor Instructions
13385 
13386 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13387   match(Set dst (XorL src1 src2));
13388 
13389   format %{ "eor  $dst, $src1, $src2\t# int" %}
13390 
13391   ins_cost(INSN_COST);
13392   ins_encode %{
13393     __ eor(as_Register($dst$$reg),
13394            as_Register($src1$$reg),
13395            as_Register($src2$$reg));
13396   %}
13397 
13398   ins_pipe(ialu_reg_reg);
13399 %}
13400 
13401 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13402   match(Set dst (XorL src1 src2));
13403 
13404   ins_cost(INSN_COST);
13405   format %{ "eor  $dst, $src1, $src2\t# int" %}
13406 
13407   ins_encode %{
13408     __ eor(as_Register($dst$$reg),
13409            as_Register($src1$$reg),
13410            (unsigned long)($src2$$constant));
13411   %}
13412 
13413   ins_pipe(ialu_reg_imm);
13414 %}
13415 
13416 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13417 %{
13418   match(Set dst (ConvI2L src));
13419 
13420   ins_cost(INSN_COST);
13421   format %{ "sxtw  $dst, $src\t# i2l" %}
13422   ins_encode %{
13423     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13424   %}
13425   ins_pipe(ialu_reg_shift);
13426 %}
13427 
13428 // this pattern occurs in bigmath arithmetic
13429 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13430 %{
13431   match(Set dst (AndL (ConvI2L src) mask));
13432 
13433   ins_cost(INSN_COST);
13434   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13435   ins_encode %{
13436     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13437   %}
13438 
13439   ins_pipe(ialu_reg_shift);
13440 %}
13441 
13442 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13443   match(Set dst (ConvL2I src));
13444 
13445   ins_cost(INSN_COST);
13446   format %{ "movw  $dst, $src \t// l2i" %}
13447 
13448   ins_encode %{
13449     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13450   %}
13451 
13452   ins_pipe(ialu_reg);
13453 %}
13454 
13455 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13456 %{
13457   match(Set dst (Conv2B src));
13458   effect(KILL cr);
13459 
13460   format %{
13461     "cmpw $src, zr\n\t"
13462     "cset $dst, ne"
13463   %}
13464 
13465   ins_encode %{
13466     __ cmpw(as_Register($src$$reg), zr);
13467     __ cset(as_Register($dst$$reg), Assembler::NE);
13468   %}
13469 
13470   ins_pipe(ialu_reg);
13471 %}
13472 
13473 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13474 %{
13475   match(Set dst (Conv2B src));
13476   effect(KILL cr);
13477 
13478   format %{
13479     "cmp  $src, zr\n\t"
13480     "cset $dst, ne"
13481   %}
13482 
13483   ins_encode %{
13484     __ cmp(as_Register($src$$reg), zr);
13485     __ cset(as_Register($dst$$reg), Assembler::NE);
13486   %}
13487 
13488   ins_pipe(ialu_reg);
13489 %}
13490 
13491 instruct convD2F_reg(vRegF dst, vRegD src) %{
13492   match(Set dst (ConvD2F src));
13493 
13494   ins_cost(INSN_COST * 5);
13495   format %{ "fcvtd  $dst, $src \t// d2f" %}
13496 
13497   ins_encode %{
13498     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13499   %}
13500 
13501   ins_pipe(fp_d2f);
13502 %}
13503 
13504 instruct convF2D_reg(vRegD dst, vRegF src) %{
13505   match(Set dst (ConvF2D src));
13506 
13507   ins_cost(INSN_COST * 5);
13508   format %{ "fcvts  $dst, $src \t// f2d" %}
13509 
13510   ins_encode %{
13511     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13512   %}
13513 
13514   ins_pipe(fp_f2d);
13515 %}
13516 
13517 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13518   match(Set dst (ConvF2I src));
13519 
13520   ins_cost(INSN_COST * 5);
13521   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13522 
13523   ins_encode %{
13524     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13525   %}
13526 
13527   ins_pipe(fp_f2i);
13528 %}
13529 
13530 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13531   match(Set dst (ConvF2L src));
13532 
13533   ins_cost(INSN_COST * 5);
13534   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13535 
13536   ins_encode %{
13537     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13538   %}
13539 
13540   ins_pipe(fp_f2l);
13541 %}
13542 
13543 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13544   match(Set dst (ConvI2F src));
13545 
13546   ins_cost(INSN_COST * 5);
13547   format %{ "scvtfws  $dst, $src \t// i2f" %}
13548 
13549   ins_encode %{
13550     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13551   %}
13552 
13553   ins_pipe(fp_i2f);
13554 %}
13555 
13556 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13557   match(Set dst (ConvL2F src));
13558 
13559   ins_cost(INSN_COST * 5);
13560   format %{ "scvtfs  $dst, $src \t// l2f" %}
13561 
13562   ins_encode %{
13563     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13564   %}
13565 
13566   ins_pipe(fp_l2f);
13567 %}
13568 
13569 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13570   match(Set dst (ConvD2I src));
13571 
13572   ins_cost(INSN_COST * 5);
13573   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13574 
13575   ins_encode %{
13576     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13577   %}
13578 
13579   ins_pipe(fp_d2i);
13580 %}
13581 
13582 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13583   match(Set dst (ConvD2L src));
13584 
13585   ins_cost(INSN_COST * 5);
13586   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13587 
13588   ins_encode %{
13589     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13590   %}
13591 
13592   ins_pipe(fp_d2l);
13593 %}
13594 
13595 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13596   match(Set dst (ConvI2D src));
13597 
13598   ins_cost(INSN_COST * 5);
13599   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13600 
13601   ins_encode %{
13602     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13603   %}
13604 
13605   ins_pipe(fp_i2d);
13606 %}
13607 
13608 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13609   match(Set dst (ConvL2D src));
13610 
13611   ins_cost(INSN_COST * 5);
13612   format %{ "scvtfd  $dst, $src \t// l2d" %}
13613 
13614   ins_encode %{
13615     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13616   %}
13617 
13618   ins_pipe(fp_l2d);
13619 %}
13620 
13621 // stack <-> reg and reg <-> reg shuffles with no conversion
13622 
13623 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13624 
13625   match(Set dst (MoveF2I src));
13626 
13627   effect(DEF dst, USE src);
13628 
13629   ins_cost(4 * INSN_COST);
13630 
13631   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13632 
13633   ins_encode %{
13634     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13635   %}
13636 
13637   ins_pipe(iload_reg_reg);
13638 
13639 %}
13640 
13641 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13642 
13643   match(Set dst (MoveI2F src));
13644 
13645   effect(DEF dst, USE src);
13646 
13647   ins_cost(4 * INSN_COST);
13648 
13649   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13650 
13651   ins_encode %{
13652     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13653   %}
13654 
13655   ins_pipe(pipe_class_memory);
13656 
13657 %}
13658 
13659 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13660 
13661   match(Set dst (MoveD2L src));
13662 
13663   effect(DEF dst, USE src);
13664 
13665   ins_cost(4 * INSN_COST);
13666 
13667   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13668 
13669   ins_encode %{
13670     __ ldr($dst$$Register, Address(sp, $src$$disp));
13671   %}
13672 
13673   ins_pipe(iload_reg_reg);
13674 
13675 %}
13676 
13677 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13678 
13679   match(Set dst (MoveL2D src));
13680 
13681   effect(DEF dst, USE src);
13682 
13683   ins_cost(4 * INSN_COST);
13684 
13685   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13686 
13687   ins_encode %{
13688     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13689   %}
13690 
13691   ins_pipe(pipe_class_memory);
13692 
13693 %}
13694 
13695 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13696 
13697   match(Set dst (MoveF2I src));
13698 
13699   effect(DEF dst, USE src);
13700 
13701   ins_cost(INSN_COST);
13702 
13703   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13704 
13705   ins_encode %{
13706     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13707   %}
13708 
13709   ins_pipe(pipe_class_memory);
13710 
13711 %}
13712 
13713 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13714 
13715   match(Set dst (MoveI2F src));
13716 
13717   effect(DEF dst, USE src);
13718 
13719   ins_cost(INSN_COST);
13720 
13721   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13722 
13723   ins_encode %{
13724     __ strw($src$$Register, Address(sp, $dst$$disp));
13725   %}
13726 
13727   ins_pipe(istore_reg_reg);
13728 
13729 %}
13730 
13731 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13732 
13733   match(Set dst (MoveD2L src));
13734 
13735   effect(DEF dst, USE src);
13736 
13737   ins_cost(INSN_COST);
13738 
13739   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13740 
13741   ins_encode %{
13742     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13743   %}
13744 
13745   ins_pipe(pipe_class_memory);
13746 
13747 %}
13748 
13749 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13750 
13751   match(Set dst (MoveL2D src));
13752 
13753   effect(DEF dst, USE src);
13754 
13755   ins_cost(INSN_COST);
13756 
13757   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13758 
13759   ins_encode %{
13760     __ str($src$$Register, Address(sp, $dst$$disp));
13761   %}
13762 
13763   ins_pipe(istore_reg_reg);
13764 
13765 %}
13766 
13767 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13768 
13769   match(Set dst (MoveF2I src));
13770 
13771   effect(DEF dst, USE src);
13772 
13773   ins_cost(INSN_COST);
13774 
13775   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13776 
13777   ins_encode %{
13778     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13779   %}
13780 
13781   ins_pipe(fp_f2i);
13782 
13783 %}
13784 
13785 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13786 
13787   match(Set dst (MoveI2F src));
13788 
13789   effect(DEF dst, USE src);
13790 
13791   ins_cost(INSN_COST);
13792 
13793   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13794 
13795   ins_encode %{
13796     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13797   %}
13798 
13799   ins_pipe(fp_i2f);
13800 
13801 %}
13802 
13803 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13804 
13805   match(Set dst (MoveD2L src));
13806 
13807   effect(DEF dst, USE src);
13808 
13809   ins_cost(INSN_COST);
13810 
13811   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13812 
13813   ins_encode %{
13814     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13815   %}
13816 
13817   ins_pipe(fp_d2l);
13818 
13819 %}
13820 
13821 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13822 
13823   match(Set dst (MoveL2D src));
13824 
13825   effect(DEF dst, USE src);
13826 
13827   ins_cost(INSN_COST);
13828 
13829   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13830 
13831   ins_encode %{
13832     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13833   %}
13834 
13835   ins_pipe(fp_l2d);
13836 
13837 %}
13838 
13839 // ============================================================================
13840 // clearing of an array
13841 
13842 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13843 %{
13844   match(Set dummy (ClearArray cnt base));
13845   effect(USE_KILL cnt, USE_KILL base);
13846 
13847   ins_cost(4 * INSN_COST);
13848   format %{ "ClearArray $cnt, $base" %}
13849 
13850   ins_encode %{
13851     __ zero_words($base$$Register, $cnt$$Register);
13852   %}
13853 
13854   ins_pipe(pipe_class_memory);
13855 %}
13856 
13857 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13858 %{
13859   predicate((u_int64_t)n->in(2)->get_long()
13860             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
13861   match(Set dummy (ClearArray cnt base));
13862   effect(USE_KILL base);
13863 
13864   ins_cost(4 * INSN_COST);
13865   format %{ "ClearArray $cnt, $base" %}
13866 
13867   ins_encode %{
13868     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13869   %}
13870 
13871   ins_pipe(pipe_class_memory);
13872 %}
13873 
13874 // ============================================================================
13875 // Overflow Math Instructions
13876 
13877 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13878 %{
13879   match(Set cr (OverflowAddI op1 op2));
13880 
13881   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13882   ins_cost(INSN_COST);
13883   ins_encode %{
13884     __ cmnw($op1$$Register, $op2$$Register);
13885   %}
13886 
13887   ins_pipe(icmp_reg_reg);
13888 %}
13889 
13890 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13891 %{
13892   match(Set cr (OverflowAddI op1 op2));
13893 
13894   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13895   ins_cost(INSN_COST);
13896   ins_encode %{
13897     __ cmnw($op1$$Register, $op2$$constant);
13898   %}
13899 
13900   ins_pipe(icmp_reg_imm);
13901 %}
13902 
13903 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13904 %{
13905   match(Set cr (OverflowAddL op1 op2));
13906 
13907   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13908   ins_cost(INSN_COST);
13909   ins_encode %{
13910     __ cmn($op1$$Register, $op2$$Register);
13911   %}
13912 
13913   ins_pipe(icmp_reg_reg);
13914 %}
13915 
13916 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13917 %{
13918   match(Set cr (OverflowAddL op1 op2));
13919 
13920   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13921   ins_cost(INSN_COST);
13922   ins_encode %{
13923     __ cmn($op1$$Register, $op2$$constant);
13924   %}
13925 
13926   ins_pipe(icmp_reg_imm);
13927 %}
13928 
13929 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13930 %{
13931   match(Set cr (OverflowSubI op1 op2));
13932 
13933   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13934   ins_cost(INSN_COST);
13935   ins_encode %{
13936     __ cmpw($op1$$Register, $op2$$Register);
13937   %}
13938 
13939   ins_pipe(icmp_reg_reg);
13940 %}
13941 
13942 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13943 %{
13944   match(Set cr (OverflowSubI op1 op2));
13945 
13946   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13947   ins_cost(INSN_COST);
13948   ins_encode %{
13949     __ cmpw($op1$$Register, $op2$$constant);
13950   %}
13951 
13952   ins_pipe(icmp_reg_imm);
13953 %}
13954 
13955 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13956 %{
13957   match(Set cr (OverflowSubL op1 op2));
13958 
13959   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13960   ins_cost(INSN_COST);
13961   ins_encode %{
13962     __ cmp($op1$$Register, $op2$$Register);
13963   %}
13964 
13965   ins_pipe(icmp_reg_reg);
13966 %}
13967 
13968 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13969 %{
13970   match(Set cr (OverflowSubL op1 op2));
13971 
13972   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13973   ins_cost(INSN_COST);
13974   ins_encode %{
13975     __ subs(zr, $op1$$Register, $op2$$constant);
13976   %}
13977 
13978   ins_pipe(icmp_reg_imm);
13979 %}
13980 
13981 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13982 %{
13983   match(Set cr (OverflowSubI zero op1));
13984 
13985   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13986   ins_cost(INSN_COST);
13987   ins_encode %{
13988     __ cmpw(zr, $op1$$Register);
13989   %}
13990 
13991   ins_pipe(icmp_reg_imm);
13992 %}
13993 
13994 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13995 %{
13996   match(Set cr (OverflowSubL zero op1));
13997 
13998   format %{ "cmp   zr, $op1\t# overflow check long" %}
13999   ins_cost(INSN_COST);
14000   ins_encode %{
14001     __ cmp(zr, $op1$$Register);
14002   %}
14003 
14004   ins_pipe(icmp_reg_imm);
14005 %}
14006 
14007 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14008 %{
14009   match(Set cr (OverflowMulI op1 op2));
14010 
14011   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14012             "cmp   rscratch1, rscratch1, sxtw\n\t"
14013             "movw  rscratch1, #0x80000000\n\t"
14014             "cselw rscratch1, rscratch1, zr, NE\n\t"
14015             "cmpw  rscratch1, #1" %}
14016   ins_cost(5 * INSN_COST);
14017   ins_encode %{
14018     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14019     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14020     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14021     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14022     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14023   %}
14024 
14025   ins_pipe(pipe_slow);
14026 %}
14027 
14028 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14029 %{
14030   match(If cmp (OverflowMulI op1 op2));
14031   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14032             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14033   effect(USE labl, KILL cr);
14034 
14035   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14036             "cmp   rscratch1, rscratch1, sxtw\n\t"
14037             "b$cmp   $labl" %}
14038   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14039   ins_encode %{
14040     Label* L = $labl$$label;
14041     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14042     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14043     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14044     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14045   %}
14046 
14047   ins_pipe(pipe_serial);
14048 %}
14049 
14050 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14051 %{
14052   match(Set cr (OverflowMulL op1 op2));
14053 
14054   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14055             "smulh rscratch2, $op1, $op2\n\t"
14056             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14057             "movw  rscratch1, #0x80000000\n\t"
14058             "cselw rscratch1, rscratch1, zr, NE\n\t"
14059             "cmpw  rscratch1, #1" %}
14060   ins_cost(6 * INSN_COST);
14061   ins_encode %{
14062     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14063     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14064     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14065     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14066     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14067     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14068   %}
14069 
14070   ins_pipe(pipe_slow);
14071 %}
14072 
14073 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14074 %{
14075   match(If cmp (OverflowMulL op1 op2));
14076   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14077             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14078   effect(USE labl, KILL cr);
14079 
14080   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14081             "smulh rscratch2, $op1, $op2\n\t"
14082             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14083             "b$cmp $labl" %}
14084   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14085   ins_encode %{
14086     Label* L = $labl$$label;
14087     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14088     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14089     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14090     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14091     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14092   %}
14093 
14094   ins_pipe(pipe_serial);
14095 %}
14096 
14097 // ============================================================================
14098 // Compare Instructions
14099 
14100 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14101 %{
14102   match(Set cr (CmpI op1 op2));
14103 
14104   effect(DEF cr, USE op1, USE op2);
14105 
14106   ins_cost(INSN_COST);
14107   format %{ "cmpw  $op1, $op2" %}
14108 
14109   ins_encode(aarch64_enc_cmpw(op1, op2));
14110 
14111   ins_pipe(icmp_reg_reg);
14112 %}
14113 
14114 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14115 %{
14116   match(Set cr (CmpI op1 zero));
14117 
14118   effect(DEF cr, USE op1);
14119 
14120   ins_cost(INSN_COST);
14121   format %{ "cmpw $op1, 0" %}
14122 
14123   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14124 
14125   ins_pipe(icmp_reg_imm);
14126 %}
14127 
14128 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14129 %{
14130   match(Set cr (CmpI op1 op2));
14131 
14132   effect(DEF cr, USE op1);
14133 
14134   ins_cost(INSN_COST);
14135   format %{ "cmpw  $op1, $op2" %}
14136 
14137   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14138 
14139   ins_pipe(icmp_reg_imm);
14140 %}
14141 
14142 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14143 %{
14144   match(Set cr (CmpI op1 op2));
14145 
14146   effect(DEF cr, USE op1);
14147 
14148   ins_cost(INSN_COST * 2);
14149   format %{ "cmpw  $op1, $op2" %}
14150 
14151   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14152 
14153   ins_pipe(icmp_reg_imm);
14154 %}
14155 
14156 // Unsigned compare Instructions; really, same as signed compare
14157 // except it should only be used to feed an If or a CMovI which takes a
14158 // cmpOpU.
14159 
14160 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14161 %{
14162   match(Set cr (CmpU op1 op2));
14163 
14164   effect(DEF cr, USE op1, USE op2);
14165 
14166   ins_cost(INSN_COST);
14167   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14168 
14169   ins_encode(aarch64_enc_cmpw(op1, op2));
14170 
14171   ins_pipe(icmp_reg_reg);
14172 %}
14173 
14174 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14175 %{
14176   match(Set cr (CmpU op1 zero));
14177 
14178   effect(DEF cr, USE op1);
14179 
14180   ins_cost(INSN_COST);
14181   format %{ "cmpw $op1, #0\t# unsigned" %}
14182 
14183   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14184 
14185   ins_pipe(icmp_reg_imm);
14186 %}
14187 
14188 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14189 %{
14190   match(Set cr (CmpU op1 op2));
14191 
14192   effect(DEF cr, USE op1);
14193 
14194   ins_cost(INSN_COST);
14195   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14196 
14197   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14198 
14199   ins_pipe(icmp_reg_imm);
14200 %}
14201 
14202 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14203 %{
14204   match(Set cr (CmpU op1 op2));
14205 
14206   effect(DEF cr, USE op1);
14207 
14208   ins_cost(INSN_COST * 2);
14209   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14210 
14211   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14212 
14213   ins_pipe(icmp_reg_imm);
14214 %}
14215 
14216 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14217 %{
14218   match(Set cr (CmpL op1 op2));
14219 
14220   effect(DEF cr, USE op1, USE op2);
14221 
14222   ins_cost(INSN_COST);
14223   format %{ "cmp  $op1, $op2" %}
14224 
14225   ins_encode(aarch64_enc_cmp(op1, op2));
14226 
14227   ins_pipe(icmp_reg_reg);
14228 %}
14229 
14230 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14231 %{
14232   match(Set cr (CmpL op1 zero));
14233 
14234   effect(DEF cr, USE op1);
14235 
14236   ins_cost(INSN_COST);
14237   format %{ "tst  $op1" %}
14238 
14239   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14240 
14241   ins_pipe(icmp_reg_imm);
14242 %}
14243 
14244 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14245 %{
14246   match(Set cr (CmpL op1 op2));
14247 
14248   effect(DEF cr, USE op1);
14249 
14250   ins_cost(INSN_COST);
14251   format %{ "cmp  $op1, $op2" %}
14252 
14253   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14254 
14255   ins_pipe(icmp_reg_imm);
14256 %}
14257 
14258 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14259 %{
14260   match(Set cr (CmpL op1 op2));
14261 
14262   effect(DEF cr, USE op1);
14263 
14264   ins_cost(INSN_COST * 2);
14265   format %{ "cmp  $op1, $op2" %}
14266 
14267   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14268 
14269   ins_pipe(icmp_reg_imm);
14270 %}
14271 
14272 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14273 %{
14274   match(Set cr (CmpUL op1 op2));
14275 
14276   effect(DEF cr, USE op1, USE op2);
14277 
14278   ins_cost(INSN_COST);
14279   format %{ "cmp  $op1, $op2" %}
14280 
14281   ins_encode(aarch64_enc_cmp(op1, op2));
14282 
14283   ins_pipe(icmp_reg_reg);
14284 %}
14285 
14286 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14287 %{
14288   match(Set cr (CmpUL op1 zero));
14289 
14290   effect(DEF cr, USE op1);
14291 
14292   ins_cost(INSN_COST);
14293   format %{ "tst  $op1" %}
14294 
14295   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14296 
14297   ins_pipe(icmp_reg_imm);
14298 %}
14299 
14300 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14301 %{
14302   match(Set cr (CmpUL op1 op2));
14303 
14304   effect(DEF cr, USE op1);
14305 
14306   ins_cost(INSN_COST);
14307   format %{ "cmp  $op1, $op2" %}
14308 
14309   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14310 
14311   ins_pipe(icmp_reg_imm);
14312 %}
14313 
14314 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14315 %{
14316   match(Set cr (CmpUL op1 op2));
14317 
14318   effect(DEF cr, USE op1);
14319 
14320   ins_cost(INSN_COST * 2);
14321   format %{ "cmp  $op1, $op2" %}
14322 
14323   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14324 
14325   ins_pipe(icmp_reg_imm);
14326 %}
14327 
14328 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14329 %{
14330   match(Set cr (CmpP op1 op2));
14331 
14332   effect(DEF cr, USE op1, USE op2);
14333 
14334   ins_cost(INSN_COST);
14335   format %{ "cmp  $op1, $op2\t // ptr" %}
14336 
14337   ins_encode(aarch64_enc_cmpp(op1, op2));
14338 
14339   ins_pipe(icmp_reg_reg);
14340 %}
14341 
14342 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14343 %{
14344   match(Set cr (CmpN op1 op2));
14345 
14346   effect(DEF cr, USE op1, USE op2);
14347 
14348   ins_cost(INSN_COST);
14349   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14350 
14351   ins_encode(aarch64_enc_cmpn(op1, op2));
14352 
14353   ins_pipe(icmp_reg_reg);
14354 %}
14355 
14356 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14357 %{
14358   match(Set cr (CmpP op1 zero));
14359 
14360   effect(DEF cr, USE op1, USE zero);
14361 
14362   ins_cost(INSN_COST);
14363   format %{ "cmp  $op1, 0\t // ptr" %}
14364 
14365   ins_encode(aarch64_enc_testp(op1));
14366 
14367   ins_pipe(icmp_reg_imm);
14368 %}
14369 
14370 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14371 %{
14372   match(Set cr (CmpN op1 zero));
14373 
14374   effect(DEF cr, USE op1, USE zero);
14375 
14376   ins_cost(INSN_COST);
14377   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14378 
14379   ins_encode(aarch64_enc_testn(op1));
14380 
14381   ins_pipe(icmp_reg_imm);
14382 %}
14383 
14384 // FP comparisons
14385 //
14386 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14387 // using normal cmpOp. See declaration of rFlagsReg for details.
14388 
14389 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14390 %{
14391   match(Set cr (CmpF src1 src2));
14392 
14393   ins_cost(3 * INSN_COST);
14394   format %{ "fcmps $src1, $src2" %}
14395 
14396   ins_encode %{
14397     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14398   %}
14399 
14400   ins_pipe(pipe_class_compare);
14401 %}
14402 
14403 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14404 %{
14405   match(Set cr (CmpF src1 src2));
14406 
14407   ins_cost(3 * INSN_COST);
14408   format %{ "fcmps $src1, 0.0" %}
14409 
14410   ins_encode %{
14411     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
14412   %}
14413 
14414   ins_pipe(pipe_class_compare);
14415 %}
14416 // FROM HERE
14417 
14418 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14419 %{
14420   match(Set cr (CmpD src1 src2));
14421 
14422   ins_cost(3 * INSN_COST);
14423   format %{ "fcmpd $src1, $src2" %}
14424 
14425   ins_encode %{
14426     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14427   %}
14428 
14429   ins_pipe(pipe_class_compare);
14430 %}
14431 
14432 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14433 %{
14434   match(Set cr (CmpD src1 src2));
14435 
14436   ins_cost(3 * INSN_COST);
14437   format %{ "fcmpd $src1, 0.0" %}
14438 
14439   ins_encode %{
14440     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
14441   %}
14442 
14443   ins_pipe(pipe_class_compare);
14444 %}
14445 
14446 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14447 %{
14448   match(Set dst (CmpF3 src1 src2));
14449   effect(KILL cr);
14450 
14451   ins_cost(5 * INSN_COST);
14452   format %{ "fcmps $src1, $src2\n\t"
14453             "csinvw($dst, zr, zr, eq\n\t"
14454             "csnegw($dst, $dst, $dst, lt)"
14455   %}
14456 
14457   ins_encode %{
14458     Label done;
14459     FloatRegister s1 = as_FloatRegister($src1$$reg);
14460     FloatRegister s2 = as_FloatRegister($src2$$reg);
14461     Register d = as_Register($dst$$reg);
14462     __ fcmps(s1, s2);
14463     // installs 0 if EQ else -1
14464     __ csinvw(d, zr, zr, Assembler::EQ);
14465     // keeps -1 if less or unordered else installs 1
14466     __ csnegw(d, d, d, Assembler::LT);
14467     __ bind(done);
14468   %}
14469 
14470   ins_pipe(pipe_class_default);
14471 
14472 %}
14473 
14474 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14475 %{
14476   match(Set dst (CmpD3 src1 src2));
14477   effect(KILL cr);
14478 
14479   ins_cost(5 * INSN_COST);
14480   format %{ "fcmpd $src1, $src2\n\t"
14481             "csinvw($dst, zr, zr, eq\n\t"
14482             "csnegw($dst, $dst, $dst, lt)"
14483   %}
14484 
14485   ins_encode %{
14486     Label done;
14487     FloatRegister s1 = as_FloatRegister($src1$$reg);
14488     FloatRegister s2 = as_FloatRegister($src2$$reg);
14489     Register d = as_Register($dst$$reg);
14490     __ fcmpd(s1, s2);
14491     // installs 0 if EQ else -1
14492     __ csinvw(d, zr, zr, Assembler::EQ);
14493     // keeps -1 if less or unordered else installs 1
14494     __ csnegw(d, d, d, Assembler::LT);
14495     __ bind(done);
14496   %}
14497   ins_pipe(pipe_class_default);
14498 
14499 %}
14500 
14501 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14502 %{
14503   match(Set dst (CmpF3 src1 zero));
14504   effect(KILL cr);
14505 
14506   ins_cost(5 * INSN_COST);
14507   format %{ "fcmps $src1, 0.0\n\t"
14508             "csinvw($dst, zr, zr, eq\n\t"
14509             "csnegw($dst, $dst, $dst, lt)"
14510   %}
14511 
14512   ins_encode %{
14513     Label done;
14514     FloatRegister s1 = as_FloatRegister($src1$$reg);
14515     Register d = as_Register($dst$$reg);
14516     __ fcmps(s1, 0.0);
14517     // installs 0 if EQ else -1
14518     __ csinvw(d, zr, zr, Assembler::EQ);
14519     // keeps -1 if less or unordered else installs 1
14520     __ csnegw(d, d, d, Assembler::LT);
14521     __ bind(done);
14522   %}
14523 
14524   ins_pipe(pipe_class_default);
14525 
14526 %}
14527 
14528 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14529 %{
14530   match(Set dst (CmpD3 src1 zero));
14531   effect(KILL cr);
14532 
14533   ins_cost(5 * INSN_COST);
14534   format %{ "fcmpd $src1, 0.0\n\t"
14535             "csinvw($dst, zr, zr, eq\n\t"
14536             "csnegw($dst, $dst, $dst, lt)"
14537   %}
14538 
14539   ins_encode %{
14540     Label done;
14541     FloatRegister s1 = as_FloatRegister($src1$$reg);
14542     Register d = as_Register($dst$$reg);
14543     __ fcmpd(s1, 0.0);
14544     // installs 0 if EQ else -1
14545     __ csinvw(d, zr, zr, Assembler::EQ);
14546     // keeps -1 if less or unordered else installs 1
14547     __ csnegw(d, d, d, Assembler::LT);
14548     __ bind(done);
14549   %}
14550   ins_pipe(pipe_class_default);
14551 
14552 %}
14553 
14554 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14555 %{
14556   match(Set dst (CmpLTMask p q));
14557   effect(KILL cr);
14558 
14559   ins_cost(3 * INSN_COST);
14560 
14561   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14562             "csetw $dst, lt\n\t"
14563             "subw $dst, zr, $dst"
14564   %}
14565 
14566   ins_encode %{
14567     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14568     __ csetw(as_Register($dst$$reg), Assembler::LT);
14569     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14570   %}
14571 
14572   ins_pipe(ialu_reg_reg);
14573 %}
14574 
14575 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14576 %{
14577   match(Set dst (CmpLTMask src zero));
14578   effect(KILL cr);
14579 
14580   ins_cost(INSN_COST);
14581 
14582   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14583 
14584   ins_encode %{
14585     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14586   %}
14587 
14588   ins_pipe(ialu_reg_shift);
14589 %}
14590 
14591 // ============================================================================
14592 // Max and Min
14593 
14594 instruct cmovI_reg_reg_lt(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14595 %{
14596   effect( DEF dst, USE src1, USE src2, USE cr );
14597 
14598   ins_cost(INSN_COST * 2);
14599   format %{ "cselw $dst, $src1, $src2 lt\t"  %}
14600 
14601   ins_encode %{
14602     __ cselw(as_Register($dst$$reg),
14603              as_Register($src1$$reg),
14604              as_Register($src2$$reg),
14605              Assembler::LT);
14606   %}
14607 
14608   ins_pipe(icond_reg_reg);
14609 %}
14610 
14611 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
14612 %{
14613   match(Set dst (MinI src1 src2));
14614   ins_cost(INSN_COST * 3);
14615 
14616   expand %{
14617     rFlagsReg cr;
14618     compI_reg_reg(cr, src1, src2);
14619     cmovI_reg_reg_lt(dst, src1, src2, cr);
14620   %}
14621 
14622 %}
14623 // FROM HERE
14624 
14625 instruct cmovI_reg_reg_gt(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14626 %{
14627   effect( DEF dst, USE src1, USE src2, USE cr );
14628 
14629   ins_cost(INSN_COST * 2);
14630   format %{ "cselw $dst, $src1, $src2 gt\t"  %}
14631 
14632   ins_encode %{
14633     __ cselw(as_Register($dst$$reg),
14634              as_Register($src1$$reg),
14635              as_Register($src2$$reg),
14636              Assembler::GT);
14637   %}
14638 
14639   ins_pipe(icond_reg_reg);
14640 %}
14641 
14642 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
14643 %{
14644   match(Set dst (MaxI src1 src2));
14645   ins_cost(INSN_COST * 3);
14646   expand %{
14647     rFlagsReg cr;
14648     compI_reg_reg(cr, src1, src2);
14649     cmovI_reg_reg_gt(dst, src1, src2, cr);
14650   %}
14651 %}
14652 
14653 // ============================================================================
14654 // Branch Instructions
14655 
14656 // Direct Branch.
14657 instruct branch(label lbl)
14658 %{
14659   match(Goto);
14660 
14661   effect(USE lbl);
14662 
14663   ins_cost(BRANCH_COST);
14664   format %{ "b  $lbl" %}
14665 
14666   ins_encode(aarch64_enc_b(lbl));
14667 
14668   ins_pipe(pipe_branch);
14669 %}
14670 
14671 // Conditional Near Branch
14672 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14673 %{
14674   // Same match rule as `branchConFar'.
14675   match(If cmp cr);
14676 
14677   effect(USE lbl);
14678 
14679   ins_cost(BRANCH_COST);
14680   // If set to 1 this indicates that the current instruction is a
14681   // short variant of a long branch. This avoids using this
14682   // instruction in first-pass matching. It will then only be used in
14683   // the `Shorten_branches' pass.
14684   // ins_short_branch(1);
14685   format %{ "b$cmp  $lbl" %}
14686 
14687   ins_encode(aarch64_enc_br_con(cmp, lbl));
14688 
14689   ins_pipe(pipe_branch_cond);
14690 %}
14691 
14692 // Conditional Near Branch Unsigned
14693 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14694 %{
14695   // Same match rule as `branchConFar'.
14696   match(If cmp cr);
14697 
14698   effect(USE lbl);
14699 
14700   ins_cost(BRANCH_COST);
14701   // If set to 1 this indicates that the current instruction is a
14702   // short variant of a long branch. This avoids using this
14703   // instruction in first-pass matching. It will then only be used in
14704   // the `Shorten_branches' pass.
14705   // ins_short_branch(1);
14706   format %{ "b$cmp  $lbl\t# unsigned" %}
14707 
14708   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14709 
14710   ins_pipe(pipe_branch_cond);
14711 %}
14712 
14713 // Make use of CBZ and CBNZ.  These instructions, as well as being
14714 // shorter than (cmp; branch), have the additional benefit of not
14715 // killing the flags.
14716 
14717 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14718   match(If cmp (CmpI op1 op2));
14719   effect(USE labl);
14720 
14721   ins_cost(BRANCH_COST);
14722   format %{ "cbw$cmp   $op1, $labl" %}
14723   ins_encode %{
14724     Label* L = $labl$$label;
14725     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14726     if (cond == Assembler::EQ)
14727       __ cbzw($op1$$Register, *L);
14728     else
14729       __ cbnzw($op1$$Register, *L);
14730   %}
14731   ins_pipe(pipe_cmp_branch);
14732 %}
14733 
14734 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14735   match(If cmp (CmpL op1 op2));
14736   effect(USE labl);
14737 
14738   ins_cost(BRANCH_COST);
14739   format %{ "cb$cmp   $op1, $labl" %}
14740   ins_encode %{
14741     Label* L = $labl$$label;
14742     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14743     if (cond == Assembler::EQ)
14744       __ cbz($op1$$Register, *L);
14745     else
14746       __ cbnz($op1$$Register, *L);
14747   %}
14748   ins_pipe(pipe_cmp_branch);
14749 %}
14750 
14751 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14752   match(If cmp (CmpP op1 op2));
14753   effect(USE labl);
14754 
14755   ins_cost(BRANCH_COST);
14756   format %{ "cb$cmp   $op1, $labl" %}
14757   ins_encode %{
14758     Label* L = $labl$$label;
14759     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14760     if (cond == Assembler::EQ)
14761       __ cbz($op1$$Register, *L);
14762     else
14763       __ cbnz($op1$$Register, *L);
14764   %}
14765   ins_pipe(pipe_cmp_branch);
14766 %}
14767 
14768 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14769   match(If cmp (CmpN op1 op2));
14770   effect(USE labl);
14771 
14772   ins_cost(BRANCH_COST);
14773   format %{ "cbw$cmp   $op1, $labl" %}
14774   ins_encode %{
14775     Label* L = $labl$$label;
14776     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14777     if (cond == Assembler::EQ)
14778       __ cbzw($op1$$Register, *L);
14779     else
14780       __ cbnzw($op1$$Register, *L);
14781   %}
14782   ins_pipe(pipe_cmp_branch);
14783 %}
14784 
14785 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14786   match(If cmp (CmpP (DecodeN oop) zero));
14787   effect(USE labl);
14788 
14789   ins_cost(BRANCH_COST);
14790   format %{ "cb$cmp   $oop, $labl" %}
14791   ins_encode %{
14792     Label* L = $labl$$label;
14793     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14794     if (cond == Assembler::EQ)
14795       __ cbzw($oop$$Register, *L);
14796     else
14797       __ cbnzw($oop$$Register, *L);
14798   %}
14799   ins_pipe(pipe_cmp_branch);
14800 %}
14801 
14802 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14803   match(If cmp (CmpU op1 op2));
14804   effect(USE labl);
14805 
14806   ins_cost(BRANCH_COST);
14807   format %{ "cbw$cmp   $op1, $labl" %}
14808   ins_encode %{
14809     Label* L = $labl$$label;
14810     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14811     if (cond == Assembler::EQ || cond == Assembler::LS)
14812       __ cbzw($op1$$Register, *L);
14813     else
14814       __ cbnzw($op1$$Register, *L);
14815   %}
14816   ins_pipe(pipe_cmp_branch);
14817 %}
14818 
14819 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14820   match(If cmp (CmpUL op1 op2));
14821   effect(USE labl);
14822 
14823   ins_cost(BRANCH_COST);
14824   format %{ "cb$cmp   $op1, $labl" %}
14825   ins_encode %{
14826     Label* L = $labl$$label;
14827     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14828     if (cond == Assembler::EQ || cond == Assembler::LS)
14829       __ cbz($op1$$Register, *L);
14830     else
14831       __ cbnz($op1$$Register, *L);
14832   %}
14833   ins_pipe(pipe_cmp_branch);
14834 %}
14835 
14836 // Test bit and Branch
14837 
14838 // Patterns for short (< 32KiB) variants
14839 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14840   match(If cmp (CmpL op1 op2));
14841   effect(USE labl);
14842 
14843   ins_cost(BRANCH_COST);
14844   format %{ "cb$cmp   $op1, $labl # long" %}
14845   ins_encode %{
14846     Label* L = $labl$$label;
14847     Assembler::Condition cond =
14848       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14849     __ tbr(cond, $op1$$Register, 63, *L);
14850   %}
14851   ins_pipe(pipe_cmp_branch);
14852   ins_short_branch(1);
14853 %}
14854 
14855 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14856   match(If cmp (CmpI op1 op2));
14857   effect(USE labl);
14858 
14859   ins_cost(BRANCH_COST);
14860   format %{ "cb$cmp   $op1, $labl # int" %}
14861   ins_encode %{
14862     Label* L = $labl$$label;
14863     Assembler::Condition cond =
14864       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14865     __ tbr(cond, $op1$$Register, 31, *L);
14866   %}
14867   ins_pipe(pipe_cmp_branch);
14868   ins_short_branch(1);
14869 %}
14870 
14871 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14872   match(If cmp (CmpL (AndL op1 op2) op3));
14873   predicate(is_power_of_2((julong)n->in(2)->in(1)->in(2)->get_long()));
14874   effect(USE labl);
14875 
14876   ins_cost(BRANCH_COST);
14877   format %{ "tb$cmp   $op1, $op2, $labl" %}
14878   ins_encode %{
14879     Label* L = $labl$$label;
14880     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14881     int bit = exact_log2_long($op2$$constant);
14882     __ tbr(cond, $op1$$Register, bit, *L);
14883   %}
14884   ins_pipe(pipe_cmp_branch);
14885   ins_short_branch(1);
14886 %}
14887 
14888 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14889   match(If cmp (CmpI (AndI op1 op2) op3));
14890   predicate(is_power_of_2((juint)n->in(2)->in(1)->in(2)->get_int()));
14891   effect(USE labl);
14892 
14893   ins_cost(BRANCH_COST);
14894   format %{ "tb$cmp   $op1, $op2, $labl" %}
14895   ins_encode %{
14896     Label* L = $labl$$label;
14897     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14898     int bit = exact_log2((juint)$op2$$constant);
14899     __ tbr(cond, $op1$$Register, bit, *L);
14900   %}
14901   ins_pipe(pipe_cmp_branch);
14902   ins_short_branch(1);
14903 %}
14904 
14905 // And far variants
14906 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14907   match(If cmp (CmpL op1 op2));
14908   effect(USE labl);
14909 
14910   ins_cost(BRANCH_COST);
14911   format %{ "cb$cmp   $op1, $labl # long" %}
14912   ins_encode %{
14913     Label* L = $labl$$label;
14914     Assembler::Condition cond =
14915       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14916     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14917   %}
14918   ins_pipe(pipe_cmp_branch);
14919 %}
14920 
14921 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14922   match(If cmp (CmpI op1 op2));
14923   effect(USE labl);
14924 
14925   ins_cost(BRANCH_COST);
14926   format %{ "cb$cmp   $op1, $labl # int" %}
14927   ins_encode %{
14928     Label* L = $labl$$label;
14929     Assembler::Condition cond =
14930       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14931     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14932   %}
14933   ins_pipe(pipe_cmp_branch);
14934 %}
14935 
14936 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14937   match(If cmp (CmpL (AndL op1 op2) op3));
14938   predicate(is_power_of_2((julong)n->in(2)->in(1)->in(2)->get_long()));
14939   effect(USE labl);
14940 
14941   ins_cost(BRANCH_COST);
14942   format %{ "tb$cmp   $op1, $op2, $labl" %}
14943   ins_encode %{
14944     Label* L = $labl$$label;
14945     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14946     int bit = exact_log2_long($op2$$constant);
14947     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14948   %}
14949   ins_pipe(pipe_cmp_branch);
14950 %}
14951 
14952 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14953   match(If cmp (CmpI (AndI op1 op2) op3));
14954   predicate(is_power_of_2((juint)n->in(2)->in(1)->in(2)->get_int()));
14955   effect(USE labl);
14956 
14957   ins_cost(BRANCH_COST);
14958   format %{ "tb$cmp   $op1, $op2, $labl" %}
14959   ins_encode %{
14960     Label* L = $labl$$label;
14961     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14962     int bit = exact_log2((juint)$op2$$constant);
14963     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14964   %}
14965   ins_pipe(pipe_cmp_branch);
14966 %}
14967 
14968 // Test bits
14969 
14970 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14971   match(Set cr (CmpL (AndL op1 op2) op3));
14972   predicate(Assembler::operand_valid_for_logical_immediate
14973             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14974 
14975   ins_cost(INSN_COST);
14976   format %{ "tst $op1, $op2 # long" %}
14977   ins_encode %{
14978     __ tst($op1$$Register, $op2$$constant);
14979   %}
14980   ins_pipe(ialu_reg_reg);
14981 %}
14982 
14983 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14984   match(Set cr (CmpI (AndI op1 op2) op3));
14985   predicate(Assembler::operand_valid_for_logical_immediate
14986             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14987 
14988   ins_cost(INSN_COST);
14989   format %{ "tst $op1, $op2 # int" %}
14990   ins_encode %{
14991     __ tstw($op1$$Register, $op2$$constant);
14992   %}
14993   ins_pipe(ialu_reg_reg);
14994 %}
14995 
14996 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14997   match(Set cr (CmpL (AndL op1 op2) op3));
14998 
14999   ins_cost(INSN_COST);
15000   format %{ "tst $op1, $op2 # long" %}
15001   ins_encode %{
15002     __ tst($op1$$Register, $op2$$Register);
15003   %}
15004   ins_pipe(ialu_reg_reg);
15005 %}
15006 
15007 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15008   match(Set cr (CmpI (AndI op1 op2) op3));
15009 
15010   ins_cost(INSN_COST);
15011   format %{ "tstw $op1, $op2 # int" %}
15012   ins_encode %{
15013     __ tstw($op1$$Register, $op2$$Register);
15014   %}
15015   ins_pipe(ialu_reg_reg);
15016 %}
15017 
15018 
15019 // Conditional Far Branch
15020 // Conditional Far Branch Unsigned
15021 // TODO: fixme
15022 
15023 // counted loop end branch near
15024 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15025 %{
15026   match(CountedLoopEnd cmp cr);
15027 
15028   effect(USE lbl);
15029 
15030   ins_cost(BRANCH_COST);
15031   // short variant.
15032   // ins_short_branch(1);
15033   format %{ "b$cmp $lbl \t// counted loop end" %}
15034 
15035   ins_encode(aarch64_enc_br_con(cmp, lbl));
15036 
15037   ins_pipe(pipe_branch);
15038 %}
15039 
15040 // counted loop end branch near Unsigned
15041 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15042 %{
15043   match(CountedLoopEnd cmp cr);
15044 
15045   effect(USE lbl);
15046 
15047   ins_cost(BRANCH_COST);
15048   // short variant.
15049   // ins_short_branch(1);
15050   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15051 
15052   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15053 
15054   ins_pipe(pipe_branch);
15055 %}
15056 
15057 // counted loop end branch far
15058 // counted loop end branch far unsigned
15059 // TODO: fixme
15060 
15061 // ============================================================================
15062 // inlined locking and unlocking
15063 
15064 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15065 %{
15066   match(Set cr (FastLock object box));
15067   effect(TEMP tmp, TEMP tmp2);
15068 
15069   // TODO
15070   // identify correct cost
15071   ins_cost(5 * INSN_COST);
15072   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15073 
15074   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15075 
15076   ins_pipe(pipe_serial);
15077 %}
15078 
15079 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15080 %{
15081   match(Set cr (FastUnlock object box));
15082   effect(TEMP tmp, TEMP tmp2);
15083 
15084   ins_cost(5 * INSN_COST);
15085   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15086 
15087   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15088 
15089   ins_pipe(pipe_serial);
15090 %}
15091 
15092 
15093 // ============================================================================
15094 // Safepoint Instructions
15095 
15096 // TODO
15097 // provide a near and far version of this code
15098 
15099 instruct safePoint(rFlagsReg cr, iRegP poll)
15100 %{
15101   match(SafePoint poll);
15102   effect(KILL cr);
15103 
15104   format %{
15105     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15106   %}
15107   ins_encode %{
15108     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15109   %}
15110   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15111 %}
15112 
15113 
15114 // ============================================================================
15115 // Procedure Call/Return Instructions
15116 
15117 // Call Java Static Instruction
15118 
15119 instruct CallStaticJavaDirect(method meth)
15120 %{
15121   match(CallStaticJava);
15122 
15123   effect(USE meth);
15124 
15125   ins_cost(CALL_COST);
15126 
15127   format %{ "call,static $meth \t// ==> " %}
15128 
15129   ins_encode( aarch64_enc_java_static_call(meth),
15130               aarch64_enc_call_epilog );
15131 
15132   ins_pipe(pipe_class_call);
15133 %}
15134 
15135 // TO HERE
15136 
15137 // Call Java Dynamic Instruction
15138 instruct CallDynamicJavaDirect(method meth)
15139 %{
15140   match(CallDynamicJava);
15141 
15142   effect(USE meth);
15143 
15144   ins_cost(CALL_COST);
15145 
15146   format %{ "CALL,dynamic $meth \t// ==> " %}
15147 
15148   ins_encode( aarch64_enc_java_dynamic_call(meth),
15149                aarch64_enc_call_epilog );
15150 
15151   ins_pipe(pipe_class_call);
15152 %}
15153 
15154 // Call Runtime Instruction
15155 
15156 instruct CallRuntimeDirect(method meth)
15157 %{
15158   match(CallRuntime);
15159 
15160   effect(USE meth);
15161 
15162   ins_cost(CALL_COST);
15163 
15164   format %{ "CALL, runtime $meth" %}
15165 
15166   ins_encode( aarch64_enc_java_to_runtime(meth) );
15167 
15168   ins_pipe(pipe_class_call);
15169 %}
15170 
15171 // Call Runtime Instruction
15172 
15173 instruct CallLeafDirect(method meth)
15174 %{
15175   match(CallLeaf);
15176 
15177   effect(USE meth);
15178 
15179   ins_cost(CALL_COST);
15180 
15181   format %{ "CALL, runtime leaf $meth" %}
15182 
15183   ins_encode( aarch64_enc_java_to_runtime(meth) );
15184 
15185   ins_pipe(pipe_class_call);
15186 %}
15187 
15188 // Call Runtime Instruction
15189 
15190 instruct CallLeafNoFPDirect(method meth)
15191 %{
15192   match(CallLeafNoFP);
15193 
15194   effect(USE meth);
15195 
15196   ins_cost(CALL_COST);
15197 
15198   format %{ "CALL, runtime leaf nofp $meth" %}
15199 
15200   ins_encode( aarch64_enc_java_to_runtime(meth) );
15201 
15202   ins_pipe(pipe_class_call);
15203 %}
15204 
15205 // Tail Call; Jump from runtime stub to Java code.
15206 // Also known as an 'interprocedural jump'.
15207 // Target of jump will eventually return to caller.
15208 // TailJump below removes the return address.
15209 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15210 %{
15211   match(TailCall jump_target method_oop);
15212 
15213   ins_cost(CALL_COST);
15214 
15215   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15216 
15217   ins_encode(aarch64_enc_tail_call(jump_target));
15218 
15219   ins_pipe(pipe_class_call);
15220 %}
15221 
15222 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15223 %{
15224   match(TailJump jump_target ex_oop);
15225 
15226   ins_cost(CALL_COST);
15227 
15228   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15229 
15230   ins_encode(aarch64_enc_tail_jmp(jump_target));
15231 
15232   ins_pipe(pipe_class_call);
15233 %}
15234 
15235 // Create exception oop: created by stack-crawling runtime code.
15236 // Created exception is now available to this handler, and is setup
15237 // just prior to jumping to this handler. No code emitted.
15238 // TODO check
15239 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15240 instruct CreateException(iRegP_R0 ex_oop)
15241 %{
15242   match(Set ex_oop (CreateEx));
15243 
15244   format %{ " -- \t// exception oop; no code emitted" %}
15245 
15246   size(0);
15247 
15248   ins_encode( /*empty*/ );
15249 
15250   ins_pipe(pipe_class_empty);
15251 %}
15252 
15253 // Rethrow exception: The exception oop will come in the first
15254 // argument position. Then JUMP (not call) to the rethrow stub code.
15255 instruct RethrowException() %{
15256   match(Rethrow);
15257   ins_cost(CALL_COST);
15258 
15259   format %{ "b rethrow_stub" %}
15260 
15261   ins_encode( aarch64_enc_rethrow() );
15262 
15263   ins_pipe(pipe_class_call);
15264 %}
15265 
15266 
15267 // Return Instruction
15268 // epilog node loads ret address into lr as part of frame pop
15269 instruct Ret()
15270 %{
15271   match(Return);
15272 
15273   format %{ "ret\t// return register" %}
15274 
15275   ins_encode( aarch64_enc_ret() );
15276 
15277   ins_pipe(pipe_branch);
15278 %}
15279 
15280 // Die now.
15281 instruct ShouldNotReachHere() %{
15282   match(Halt);
15283 
15284   ins_cost(CALL_COST);
15285   format %{ "ShouldNotReachHere" %}
15286 
15287   ins_encode %{
15288     if (is_reachable()) {
15289       __ dpcs1(0xdead + 1);
15290     }
15291   %}
15292 
15293   ins_pipe(pipe_class_default);
15294 %}
15295 
15296 // ============================================================================
15297 // Partial Subtype Check
15298 //
15299 // superklass array for an instance of the superklass.  Set a hidden
15300 // internal cache on a hit (cache is checked with exposed code in
15301 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15302 // encoding ALSO sets flags.
15303 
15304 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15305 %{
15306   match(Set result (PartialSubtypeCheck sub super));
15307   effect(KILL cr, KILL temp);
15308 
15309   ins_cost(1100);  // slightly larger than the next version
15310   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15311 
15312   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15313 
15314   opcode(0x1); // Force zero of result reg on hit
15315 
15316   ins_pipe(pipe_class_memory);
15317 %}
15318 
15319 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15320 %{
15321   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15322   effect(KILL temp, KILL result);
15323 
15324   ins_cost(1100);  // slightly larger than the next version
15325   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15326 
15327   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15328 
15329   opcode(0x0); // Don't zero result reg on hit
15330 
15331   ins_pipe(pipe_class_memory);
15332 %}
15333 
15334 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15335                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15336 %{
15337   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15338   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15339   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15340 
15341   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15342   ins_encode %{
15343     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15344     __ string_compare($str1$$Register, $str2$$Register,
15345                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15346                       $tmp1$$Register, $tmp2$$Register,
15347                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
15348   %}
15349   ins_pipe(pipe_class_memory);
15350 %}
15351 
15352 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15353                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15354 %{
15355   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15356   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15357   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15358 
15359   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15360   ins_encode %{
15361     __ string_compare($str1$$Register, $str2$$Register,
15362                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15363                       $tmp1$$Register, $tmp2$$Register,
15364                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
15365   %}
15366   ins_pipe(pipe_class_memory);
15367 %}
15368 
15369 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15370                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
15371                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
15372 %{
15373   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15374   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15375   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
15376          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15377 
15378   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
15379   ins_encode %{
15380     __ string_compare($str1$$Register, $str2$$Register,
15381                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15382                       $tmp1$$Register, $tmp2$$Register,
15383                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
15384                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
15385   %}
15386   ins_pipe(pipe_class_memory);
15387 %}
15388 
15389 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15390                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
15391                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
15392 %{
15393   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15394   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15395   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
15396          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15397 
15398   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
15399   ins_encode %{
15400     __ string_compare($str1$$Register, $str2$$Register,
15401                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15402                       $tmp1$$Register, $tmp2$$Register,
15403                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
15404                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
15405   %}
15406   ins_pipe(pipe_class_memory);
15407 %}
15408 
15409 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15410        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15411        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15412 %{
15413   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15414   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15415   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15416          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15417   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15418 
15419   ins_encode %{
15420     __ string_indexof($str1$$Register, $str2$$Register,
15421                       $cnt1$$Register, $cnt2$$Register,
15422                       $tmp1$$Register, $tmp2$$Register,
15423                       $tmp3$$Register, $tmp4$$Register,
15424                       $tmp5$$Register, $tmp6$$Register,
15425                       -1, $result$$Register, StrIntrinsicNode::UU);
15426   %}
15427   ins_pipe(pipe_class_memory);
15428 %}
15429 
15430 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15431        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15432        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15433 %{
15434   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15435   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15436   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15437          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15438   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15439 
15440   ins_encode %{
15441     __ string_indexof($str1$$Register, $str2$$Register,
15442                       $cnt1$$Register, $cnt2$$Register,
15443                       $tmp1$$Register, $tmp2$$Register,
15444                       $tmp3$$Register, $tmp4$$Register,
15445                       $tmp5$$Register, $tmp6$$Register,
15446                       -1, $result$$Register, StrIntrinsicNode::LL);
15447   %}
15448   ins_pipe(pipe_class_memory);
15449 %}
15450 
15451 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15452        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15453        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15454 %{
15455   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15456   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15457   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15458          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15459   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15460 
15461   ins_encode %{
15462     __ string_indexof($str1$$Register, $str2$$Register,
15463                       $cnt1$$Register, $cnt2$$Register,
15464                       $tmp1$$Register, $tmp2$$Register,
15465                       $tmp3$$Register, $tmp4$$Register,
15466                       $tmp5$$Register, $tmp6$$Register,
15467                       -1, $result$$Register, StrIntrinsicNode::UL);
15468   %}
15469   ins_pipe(pipe_class_memory);
15470 %}
15471 
15472 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15473                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15474                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15475 %{
15476   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15477   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15478   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15479          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15480   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15481 
15482   ins_encode %{
15483     int icnt2 = (int)$int_cnt2$$constant;
15484     __ string_indexof($str1$$Register, $str2$$Register,
15485                       $cnt1$$Register, zr,
15486                       $tmp1$$Register, $tmp2$$Register,
15487                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15488                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15489   %}
15490   ins_pipe(pipe_class_memory);
15491 %}
15492 
15493 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15494                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15495                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15496 %{
15497   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15498   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15499   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15500          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15501   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15502 
15503   ins_encode %{
15504     int icnt2 = (int)$int_cnt2$$constant;
15505     __ string_indexof($str1$$Register, $str2$$Register,
15506                       $cnt1$$Register, zr,
15507                       $tmp1$$Register, $tmp2$$Register,
15508                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15509                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15510   %}
15511   ins_pipe(pipe_class_memory);
15512 %}
15513 
15514 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15515                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15516                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15517 %{
15518   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15519   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15520   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15521          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15522   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15523 
15524   ins_encode %{
15525     int icnt2 = (int)$int_cnt2$$constant;
15526     __ string_indexof($str1$$Register, $str2$$Register,
15527                       $cnt1$$Register, zr,
15528                       $tmp1$$Register, $tmp2$$Register,
15529                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15530                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15531   %}
15532   ins_pipe(pipe_class_memory);
15533 %}
15534 
15535 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15536                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15537                               iRegINoSp tmp3, rFlagsReg cr)
15538 %{
15539   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15540   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15541          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15542 
15543   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15544 
15545   ins_encode %{
15546     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15547                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
15548                            $tmp3$$Register);
15549   %}
15550   ins_pipe(pipe_class_memory);
15551 %}
15552 
15553 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15554                         iRegI_R0 result, rFlagsReg cr)
15555 %{
15556   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15557   match(Set result (StrEquals (Binary str1 str2) cnt));
15558   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15559 
15560   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15561   ins_encode %{
15562     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15563     __ string_equals($str1$$Register, $str2$$Register,
15564                      $result$$Register, $cnt$$Register, 1);
15565   %}
15566   ins_pipe(pipe_class_memory);
15567 %}
15568 
15569 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15570                         iRegI_R0 result, rFlagsReg cr)
15571 %{
15572   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15573   match(Set result (StrEquals (Binary str1 str2) cnt));
15574   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15575 
15576   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15577   ins_encode %{
15578     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15579     __ string_equals($str1$$Register, $str2$$Register,
15580                      $result$$Register, $cnt$$Register, 2);
15581   %}
15582   ins_pipe(pipe_class_memory);
15583 %}
15584 
15585 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15586                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15587                        iRegP_R10 tmp, rFlagsReg cr)
15588 %{
15589   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15590   match(Set result (AryEq ary1 ary2));
15591   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15592 
15593   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15594   ins_encode %{
15595     __ arrays_equals($ary1$$Register, $ary2$$Register,
15596                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15597                      $result$$Register, $tmp$$Register, 1);
15598     %}
15599   ins_pipe(pipe_class_memory);
15600 %}
15601 
15602 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15603                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15604                        iRegP_R10 tmp, rFlagsReg cr)
15605 %{
15606   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15607   match(Set result (AryEq ary1 ary2));
15608   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15609 
15610   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15611   ins_encode %{
15612     __ arrays_equals($ary1$$Register, $ary2$$Register,
15613                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15614                      $result$$Register, $tmp$$Register, 2);
15615   %}
15616   ins_pipe(pipe_class_memory);
15617 %}
15618 
15619 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
15620 %{
15621   match(Set result (HasNegatives ary1 len));
15622   effect(USE_KILL ary1, USE_KILL len, KILL cr);
15623   format %{ "has negatives byte[] $ary1,$len -> $result" %}
15624   ins_encode %{
15625     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
15626   %}
15627   ins_pipe( pipe_slow );
15628 %}
15629 
15630 // fast char[] to byte[] compression
15631 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15632                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15633                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15634                          iRegI_R0 result, rFlagsReg cr)
15635 %{
15636   match(Set result (StrCompressedCopy src (Binary dst len)));
15637   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15638 
15639   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15640   ins_encode %{
15641     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15642                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15643                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15644                            $result$$Register);
15645   %}
15646   ins_pipe( pipe_slow );
15647 %}
15648 
15649 // fast byte[] to char[] inflation
15650 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15651                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15652 %{
15653   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15654   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15655 
15656   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15657   ins_encode %{
15658     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15659                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15660   %}
15661   ins_pipe(pipe_class_memory);
15662 %}
15663 
15664 // encode char[] to byte[] in ISO_8859_1
15665 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15666                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15667                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15668                           iRegI_R0 result, rFlagsReg cr)
15669 %{
15670   match(Set result (EncodeISOArray src (Binary dst len)));
15671   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15672          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15673 
15674   format %{ "Encode array $src,$dst,$len -> $result" %}
15675   ins_encode %{
15676     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15677          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15678          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15679   %}
15680   ins_pipe( pipe_class_memory );
15681 %}
15682 
15683 // ============================================================================
15684 // This name is KNOWN by the ADLC and cannot be changed.
15685 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15686 // for this guy.
15687 instruct tlsLoadP(thread_RegP dst)
15688 %{
15689   match(Set dst (ThreadLocal));
15690 
15691   ins_cost(0);
15692 
15693   format %{ " -- \t// $dst=Thread::current(), empty" %}
15694 
15695   size(0);
15696 
15697   ins_encode( /*empty*/ );
15698 
15699   ins_pipe(pipe_class_empty);
15700 %}
15701 
15702 // ====================VECTOR INSTRUCTIONS=====================================
15703 
15704 // Load vector (32 bits)
15705 instruct loadV4(vecD dst, vmem4 mem)
15706 %{
15707   predicate(n->as_LoadVector()->memory_size() == 4);
15708   match(Set dst (LoadVector mem));
15709   ins_cost(4 * INSN_COST);
15710   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15711   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15712   ins_pipe(vload_reg_mem64);
15713 %}
15714 
15715 // Load vector (64 bits)
15716 instruct loadV8(vecD dst, vmem8 mem)
15717 %{
15718   predicate(n->as_LoadVector()->memory_size() == 8);
15719   match(Set dst (LoadVector mem));
15720   ins_cost(4 * INSN_COST);
15721   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15722   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15723   ins_pipe(vload_reg_mem64);
15724 %}
15725 
15726 // Load Vector (128 bits)
15727 instruct loadV16(vecX dst, vmem16 mem)
15728 %{
15729   predicate(n->as_LoadVector()->memory_size() == 16);
15730   match(Set dst (LoadVector mem));
15731   ins_cost(4 * INSN_COST);
15732   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15733   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15734   ins_pipe(vload_reg_mem128);
15735 %}
15736 
15737 // Store Vector (32 bits)
15738 instruct storeV4(vecD src, vmem4 mem)
15739 %{
15740   predicate(n->as_StoreVector()->memory_size() == 4);
15741   match(Set mem (StoreVector mem src));
15742   ins_cost(4 * INSN_COST);
15743   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15744   ins_encode( aarch64_enc_strvS(src, mem) );
15745   ins_pipe(vstore_reg_mem64);
15746 %}
15747 
15748 // Store Vector (64 bits)
15749 instruct storeV8(vecD src, vmem8 mem)
15750 %{
15751   predicate(n->as_StoreVector()->memory_size() == 8);
15752   match(Set mem (StoreVector mem src));
15753   ins_cost(4 * INSN_COST);
15754   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15755   ins_encode( aarch64_enc_strvD(src, mem) );
15756   ins_pipe(vstore_reg_mem64);
15757 %}
15758 
15759 // Store Vector (128 bits)
15760 instruct storeV16(vecX src, vmem16 mem)
15761 %{
15762   predicate(n->as_StoreVector()->memory_size() == 16);
15763   match(Set mem (StoreVector mem src));
15764   ins_cost(4 * INSN_COST);
15765   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15766   ins_encode( aarch64_enc_strvQ(src, mem) );
15767   ins_pipe(vstore_reg_mem128);
15768 %}
15769 
15770 instruct replicate8B(vecD dst, iRegIorL2I src)
15771 %{
15772   predicate(n->as_Vector()->length() == 4 ||
15773             n->as_Vector()->length() == 8);
15774   match(Set dst (ReplicateB src));
15775   ins_cost(INSN_COST);
15776   format %{ "dup  $dst, $src\t# vector (8B)" %}
15777   ins_encode %{
15778     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15779   %}
15780   ins_pipe(vdup_reg_reg64);
15781 %}
15782 
15783 instruct replicate16B(vecX dst, iRegIorL2I src)
15784 %{
15785   predicate(n->as_Vector()->length() == 16);
15786   match(Set dst (ReplicateB src));
15787   ins_cost(INSN_COST);
15788   format %{ "dup  $dst, $src\t# vector (16B)" %}
15789   ins_encode %{
15790     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15791   %}
15792   ins_pipe(vdup_reg_reg128);
15793 %}
15794 
15795 instruct replicate8B_imm(vecD dst, immI con)
15796 %{
15797   predicate(n->as_Vector()->length() == 4 ||
15798             n->as_Vector()->length() == 8);
15799   match(Set dst (ReplicateB con));
15800   ins_cost(INSN_COST);
15801   format %{ "movi  $dst, $con\t# vector(8B)" %}
15802   ins_encode %{
15803     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15804   %}
15805   ins_pipe(vmovi_reg_imm64);
15806 %}
15807 
15808 instruct replicate16B_imm(vecX dst, immI con)
15809 %{
15810   predicate(n->as_Vector()->length() == 16);
15811   match(Set dst (ReplicateB con));
15812   ins_cost(INSN_COST);
15813   format %{ "movi  $dst, $con\t# vector(16B)" %}
15814   ins_encode %{
15815     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15816   %}
15817   ins_pipe(vmovi_reg_imm128);
15818 %}
15819 
15820 instruct replicate4S(vecD dst, iRegIorL2I src)
15821 %{
15822   predicate(n->as_Vector()->length() == 2 ||
15823             n->as_Vector()->length() == 4);
15824   match(Set dst (ReplicateS src));
15825   ins_cost(INSN_COST);
15826   format %{ "dup  $dst, $src\t# vector (4S)" %}
15827   ins_encode %{
15828     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15829   %}
15830   ins_pipe(vdup_reg_reg64);
15831 %}
15832 
15833 instruct replicate8S(vecX dst, iRegIorL2I src)
15834 %{
15835   predicate(n->as_Vector()->length() == 8);
15836   match(Set dst (ReplicateS src));
15837   ins_cost(INSN_COST);
15838   format %{ "dup  $dst, $src\t# vector (8S)" %}
15839   ins_encode %{
15840     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15841   %}
15842   ins_pipe(vdup_reg_reg128);
15843 %}
15844 
15845 instruct replicate4S_imm(vecD dst, immI con)
15846 %{
15847   predicate(n->as_Vector()->length() == 2 ||
15848             n->as_Vector()->length() == 4);
15849   match(Set dst (ReplicateS con));
15850   ins_cost(INSN_COST);
15851   format %{ "movi  $dst, $con\t# vector(4H)" %}
15852   ins_encode %{
15853     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15854   %}
15855   ins_pipe(vmovi_reg_imm64);
15856 %}
15857 
15858 instruct replicate8S_imm(vecX dst, immI con)
15859 %{
15860   predicate(n->as_Vector()->length() == 8);
15861   match(Set dst (ReplicateS con));
15862   ins_cost(INSN_COST);
15863   format %{ "movi  $dst, $con\t# vector(8H)" %}
15864   ins_encode %{
15865     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15866   %}
15867   ins_pipe(vmovi_reg_imm128);
15868 %}
15869 
15870 instruct replicate2I(vecD dst, iRegIorL2I src)
15871 %{
15872   predicate(n->as_Vector()->length() == 2);
15873   match(Set dst (ReplicateI src));
15874   ins_cost(INSN_COST);
15875   format %{ "dup  $dst, $src\t# vector (2I)" %}
15876   ins_encode %{
15877     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15878   %}
15879   ins_pipe(vdup_reg_reg64);
15880 %}
15881 
15882 instruct replicate4I(vecX dst, iRegIorL2I src)
15883 %{
15884   predicate(n->as_Vector()->length() == 4);
15885   match(Set dst (ReplicateI src));
15886   ins_cost(INSN_COST);
15887   format %{ "dup  $dst, $src\t# vector (4I)" %}
15888   ins_encode %{
15889     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15890   %}
15891   ins_pipe(vdup_reg_reg128);
15892 %}
15893 
15894 instruct replicate2I_imm(vecD dst, immI con)
15895 %{
15896   predicate(n->as_Vector()->length() == 2);
15897   match(Set dst (ReplicateI con));
15898   ins_cost(INSN_COST);
15899   format %{ "movi  $dst, $con\t# vector(2I)" %}
15900   ins_encode %{
15901     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15902   %}
15903   ins_pipe(vmovi_reg_imm64);
15904 %}
15905 
15906 instruct replicate4I_imm(vecX dst, immI con)
15907 %{
15908   predicate(n->as_Vector()->length() == 4);
15909   match(Set dst (ReplicateI con));
15910   ins_cost(INSN_COST);
15911   format %{ "movi  $dst, $con\t# vector(4I)" %}
15912   ins_encode %{
15913     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15914   %}
15915   ins_pipe(vmovi_reg_imm128);
15916 %}
15917 
15918 instruct replicate2L(vecX dst, iRegL src)
15919 %{
15920   predicate(n->as_Vector()->length() == 2);
15921   match(Set dst (ReplicateL src));
15922   ins_cost(INSN_COST);
15923   format %{ "dup  $dst, $src\t# vector (2L)" %}
15924   ins_encode %{
15925     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15926   %}
15927   ins_pipe(vdup_reg_reg128);
15928 %}
15929 
15930 instruct replicate2L_zero(vecX dst, immI0 zero)
15931 %{
15932   predicate(n->as_Vector()->length() == 2);
15933   match(Set dst (ReplicateI zero));
15934   ins_cost(INSN_COST);
15935   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15936   ins_encode %{
15937     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15938            as_FloatRegister($dst$$reg),
15939            as_FloatRegister($dst$$reg));
15940   %}
15941   ins_pipe(vmovi_reg_imm128);
15942 %}
15943 
15944 instruct replicate2F(vecD dst, vRegF src)
15945 %{
15946   predicate(n->as_Vector()->length() == 2);
15947   match(Set dst (ReplicateF src));
15948   ins_cost(INSN_COST);
15949   format %{ "dup  $dst, $src\t# vector (2F)" %}
15950   ins_encode %{
15951     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15952            as_FloatRegister($src$$reg));
15953   %}
15954   ins_pipe(vdup_reg_freg64);
15955 %}
15956 
15957 instruct replicate4F(vecX dst, vRegF src)
15958 %{
15959   predicate(n->as_Vector()->length() == 4);
15960   match(Set dst (ReplicateF src));
15961   ins_cost(INSN_COST);
15962   format %{ "dup  $dst, $src\t# vector (4F)" %}
15963   ins_encode %{
15964     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15965            as_FloatRegister($src$$reg));
15966   %}
15967   ins_pipe(vdup_reg_freg128);
15968 %}
15969 
15970 instruct replicate2D(vecX dst, vRegD src)
15971 %{
15972   predicate(n->as_Vector()->length() == 2);
15973   match(Set dst (ReplicateD src));
15974   ins_cost(INSN_COST);
15975   format %{ "dup  $dst, $src\t# vector (2D)" %}
15976   ins_encode %{
15977     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15978            as_FloatRegister($src$$reg));
15979   %}
15980   ins_pipe(vdup_reg_dreg128);
15981 %}
15982 
15983 // ====================REDUCTION ARITHMETIC====================================
15984 
15985 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
15986 %{
15987   match(Set dst (AddReductionVI isrc vsrc));
15988   ins_cost(INSN_COST);
15989   effect(TEMP tmp, TEMP tmp2);
15990   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
15991             "umov  $tmp2, $vsrc, S, 1\n\t"
15992             "addw  $tmp, $isrc, $tmp\n\t"
15993             "addw  $dst, $tmp, $tmp2\t# add reduction2I"
15994   %}
15995   ins_encode %{
15996     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
15997     __ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
15998     __ addw($tmp$$Register, $isrc$$Register, $tmp$$Register);
15999     __ addw($dst$$Register, $tmp$$Register, $tmp2$$Register);
16000   %}
16001   ins_pipe(pipe_class_default);
16002 %}
16003 
16004 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
16005 %{
16006   match(Set dst (AddReductionVI isrc vsrc));
16007   ins_cost(INSN_COST);
16008   effect(TEMP vtmp, TEMP itmp);
16009   format %{ "addv  $vtmp, T4S, $vsrc\n\t"
16010             "umov  $itmp, $vtmp, S, 0\n\t"
16011             "addw  $dst, $itmp, $isrc\t# add reduction4I"
16012   %}
16013   ins_encode %{
16014     __ addv(as_FloatRegister($vtmp$$reg), __ T4S,
16015             as_FloatRegister($vsrc$$reg));
16016     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
16017     __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
16018   %}
16019   ins_pipe(pipe_class_default);
16020 %}
16021 
16022 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
16023 %{
16024   match(Set dst (MulReductionVI isrc vsrc));
16025   ins_cost(INSN_COST);
16026   effect(TEMP tmp, TEMP dst);
16027   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
16028             "mul   $dst, $tmp, $isrc\n\t"
16029             "umov  $tmp, $vsrc, S, 1\n\t"
16030             "mul   $dst, $tmp, $dst\t# mul reduction2I"
16031   %}
16032   ins_encode %{
16033     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
16034     __ mul($dst$$Register, $tmp$$Register, $isrc$$Register);
16035     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
16036     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16037   %}
16038   ins_pipe(pipe_class_default);
16039 %}
16040 
16041 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
16042 %{
16043   match(Set dst (MulReductionVI isrc vsrc));
16044   ins_cost(INSN_COST);
16045   effect(TEMP vtmp, TEMP itmp, TEMP dst);
16046   format %{ "ins   $vtmp, D, $vsrc, 0, 1\n\t"
16047             "mulv  $vtmp, T2S, $vtmp, $vsrc\n\t"
16048             "umov  $itmp, $vtmp, S, 0\n\t"
16049             "mul   $dst, $itmp, $isrc\n\t"
16050             "umov  $itmp, $vtmp, S, 1\n\t"
16051             "mul   $dst, $itmp, $dst\t# mul reduction4I"
16052   %}
16053   ins_encode %{
16054     __ ins(as_FloatRegister($vtmp$$reg), __ D,
16055            as_FloatRegister($vsrc$$reg), 0, 1);
16056     __ mulv(as_FloatRegister($vtmp$$reg), __ T2S,
16057             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
16058     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
16059     __ mul($dst$$Register, $itmp$$Register, $isrc$$Register);
16060     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1);
16061     __ mul($dst$$Register, $itmp$$Register, $dst$$Register);
16062   %}
16063   ins_pipe(pipe_class_default);
16064 %}
16065 
16066 instruct reduce_add2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp)
16067 %{
16068   match(Set dst (AddReductionVF fsrc vsrc));
16069   ins_cost(INSN_COST);
16070   effect(TEMP tmp, TEMP dst);
16071   format %{ "fadds $dst, $fsrc, $vsrc\n\t"
16072             "ins   $tmp, S, $vsrc, 0, 1\n\t"
16073             "fadds $dst, $dst, $tmp\t# add reduction2F"
16074   %}
16075   ins_encode %{
16076     __ fadds(as_FloatRegister($dst$$reg),
16077              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
16078     __ ins(as_FloatRegister($tmp$$reg), __ S,
16079            as_FloatRegister($vsrc$$reg), 0, 1);
16080     __ fadds(as_FloatRegister($dst$$reg),
16081              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16082   %}
16083   ins_pipe(pipe_class_default);
16084 %}
16085 
16086 instruct reduce_add4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp)
16087 %{
16088   match(Set dst (AddReductionVF fsrc vsrc));
16089   ins_cost(INSN_COST);
16090   effect(TEMP tmp, TEMP dst);
16091   format %{ "fadds $dst, $fsrc, $vsrc\n\t"
16092             "ins   $tmp, S, $vsrc, 0, 1\n\t"
16093             "fadds $dst, $dst, $tmp\n\t"
16094             "ins   $tmp, S, $vsrc, 0, 2\n\t"
16095             "fadds $dst, $dst, $tmp\n\t"
16096             "ins   $tmp, S, $vsrc, 0, 3\n\t"
16097             "fadds $dst, $dst, $tmp\t# add reduction4F"
16098   %}
16099   ins_encode %{
16100     __ fadds(as_FloatRegister($dst$$reg),
16101              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
16102     __ ins(as_FloatRegister($tmp$$reg), __ S,
16103            as_FloatRegister($vsrc$$reg), 0, 1);
16104     __ fadds(as_FloatRegister($dst$$reg),
16105              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16106     __ ins(as_FloatRegister($tmp$$reg), __ S,
16107            as_FloatRegister($vsrc$$reg), 0, 2);
16108     __ fadds(as_FloatRegister($dst$$reg),
16109              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16110     __ ins(as_FloatRegister($tmp$$reg), __ S,
16111            as_FloatRegister($vsrc$$reg), 0, 3);
16112     __ fadds(as_FloatRegister($dst$$reg),
16113              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16114   %}
16115   ins_pipe(pipe_class_default);
16116 %}
16117 
16118 instruct reduce_mul2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp)
16119 %{
16120   match(Set dst (MulReductionVF fsrc vsrc));
16121   ins_cost(INSN_COST);
16122   effect(TEMP tmp, TEMP dst);
16123   format %{ "fmuls $dst, $fsrc, $vsrc\n\t"
16124             "ins   $tmp, S, $vsrc, 0, 1\n\t"
16125             "fmuls $dst, $dst, $tmp\t# mul reduction2F"
16126   %}
16127   ins_encode %{
16128     __ fmuls(as_FloatRegister($dst$$reg),
16129              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
16130     __ ins(as_FloatRegister($tmp$$reg), __ S,
16131            as_FloatRegister($vsrc$$reg), 0, 1);
16132     __ fmuls(as_FloatRegister($dst$$reg),
16133              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16134   %}
16135   ins_pipe(pipe_class_default);
16136 %}
16137 
16138 instruct reduce_mul4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp)
16139 %{
16140   match(Set dst (MulReductionVF fsrc vsrc));
16141   ins_cost(INSN_COST);
16142   effect(TEMP tmp, TEMP dst);
16143   format %{ "fmuls $dst, $fsrc, $vsrc\n\t"
16144             "ins   $tmp, S, $vsrc, 0, 1\n\t"
16145             "fmuls $dst, $dst, $tmp\n\t"
16146             "ins   $tmp, S, $vsrc, 0, 2\n\t"
16147             "fmuls $dst, $dst, $tmp\n\t"
16148             "ins   $tmp, S, $vsrc, 0, 3\n\t"
16149             "fmuls $dst, $dst, $tmp\t# mul reduction4F"
16150   %}
16151   ins_encode %{
16152     __ fmuls(as_FloatRegister($dst$$reg),
16153              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
16154     __ ins(as_FloatRegister($tmp$$reg), __ S,
16155            as_FloatRegister($vsrc$$reg), 0, 1);
16156     __ fmuls(as_FloatRegister($dst$$reg),
16157              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16158     __ ins(as_FloatRegister($tmp$$reg), __ S,
16159            as_FloatRegister($vsrc$$reg), 0, 2);
16160     __ fmuls(as_FloatRegister($dst$$reg),
16161              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16162     __ ins(as_FloatRegister($tmp$$reg), __ S,
16163            as_FloatRegister($vsrc$$reg), 0, 3);
16164     __ fmuls(as_FloatRegister($dst$$reg),
16165              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16166   %}
16167   ins_pipe(pipe_class_default);
16168 %}
16169 
16170 instruct reduce_add2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp)
16171 %{
16172   match(Set dst (AddReductionVD dsrc vsrc));
16173   ins_cost(INSN_COST);
16174   effect(TEMP tmp, TEMP dst);
16175   format %{ "faddd $dst, $dsrc, $vsrc\n\t"
16176             "ins   $tmp, D, $vsrc, 0, 1\n\t"
16177             "faddd $dst, $dst, $tmp\t# add reduction2D"
16178   %}
16179   ins_encode %{
16180     __ faddd(as_FloatRegister($dst$$reg),
16181              as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
16182     __ ins(as_FloatRegister($tmp$$reg), __ D,
16183            as_FloatRegister($vsrc$$reg), 0, 1);
16184     __ faddd(as_FloatRegister($dst$$reg),
16185              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16186   %}
16187   ins_pipe(pipe_class_default);
16188 %}
16189 
16190 instruct reduce_mul2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp)
16191 %{
16192   match(Set dst (MulReductionVD dsrc vsrc));
16193   ins_cost(INSN_COST);
16194   effect(TEMP tmp, TEMP dst);
16195   format %{ "fmuld $dst, $dsrc, $vsrc\n\t"
16196             "ins   $tmp, D, $vsrc, 0, 1\n\t"
16197             "fmuld $dst, $dst, $tmp\t# mul reduction2D"
16198   %}
16199   ins_encode %{
16200     __ fmuld(as_FloatRegister($dst$$reg),
16201              as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
16202     __ ins(as_FloatRegister($tmp$$reg), __ D,
16203            as_FloatRegister($vsrc$$reg), 0, 1);
16204     __ fmuld(as_FloatRegister($dst$$reg),
16205              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16206   %}
16207   ins_pipe(pipe_class_default);
16208 %}
16209 
16210 instruct reduce_max2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp) %{
16211   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
16212   match(Set dst (MaxReductionV fsrc vsrc));
16213   ins_cost(INSN_COST);
16214   effect(TEMP_DEF dst, TEMP tmp);
16215   format %{ "fmaxs $dst, $fsrc, $vsrc\n\t"
16216             "ins   $tmp, S, $vsrc, 0, 1\n\t"
16217             "fmaxs $dst, $dst, $tmp\t# max reduction2F" %}
16218   ins_encode %{
16219     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
16220     __ ins(as_FloatRegister($tmp$$reg), __ S, as_FloatRegister($vsrc$$reg), 0, 1);
16221     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16222   %}
16223   ins_pipe(pipe_class_default);
16224 %}
16225 
16226 instruct reduce_max4F(vRegF dst, vRegF fsrc, vecX vsrc) %{
16227   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
16228   match(Set dst (MaxReductionV fsrc vsrc));
16229   ins_cost(INSN_COST);
16230   effect(TEMP_DEF dst);
16231   format %{ "fmaxv $dst, T4S, $vsrc\n\t"
16232             "fmaxs $dst, $dst, $fsrc\t# max reduction4F" %}
16233   ins_encode %{
16234     __ fmaxv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
16235     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
16236   %}
16237   ins_pipe(pipe_class_default);
16238 %}
16239 
16240 instruct reduce_max2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp) %{
16241   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
16242   match(Set dst (MaxReductionV dsrc vsrc));
16243   ins_cost(INSN_COST);
16244   effect(TEMP_DEF dst, TEMP tmp);
16245   format %{ "fmaxd $dst, $dsrc, $vsrc\n\t"
16246             "ins   $tmp, D, $vsrc, 0, 1\n\t"
16247             "fmaxd $dst, $dst, $tmp\t# max reduction2D" %}
16248   ins_encode %{
16249     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
16250     __ ins(as_FloatRegister($tmp$$reg), __ D, as_FloatRegister($vsrc$$reg), 0, 1);
16251     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16252   %}
16253   ins_pipe(pipe_class_default);
16254 %}
16255 
16256 instruct reduce_min2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp) %{
16257   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
16258   match(Set dst (MinReductionV fsrc vsrc));
16259   ins_cost(INSN_COST);
16260   effect(TEMP_DEF dst, TEMP tmp);
16261   format %{ "fmins $dst, $fsrc, $vsrc\n\t"
16262             "ins   $tmp, S, $vsrc, 0, 1\n\t"
16263             "fmins $dst, $dst, $tmp\t# min reduction2F" %}
16264   ins_encode %{
16265     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
16266     __ ins(as_FloatRegister($tmp$$reg), __ S, as_FloatRegister($vsrc$$reg), 0, 1);
16267     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16268   %}
16269   ins_pipe(pipe_class_default);
16270 %}
16271 
16272 instruct reduce_min4F(vRegF dst, vRegF fsrc, vecX vsrc) %{
16273   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
16274   match(Set dst (MinReductionV fsrc vsrc));
16275   ins_cost(INSN_COST);
16276   effect(TEMP_DEF dst);
16277   format %{ "fminv $dst, T4S, $vsrc\n\t"
16278             "fmins $dst, $dst, $fsrc\t# min reduction4F" %}
16279   ins_encode %{
16280     __ fminv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
16281     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
16282   %}
16283   ins_pipe(pipe_class_default);
16284 %}
16285 
16286 instruct reduce_min2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp) %{
16287   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
16288   match(Set dst (MinReductionV dsrc vsrc));
16289   ins_cost(INSN_COST);
16290   effect(TEMP_DEF dst, TEMP tmp);
16291   format %{ "fmind $dst, $dsrc, $vsrc\n\t"
16292             "ins   $tmp, D, $vsrc, 0, 1\n\t"
16293             "fmind $dst, $dst, $tmp\t# min reduction2D" %}
16294   ins_encode %{
16295     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
16296     __ ins(as_FloatRegister($tmp$$reg), __ D, as_FloatRegister($vsrc$$reg), 0, 1);
16297     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16298   %}
16299   ins_pipe(pipe_class_default);
16300 %}
16301 
16302 // ====================VECTOR ARITHMETIC=======================================
16303 
16304 // --------------------------------- ADD --------------------------------------
16305 
16306 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16307 %{
16308   predicate(n->as_Vector()->length() == 4 ||
16309             n->as_Vector()->length() == 8);
16310   match(Set dst (AddVB src1 src2));
16311   ins_cost(INSN_COST);
16312   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16313   ins_encode %{
16314     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16315             as_FloatRegister($src1$$reg),
16316             as_FloatRegister($src2$$reg));
16317   %}
16318   ins_pipe(vdop64);
16319 %}
16320 
16321 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16322 %{
16323   predicate(n->as_Vector()->length() == 16);
16324   match(Set dst (AddVB src1 src2));
16325   ins_cost(INSN_COST);
16326   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16327   ins_encode %{
16328     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16329             as_FloatRegister($src1$$reg),
16330             as_FloatRegister($src2$$reg));
16331   %}
16332   ins_pipe(vdop128);
16333 %}
16334 
16335 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16336 %{
16337   predicate(n->as_Vector()->length() == 2 ||
16338             n->as_Vector()->length() == 4);
16339   match(Set dst (AddVS src1 src2));
16340   ins_cost(INSN_COST);
16341   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16342   ins_encode %{
16343     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16344             as_FloatRegister($src1$$reg),
16345             as_FloatRegister($src2$$reg));
16346   %}
16347   ins_pipe(vdop64);
16348 %}
16349 
16350 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16351 %{
16352   predicate(n->as_Vector()->length() == 8);
16353   match(Set dst (AddVS src1 src2));
16354   ins_cost(INSN_COST);
16355   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16356   ins_encode %{
16357     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16358             as_FloatRegister($src1$$reg),
16359             as_FloatRegister($src2$$reg));
16360   %}
16361   ins_pipe(vdop128);
16362 %}
16363 
16364 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16365 %{
16366   predicate(n->as_Vector()->length() == 2);
16367   match(Set dst (AddVI src1 src2));
16368   ins_cost(INSN_COST);
16369   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16370   ins_encode %{
16371     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16372             as_FloatRegister($src1$$reg),
16373             as_FloatRegister($src2$$reg));
16374   %}
16375   ins_pipe(vdop64);
16376 %}
16377 
16378 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16379 %{
16380   predicate(n->as_Vector()->length() == 4);
16381   match(Set dst (AddVI src1 src2));
16382   ins_cost(INSN_COST);
16383   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16384   ins_encode %{
16385     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16386             as_FloatRegister($src1$$reg),
16387             as_FloatRegister($src2$$reg));
16388   %}
16389   ins_pipe(vdop128);
16390 %}
16391 
16392 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16393 %{
16394   predicate(n->as_Vector()->length() == 2);
16395   match(Set dst (AddVL src1 src2));
16396   ins_cost(INSN_COST);
16397   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16398   ins_encode %{
16399     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16400             as_FloatRegister($src1$$reg),
16401             as_FloatRegister($src2$$reg));
16402   %}
16403   ins_pipe(vdop128);
16404 %}
16405 
16406 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16407 %{
16408   predicate(n->as_Vector()->length() == 2);
16409   match(Set dst (AddVF src1 src2));
16410   ins_cost(INSN_COST);
16411   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16412   ins_encode %{
16413     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16414             as_FloatRegister($src1$$reg),
16415             as_FloatRegister($src2$$reg));
16416   %}
16417   ins_pipe(vdop_fp64);
16418 %}
16419 
16420 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16421 %{
16422   predicate(n->as_Vector()->length() == 4);
16423   match(Set dst (AddVF src1 src2));
16424   ins_cost(INSN_COST);
16425   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16426   ins_encode %{
16427     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16428             as_FloatRegister($src1$$reg),
16429             as_FloatRegister($src2$$reg));
16430   %}
16431   ins_pipe(vdop_fp128);
16432 %}
16433 
16434 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16435 %{
16436   match(Set dst (AddVD src1 src2));
16437   ins_cost(INSN_COST);
16438   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16439   ins_encode %{
16440     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16441             as_FloatRegister($src1$$reg),
16442             as_FloatRegister($src2$$reg));
16443   %}
16444   ins_pipe(vdop_fp128);
16445 %}
16446 
16447 // --------------------------------- SUB --------------------------------------
16448 
16449 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16450 %{
16451   predicate(n->as_Vector()->length() == 4 ||
16452             n->as_Vector()->length() == 8);
16453   match(Set dst (SubVB src1 src2));
16454   ins_cost(INSN_COST);
16455   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16456   ins_encode %{
16457     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16458             as_FloatRegister($src1$$reg),
16459             as_FloatRegister($src2$$reg));
16460   %}
16461   ins_pipe(vdop64);
16462 %}
16463 
16464 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16465 %{
16466   predicate(n->as_Vector()->length() == 16);
16467   match(Set dst (SubVB src1 src2));
16468   ins_cost(INSN_COST);
16469   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16470   ins_encode %{
16471     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16472             as_FloatRegister($src1$$reg),
16473             as_FloatRegister($src2$$reg));
16474   %}
16475   ins_pipe(vdop128);
16476 %}
16477 
16478 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16479 %{
16480   predicate(n->as_Vector()->length() == 2 ||
16481             n->as_Vector()->length() == 4);
16482   match(Set dst (SubVS src1 src2));
16483   ins_cost(INSN_COST);
16484   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16485   ins_encode %{
16486     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16487             as_FloatRegister($src1$$reg),
16488             as_FloatRegister($src2$$reg));
16489   %}
16490   ins_pipe(vdop64);
16491 %}
16492 
16493 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16494 %{
16495   predicate(n->as_Vector()->length() == 8);
16496   match(Set dst (SubVS src1 src2));
16497   ins_cost(INSN_COST);
16498   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16499   ins_encode %{
16500     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16501             as_FloatRegister($src1$$reg),
16502             as_FloatRegister($src2$$reg));
16503   %}
16504   ins_pipe(vdop128);
16505 %}
16506 
16507 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16508 %{
16509   predicate(n->as_Vector()->length() == 2);
16510   match(Set dst (SubVI src1 src2));
16511   ins_cost(INSN_COST);
16512   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16513   ins_encode %{
16514     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16515             as_FloatRegister($src1$$reg),
16516             as_FloatRegister($src2$$reg));
16517   %}
16518   ins_pipe(vdop64);
16519 %}
16520 
16521 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16522 %{
16523   predicate(n->as_Vector()->length() == 4);
16524   match(Set dst (SubVI src1 src2));
16525   ins_cost(INSN_COST);
16526   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16527   ins_encode %{
16528     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16529             as_FloatRegister($src1$$reg),
16530             as_FloatRegister($src2$$reg));
16531   %}
16532   ins_pipe(vdop128);
16533 %}
16534 
16535 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16536 %{
16537   predicate(n->as_Vector()->length() == 2);
16538   match(Set dst (SubVL src1 src2));
16539   ins_cost(INSN_COST);
16540   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16541   ins_encode %{
16542     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16543             as_FloatRegister($src1$$reg),
16544             as_FloatRegister($src2$$reg));
16545   %}
16546   ins_pipe(vdop128);
16547 %}
16548 
16549 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16550 %{
16551   predicate(n->as_Vector()->length() == 2);
16552   match(Set dst (SubVF src1 src2));
16553   ins_cost(INSN_COST);
16554   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16555   ins_encode %{
16556     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16557             as_FloatRegister($src1$$reg),
16558             as_FloatRegister($src2$$reg));
16559   %}
16560   ins_pipe(vdop_fp64);
16561 %}
16562 
16563 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16564 %{
16565   predicate(n->as_Vector()->length() == 4);
16566   match(Set dst (SubVF src1 src2));
16567   ins_cost(INSN_COST);
16568   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16569   ins_encode %{
16570     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16571             as_FloatRegister($src1$$reg),
16572             as_FloatRegister($src2$$reg));
16573   %}
16574   ins_pipe(vdop_fp128);
16575 %}
16576 
16577 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16578 %{
16579   predicate(n->as_Vector()->length() == 2);
16580   match(Set dst (SubVD src1 src2));
16581   ins_cost(INSN_COST);
16582   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16583   ins_encode %{
16584     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16585             as_FloatRegister($src1$$reg),
16586             as_FloatRegister($src2$$reg));
16587   %}
16588   ins_pipe(vdop_fp128);
16589 %}
16590 
16591 // --------------------------------- MUL --------------------------------------
16592 
16593 instruct vmul8B(vecD dst, vecD src1, vecD src2)
16594 %{
16595   predicate(n->as_Vector()->length() == 4 ||
16596             n->as_Vector()->length() == 8);
16597   match(Set dst (MulVB src1 src2));
16598   ins_cost(INSN_COST);
16599   format %{ "mulv  $dst,$src1,$src2\t# vector (8B)" %}
16600   ins_encode %{
16601     __ mulv(as_FloatRegister($dst$$reg), __ T8B,
16602             as_FloatRegister($src1$$reg),
16603             as_FloatRegister($src2$$reg));
16604   %}
16605   ins_pipe(vmul64);
16606 %}
16607 
16608 instruct vmul16B(vecX dst, vecX src1, vecX src2)
16609 %{
16610   predicate(n->as_Vector()->length() == 16);
16611   match(Set dst (MulVB src1 src2));
16612   ins_cost(INSN_COST);
16613   format %{ "mulv  $dst,$src1,$src2\t# vector (16B)" %}
16614   ins_encode %{
16615     __ mulv(as_FloatRegister($dst$$reg), __ T16B,
16616             as_FloatRegister($src1$$reg),
16617             as_FloatRegister($src2$$reg));
16618   %}
16619   ins_pipe(vmul128);
16620 %}
16621 
16622 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16623 %{
16624   predicate(n->as_Vector()->length() == 2 ||
16625             n->as_Vector()->length() == 4);
16626   match(Set dst (MulVS src1 src2));
16627   ins_cost(INSN_COST);
16628   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16629   ins_encode %{
16630     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16631             as_FloatRegister($src1$$reg),
16632             as_FloatRegister($src2$$reg));
16633   %}
16634   ins_pipe(vmul64);
16635 %}
16636 
16637 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16638 %{
16639   predicate(n->as_Vector()->length() == 8);
16640   match(Set dst (MulVS src1 src2));
16641   ins_cost(INSN_COST);
16642   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16643   ins_encode %{
16644     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16645             as_FloatRegister($src1$$reg),
16646             as_FloatRegister($src2$$reg));
16647   %}
16648   ins_pipe(vmul128);
16649 %}
16650 
16651 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16652 %{
16653   predicate(n->as_Vector()->length() == 2);
16654   match(Set dst (MulVI src1 src2));
16655   ins_cost(INSN_COST);
16656   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16657   ins_encode %{
16658     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16659             as_FloatRegister($src1$$reg),
16660             as_FloatRegister($src2$$reg));
16661   %}
16662   ins_pipe(vmul64);
16663 %}
16664 
16665 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16666 %{
16667   predicate(n->as_Vector()->length() == 4);
16668   match(Set dst (MulVI src1 src2));
16669   ins_cost(INSN_COST);
16670   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16671   ins_encode %{
16672     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16673             as_FloatRegister($src1$$reg),
16674             as_FloatRegister($src2$$reg));
16675   %}
16676   ins_pipe(vmul128);
16677 %}
16678 
16679 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16680 %{
16681   predicate(n->as_Vector()->length() == 2);
16682   match(Set dst (MulVF src1 src2));
16683   ins_cost(INSN_COST);
16684   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16685   ins_encode %{
16686     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16687             as_FloatRegister($src1$$reg),
16688             as_FloatRegister($src2$$reg));
16689   %}
16690   ins_pipe(vmuldiv_fp64);
16691 %}
16692 
16693 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16694 %{
16695   predicate(n->as_Vector()->length() == 4);
16696   match(Set dst (MulVF src1 src2));
16697   ins_cost(INSN_COST);
16698   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16699   ins_encode %{
16700     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16701             as_FloatRegister($src1$$reg),
16702             as_FloatRegister($src2$$reg));
16703   %}
16704   ins_pipe(vmuldiv_fp128);
16705 %}
16706 
16707 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16708 %{
16709   predicate(n->as_Vector()->length() == 2);
16710   match(Set dst (MulVD src1 src2));
16711   ins_cost(INSN_COST);
16712   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16713   ins_encode %{
16714     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16715             as_FloatRegister($src1$$reg),
16716             as_FloatRegister($src2$$reg));
16717   %}
16718   ins_pipe(vmuldiv_fp128);
16719 %}
16720 
16721 // --------------------------------- MLA --------------------------------------
16722 
16723 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16724 %{
16725   predicate(n->as_Vector()->length() == 2 ||
16726             n->as_Vector()->length() == 4);
16727   match(Set dst (AddVS dst (MulVS src1 src2)));
16728   ins_cost(INSN_COST);
16729   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16730   ins_encode %{
16731     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16732             as_FloatRegister($src1$$reg),
16733             as_FloatRegister($src2$$reg));
16734   %}
16735   ins_pipe(vmla64);
16736 %}
16737 
16738 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16739 %{
16740   predicate(n->as_Vector()->length() == 8);
16741   match(Set dst (AddVS dst (MulVS src1 src2)));
16742   ins_cost(INSN_COST);
16743   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16744   ins_encode %{
16745     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16746             as_FloatRegister($src1$$reg),
16747             as_FloatRegister($src2$$reg));
16748   %}
16749   ins_pipe(vmla128);
16750 %}
16751 
16752 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16753 %{
16754   predicate(n->as_Vector()->length() == 2);
16755   match(Set dst (AddVI dst (MulVI src1 src2)));
16756   ins_cost(INSN_COST);
16757   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16758   ins_encode %{
16759     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16760             as_FloatRegister($src1$$reg),
16761             as_FloatRegister($src2$$reg));
16762   %}
16763   ins_pipe(vmla64);
16764 %}
16765 
16766 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16767 %{
16768   predicate(n->as_Vector()->length() == 4);
16769   match(Set dst (AddVI dst (MulVI src1 src2)));
16770   ins_cost(INSN_COST);
16771   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16772   ins_encode %{
16773     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16774             as_FloatRegister($src1$$reg),
16775             as_FloatRegister($src2$$reg));
16776   %}
16777   ins_pipe(vmla128);
16778 %}
16779 
16780 // dst + src1 * src2
16781 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
16782   predicate(UseFMA && n->as_Vector()->length() == 2);
16783   match(Set dst (FmaVF  dst (Binary src1 src2)));
16784   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
16785   ins_cost(INSN_COST);
16786   ins_encode %{
16787     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
16788             as_FloatRegister($src1$$reg),
16789             as_FloatRegister($src2$$reg));
16790   %}
16791   ins_pipe(vmuldiv_fp64);
16792 %}
16793 
16794 // dst + src1 * src2
16795 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
16796   predicate(UseFMA && n->as_Vector()->length() == 4);
16797   match(Set dst (FmaVF  dst (Binary src1 src2)));
16798   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
16799   ins_cost(INSN_COST);
16800   ins_encode %{
16801     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
16802             as_FloatRegister($src1$$reg),
16803             as_FloatRegister($src2$$reg));
16804   %}
16805   ins_pipe(vmuldiv_fp128);
16806 %}
16807 
16808 // dst + src1 * src2
16809 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
16810   predicate(UseFMA && n->as_Vector()->length() == 2);
16811   match(Set dst (FmaVD  dst (Binary src1 src2)));
16812   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
16813   ins_cost(INSN_COST);
16814   ins_encode %{
16815     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
16816             as_FloatRegister($src1$$reg),
16817             as_FloatRegister($src2$$reg));
16818   %}
16819   ins_pipe(vmuldiv_fp128);
16820 %}
16821 
16822 // --------------------------------- MLS --------------------------------------
16823 
16824 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16825 %{
16826   predicate(n->as_Vector()->length() == 2 ||
16827             n->as_Vector()->length() == 4);
16828   match(Set dst (SubVS dst (MulVS src1 src2)));
16829   ins_cost(INSN_COST);
16830   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16831   ins_encode %{
16832     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16833             as_FloatRegister($src1$$reg),
16834             as_FloatRegister($src2$$reg));
16835   %}
16836   ins_pipe(vmla64);
16837 %}
16838 
16839 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16840 %{
16841   predicate(n->as_Vector()->length() == 8);
16842   match(Set dst (SubVS dst (MulVS src1 src2)));
16843   ins_cost(INSN_COST);
16844   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16845   ins_encode %{
16846     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16847             as_FloatRegister($src1$$reg),
16848             as_FloatRegister($src2$$reg));
16849   %}
16850   ins_pipe(vmla128);
16851 %}
16852 
16853 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16854 %{
16855   predicate(n->as_Vector()->length() == 2);
16856   match(Set dst (SubVI dst (MulVI src1 src2)));
16857   ins_cost(INSN_COST);
16858   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16859   ins_encode %{
16860     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16861             as_FloatRegister($src1$$reg),
16862             as_FloatRegister($src2$$reg));
16863   %}
16864   ins_pipe(vmla64);
16865 %}
16866 
16867 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16868 %{
16869   predicate(n->as_Vector()->length() == 4);
16870   match(Set dst (SubVI dst (MulVI src1 src2)));
16871   ins_cost(INSN_COST);
16872   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16873   ins_encode %{
16874     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16875             as_FloatRegister($src1$$reg),
16876             as_FloatRegister($src2$$reg));
16877   %}
16878   ins_pipe(vmla128);
16879 %}
16880 
16881 // dst - src1 * src2
16882 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
16883   predicate(UseFMA && n->as_Vector()->length() == 2);
16884   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16885   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16886   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
16887   ins_cost(INSN_COST);
16888   ins_encode %{
16889     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
16890             as_FloatRegister($src1$$reg),
16891             as_FloatRegister($src2$$reg));
16892   %}
16893   ins_pipe(vmuldiv_fp64);
16894 %}
16895 
16896 // dst - src1 * src2
16897 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
16898   predicate(UseFMA && n->as_Vector()->length() == 4);
16899   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16900   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16901   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
16902   ins_cost(INSN_COST);
16903   ins_encode %{
16904     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
16905             as_FloatRegister($src1$$reg),
16906             as_FloatRegister($src2$$reg));
16907   %}
16908   ins_pipe(vmuldiv_fp128);
16909 %}
16910 
16911 // dst - src1 * src2
16912 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
16913   predicate(UseFMA && n->as_Vector()->length() == 2);
16914   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
16915   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
16916   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
16917   ins_cost(INSN_COST);
16918   ins_encode %{
16919     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
16920             as_FloatRegister($src1$$reg),
16921             as_FloatRegister($src2$$reg));
16922   %}
16923   ins_pipe(vmuldiv_fp128);
16924 %}
16925 
16926 // --------------- Vector Multiply-Add Shorts into Integer --------------------
16927 
16928 instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
16929   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
16930   match(Set dst (MulAddVS2VI src1 src2));
16931   ins_cost(INSN_COST);
16932   effect(TEMP_DEF dst, TEMP tmp);
16933   format %{ "smullv  $tmp, $src1, $src2\t# vector (4H)\n\t"
16934             "smullv  $dst, $src1, $src2\t# vector (8H)\n\t"
16935             "addpv   $dst, $tmp, $dst\t# vector (4S)\n\t" %}
16936   ins_encode %{
16937     __ smullv(as_FloatRegister($tmp$$reg), __ T4H,
16938               as_FloatRegister($src1$$reg),
16939               as_FloatRegister($src2$$reg));
16940     __ smullv(as_FloatRegister($dst$$reg), __ T8H,
16941               as_FloatRegister($src1$$reg),
16942               as_FloatRegister($src2$$reg));
16943     __ addpv(as_FloatRegister($dst$$reg), __ T4S,
16944              as_FloatRegister($tmp$$reg),
16945              as_FloatRegister($dst$$reg));
16946   %}
16947   ins_pipe(vmuldiv_fp128);
16948 %}
16949 
16950 // --------------------------------- DIV --------------------------------------
16951 
16952 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16953 %{
16954   predicate(n->as_Vector()->length() == 2);
16955   match(Set dst (DivVF src1 src2));
16956   ins_cost(INSN_COST);
16957   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16958   ins_encode %{
16959     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16960             as_FloatRegister($src1$$reg),
16961             as_FloatRegister($src2$$reg));
16962   %}
16963   ins_pipe(vmuldiv_fp64);
16964 %}
16965 
16966 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16967 %{
16968   predicate(n->as_Vector()->length() == 4);
16969   match(Set dst (DivVF src1 src2));
16970   ins_cost(INSN_COST);
16971   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16972   ins_encode %{
16973     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16974             as_FloatRegister($src1$$reg),
16975             as_FloatRegister($src2$$reg));
16976   %}
16977   ins_pipe(vmuldiv_fp128);
16978 %}
16979 
16980 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16981 %{
16982   predicate(n->as_Vector()->length() == 2);
16983   match(Set dst (DivVD src1 src2));
16984   ins_cost(INSN_COST);
16985   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16986   ins_encode %{
16987     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16988             as_FloatRegister($src1$$reg),
16989             as_FloatRegister($src2$$reg));
16990   %}
16991   ins_pipe(vmuldiv_fp128);
16992 %}
16993 
16994 // --------------------------------- SQRT -------------------------------------
16995 
16996 instruct vsqrt2F(vecD dst, vecD src)
16997 %{
16998   predicate(n->as_Vector()->length() == 2);
16999   match(Set dst (SqrtVF src));
17000   format %{ "fsqrt  $dst, $src\t# vector (2F)" %}
17001   ins_encode %{
17002     __ fsqrt(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
17003   %}
17004   ins_pipe(vunop_fp64);
17005 %}
17006 
17007 instruct vsqrt4F(vecX dst, vecX src)
17008 %{
17009   predicate(n->as_Vector()->length() == 4);
17010   match(Set dst (SqrtVF src));
17011   format %{ "fsqrt  $dst, $src\t# vector (4F)" %}
17012   ins_encode %{
17013     __ fsqrt(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
17014   %}
17015   ins_pipe(vsqrt_fp128);
17016 %}
17017 
17018 instruct vsqrt2D(vecX dst, vecX src)
17019 %{
17020   predicate(n->as_Vector()->length() == 2);
17021   match(Set dst (SqrtVD src));
17022   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17023   ins_encode %{
17024     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17025              as_FloatRegister($src$$reg));
17026   %}
17027   ins_pipe(vsqrt_fp128);
17028 %}
17029 
17030 // --------------------------------- ABS --------------------------------------
17031 
17032 instruct vabs2F(vecD dst, vecD src)
17033 %{
17034   predicate(n->as_Vector()->length() == 2);
17035   match(Set dst (AbsVF src));
17036   ins_cost(INSN_COST * 3);
17037   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17038   ins_encode %{
17039     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17040             as_FloatRegister($src$$reg));
17041   %}
17042   ins_pipe(vunop_fp64);
17043 %}
17044 
17045 instruct vabs4F(vecX dst, vecX src)
17046 %{
17047   predicate(n->as_Vector()->length() == 4);
17048   match(Set dst (AbsVF src));
17049   ins_cost(INSN_COST * 3);
17050   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17051   ins_encode %{
17052     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17053             as_FloatRegister($src$$reg));
17054   %}
17055   ins_pipe(vunop_fp128);
17056 %}
17057 
17058 instruct vabs2D(vecX dst, vecX src)
17059 %{
17060   predicate(n->as_Vector()->length() == 2);
17061   match(Set dst (AbsVD src));
17062   ins_cost(INSN_COST * 3);
17063   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17064   ins_encode %{
17065     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17066             as_FloatRegister($src$$reg));
17067   %}
17068   ins_pipe(vunop_fp128);
17069 %}
17070 
17071 // --------------------------------- NEG --------------------------------------
17072 
17073 instruct vneg2F(vecD dst, vecD src)
17074 %{
17075   predicate(n->as_Vector()->length() == 2);
17076   match(Set dst (NegVF src));
17077   ins_cost(INSN_COST * 3);
17078   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17079   ins_encode %{
17080     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17081             as_FloatRegister($src$$reg));
17082   %}
17083   ins_pipe(vunop_fp64);
17084 %}
17085 
17086 instruct vneg4F(vecX dst, vecX src)
17087 %{
17088   predicate(n->as_Vector()->length() == 4);
17089   match(Set dst (NegVF src));
17090   ins_cost(INSN_COST * 3);
17091   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17092   ins_encode %{
17093     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17094             as_FloatRegister($src$$reg));
17095   %}
17096   ins_pipe(vunop_fp128);
17097 %}
17098 
17099 instruct vneg2D(vecX dst, vecX src)
17100 %{
17101   predicate(n->as_Vector()->length() == 2);
17102   match(Set dst (NegVD src));
17103   ins_cost(INSN_COST * 3);
17104   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17105   ins_encode %{
17106     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17107             as_FloatRegister($src$$reg));
17108   %}
17109   ins_pipe(vunop_fp128);
17110 %}
17111 
17112 // --------------------------------- AND --------------------------------------
17113 
17114 instruct vand8B(vecD dst, vecD src1, vecD src2)
17115 %{
17116   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17117             n->as_Vector()->length_in_bytes() == 8);
17118   match(Set dst (AndV src1 src2));
17119   ins_cost(INSN_COST);
17120   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17121   ins_encode %{
17122     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17123             as_FloatRegister($src1$$reg),
17124             as_FloatRegister($src2$$reg));
17125   %}
17126   ins_pipe(vlogical64);
17127 %}
17128 
17129 instruct vand16B(vecX dst, vecX src1, vecX src2)
17130 %{
17131   predicate(n->as_Vector()->length_in_bytes() == 16);
17132   match(Set dst (AndV src1 src2));
17133   ins_cost(INSN_COST);
17134   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17135   ins_encode %{
17136     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17137             as_FloatRegister($src1$$reg),
17138             as_FloatRegister($src2$$reg));
17139   %}
17140   ins_pipe(vlogical128);
17141 %}
17142 
17143 // --------------------------------- OR ---------------------------------------
17144 
17145 instruct vor8B(vecD dst, vecD src1, vecD src2)
17146 %{
17147   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17148             n->as_Vector()->length_in_bytes() == 8);
17149   match(Set dst (OrV src1 src2));
17150   ins_cost(INSN_COST);
17151   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17152   ins_encode %{
17153     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17154             as_FloatRegister($src1$$reg),
17155             as_FloatRegister($src2$$reg));
17156   %}
17157   ins_pipe(vlogical64);
17158 %}
17159 
17160 instruct vor16B(vecX dst, vecX src1, vecX src2)
17161 %{
17162   predicate(n->as_Vector()->length_in_bytes() == 16);
17163   match(Set dst (OrV src1 src2));
17164   ins_cost(INSN_COST);
17165   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17166   ins_encode %{
17167     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17168             as_FloatRegister($src1$$reg),
17169             as_FloatRegister($src2$$reg));
17170   %}
17171   ins_pipe(vlogical128);
17172 %}
17173 
17174 // --------------------------------- XOR --------------------------------------
17175 
17176 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17177 %{
17178   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17179             n->as_Vector()->length_in_bytes() == 8);
17180   match(Set dst (XorV src1 src2));
17181   ins_cost(INSN_COST);
17182   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17183   ins_encode %{
17184     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17185             as_FloatRegister($src1$$reg),
17186             as_FloatRegister($src2$$reg));
17187   %}
17188   ins_pipe(vlogical64);
17189 %}
17190 
17191 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17192 %{
17193   predicate(n->as_Vector()->length_in_bytes() == 16);
17194   match(Set dst (XorV src1 src2));
17195   ins_cost(INSN_COST);
17196   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17197   ins_encode %{
17198     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17199             as_FloatRegister($src1$$reg),
17200             as_FloatRegister($src2$$reg));
17201   %}
17202   ins_pipe(vlogical128);
17203 %}
17204 
17205 // ------------------------------ Shift ---------------------------------------
17206 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
17207   predicate(n->as_Vector()->length_in_bytes() == 8);
17208   match(Set dst (LShiftCntV cnt));
17209   match(Set dst (RShiftCntV cnt));
17210   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
17211   ins_encode %{
17212     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
17213   %}
17214   ins_pipe(vdup_reg_reg64);
17215 %}
17216 
17217 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
17218   predicate(n->as_Vector()->length_in_bytes() == 16);
17219   match(Set dst (LShiftCntV cnt));
17220   match(Set dst (RShiftCntV cnt));
17221   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
17222   ins_encode %{
17223     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17224   %}
17225   ins_pipe(vdup_reg_reg128);
17226 %}
17227 
17228 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
17229   predicate(n->as_Vector()->length() == 4 ||
17230             n->as_Vector()->length() == 8);
17231   match(Set dst (LShiftVB src shift));
17232   ins_cost(INSN_COST);
17233   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17234   ins_encode %{
17235     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17236             as_FloatRegister($src$$reg),
17237             as_FloatRegister($shift$$reg));
17238   %}
17239   ins_pipe(vshift64);
17240 %}
17241 
17242 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17243   predicate(n->as_Vector()->length() == 16);
17244   match(Set dst (LShiftVB src shift));
17245   ins_cost(INSN_COST);
17246   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17247   ins_encode %{
17248     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17249             as_FloatRegister($src$$reg),
17250             as_FloatRegister($shift$$reg));
17251   %}
17252   ins_pipe(vshift128);
17253 %}
17254 
17255 // Right shifts with vector shift count on aarch64 SIMD are implemented
17256 // as left shift by negative shift count.
17257 // There are two cases for vector shift count.
17258 //
17259 // Case 1: The vector shift count is from replication.
17260 //        |            |
17261 //    LoadVector  RShiftCntV
17262 //        |       /
17263 //     RShiftVI
17264 // Note: In inner loop, multiple neg instructions are used, which can be
17265 // moved to outer loop and merge into one neg instruction.
17266 //
17267 // Case 2: The vector shift count is from loading.
17268 // This case isn't supported by middle-end now. But it's supported by
17269 // panama/vectorIntrinsics(JEP 338: Vector API).
17270 //        |            |
17271 //    LoadVector  LoadVector
17272 //        |       /
17273 //     RShiftVI
17274 //
17275 
17276 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
17277   predicate(n->as_Vector()->length() == 4 ||
17278             n->as_Vector()->length() == 8);
17279   match(Set dst (RShiftVB src shift));
17280   ins_cost(INSN_COST);
17281   effect(TEMP tmp);
17282   format %{ "negr  $tmp,$shift\t"
17283             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
17284   ins_encode %{
17285     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17286             as_FloatRegister($shift$$reg));
17287     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17288             as_FloatRegister($src$$reg),
17289             as_FloatRegister($tmp$$reg));
17290   %}
17291   ins_pipe(vshift64);
17292 %}
17293 
17294 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
17295   predicate(n->as_Vector()->length() == 16);
17296   match(Set dst (RShiftVB src shift));
17297   ins_cost(INSN_COST);
17298   effect(TEMP tmp);
17299   format %{ "negr  $tmp,$shift\t"
17300             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
17301   ins_encode %{
17302     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17303             as_FloatRegister($shift$$reg));
17304     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17305             as_FloatRegister($src$$reg),
17306             as_FloatRegister($tmp$$reg));
17307   %}
17308   ins_pipe(vshift128);
17309 %}
17310 
17311 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
17312   predicate(n->as_Vector()->length() == 4 ||
17313             n->as_Vector()->length() == 8);
17314   match(Set dst (URShiftVB src shift));
17315   ins_cost(INSN_COST);
17316   effect(TEMP tmp);
17317   format %{ "negr  $tmp,$shift\t"
17318             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
17319   ins_encode %{
17320     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17321             as_FloatRegister($shift$$reg));
17322     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17323             as_FloatRegister($src$$reg),
17324             as_FloatRegister($tmp$$reg));
17325   %}
17326   ins_pipe(vshift64);
17327 %}
17328 
17329 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
17330   predicate(n->as_Vector()->length() == 16);
17331   match(Set dst (URShiftVB src shift));
17332   ins_cost(INSN_COST);
17333   effect(TEMP tmp);
17334   format %{ "negr  $tmp,$shift\t"
17335             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
17336   ins_encode %{
17337     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17338             as_FloatRegister($shift$$reg));
17339     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17340             as_FloatRegister($src$$reg),
17341             as_FloatRegister($tmp$$reg));
17342   %}
17343   ins_pipe(vshift128);
17344 %}
17345 
17346 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17347   predicate(n->as_Vector()->length() == 4 ||
17348             n->as_Vector()->length() == 8);
17349   match(Set dst (LShiftVB src (LShiftCntV shift)));
17350   ins_cost(INSN_COST);
17351   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17352   ins_encode %{
17353     int sh = (int)$shift$$constant;
17354     if (sh >= 8) {
17355       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17356              as_FloatRegister($src$$reg),
17357              as_FloatRegister($src$$reg));
17358     } else {
17359       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17360              as_FloatRegister($src$$reg), sh);
17361     }
17362   %}
17363   ins_pipe(vshift64_imm);
17364 %}
17365 
17366 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17367   predicate(n->as_Vector()->length() == 16);
17368   match(Set dst (LShiftVB src (LShiftCntV shift)));
17369   ins_cost(INSN_COST);
17370   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17371   ins_encode %{
17372     int sh = (int)$shift$$constant;
17373     if (sh >= 8) {
17374       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17375              as_FloatRegister($src$$reg),
17376              as_FloatRegister($src$$reg));
17377     } else {
17378       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17379              as_FloatRegister($src$$reg), sh);
17380     }
17381   %}
17382   ins_pipe(vshift128_imm);
17383 %}
17384 
17385 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17386   predicate(n->as_Vector()->length() == 4 ||
17387             n->as_Vector()->length() == 8);
17388   match(Set dst (RShiftVB src (RShiftCntV shift)));
17389   ins_cost(INSN_COST);
17390   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17391   ins_encode %{
17392     int sh = (int)$shift$$constant;
17393     if (sh >= 8) sh = 7;
17394     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17395            as_FloatRegister($src$$reg), sh);
17396   %}
17397   ins_pipe(vshift64_imm);
17398 %}
17399 
17400 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17401   predicate(n->as_Vector()->length() == 16);
17402   match(Set dst (RShiftVB src (RShiftCntV shift)));
17403   ins_cost(INSN_COST);
17404   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17405   ins_encode %{
17406     int sh = (int)$shift$$constant;
17407     if (sh >= 8) sh = 7;
17408     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17409            as_FloatRegister($src$$reg), sh);
17410   %}
17411   ins_pipe(vshift128_imm);
17412 %}
17413 
17414 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17415   predicate(n->as_Vector()->length() == 4 ||
17416             n->as_Vector()->length() == 8);
17417   match(Set dst (URShiftVB src (RShiftCntV shift)));
17418   ins_cost(INSN_COST);
17419   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17420   ins_encode %{
17421     int sh = (int)$shift$$constant;
17422     if (sh >= 8) {
17423       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17424              as_FloatRegister($src$$reg),
17425              as_FloatRegister($src$$reg));
17426     } else {
17427       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17428              as_FloatRegister($src$$reg), sh);
17429     }
17430   %}
17431   ins_pipe(vshift64_imm);
17432 %}
17433 
17434 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17435   predicate(n->as_Vector()->length() == 16);
17436   match(Set dst (URShiftVB src (RShiftCntV shift)));
17437   ins_cost(INSN_COST);
17438   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17439   ins_encode %{
17440     int sh = (int)$shift$$constant;
17441     if (sh >= 8) {
17442       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17443              as_FloatRegister($src$$reg),
17444              as_FloatRegister($src$$reg));
17445     } else {
17446       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17447              as_FloatRegister($src$$reg), sh);
17448     }
17449   %}
17450   ins_pipe(vshift128_imm);
17451 %}
17452 
17453 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
17454   predicate(n->as_Vector()->length() == 2 ||
17455             n->as_Vector()->length() == 4);
17456   match(Set dst (LShiftVS src shift));
17457   ins_cost(INSN_COST);
17458   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17459   ins_encode %{
17460     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17461             as_FloatRegister($src$$reg),
17462             as_FloatRegister($shift$$reg));
17463   %}
17464   ins_pipe(vshift64);
17465 %}
17466 
17467 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17468   predicate(n->as_Vector()->length() == 8);
17469   match(Set dst (LShiftVS src shift));
17470   ins_cost(INSN_COST);
17471   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17472   ins_encode %{
17473     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17474             as_FloatRegister($src$$reg),
17475             as_FloatRegister($shift$$reg));
17476   %}
17477   ins_pipe(vshift128);
17478 %}
17479 
17480 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
17481   predicate(n->as_Vector()->length() == 2 ||
17482             n->as_Vector()->length() == 4);
17483   match(Set dst (RShiftVS src shift));
17484   ins_cost(INSN_COST);
17485   effect(TEMP tmp);
17486   format %{ "negr  $tmp,$shift\t"
17487             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
17488   ins_encode %{
17489     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17490             as_FloatRegister($shift$$reg));
17491     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17492             as_FloatRegister($src$$reg),
17493             as_FloatRegister($tmp$$reg));
17494   %}
17495   ins_pipe(vshift64);
17496 %}
17497 
17498 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
17499   predicate(n->as_Vector()->length() == 8);
17500   match(Set dst (RShiftVS src shift));
17501   ins_cost(INSN_COST);
17502   effect(TEMP tmp);
17503   format %{ "negr  $tmp,$shift\t"
17504             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
17505   ins_encode %{
17506     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17507             as_FloatRegister($shift$$reg));
17508     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17509             as_FloatRegister($src$$reg),
17510             as_FloatRegister($tmp$$reg));
17511   %}
17512   ins_pipe(vshift128);
17513 %}
17514 
17515 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
17516   predicate(n->as_Vector()->length() == 2 ||
17517             n->as_Vector()->length() == 4);
17518   match(Set dst (URShiftVS src shift));
17519   ins_cost(INSN_COST);
17520   effect(TEMP tmp);
17521   format %{ "negr  $tmp,$shift\t"
17522             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
17523   ins_encode %{
17524     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17525             as_FloatRegister($shift$$reg));
17526     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17527             as_FloatRegister($src$$reg),
17528             as_FloatRegister($tmp$$reg));
17529   %}
17530   ins_pipe(vshift64);
17531 %}
17532 
17533 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
17534   predicate(n->as_Vector()->length() == 8);
17535   match(Set dst (URShiftVS src shift));
17536   ins_cost(INSN_COST);
17537   effect(TEMP tmp);
17538   format %{ "negr  $tmp,$shift\t"
17539             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
17540   ins_encode %{
17541     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17542             as_FloatRegister($shift$$reg));
17543     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17544             as_FloatRegister($src$$reg),
17545             as_FloatRegister($tmp$$reg));
17546   %}
17547   ins_pipe(vshift128);
17548 %}
17549 
17550 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17551   predicate(n->as_Vector()->length() == 2 ||
17552             n->as_Vector()->length() == 4);
17553   match(Set dst (LShiftVS src (LShiftCntV shift)));
17554   ins_cost(INSN_COST);
17555   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17556   ins_encode %{
17557     int sh = (int)$shift$$constant;
17558     if (sh >= 16) {
17559       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17560              as_FloatRegister($src$$reg),
17561              as_FloatRegister($src$$reg));
17562     } else {
17563       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17564              as_FloatRegister($src$$reg), sh);
17565     }
17566   %}
17567   ins_pipe(vshift64_imm);
17568 %}
17569 
17570 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17571   predicate(n->as_Vector()->length() == 8);
17572   match(Set dst (LShiftVS src (LShiftCntV shift)));
17573   ins_cost(INSN_COST);
17574   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17575   ins_encode %{
17576     int sh = (int)$shift$$constant;
17577     if (sh >= 16) {
17578       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17579              as_FloatRegister($src$$reg),
17580              as_FloatRegister($src$$reg));
17581     } else {
17582       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17583              as_FloatRegister($src$$reg), sh);
17584     }
17585   %}
17586   ins_pipe(vshift128_imm);
17587 %}
17588 
17589 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17590   predicate(n->as_Vector()->length() == 2 ||
17591             n->as_Vector()->length() == 4);
17592   match(Set dst (RShiftVS src (RShiftCntV shift)));
17593   ins_cost(INSN_COST);
17594   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17595   ins_encode %{
17596     int sh = (int)$shift$$constant;
17597     if (sh >= 16) sh = 15;
17598     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17599            as_FloatRegister($src$$reg), sh);
17600   %}
17601   ins_pipe(vshift64_imm);
17602 %}
17603 
17604 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17605   predicate(n->as_Vector()->length() == 8);
17606   match(Set dst (RShiftVS src (RShiftCntV shift)));
17607   ins_cost(INSN_COST);
17608   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17609   ins_encode %{
17610     int sh = (int)$shift$$constant;
17611     if (sh >= 16) sh = 15;
17612     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17613            as_FloatRegister($src$$reg), sh);
17614   %}
17615   ins_pipe(vshift128_imm);
17616 %}
17617 
17618 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17619   predicate(n->as_Vector()->length() == 2 ||
17620             n->as_Vector()->length() == 4);
17621   match(Set dst (URShiftVS src (RShiftCntV shift)));
17622   ins_cost(INSN_COST);
17623   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17624   ins_encode %{
17625     int sh = (int)$shift$$constant;
17626     if (sh >= 16) {
17627       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17628              as_FloatRegister($src$$reg),
17629              as_FloatRegister($src$$reg));
17630     } else {
17631       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17632              as_FloatRegister($src$$reg), sh);
17633     }
17634   %}
17635   ins_pipe(vshift64_imm);
17636 %}
17637 
17638 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17639   predicate(n->as_Vector()->length() == 8);
17640   match(Set dst (URShiftVS src (RShiftCntV shift)));
17641   ins_cost(INSN_COST);
17642   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17643   ins_encode %{
17644     int sh = (int)$shift$$constant;
17645     if (sh >= 16) {
17646       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17647              as_FloatRegister($src$$reg),
17648              as_FloatRegister($src$$reg));
17649     } else {
17650       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17651              as_FloatRegister($src$$reg), sh);
17652     }
17653   %}
17654   ins_pipe(vshift128_imm);
17655 %}
17656 
17657 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
17658   predicate(n->as_Vector()->length() == 2);
17659   match(Set dst (LShiftVI src shift));
17660   ins_cost(INSN_COST);
17661   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17662   ins_encode %{
17663     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17664             as_FloatRegister($src$$reg),
17665             as_FloatRegister($shift$$reg));
17666   %}
17667   ins_pipe(vshift64);
17668 %}
17669 
17670 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17671   predicate(n->as_Vector()->length() == 4);
17672   match(Set dst (LShiftVI src shift));
17673   ins_cost(INSN_COST);
17674   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17675   ins_encode %{
17676     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17677             as_FloatRegister($src$$reg),
17678             as_FloatRegister($shift$$reg));
17679   %}
17680   ins_pipe(vshift128);
17681 %}
17682 
17683 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
17684   predicate(n->as_Vector()->length() == 2);
17685   match(Set dst (RShiftVI src shift));
17686   ins_cost(INSN_COST);
17687   effect(TEMP tmp);
17688   format %{ "negr  $tmp,$shift\t"
17689             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
17690   ins_encode %{
17691     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17692             as_FloatRegister($shift$$reg));
17693     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17694             as_FloatRegister($src$$reg),
17695             as_FloatRegister($tmp$$reg));
17696   %}
17697   ins_pipe(vshift64);
17698 %}
17699 
17700 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
17701   predicate(n->as_Vector()->length() == 4);
17702   match(Set dst (RShiftVI src shift));
17703   ins_cost(INSN_COST);
17704   effect(TEMP tmp);
17705   format %{ "negr  $tmp,$shift\t"
17706             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
17707   ins_encode %{
17708     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17709             as_FloatRegister($shift$$reg));
17710     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17711             as_FloatRegister($src$$reg),
17712             as_FloatRegister($tmp$$reg));
17713   %}
17714   ins_pipe(vshift128);
17715 %}
17716 
17717 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
17718   predicate(n->as_Vector()->length() == 2);
17719   match(Set dst (URShiftVI src shift));
17720   ins_cost(INSN_COST);
17721   effect(TEMP tmp);
17722   format %{ "negr  $tmp,$shift\t"
17723             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
17724   ins_encode %{
17725     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17726             as_FloatRegister($shift$$reg));
17727     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17728             as_FloatRegister($src$$reg),
17729             as_FloatRegister($tmp$$reg));
17730   %}
17731   ins_pipe(vshift64);
17732 %}
17733 
17734 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
17735   predicate(n->as_Vector()->length() == 4);
17736   match(Set dst (URShiftVI src shift));
17737   ins_cost(INSN_COST);
17738   effect(TEMP tmp);
17739   format %{ "negr  $tmp,$shift\t"
17740             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
17741   ins_encode %{
17742     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17743             as_FloatRegister($shift$$reg));
17744     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17745             as_FloatRegister($src$$reg),
17746             as_FloatRegister($tmp$$reg));
17747   %}
17748   ins_pipe(vshift128);
17749 %}
17750 
17751 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17752   predicate(n->as_Vector()->length() == 2);
17753   match(Set dst (LShiftVI src (LShiftCntV shift)));
17754   ins_cost(INSN_COST);
17755   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17756   ins_encode %{
17757     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17758            as_FloatRegister($src$$reg),
17759            (int)$shift$$constant);
17760   %}
17761   ins_pipe(vshift64_imm);
17762 %}
17763 
17764 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17765   predicate(n->as_Vector()->length() == 4);
17766   match(Set dst (LShiftVI src (LShiftCntV shift)));
17767   ins_cost(INSN_COST);
17768   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17769   ins_encode %{
17770     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17771            as_FloatRegister($src$$reg),
17772            (int)$shift$$constant);
17773   %}
17774   ins_pipe(vshift128_imm);
17775 %}
17776 
17777 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17778   predicate(n->as_Vector()->length() == 2);
17779   match(Set dst (RShiftVI src (RShiftCntV shift)));
17780   ins_cost(INSN_COST);
17781   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17782   ins_encode %{
17783     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17784             as_FloatRegister($src$$reg),
17785             (int)$shift$$constant);
17786   %}
17787   ins_pipe(vshift64_imm);
17788 %}
17789 
17790 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17791   predicate(n->as_Vector()->length() == 4);
17792   match(Set dst (RShiftVI src (RShiftCntV shift)));
17793   ins_cost(INSN_COST);
17794   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17795   ins_encode %{
17796     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17797             as_FloatRegister($src$$reg),
17798             (int)$shift$$constant);
17799   %}
17800   ins_pipe(vshift128_imm);
17801 %}
17802 
17803 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17804   predicate(n->as_Vector()->length() == 2);
17805   match(Set dst (URShiftVI src (RShiftCntV shift)));
17806   ins_cost(INSN_COST);
17807   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17808   ins_encode %{
17809     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17810             as_FloatRegister($src$$reg),
17811             (int)$shift$$constant);
17812   %}
17813   ins_pipe(vshift64_imm);
17814 %}
17815 
17816 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17817   predicate(n->as_Vector()->length() == 4);
17818   match(Set dst (URShiftVI src (RShiftCntV shift)));
17819   ins_cost(INSN_COST);
17820   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17821   ins_encode %{
17822     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17823             as_FloatRegister($src$$reg),
17824             (int)$shift$$constant);
17825   %}
17826   ins_pipe(vshift128_imm);
17827 %}
17828 
17829 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17830   predicate(n->as_Vector()->length() == 2);
17831   match(Set dst (LShiftVL src shift));
17832   ins_cost(INSN_COST);
17833   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17834   ins_encode %{
17835     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17836             as_FloatRegister($src$$reg),
17837             as_FloatRegister($shift$$reg));
17838   %}
17839   ins_pipe(vshift128);
17840 %}
17841 
17842 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17843   predicate(n->as_Vector()->length() == 2);
17844   match(Set dst (RShiftVL src shift));
17845   ins_cost(INSN_COST);
17846   effect(TEMP tmp);
17847   format %{ "negr  $tmp,$shift\t"
17848             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
17849   ins_encode %{
17850     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17851             as_FloatRegister($shift$$reg));
17852     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17853             as_FloatRegister($src$$reg),
17854             as_FloatRegister($tmp$$reg));
17855   %}
17856   ins_pipe(vshift128);
17857 %}
17858 
17859 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17860   predicate(n->as_Vector()->length() == 2);
17861   match(Set dst (URShiftVL src shift));
17862   ins_cost(INSN_COST);
17863   effect(TEMP tmp);
17864   format %{ "negr  $tmp,$shift\t"
17865             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
17866   ins_encode %{
17867     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17868             as_FloatRegister($shift$$reg));
17869     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17870             as_FloatRegister($src$$reg),
17871             as_FloatRegister($tmp$$reg));
17872   %}
17873   ins_pipe(vshift128);
17874 %}
17875 
17876 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17877   predicate(n->as_Vector()->length() == 2);
17878   match(Set dst (LShiftVL src (LShiftCntV shift)));
17879   ins_cost(INSN_COST);
17880   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17881   ins_encode %{
17882     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17883            as_FloatRegister($src$$reg),
17884            (int)$shift$$constant);
17885   %}
17886   ins_pipe(vshift128_imm);
17887 %}
17888 
17889 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17890   predicate(n->as_Vector()->length() == 2);
17891   match(Set dst (RShiftVL src (RShiftCntV shift)));
17892   ins_cost(INSN_COST);
17893   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17894   ins_encode %{
17895     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17896             as_FloatRegister($src$$reg),
17897             (int)$shift$$constant);
17898   %}
17899   ins_pipe(vshift128_imm);
17900 %}
17901 
17902 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17903   predicate(n->as_Vector()->length() == 2);
17904   match(Set dst (URShiftVL src (RShiftCntV shift)));
17905   ins_cost(INSN_COST);
17906   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17907   ins_encode %{
17908     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17909             as_FloatRegister($src$$reg),
17910             (int)$shift$$constant);
17911   %}
17912   ins_pipe(vshift128_imm);
17913 %}
17914 
17915 instruct vmax2F(vecD dst, vecD src1, vecD src2)
17916 %{
17917   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
17918   match(Set dst (MaxV src1 src2));
17919   ins_cost(INSN_COST);
17920   format %{ "fmax  $dst,$src1,$src2\t# vector (2F)" %}
17921   ins_encode %{
17922     __ fmax(as_FloatRegister($dst$$reg), __ T2S,
17923             as_FloatRegister($src1$$reg),
17924             as_FloatRegister($src2$$reg));
17925   %}
17926   ins_pipe(vdop_fp64);
17927 %}
17928 
17929 instruct vmax4F(vecX dst, vecX src1, vecX src2)
17930 %{
17931   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
17932   match(Set dst (MaxV src1 src2));
17933   ins_cost(INSN_COST);
17934   format %{ "fmax  $dst,$src1,$src2\t# vector (4S)" %}
17935   ins_encode %{
17936     __ fmax(as_FloatRegister($dst$$reg), __ T4S,
17937             as_FloatRegister($src1$$reg),
17938             as_FloatRegister($src2$$reg));
17939   %}
17940   ins_pipe(vdop_fp128);
17941 %}
17942 
17943 instruct vmax2D(vecX dst, vecX src1, vecX src2)
17944 %{
17945   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
17946   match(Set dst (MaxV src1 src2));
17947   ins_cost(INSN_COST);
17948   format %{ "fmax  $dst,$src1,$src2\t# vector (2D)" %}
17949   ins_encode %{
17950     __ fmax(as_FloatRegister($dst$$reg), __ T2D,
17951             as_FloatRegister($src1$$reg),
17952             as_FloatRegister($src2$$reg));
17953   %}
17954   ins_pipe(vdop_fp128);
17955 %}
17956 
17957 instruct vmin2F(vecD dst, vecD src1, vecD src2)
17958 %{
17959   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
17960   match(Set dst (MinV src1 src2));
17961   ins_cost(INSN_COST);
17962   format %{ "fmin  $dst,$src1,$src2\t# vector (2F)" %}
17963   ins_encode %{
17964     __ fmin(as_FloatRegister($dst$$reg), __ T2S,
17965             as_FloatRegister($src1$$reg),
17966             as_FloatRegister($src2$$reg));
17967   %}
17968   ins_pipe(vdop_fp64);
17969 %}
17970 
17971 instruct vmin4F(vecX dst, vecX src1, vecX src2)
17972 %{
17973   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
17974   match(Set dst (MinV src1 src2));
17975   ins_cost(INSN_COST);
17976   format %{ "fmin  $dst,$src1,$src2\t# vector (4S)" %}
17977   ins_encode %{
17978     __ fmin(as_FloatRegister($dst$$reg), __ T4S,
17979             as_FloatRegister($src1$$reg),
17980             as_FloatRegister($src2$$reg));
17981   %}
17982   ins_pipe(vdop_fp128);
17983 %}
17984 
17985 instruct vmin2D(vecX dst, vecX src1, vecX src2)
17986 %{
17987   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
17988   match(Set dst (MinV src1 src2));
17989   ins_cost(INSN_COST);
17990   format %{ "fmin  $dst,$src1,$src2\t# vector (2D)" %}
17991   ins_encode %{
17992     __ fmin(as_FloatRegister($dst$$reg), __ T2D,
17993             as_FloatRegister($src1$$reg),
17994             as_FloatRegister($src2$$reg));
17995   %}
17996   ins_pipe(vdop_fp128);
17997 %}
17998 
17999 instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
18000   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
18001   match(Set dst (RoundDoubleModeV src rmode));
18002   format %{ "frint  $dst, $src, $rmode" %}
18003   ins_encode %{
18004     switch ($rmode$$constant) {
18005       case RoundDoubleModeNode::rmode_rint:
18006         __ frintn(as_FloatRegister($dst$$reg), __ T2D,
18007                   as_FloatRegister($src$$reg));
18008         break;
18009       case RoundDoubleModeNode::rmode_floor:
18010         __ frintm(as_FloatRegister($dst$$reg), __ T2D,
18011                   as_FloatRegister($src$$reg));
18012         break;
18013       case RoundDoubleModeNode::rmode_ceil:
18014         __ frintp(as_FloatRegister($dst$$reg), __ T2D,
18015                   as_FloatRegister($src$$reg));
18016         break;
18017     }
18018   %}
18019   ins_pipe(vdop_fp128);
18020 %}
18021 
18022 instruct vpopcount4I(vecX dst, vecX src) %{
18023   predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
18024   match(Set dst (PopCountVI src));
18025   format %{
18026     "cnt     $dst, $src\t# vector (16B)\n\t"
18027     "uaddlp  $dst, $dst\t# vector (16B)\n\t"
18028     "uaddlp  $dst, $dst\t# vector (8H)"
18029   %}
18030   ins_encode %{
18031      __ cnt(as_FloatRegister($dst$$reg), __ T16B,
18032             as_FloatRegister($src$$reg));
18033      __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
18034                as_FloatRegister($dst$$reg));
18035      __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
18036                as_FloatRegister($dst$$reg));
18037   %}
18038   ins_pipe(pipe_class_default);
18039 %}
18040 
18041 instruct vpopcount2I(vecD dst, vecD src) %{
18042   predicate(UsePopCountInstruction && n->as_Vector()->length() == 2);
18043   match(Set dst (PopCountVI src));
18044   format %{
18045     "cnt     $dst, $src\t# vector (8B)\n\t"
18046     "uaddlp  $dst, $dst\t# vector (8B)\n\t"
18047     "uaddlp  $dst, $dst\t# vector (4H)"
18048   %}
18049   ins_encode %{
18050      __ cnt(as_FloatRegister($dst$$reg), __ T8B,
18051             as_FloatRegister($src$$reg));
18052      __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
18053                as_FloatRegister($dst$$reg));
18054      __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
18055                as_FloatRegister($dst$$reg));
18056   %}
18057   ins_pipe(pipe_class_default);
18058 %}
18059 
18060 //----------PEEPHOLE RULES-----------------------------------------------------
18061 // These must follow all instruction definitions as they use the names
18062 // defined in the instructions definitions.
18063 //
18064 // peepmatch ( root_instr_name [preceding_instruction]* );
18065 //
18066 // peepconstraint %{
18067 // (instruction_number.operand_name relational_op instruction_number.operand_name
18068 //  [, ...] );
18069 // // instruction numbers are zero-based using left to right order in peepmatch
18070 //
18071 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18072 // // provide an instruction_number.operand_name for each operand that appears
18073 // // in the replacement instruction's match rule
18074 //
18075 // ---------VM FLAGS---------------------------------------------------------
18076 //
18077 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18078 //
18079 // Each peephole rule is given an identifying number starting with zero and
18080 // increasing by one in the order seen by the parser.  An individual peephole
18081 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18082 // on the command-line.
18083 //
18084 // ---------CURRENT LIMITATIONS----------------------------------------------
18085 //
18086 // Only match adjacent instructions in same basic block
18087 // Only equality constraints
18088 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18089 // Only one replacement instruction
18090 //
18091 // ---------EXAMPLE----------------------------------------------------------
18092 //
18093 // // pertinent parts of existing instructions in architecture description
18094 // instruct movI(iRegINoSp dst, iRegI src)
18095 // %{
18096 //   match(Set dst (CopyI src));
18097 // %}
18098 //
18099 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18100 // %{
18101 //   match(Set dst (AddI dst src));
18102 //   effect(KILL cr);
18103 // %}
18104 //
18105 // // Change (inc mov) to lea
18106 // peephole %{
18107 //   // increment preceeded by register-register move
18108 //   peepmatch ( incI_iReg movI );
18109 //   // require that the destination register of the increment
18110 //   // match the destination register of the move
18111 //   peepconstraint ( 0.dst == 1.dst );
18112 //   // construct a replacement instruction that sets
18113 //   // the destination to ( move's source register + one )
18114 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18115 // %}
18116 //
18117 
18118 // Implementation no longer uses movX instructions since
18119 // machine-independent system no longer uses CopyX nodes.
18120 //
18121 // peephole
18122 // %{
18123 //   peepmatch (incI_iReg movI);
18124 //   peepconstraint (0.dst == 1.dst);
18125 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18126 // %}
18127 
18128 // peephole
18129 // %{
18130 //   peepmatch (decI_iReg movI);
18131 //   peepconstraint (0.dst == 1.dst);
18132 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18133 // %}
18134 
18135 // peephole
18136 // %{
18137 //   peepmatch (addI_iReg_imm movI);
18138 //   peepconstraint (0.dst == 1.dst);
18139 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18140 // %}
18141 
18142 // peephole
18143 // %{
18144 //   peepmatch (incL_iReg movL);
18145 //   peepconstraint (0.dst == 1.dst);
18146 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18147 // %}
18148 
18149 // peephole
18150 // %{
18151 //   peepmatch (decL_iReg movL);
18152 //   peepconstraint (0.dst == 1.dst);
18153 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18154 // %}
18155 
18156 // peephole
18157 // %{
18158 //   peepmatch (addL_iReg_imm movL);
18159 //   peepconstraint (0.dst == 1.dst);
18160 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18161 // %}
18162 
18163 // peephole
18164 // %{
18165 //   peepmatch (addP_iReg_imm movP);
18166 //   peepconstraint (0.dst == 1.dst);
18167 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18168 // %}
18169 
18170 // // Change load of spilled value to only a spill
18171 // instruct storeI(memory mem, iRegI src)
18172 // %{
18173 //   match(Set mem (StoreI mem src));
18174 // %}
18175 //
18176 // instruct loadI(iRegINoSp dst, memory mem)
18177 // %{
18178 //   match(Set dst (LoadI mem));
18179 // %}
18180 //
18181 
18182 //----------SMARTSPILL RULES---------------------------------------------------
18183 // These must follow all instruction definitions as they use the names
18184 // defined in the instructions definitions.
18185 
18186 // Local Variables:
18187 // mode: c++
18188 // End: