1 //
   2 // Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "opto/addnode.hpp"
1000 
1001 class CallStubImpl {
1002 
1003   //--------------------------------------------------------------
1004   //---<  Used for optimization in Compile::shorten_branches  >---
1005   //--------------------------------------------------------------
1006 
1007  public:
1008   // Size of call trampoline stub.
1009   static uint size_call_trampoline() {
1010     return 0; // no call trampolines on this platform
1011   }
1012 
1013   // number of relocations needed by a call trampoline stub
1014   static uint reloc_call_trampoline() {
1015     return 0; // no call trampolines on this platform
1016   }
1017 };
1018 
1019 class HandlerImpl {
1020 
1021  public:
1022 
1023   static int emit_exception_handler(CodeBuffer &cbuf);
1024   static int emit_deopt_handler(CodeBuffer& cbuf);
1025 
1026   static uint size_exception_handler() {
1027     return MacroAssembler::far_branch_size();
1028   }
1029 
1030   static uint size_deopt_handler() {
1031     // count one adr and one far branch instruction
1032     return 4 * NativeInstruction::instruction_size;
1033   }
1034 };
1035 
1036   // graph traversal helpers
1037 
1038   MemBarNode *parent_membar(const Node *n);
1039   MemBarNode *child_membar(const MemBarNode *n);
1040   bool leading_membar(const MemBarNode *barrier);
1041 
1042   bool is_card_mark_membar(const MemBarNode *barrier);
1043   bool is_CAS(int opcode);
1044 
1045   MemBarNode *leading_to_normal(MemBarNode *leading);
1046   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1047   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1048   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1049   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1050 
1051   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1052 
1053   bool unnecessary_acquire(const Node *barrier);
1054   bool needs_acquiring_load(const Node *load);
1055 
1056   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1057 
1058   bool unnecessary_release(const Node *barrier);
1059   bool unnecessary_volatile(const Node *barrier);
1060   bool needs_releasing_store(const Node *store);
1061 
1062   // predicate controlling translation of CompareAndSwapX
1063   bool needs_acquiring_load_exclusive(const Node *load);
1064 
1065   // predicate controlling translation of StoreCM
1066   bool unnecessary_storestore(const Node *storecm);
1067 
1068   // predicate controlling addressing modes
1069   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1070 %}
1071 
1072 source %{
1073 
1074   // Optimizaton of volatile gets and puts
1075   // -------------------------------------
1076   //
1077   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1078   // use to implement volatile reads and writes. For a volatile read
1079   // we simply need
1080   //
1081   //   ldar<x>
1082   //
1083   // and for a volatile write we need
1084   //
1085   //   stlr<x>
1086   //
1087   // Alternatively, we can implement them by pairing a normal
1088   // load/store with a memory barrier. For a volatile read we need
1089   //
1090   //   ldr<x>
1091   //   dmb ishld
1092   //
1093   // for a volatile write
1094   //
1095   //   dmb ish
1096   //   str<x>
1097   //   dmb ish
1098   //
1099   // We can also use ldaxr and stlxr to implement compare and swap CAS
1100   // sequences. These are normally translated to an instruction
1101   // sequence like the following
1102   //
1103   //   dmb      ish
1104   // retry:
1105   //   ldxr<x>   rval raddr
1106   //   cmp       rval rold
1107   //   b.ne done
1108   //   stlxr<x>  rval, rnew, rold
1109   //   cbnz      rval retry
1110   // done:
1111   //   cset      r0, eq
1112   //   dmb ishld
1113   //
1114   // Note that the exclusive store is already using an stlxr
1115   // instruction. That is required to ensure visibility to other
1116   // threads of the exclusive write (assuming it succeeds) before that
1117   // of any subsequent writes.
1118   //
1119   // The following instruction sequence is an improvement on the above
1120   //
1121   // retry:
1122   //   ldaxr<x>  rval raddr
1123   //   cmp       rval rold
1124   //   b.ne done
1125   //   stlxr<x>  rval, rnew, rold
1126   //   cbnz      rval retry
1127   // done:
1128   //   cset      r0, eq
1129   //
1130   // We don't need the leading dmb ish since the stlxr guarantees
1131   // visibility of prior writes in the case that the swap is
1132   // successful. Crucially we don't have to worry about the case where
1133   // the swap is not successful since no valid program should be
1134   // relying on visibility of prior changes by the attempting thread
1135   // in the case where the CAS fails.
1136   //
1137   // Similarly, we don't need the trailing dmb ishld if we substitute
1138   // an ldaxr instruction since that will provide all the guarantees we
1139   // require regarding observation of changes made by other threads
1140   // before any change to the CAS address observed by the load.
1141   //
1142   // In order to generate the desired instruction sequence we need to
1143   // be able to identify specific 'signature' ideal graph node
1144   // sequences which i) occur as a translation of a volatile reads or
1145   // writes or CAS operations and ii) do not occur through any other
1146   // translation or graph transformation. We can then provide
1147   // alternative aldc matching rules which translate these node
1148   // sequences to the desired machine code sequences. Selection of the
1149   // alternative rules can be implemented by predicates which identify
1150   // the relevant node sequences.
1151   //
1152   // The ideal graph generator translates a volatile read to the node
1153   // sequence
1154   //
1155   //   LoadX[mo_acquire]
1156   //   MemBarAcquire
1157   //
1158   // As a special case when using the compressed oops optimization we
1159   // may also see this variant
1160   //
1161   //   LoadN[mo_acquire]
1162   //   DecodeN
1163   //   MemBarAcquire
1164   //
1165   // A volatile write is translated to the node sequence
1166   //
1167   //   MemBarRelease
1168   //   StoreX[mo_release] {CardMark}-optional
1169   //   MemBarVolatile
1170   //
1171   // n.b. the above node patterns are generated with a strict
1172   // 'signature' configuration of input and output dependencies (see
1173   // the predicates below for exact details). The card mark may be as
1174   // simple as a few extra nodes or, in a few GC configurations, may
1175   // include more complex control flow between the leading and
1176   // trailing memory barriers. However, whatever the card mark
1177   // configuration these signatures are unique to translated volatile
1178   // reads/stores -- they will not appear as a result of any other
1179   // bytecode translation or inlining nor as a consequence of
1180   // optimizing transforms.
1181   //
1182   // We also want to catch inlined unsafe volatile gets and puts and
1183   // be able to implement them using either ldar<x>/stlr<x> or some
1184   // combination of ldr<x>/stlr<x> and dmb instructions.
1185   //
1186   // Inlined unsafe volatiles puts manifest as a minor variant of the
1187   // normal volatile put node sequence containing an extra cpuorder
1188   // membar
1189   //
1190   //   MemBarRelease
1191   //   MemBarCPUOrder
1192   //   StoreX[mo_release] {CardMark}-optional
1193   //   MemBarVolatile
1194   //
1195   // n.b. as an aside, the cpuorder membar is not itself subject to
1196   // matching and translation by adlc rules.  However, the rule
1197   // predicates need to detect its presence in order to correctly
1198   // select the desired adlc rules.
1199   //
1200   // Inlined unsafe volatile gets manifest as a somewhat different
1201   // node sequence to a normal volatile get
1202   //
1203   //   MemBarCPUOrder
1204   //        ||       \\
1205   //   MemBarAcquire LoadX[mo_acquire]
1206   //        ||
1207   //   MemBarCPUOrder
1208   //
1209   // In this case the acquire membar does not directly depend on the
1210   // load. However, we can be sure that the load is generated from an
1211   // inlined unsafe volatile get if we see it dependent on this unique
1212   // sequence of membar nodes. Similarly, given an acquire membar we
1213   // can know that it was added because of an inlined unsafe volatile
1214   // get if it is fed and feeds a cpuorder membar and if its feed
1215   // membar also feeds an acquiring load.
1216   //
1217   // Finally an inlined (Unsafe) CAS operation is translated to the
1218   // following ideal graph
1219   //
1220   //   MemBarRelease
1221   //   MemBarCPUOrder
1222   //   CompareAndSwapX {CardMark}-optional
1223   //   MemBarCPUOrder
1224   //   MemBarAcquire
1225   //
1226   // So, where we can identify these volatile read and write
1227   // signatures we can choose to plant either of the above two code
1228   // sequences. For a volatile read we can simply plant a normal
1229   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1230   // also choose to inhibit translation of the MemBarAcquire and
1231   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1232   //
1233   // When we recognise a volatile store signature we can choose to
1234   // plant at a dmb ish as a translation for the MemBarRelease, a
1235   // normal str<x> and then a dmb ish for the MemBarVolatile.
1236   // Alternatively, we can inhibit translation of the MemBarRelease
1237   // and MemBarVolatile and instead plant a simple stlr<x>
1238   // instruction.
1239   //
1240   // when we recognise a CAS signature we can choose to plant a dmb
1241   // ish as a translation for the MemBarRelease, the conventional
1242   // macro-instruction sequence for the CompareAndSwap node (which
1243   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1244   // Alternatively, we can elide generation of the dmb instructions
1245   // and plant the alternative CompareAndSwap macro-instruction
1246   // sequence (which uses ldaxr<x>).
1247   //
1248   // Of course, the above only applies when we see these signature
1249   // configurations. We still want to plant dmb instructions in any
1250   // other cases where we may see a MemBarAcquire, MemBarRelease or
1251   // MemBarVolatile. For example, at the end of a constructor which
1252   // writes final/volatile fields we will see a MemBarRelease
1253   // instruction and this needs a 'dmb ish' lest we risk the
1254   // constructed object being visible without making the
1255   // final/volatile field writes visible.
1256   //
1257   // n.b. the translation rules below which rely on detection of the
1258   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1259   // If we see anything other than the signature configurations we
1260   // always just translate the loads and stores to ldr<x> and str<x>
1261   // and translate acquire, release and volatile membars to the
1262   // relevant dmb instructions.
1263   //
1264 
1265   // graph traversal helpers used for volatile put/get and CAS
1266   // optimization
1267 
1268   // 1) general purpose helpers
1269 
1270   // if node n is linked to a parent MemBarNode by an intervening
1271   // Control and Memory ProjNode return the MemBarNode otherwise return
1272   // NULL.
1273   //
1274   // n may only be a Load or a MemBar.
1275 
1276   MemBarNode *parent_membar(const Node *n)
1277   {
1278     Node *ctl = NULL;
1279     Node *mem = NULL;
1280     Node *membar = NULL;
1281 
1282     if (n->is_Load()) {
1283       ctl = n->lookup(LoadNode::Control);
1284       mem = n->lookup(LoadNode::Memory);
1285     } else if (n->is_MemBar()) {
1286       ctl = n->lookup(TypeFunc::Control);
1287       mem = n->lookup(TypeFunc::Memory);
1288     } else {
1289         return NULL;
1290     }
1291 
1292     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1293       return NULL;
1294     }
1295 
1296     membar = ctl->lookup(0);
1297 
1298     if (!membar || !membar->is_MemBar()) {
1299       return NULL;
1300     }
1301 
1302     if (mem->lookup(0) != membar) {
1303       return NULL;
1304     }
1305 
1306     return membar->as_MemBar();
1307   }
1308 
1309   // if n is linked to a child MemBarNode by intervening Control and
1310   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1311 
1312   MemBarNode *child_membar(const MemBarNode *n)
1313   {
1314     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1315     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1316 
1317     // MemBar needs to have both a Ctl and Mem projection
1318     if (! ctl || ! mem)
1319       return NULL;
1320 
1321     MemBarNode *child = NULL;
1322     Node *x;
1323 
1324     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1325       x = ctl->fast_out(i);
1326       // if we see a membar we keep hold of it. we may also see a new
1327       // arena copy of the original but it will appear later
1328       if (x->is_MemBar()) {
1329           child = x->as_MemBar();
1330           break;
1331       }
1332     }
1333 
1334     if (child == NULL) {
1335       return NULL;
1336     }
1337 
1338     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1339       x = mem->fast_out(i);
1340       // if we see a membar we keep hold of it. we may also see a new
1341       // arena copy of the original but it will appear later
1342       if (x == child) {
1343         return child;
1344       }
1345     }
1346     return NULL;
1347   }
1348 
1349   // helper predicate use to filter candidates for a leading memory
1350   // barrier
1351   //
1352   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1353   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1354 
1355   bool leading_membar(const MemBarNode *barrier)
1356   {
1357     int opcode = barrier->Opcode();
1358     // if this is a release membar we are ok
1359     if (opcode == Op_MemBarRelease) {
1360       return true;
1361     }
1362     // if its a cpuorder membar . . .
1363     if (opcode != Op_MemBarCPUOrder) {
1364       return false;
1365     }
1366     // then the parent has to be a release membar
1367     MemBarNode *parent = parent_membar(barrier);
1368     if (!parent) {
1369       return false;
1370     }
1371     opcode = parent->Opcode();
1372     return opcode == Op_MemBarRelease;
1373   }
1374 
1375   // 2) card mark detection helper
1376 
1377   // helper predicate which can be used to detect a volatile membar
1378   // introduced as part of a conditional card mark sequence either by
1379   // G1 or by CMS when UseCondCardMark is true.
1380   //
1381   // membar can be definitively determined to be part of a card mark
1382   // sequence if and only if all the following hold
1383   //
1384   // i) it is a MemBarVolatile
1385   //
1386   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1387   // true
1388   //
1389   // iii) the node's Mem projection feeds a StoreCM node.
1390 
1391   bool is_card_mark_membar(const MemBarNode *barrier)
1392   {
1393     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1394       return false;
1395     }
1396 
1397     if (barrier->Opcode() != Op_MemBarVolatile) {
1398       return false;
1399     }
1400 
1401     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1402 
1403     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1404       Node *y = mem->fast_out(i);
1405       if (y->Opcode() == Op_StoreCM) {
1406         return true;
1407       }
1408     }
1409 
1410     return false;
1411   }
1412 
1413 
1414   // 3) helper predicates to traverse volatile put or CAS graphs which
1415   // may contain GC barrier subgraphs
1416 
1417   // Preamble
1418   // --------
1419   //
1420   // for volatile writes we can omit generating barriers and employ a
1421   // releasing store when we see a node sequence sequence with a
1422   // leading MemBarRelease and a trailing MemBarVolatile as follows
1423   //
1424   //   MemBarRelease
1425   //  {      ||      } -- optional
1426   //  {MemBarCPUOrder}
1427   //         ||     \\
1428   //         ||     StoreX[mo_release]
1429   //         | \     /
1430   //         | MergeMem
1431   //         | /
1432   //   MemBarVolatile
1433   //
1434   // where
1435   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1436   //  | \ and / indicate further routing of the Ctl and Mem feeds
1437   //
1438   // this is the graph we see for non-object stores. however, for a
1439   // volatile Object store (StoreN/P) we may see other nodes below the
1440   // leading membar because of the need for a GC pre- or post-write
1441   // barrier.
1442   //
1443   // with most GC configurations we with see this simple variant which
1444   // includes a post-write barrier card mark.
1445   //
1446   //   MemBarRelease______________________________
1447   //         ||    \\               Ctl \        \\
1448   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1449   //         | \     /                       . . .  /
1450   //         | MergeMem
1451   //         | /
1452   //         ||      /
1453   //   MemBarVolatile
1454   //
1455   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1456   // the object address to an int used to compute the card offset) and
1457   // Ctl+Mem to a StoreB node (which does the actual card mark).
1458   //
1459   // n.b. a StoreCM node will only appear in this configuration when
1460   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1461   // because it implies a requirement to order visibility of the card
1462   // mark (StoreCM) relative to the object put (StoreP/N) using a
1463   // StoreStore memory barrier (arguably this ought to be represented
1464   // explicitly in the ideal graph but that is not how it works). This
1465   // ordering is required for both non-volatile and volatile
1466   // puts. Normally that means we need to translate a StoreCM using
1467   // the sequence
1468   //
1469   //   dmb ishst
1470   //   stlrb
1471   //
1472   // However, in the case of a volatile put if we can recognise this
1473   // configuration and plant an stlr for the object write then we can
1474   // omit the dmb and just plant an strb since visibility of the stlr
1475   // is ordered before visibility of subsequent stores. StoreCM nodes
1476   // also arise when using G1 or using CMS with conditional card
1477   // marking. In these cases (as we shall see) we don't need to insert
1478   // the dmb when translating StoreCM because there is already an
1479   // intervening StoreLoad barrier between it and the StoreP/N.
1480   //
1481   // It is also possible to perform the card mark conditionally on it
1482   // currently being unmarked in which case the volatile put graph
1483   // will look slightly different
1484   //
1485   //   MemBarRelease____________________________________________
1486   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1487   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1488   //         | \     /                              \            |
1489   //         | MergeMem                            . . .      StoreB
1490   //         | /                                                /
1491   //         ||     /
1492   //   MemBarVolatile
1493   //
1494   // It is worth noting at this stage that both the above
1495   // configurations can be uniquely identified by checking that the
1496   // memory flow includes the following subgraph:
1497   //
1498   //   MemBarRelease
1499   //  {MemBarCPUOrder}
1500   //          |  \      . . .
1501   //          |  StoreX[mo_release]  . . .
1502   //          |   /
1503   //         MergeMem
1504   //          |
1505   //   MemBarVolatile
1506   //
1507   // This is referred to as a *normal* subgraph. It can easily be
1508   // detected starting from any candidate MemBarRelease,
1509   // StoreX[mo_release] or MemBarVolatile.
1510   //
1511   // A simple variation on this normal case occurs for an unsafe CAS
1512   // operation. The basic graph for a non-object CAS is
1513   //
1514   //   MemBarRelease
1515   //         ||
1516   //   MemBarCPUOrder
1517   //         ||     \\   . . .
1518   //         ||     CompareAndSwapX
1519   //         ||       |
1520   //         ||     SCMemProj
1521   //         | \     /
1522   //         | MergeMem
1523   //         | /
1524   //   MemBarCPUOrder
1525   //         ||
1526   //   MemBarAcquire
1527   //
1528   // The same basic variations on this arrangement (mutatis mutandis)
1529   // occur when a card mark is introduced. i.e. we se the same basic
1530   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1531   // tail of the graph is a pair comprising a MemBarCPUOrder +
1532   // MemBarAcquire.
1533   //
1534   // So, in the case of a CAS the normal graph has the variant form
1535   //
1536   //   MemBarRelease
1537   //   MemBarCPUOrder
1538   //          |   \      . . .
1539   //          |  CompareAndSwapX  . . .
1540   //          |    |
1541   //          |   SCMemProj
1542   //          |   /  . . .
1543   //         MergeMem
1544   //          |
1545   //   MemBarCPUOrder
1546   //   MemBarAcquire
1547   //
1548   // This graph can also easily be detected starting from any
1549   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1550   //
1551   // the code below uses two helper predicates, leading_to_normal and
1552   // normal_to_leading to identify these normal graphs, one validating
1553   // the layout starting from the top membar and searching down and
1554   // the other validating the layout starting from the lower membar
1555   // and searching up.
1556   //
1557   // There are two special case GC configurations when a normal graph
1558   // may not be generated: when using G1 (which always employs a
1559   // conditional card mark); and when using CMS with conditional card
1560   // marking configured. These GCs are both concurrent rather than
1561   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1562   // graph between the leading and trailing membar nodes, in
1563   // particular enforcing stronger memory serialisation beween the
1564   // object put and the corresponding conditional card mark. CMS
1565   // employs a post-write GC barrier while G1 employs both a pre- and
1566   // post-write GC barrier. Of course the extra nodes may be absent --
1567   // they are only inserted for object puts. This significantly
1568   // complicates the task of identifying whether a MemBarRelease,
1569   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1570   // when using these GC configurations (see below). It adds similar
1571   // complexity to the task of identifying whether a MemBarRelease,
1572   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1573   //
1574   // In both cases the post-write subtree includes an auxiliary
1575   // MemBarVolatile (StoreLoad barrier) separating the object put and
1576   // the read of the corresponding card. This poses two additional
1577   // problems.
1578   //
1579   // Firstly, a card mark MemBarVolatile needs to be distinguished
1580   // from a normal trailing MemBarVolatile. Resolving this first
1581   // problem is straightforward: a card mark MemBarVolatile always
1582   // projects a Mem feed to a StoreCM node and that is a unique marker
1583   //
1584   //      MemBarVolatile (card mark)
1585   //       C |    \     . . .
1586   //         |   StoreCM   . . .
1587   //       . . .
1588   //
1589   // The second problem is how the code generator is to translate the
1590   // card mark barrier? It always needs to be translated to a "dmb
1591   // ish" instruction whether or not it occurs as part of a volatile
1592   // put. A StoreLoad barrier is needed after the object put to ensure
1593   // i) visibility to GC threads of the object put and ii) visibility
1594   // to the mutator thread of any card clearing write by a GC
1595   // thread. Clearly a normal store (str) will not guarantee this
1596   // ordering but neither will a releasing store (stlr). The latter
1597   // guarantees that the object put is visible but does not guarantee
1598   // that writes by other threads have also been observed.
1599   //
1600   // So, returning to the task of translating the object put and the
1601   // leading/trailing membar nodes: what do the non-normal node graph
1602   // look like for these 2 special cases? and how can we determine the
1603   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1604   // in both normal and non-normal cases?
1605   //
1606   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1607   // which selects conditonal execution based on the value loaded
1608   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1609   // intervening StoreLoad barrier (MemBarVolatile).
1610   //
1611   // So, with CMS we may see a node graph for a volatile object store
1612   // which looks like this
1613   //
1614   //   MemBarRelease
1615   //   MemBarCPUOrder_(leading)__________________
1616   //     C |    M \       \\                   C \
1617   //       |       \    StoreN/P[mo_release]  CastP2X
1618   //       |    Bot \    /
1619   //       |       MergeMem
1620   //       |         /
1621   //      MemBarVolatile (card mark)
1622   //     C |  ||    M |
1623   //       | LoadB    |
1624   //       |   |      |
1625   //       | Cmp      |\
1626   //       | /        | \
1627   //       If         |  \
1628   //       | \        |   \
1629   // IfFalse  IfTrue  |    \
1630   //       \     / \  |     \
1631   //        \   / StoreCM    |
1632   //         \ /      |      |
1633   //        Region   . . .   |
1634   //          | \           /
1635   //          |  . . .  \  / Bot
1636   //          |       MergeMem
1637   //          |          |
1638   //        MemBarVolatile (trailing)
1639   //
1640   // The first MergeMem merges the AliasIdxBot Mem slice from the
1641   // leading membar and the oopptr Mem slice from the Store into the
1642   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1643   // Mem slice from the card mark membar and the AliasIdxRaw slice
1644   // from the StoreCM into the trailing membar (n.b. the latter
1645   // proceeds via a Phi associated with the If region).
1646   //
1647   // The graph for a CAS varies slightly, the obvious difference being
1648   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1649   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1650   // MemBarAcquire pair. The other important difference is that the
1651   // CompareAndSwap node's SCMemProj is not merged into the card mark
1652   // membar - it still feeds the trailing MergeMem. This also means
1653   // that the card mark membar receives its Mem feed directly from the
1654   // leading membar rather than via a MergeMem.
1655   //
1656   //   MemBarRelease
1657   //   MemBarCPUOrder__(leading)_________________________
1658   //       ||                       \\                 C \
1659   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1660   //     C |  ||    M |              |
1661   //       | LoadB    |       ______/|
1662   //       |   |      |      /       |
1663   //       | Cmp      |     /      SCMemProj
1664   //       | /        |    /         |
1665   //       If         |   /         /
1666   //       | \        |  /         /
1667   // IfFalse  IfTrue  | /         /
1668   //       \     / \  |/ prec    /
1669   //        \   / StoreCM       /
1670   //         \ /      |        /
1671   //        Region   . . .    /
1672   //          | \            /
1673   //          |  . . .  \   / Bot
1674   //          |       MergeMem
1675   //          |          |
1676   //        MemBarCPUOrder
1677   //        MemBarAcquire (trailing)
1678   //
1679   // This has a slightly different memory subgraph to the one seen
1680   // previously but the core of it is the same as for the CAS normal
1681   // sungraph
1682   //
1683   //   MemBarRelease
1684   //   MemBarCPUOrder____
1685   //      ||             \      . . .
1686   //   MemBarVolatile  CompareAndSwapX  . . .
1687   //      |  \            |
1688   //        . . .   SCMemProj
1689   //          |     /  . . .
1690   //         MergeMem
1691   //          |
1692   //   MemBarCPUOrder
1693   //   MemBarAcquire
1694   //
1695   //
1696   // G1 is quite a lot more complicated. The nodes inserted on behalf
1697   // of G1 may comprise: a pre-write graph which adds the old value to
1698   // the SATB queue; the releasing store itself; and, finally, a
1699   // post-write graph which performs a card mark.
1700   //
1701   // The pre-write graph may be omitted, but only when the put is
1702   // writing to a newly allocated (young gen) object and then only if
1703   // there is a direct memory chain to the Initialize node for the
1704   // object allocation. This will not happen for a volatile put since
1705   // any memory chain passes through the leading membar.
1706   //
1707   // The pre-write graph includes a series of 3 If tests. The outermost
1708   // If tests whether SATB is enabled (no else case). The next If tests
1709   // whether the old value is non-NULL (no else case). The third tests
1710   // whether the SATB queue index is > 0, if so updating the queue. The
1711   // else case for this third If calls out to the runtime to allocate a
1712   // new queue buffer.
1713   //
1714   // So with G1 the pre-write and releasing store subgraph looks like
1715   // this (the nested Ifs are omitted).
1716   //
1717   //  MemBarRelease (leading)____________
1718   //     C |  ||  M \   M \    M \  M \ . . .
1719   //       | LoadB   \  LoadL  LoadN   \
1720   //       | /        \                 \
1721   //       If         |\                 \
1722   //       | \        | \                 \
1723   //  IfFalse  IfTrue |  \                 \
1724   //       |     |    |   \                 |
1725   //       |     If   |   /\                |
1726   //       |     |          \               |
1727   //       |                 \              |
1728   //       |    . . .         \             |
1729   //       | /       | /       |            |
1730   //      Region  Phi[M]       |            |
1731   //       | \       |         |            |
1732   //       |  \_____ | ___     |            |
1733   //     C | C \     |   C \ M |            |
1734   //       | CastP2X | StoreN/P[mo_release] |
1735   //       |         |         |            |
1736   //     C |       M |       M |          M |
1737   //        \        |         |           /
1738   //                  . . .
1739   //          (post write subtree elided)
1740   //                    . . .
1741   //             C \         M /
1742   //         MemBarVolatile (trailing)
1743   //
1744   // n.b. the LoadB in this subgraph is not the card read -- it's a
1745   // read of the SATB queue active flag.
1746   //
1747   // Once again the CAS graph is a minor variant on the above with the
1748   // expected substitutions of CompareAndSawpX for StoreN/P and
1749   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1750   //
1751   // The G1 post-write subtree is also optional, this time when the
1752   // new value being written is either null or can be identified as a
1753   // newly allocated (young gen) object with no intervening control
1754   // flow. The latter cannot happen but the former may, in which case
1755   // the card mark membar is omitted and the memory feeds form the
1756   // leading membar and the SToreN/P are merged direct into the
1757   // trailing membar as per the normal subgraph. So, the only special
1758   // case which arises is when the post-write subgraph is generated.
1759   //
1760   // The kernel of the post-write G1 subgraph is the card mark itself
1761   // which includes a card mark memory barrier (MemBarVolatile), a
1762   // card test (LoadB), and a conditional update (If feeding a
1763   // StoreCM). These nodes are surrounded by a series of nested Ifs
1764   // which try to avoid doing the card mark. The top level If skips if
1765   // the object reference does not cross regions (i.e. it tests if
1766   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1767   // need not be recorded. The next If, which skips on a NULL value,
1768   // may be absent (it is not generated if the type of value is >=
1769   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1770   // checking if card_val != young).  n.b. although this test requires
1771   // a pre-read of the card it can safely be done before the StoreLoad
1772   // barrier. However that does not bypass the need to reread the card
1773   // after the barrier.
1774   //
1775   //                (pre-write subtree elided)
1776   //        . . .                  . . .    . . .  . . .
1777   //        C |                    M |     M |    M |
1778   //       Region                  Phi[M] StoreN    |
1779   //          |                     / \      |      |
1780   //         / \_______            /   \     |      |
1781   //      C / C \      . . .            \    |      |
1782   //       If   CastP2X . . .            |   |      |
1783   //       / \                           |   |      |
1784   //      /   \                          |   |      |
1785   // IfFalse IfTrue                      |   |      |
1786   //   |       |                         |   |     /|
1787   //   |       If                        |   |    / |
1788   //   |      / \                        |   |   /  |
1789   //   |     /   \                        \  |  /   |
1790   //   | IfFalse IfTrue                   MergeMem  |
1791   //   |  . . .    / \                       /      |
1792   //   |          /   \                     /       |
1793   //   |     IfFalse IfTrue                /        |
1794   //   |      . . .    |                  /         |
1795   //   |               If                /          |
1796   //   |               / \              /           |
1797   //   |              /   \            /            |
1798   //   |         IfFalse IfTrue       /             |
1799   //   |           . . .   |         /              |
1800   //   |                    \       /               |
1801   //   |                     \     /                |
1802   //   |             MemBarVolatile__(card mark)    |
1803   //   |                ||   C |  M \  M \          |
1804   //   |               LoadB   If    |    |         |
1805   //   |                      / \    |    |         |
1806   //   |                     . . .   |    |         |
1807   //   |                          \  |    |        /
1808   //   |                        StoreCM   |       /
1809   //   |                          . . .   |      /
1810   //   |                        _________/      /
1811   //   |                       /  _____________/
1812   //   |   . . .       . . .  |  /            /
1813   //   |    |                 | /   _________/
1814   //   |    |               Phi[M] /        /
1815   //   |    |                 |   /        /
1816   //   |    |                 |  /        /
1817   //   |  Region  . . .     Phi[M]  _____/
1818   //   |    /                 |    /
1819   //   |                      |   /
1820   //   | . . .   . . .        |  /
1821   //   | /                    | /
1822   // Region           |  |  Phi[M]
1823   //   |              |  |  / Bot
1824   //    \            MergeMem
1825   //     \            /
1826   //     MemBarVolatile
1827   //
1828   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1829   // from the leading membar and the oopptr Mem slice from the Store
1830   // into the card mark membar i.e. the memory flow to the card mark
1831   // membar still looks like a normal graph.
1832   //
1833   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1834   // Mem slices (from the StoreCM and other card mark queue stores).
1835   // However in this case the AliasIdxBot Mem slice does not come
1836   // direct from the card mark membar. It is merged through a series
1837   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1838   // from the leading membar with the Mem feed from the card mark
1839   // membar. Each Phi corresponds to one of the Ifs which may skip
1840   // around the card mark membar. So when the If implementing the NULL
1841   // value check has been elided the total number of Phis is 2
1842   // otherwise it is 3.
1843   //
1844   // The CAS graph when using G1GC also includes a pre-write subgraph
1845   // and an optional post-write subgraph. Teh sam evarioations are
1846   // introduced as for CMS with conditional card marking i.e. the
1847   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1848   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1849   // Mem feed from the CompareAndSwapP/N includes a precedence
1850   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1851   // trailing membar. So, as before the configuration includes the
1852   // normal CAS graph as a subgraph of the memory flow.
1853   //
1854   // So, the upshot is that in all cases the volatile put graph will
1855   // include a *normal* memory subgraph betwen the leading membar and
1856   // its child membar, either a volatile put graph (including a
1857   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1858   // When that child is not a card mark membar then it marks the end
1859   // of the volatile put or CAS subgraph. If the child is a card mark
1860   // membar then the normal subgraph will form part of a volatile put
1861   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1862   // to a trailing barrier via a MergeMem. That feed is either direct
1863   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1864   // memory flow (for G1).
1865   //
1866   // The predicates controlling generation of instructions for store
1867   // and barrier nodes employ a few simple helper functions (described
1868   // below) which identify the presence or absence of all these
1869   // subgraph configurations and provide a means of traversing from
1870   // one node in the subgraph to another.
1871 
1872   // is_CAS(int opcode)
1873   //
1874   // return true if opcode is one of the possible CompareAndSwapX
1875   // values otherwise false.
1876 
1877   bool is_CAS(int opcode)
1878   {
1879     switch(opcode) {
1880       // We handle these
1881     case Op_CompareAndSwapI:
1882     case Op_CompareAndSwapL:
1883     case Op_CompareAndSwapP:
1884     case Op_CompareAndSwapN:
1885  // case Op_CompareAndSwapB:
1886  // case Op_CompareAndSwapS:
1887       return true;
1888       // These are TBD
1889     case Op_WeakCompareAndSwapB:
1890     case Op_WeakCompareAndSwapS:
1891     case Op_WeakCompareAndSwapI:
1892     case Op_WeakCompareAndSwapL:
1893     case Op_WeakCompareAndSwapP:
1894     case Op_WeakCompareAndSwapN:
1895     case Op_CompareAndExchangeB:
1896     case Op_CompareAndExchangeS:
1897     case Op_CompareAndExchangeI:
1898     case Op_CompareAndExchangeL:
1899     case Op_CompareAndExchangeP:
1900     case Op_CompareAndExchangeN:
1901       return false;
1902     default:
1903       return false;
1904     }
1905   }
1906 
1907 
1908   // leading_to_normal
1909   //
1910   //graph traversal helper which detects the normal case Mem feed from
1911   // a release membar (or, optionally, its cpuorder child) to a
1912   // dependent volatile membar i.e. it ensures that one or other of
1913   // the following Mem flow subgraph is present.
1914   //
1915   //   MemBarRelease
1916   //   MemBarCPUOrder {leading}
1917   //          |  \      . . .
1918   //          |  StoreN/P[mo_release]  . . .
1919   //          |   /
1920   //         MergeMem
1921   //          |
1922   //   MemBarVolatile {trailing or card mark}
1923   //
1924   //   MemBarRelease
1925   //   MemBarCPUOrder {leading}
1926   //      |       \      . . .
1927   //      |     CompareAndSwapX  . . .
1928   //               |
1929   //     . . .    SCMemProj
1930   //           \   |
1931   //      |    MergeMem
1932   //      |       /
1933   //    MemBarCPUOrder
1934   //    MemBarAcquire {trailing}
1935   //
1936   // if the correct configuration is present returns the trailing
1937   // membar otherwise NULL.
1938   //
1939   // the input membar is expected to be either a cpuorder membar or a
1940   // release membar. in the latter case it should not have a cpu membar
1941   // child.
1942   //
1943   // the returned value may be a card mark or trailing membar
1944   //
1945 
1946   MemBarNode *leading_to_normal(MemBarNode *leading)
1947   {
1948     assert((leading->Opcode() == Op_MemBarRelease ||
1949             leading->Opcode() == Op_MemBarCPUOrder),
1950            "expecting a volatile or cpuroder membar!");
1951 
1952     // check the mem flow
1953     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1954 
1955     if (!mem) {
1956       return NULL;
1957     }
1958 
1959     Node *x = NULL;
1960     StoreNode * st = NULL;
1961     LoadStoreNode *cas = NULL;
1962     MergeMemNode *mm = NULL;
1963 
1964     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1965       x = mem->fast_out(i);
1966       if (x->is_MergeMem()) {
1967         if (mm != NULL) {
1968           return NULL;
1969         }
1970         // two merge mems is one too many
1971         mm = x->as_MergeMem();
1972       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1973         // two releasing stores/CAS nodes is one too many
1974         if (st != NULL || cas != NULL) {
1975           return NULL;
1976         }
1977         st = x->as_Store();
1978       } else if (is_CAS(x->Opcode())) {
1979         if (st != NULL || cas != NULL) {
1980           return NULL;
1981         }
1982         cas = x->as_LoadStore();
1983       }
1984     }
1985 
1986     // must have a store or a cas
1987     if (!st && !cas) {
1988       return NULL;
1989     }
1990 
1991     // must have a merge if we also have st
1992     if (st && !mm) {
1993       return NULL;
1994     }
1995 
1996     Node *y = NULL;
1997     if (cas) {
1998       // look for an SCMemProj
1999       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2000         x = cas->fast_out(i);
2001         if (x->is_Proj()) {
2002           y = x;
2003           break;
2004         }
2005       }
2006       if (y == NULL) {
2007         return NULL;
2008       }
2009       // the proj must feed a MergeMem
2010       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2011         x = y->fast_out(i);
2012         if (x->is_MergeMem()) {
2013           mm = x->as_MergeMem();
2014           break;
2015         }
2016       }
2017       if (mm == NULL)
2018         return NULL;
2019     } else {
2020       // ensure the store feeds the existing mergemem;
2021       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2022         if (st->fast_out(i) == mm) {
2023           y = st;
2024           break;
2025         }
2026       }
2027       if (y == NULL) {
2028         return NULL;
2029       }
2030     }
2031 
2032     MemBarNode *mbar = NULL;
2033     // ensure the merge feeds to the expected type of membar
2034     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2035       x = mm->fast_out(i);
2036       if (x->is_MemBar()) {
2037         int opcode = x->Opcode();
2038         if (opcode == Op_MemBarVolatile && st) {
2039           mbar = x->as_MemBar();
2040         } else if (cas && opcode == Op_MemBarCPUOrder) {
2041           MemBarNode *y =  x->as_MemBar();
2042           y = child_membar(y);
2043           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2044             mbar = y;
2045           }
2046         }
2047         break;
2048       }
2049     }
2050 
2051     return mbar;
2052   }
2053 
2054   // normal_to_leading
2055   //
2056   // graph traversal helper which detects the normal case Mem feed
2057   // from either a card mark or a trailing membar to a preceding
2058   // release membar (optionally its cpuorder child) i.e. it ensures
2059   // that one or other of the following Mem flow subgraphs is present.
2060   //
2061   //   MemBarRelease
2062   //   MemBarCPUOrder {leading}
2063   //          |  \      . . .
2064   //          |  StoreN/P[mo_release]  . . .
2065   //          |   /
2066   //         MergeMem
2067   //          |
2068   //   MemBarVolatile {card mark or trailing}
2069   //
2070   //   MemBarRelease
2071   //   MemBarCPUOrder {leading}
2072   //      |       \      . . .
2073   //      |     CompareAndSwapX  . . .
2074   //               |
2075   //     . . .    SCMemProj
2076   //           \   |
2077   //      |    MergeMem
2078   //      |        /
2079   //    MemBarCPUOrder
2080   //    MemBarAcquire {trailing}
2081   //
2082   // this predicate checks for the same flow as the previous predicate
2083   // but starting from the bottom rather than the top.
2084   //
2085   // if the configuration is present returns the cpuorder member for
2086   // preference or when absent the release membar otherwise NULL.
2087   //
2088   // n.b. the input membar is expected to be a MemBarVolatile but
2089   // need not be a card mark membar.
2090 
2091   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2092   {
2093     // input must be a volatile membar
2094     assert((barrier->Opcode() == Op_MemBarVolatile ||
2095             barrier->Opcode() == Op_MemBarAcquire),
2096            "expecting a volatile or an acquire membar");
2097     Node *x;
2098     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2099 
2100     // if we have an acquire membar then it must be fed via a CPUOrder
2101     // membar
2102 
2103     if (is_cas) {
2104       // skip to parent barrier which must be a cpuorder
2105       x = parent_membar(barrier);
2106       if (x->Opcode() != Op_MemBarCPUOrder)
2107         return NULL;
2108     } else {
2109       // start from the supplied barrier
2110       x = (Node *)barrier;
2111     }
2112 
2113     // the Mem feed to the membar should be a merge
2114     x = x ->in(TypeFunc::Memory);
2115     if (!x->is_MergeMem())
2116       return NULL;
2117 
2118     MergeMemNode *mm = x->as_MergeMem();
2119 
2120     if (is_cas) {
2121       // the merge should be fed from the CAS via an SCMemProj node
2122       x = NULL;
2123       for (uint idx = 1; idx < mm->req(); idx++) {
2124         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2125           x = mm->in(idx);
2126           break;
2127         }
2128       }
2129       if (x == NULL) {
2130         return NULL;
2131       }
2132       // check for a CAS feeding this proj
2133       x = x->in(0);
2134       int opcode = x->Opcode();
2135       if (!is_CAS(opcode)) {
2136         return NULL;
2137       }
2138       // the CAS should get its mem feed from the leading membar
2139       x = x->in(MemNode::Memory);
2140     } else {
2141       // the merge should get its Bottom mem feed from the leading membar
2142       x = mm->in(Compile::AliasIdxBot);
2143     }
2144 
2145     // ensure this is a non control projection
2146     if (!x->is_Proj() || x->is_CFG()) {
2147       return NULL;
2148     }
2149     // if it is fed by a membar that's the one we want
2150     x = x->in(0);
2151 
2152     if (!x->is_MemBar()) {
2153       return NULL;
2154     }
2155 
2156     MemBarNode *leading = x->as_MemBar();
2157     // reject invalid candidates
2158     if (!leading_membar(leading)) {
2159       return NULL;
2160     }
2161 
2162     // ok, we have a leading membar, now for the sanity clauses
2163 
2164     // the leading membar must feed Mem to a releasing store or CAS
2165     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2166     StoreNode *st = NULL;
2167     LoadStoreNode *cas = NULL;
2168     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2169       x = mem->fast_out(i);
2170       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2171         // two stores or CASes is one too many
2172         if (st != NULL || cas != NULL) {
2173           return NULL;
2174         }
2175         st = x->as_Store();
2176       } else if (is_CAS(x->Opcode())) {
2177         if (st != NULL || cas != NULL) {
2178           return NULL;
2179         }
2180         cas = x->as_LoadStore();
2181       }
2182     }
2183 
2184     // we should not have both a store and a cas
2185     if (st == NULL & cas == NULL) {
2186       return NULL;
2187     }
2188 
2189     if (st == NULL) {
2190       // nothing more to check
2191       return leading;
2192     } else {
2193       // we should not have a store if we started from an acquire
2194       if (is_cas) {
2195         return NULL;
2196       }
2197 
2198       // the store should feed the merge we used to get here
2199       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2200         if (st->fast_out(i) == mm) {
2201           return leading;
2202         }
2203       }
2204     }
2205 
2206     return NULL;
2207   }
2208 
2209   // card_mark_to_trailing
2210   //
2211   // graph traversal helper which detects extra, non-normal Mem feed
2212   // from a card mark volatile membar to a trailing membar i.e. it
2213   // ensures that one of the following three GC post-write Mem flow
2214   // subgraphs is present.
2215   //
2216   // 1)
2217   //     . . .
2218   //       |
2219   //   MemBarVolatile (card mark)
2220   //      |          |
2221   //      |        StoreCM
2222   //      |          |
2223   //      |        . . .
2224   //  Bot |  /
2225   //   MergeMem
2226   //      |
2227   //      |
2228   //    MemBarVolatile {trailing}
2229   //
2230   // 2)
2231   //   MemBarRelease/CPUOrder (leading)
2232   //    |
2233   //    |
2234   //    |\       . . .
2235   //    | \        |
2236   //    |  \  MemBarVolatile (card mark)
2237   //    |   \   |     |
2238   //     \   \  |   StoreCM    . . .
2239   //      \   \ |
2240   //       \  Phi
2241   //        \ /
2242   //        Phi  . . .
2243   //     Bot |   /
2244   //       MergeMem
2245   //         |
2246   //    MemBarVolatile {trailing}
2247   //
2248   //
2249   // 3)
2250   //   MemBarRelease/CPUOrder (leading)
2251   //    |
2252   //    |\
2253   //    | \
2254   //    |  \      . . .
2255   //    |   \       |
2256   //    |\   \  MemBarVolatile (card mark)
2257   //    | \   \   |     |
2258   //    |  \   \  |   StoreCM    . . .
2259   //    |   \   \ |
2260   //     \   \  Phi
2261   //      \   \ /
2262   //       \  Phi
2263   //        \ /
2264   //        Phi  . . .
2265   //     Bot |   /
2266   //       MergeMem
2267   //         |
2268   //         |
2269   //    MemBarVolatile {trailing}
2270   //
2271   // configuration 1 is only valid if UseConcMarkSweepGC &&
2272   // UseCondCardMark
2273   //
2274   // configurations 2 and 3 are only valid if UseG1GC.
2275   //
2276   // if a valid configuration is present returns the trailing membar
2277   // otherwise NULL.
2278   //
2279   // n.b. the supplied membar is expected to be a card mark
2280   // MemBarVolatile i.e. the caller must ensure the input node has the
2281   // correct operand and feeds Mem to a StoreCM node
2282 
2283   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2284   {
2285     // input must be a card mark volatile membar
2286     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2287 
2288     Node *feed = barrier->proj_out(TypeFunc::Memory);
2289     Node *x;
2290     MergeMemNode *mm = NULL;
2291 
2292     const int MAX_PHIS = 3;     // max phis we will search through
2293     int phicount = 0;           // current search count
2294 
2295     bool retry_feed = true;
2296     while (retry_feed) {
2297       // see if we have a direct MergeMem feed
2298       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2299         x = feed->fast_out(i);
2300         // the correct Phi will be merging a Bot memory slice
2301         if (x->is_MergeMem()) {
2302           mm = x->as_MergeMem();
2303           break;
2304         }
2305       }
2306       if (mm) {
2307         retry_feed = false;
2308       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2309         // the barrier may feed indirectly via one or two Phi nodes
2310         PhiNode *phi = NULL;
2311         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2312           x = feed->fast_out(i);
2313           // the correct Phi will be merging a Bot memory slice
2314           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2315             phi = x->as_Phi();
2316             break;
2317           }
2318         }
2319         if (!phi) {
2320           return NULL;
2321         }
2322         // look for another merge below this phi
2323         feed = phi;
2324       } else {
2325         // couldn't find a merge
2326         return NULL;
2327       }
2328     }
2329 
2330     // sanity check this feed turns up as the expected slice
2331     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2332 
2333     MemBarNode *trailing = NULL;
2334     // be sure we have a trailing membar the merge
2335     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2336       x = mm->fast_out(i);
2337       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2338         trailing = x->as_MemBar();
2339         break;
2340       }
2341     }
2342 
2343     return trailing;
2344   }
2345 
2346   // trailing_to_card_mark
2347   //
2348   // graph traversal helper which detects extra, non-normal Mem feed
2349   // from a trailing volatile membar to a preceding card mark volatile
2350   // membar i.e. it identifies whether one of the three possible extra
2351   // GC post-write Mem flow subgraphs is present
2352   //
2353   // this predicate checks for the same flow as the previous predicate
2354   // but starting from the bottom rather than the top.
2355   //
2356   // if the configuration is present returns the card mark membar
2357   // otherwise NULL
2358   //
2359   // n.b. the supplied membar is expected to be a trailing
2360   // MemBarVolatile i.e. the caller must ensure the input node has the
2361   // correct opcode
2362 
2363   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2364   {
2365     assert(trailing->Opcode() == Op_MemBarVolatile,
2366            "expecting a volatile membar");
2367     assert(!is_card_mark_membar(trailing),
2368            "not expecting a card mark membar");
2369 
2370     // the Mem feed to the membar should be a merge
2371     Node *x = trailing->in(TypeFunc::Memory);
2372     if (!x->is_MergeMem()) {
2373       return NULL;
2374     }
2375 
2376     MergeMemNode *mm = x->as_MergeMem();
2377 
2378     x = mm->in(Compile::AliasIdxBot);
2379     // with G1 we may possibly see a Phi or two before we see a Memory
2380     // Proj from the card mark membar
2381 
2382     const int MAX_PHIS = 3;     // max phis we will search through
2383     int phicount = 0;           // current search count
2384 
2385     bool retry_feed = !x->is_Proj();
2386 
2387     while (retry_feed) {
2388       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2389         PhiNode *phi = x->as_Phi();
2390         ProjNode *proj = NULL;
2391         PhiNode *nextphi = NULL;
2392         bool found_leading = false;
2393         for (uint i = 1; i < phi->req(); i++) {
2394           x = phi->in(i);
2395           if (x->is_Phi()) {
2396             nextphi = x->as_Phi();
2397           } else if (x->is_Proj()) {
2398             int opcode = x->in(0)->Opcode();
2399             if (opcode == Op_MemBarVolatile) {
2400               proj = x->as_Proj();
2401             } else if (opcode == Op_MemBarRelease ||
2402                        opcode == Op_MemBarCPUOrder) {
2403               // probably a leading membar
2404               found_leading = true;
2405             }
2406           }
2407         }
2408         // if we found a correct looking proj then retry from there
2409         // otherwise we must see a leading and a phi or this the
2410         // wrong config
2411         if (proj != NULL) {
2412           x = proj;
2413           retry_feed = false;
2414         } else if (found_leading && nextphi != NULL) {
2415           // retry from this phi to check phi2
2416           x = nextphi;
2417         } else {
2418           // not what we were looking for
2419           return NULL;
2420         }
2421       } else {
2422         return NULL;
2423       }
2424     }
2425     // the proj has to come from the card mark membar
2426     x = x->in(0);
2427     if (!x->is_MemBar()) {
2428       return NULL;
2429     }
2430 
2431     MemBarNode *card_mark_membar = x->as_MemBar();
2432 
2433     if (!is_card_mark_membar(card_mark_membar)) {
2434       return NULL;
2435     }
2436 
2437     return card_mark_membar;
2438   }
2439 
2440   // trailing_to_leading
2441   //
2442   // graph traversal helper which checks the Mem flow up the graph
2443   // from a (non-card mark) trailing membar attempting to locate and
2444   // return an associated leading membar. it first looks for a
2445   // subgraph in the normal configuration (relying on helper
2446   // normal_to_leading). failing that it then looks for one of the
2447   // possible post-write card mark subgraphs linking the trailing node
2448   // to a the card mark membar (relying on helper
2449   // trailing_to_card_mark), and then checks that the card mark membar
2450   // is fed by a leading membar (once again relying on auxiliary
2451   // predicate normal_to_leading).
2452   //
2453   // if the configuration is valid returns the cpuorder member for
2454   // preference or when absent the release membar otherwise NULL.
2455   //
2456   // n.b. the input membar is expected to be either a volatile or
2457   // acquire membar but in the former case must *not* be a card mark
2458   // membar.
2459 
2460   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2461   {
2462     assert((trailing->Opcode() == Op_MemBarAcquire ||
2463             trailing->Opcode() == Op_MemBarVolatile),
2464            "expecting an acquire or volatile membar");
2465     assert((trailing->Opcode() != Op_MemBarVolatile ||
2466             !is_card_mark_membar(trailing)),
2467            "not expecting a card mark membar");
2468 
2469     MemBarNode *leading = normal_to_leading(trailing);
2470 
2471     if (leading) {
2472       return leading;
2473     }
2474 
2475     // nothing more to do if this is an acquire
2476     if (trailing->Opcode() == Op_MemBarAcquire) {
2477       return NULL;
2478     }
2479 
2480     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2481 
2482     if (!card_mark_membar) {
2483       return NULL;
2484     }
2485 
2486     return normal_to_leading(card_mark_membar);
2487   }
2488 
2489   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2490 
2491 bool unnecessary_acquire(const Node *barrier)
2492 {
2493   assert(barrier->is_MemBar(), "expecting a membar");
2494 
2495   if (UseBarriersForVolatile) {
2496     // we need to plant a dmb
2497     return false;
2498   }
2499 
2500   // a volatile read derived from bytecode (or also from an inlined
2501   // SHA field read via LibraryCallKit::load_field_from_object)
2502   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2503   // with a bogus read dependency on it's preceding load. so in those
2504   // cases we will find the load node at the PARMS offset of the
2505   // acquire membar.  n.b. there may be an intervening DecodeN node.
2506   //
2507   // a volatile load derived from an inlined unsafe field access
2508   // manifests as a cpuorder membar with Ctl and Mem projections
2509   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2510   // acquire then feeds another cpuorder membar via Ctl and Mem
2511   // projections. The load has no output dependency on these trailing
2512   // membars because subsequent nodes inserted into the graph take
2513   // their control feed from the final membar cpuorder meaning they
2514   // are all ordered after the load.
2515 
2516   Node *x = barrier->lookup(TypeFunc::Parms);
2517   if (x) {
2518     // we are starting from an acquire and it has a fake dependency
2519     //
2520     // need to check for
2521     //
2522     //   LoadX[mo_acquire]
2523     //   {  |1   }
2524     //   {DecodeN}
2525     //      |Parms
2526     //   MemBarAcquire*
2527     //
2528     // where * tags node we were passed
2529     // and |k means input k
2530     if (x->is_DecodeNarrowPtr()) {
2531       x = x->in(1);
2532     }
2533 
2534     return (x->is_Load() && x->as_Load()->is_acquire());
2535   }
2536 
2537   // now check for an unsafe volatile get
2538 
2539   // need to check for
2540   //
2541   //   MemBarCPUOrder
2542   //        ||       \\
2543   //   MemBarAcquire* LoadX[mo_acquire]
2544   //        ||
2545   //   MemBarCPUOrder
2546   //
2547   // where * tags node we were passed
2548   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2549 
2550   // check for a parent MemBarCPUOrder
2551   ProjNode *ctl;
2552   ProjNode *mem;
2553   MemBarNode *parent = parent_membar(barrier);
2554   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2555     return false;
2556   ctl = parent->proj_out(TypeFunc::Control);
2557   mem = parent->proj_out(TypeFunc::Memory);
2558   if (!ctl || !mem) {
2559     return false;
2560   }
2561   // ensure the proj nodes both feed a LoadX[mo_acquire]
2562   LoadNode *ld = NULL;
2563   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2564     x = ctl->fast_out(i);
2565     // if we see a load we keep hold of it and stop searching
2566     if (x->is_Load()) {
2567       ld = x->as_Load();
2568       break;
2569     }
2570   }
2571   // it must be an acquiring load
2572   if (ld && ld->is_acquire()) {
2573 
2574     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2575       x = mem->fast_out(i);
2576       // if we see the same load we drop it and stop searching
2577       if (x == ld) {
2578         ld = NULL;
2579         break;
2580       }
2581     }
2582     // we must have dropped the load
2583     if (ld == NULL) {
2584       // check for a child cpuorder membar
2585       MemBarNode *child  = child_membar(barrier->as_MemBar());
2586       if (child && child->Opcode() == Op_MemBarCPUOrder)
2587         return true;
2588     }
2589   }
2590 
2591   // final option for unnecessary mebar is that it is a trailing node
2592   // belonging to a CAS
2593 
2594   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2595 
2596   return leading != NULL;
2597 }
2598 
2599 bool needs_acquiring_load(const Node *n)
2600 {
2601   assert(n->is_Load(), "expecting a load");
2602   if (UseBarriersForVolatile) {
2603     // we use a normal load and a dmb
2604     return false;
2605   }
2606 
2607   LoadNode *ld = n->as_Load();
2608 
2609   if (!ld->is_acquire()) {
2610     return false;
2611   }
2612 
2613   // check if this load is feeding an acquire membar
2614   //
2615   //   LoadX[mo_acquire]
2616   //   {  |1   }
2617   //   {DecodeN}
2618   //      |Parms
2619   //   MemBarAcquire*
2620   //
2621   // where * tags node we were passed
2622   // and |k means input k
2623 
2624   Node *start = ld;
2625   Node *mbacq = NULL;
2626 
2627   // if we hit a DecodeNarrowPtr we reset the start node and restart
2628   // the search through the outputs
2629  restart:
2630 
2631   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2632     Node *x = start->fast_out(i);
2633     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2634       mbacq = x;
2635     } else if (!mbacq &&
2636                (x->is_DecodeNarrowPtr() ||
2637                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2638       start = x;
2639       goto restart;
2640     }
2641   }
2642 
2643   if (mbacq) {
2644     return true;
2645   }
2646 
2647   // now check for an unsafe volatile get
2648 
2649   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2650   //
2651   //     MemBarCPUOrder
2652   //        ||       \\
2653   //   MemBarAcquire* LoadX[mo_acquire]
2654   //        ||
2655   //   MemBarCPUOrder
2656 
2657   MemBarNode *membar;
2658 
2659   membar = parent_membar(ld);
2660 
2661   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2662     return false;
2663   }
2664 
2665   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2666 
2667   membar = child_membar(membar);
2668 
2669   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2670     return false;
2671   }
2672 
2673   membar = child_membar(membar);
2674 
2675   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2676     return false;
2677   }
2678 
2679   return true;
2680 }
2681 
2682 bool unnecessary_release(const Node *n)
2683 {
2684   assert((n->is_MemBar() &&
2685           n->Opcode() == Op_MemBarRelease),
2686          "expecting a release membar");
2687 
2688   if (UseBarriersForVolatile) {
2689     // we need to plant a dmb
2690     return false;
2691   }
2692 
2693   // if there is a dependent CPUOrder barrier then use that as the
2694   // leading
2695 
2696   MemBarNode *barrier = n->as_MemBar();
2697   // check for an intervening cpuorder membar
2698   MemBarNode *b = child_membar(barrier);
2699   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2700     // ok, so start the check from the dependent cpuorder barrier
2701     barrier = b;
2702   }
2703 
2704   // must start with a normal feed
2705   MemBarNode *child_barrier = leading_to_normal(barrier);
2706 
2707   if (!child_barrier) {
2708     return false;
2709   }
2710 
2711   if (!is_card_mark_membar(child_barrier)) {
2712     // this is the trailing membar and we are done
2713     return true;
2714   }
2715 
2716   // must be sure this card mark feeds a trailing membar
2717   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2718   return (trailing != NULL);
2719 }
2720 
2721 bool unnecessary_volatile(const Node *n)
2722 {
2723   // assert n->is_MemBar();
2724   if (UseBarriersForVolatile) {
2725     // we need to plant a dmb
2726     return false;
2727   }
2728 
2729   MemBarNode *mbvol = n->as_MemBar();
2730 
2731   // first we check if this is part of a card mark. if so then we have
2732   // to generate a StoreLoad barrier
2733 
2734   if (is_card_mark_membar(mbvol)) {
2735       return false;
2736   }
2737 
2738   // ok, if it's not a card mark then we still need to check if it is
2739   // a trailing membar of a volatile put hgraph.
2740 
2741   return (trailing_to_leading(mbvol) != NULL);
2742 }
2743 
2744 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2745 
2746 bool needs_releasing_store(const Node *n)
2747 {
2748   // assert n->is_Store();
2749   if (UseBarriersForVolatile) {
2750     // we use a normal store and dmb combination
2751     return false;
2752   }
2753 
2754   StoreNode *st = n->as_Store();
2755 
2756   // the store must be marked as releasing
2757   if (!st->is_release()) {
2758     return false;
2759   }
2760 
2761   // the store must be fed by a membar
2762 
2763   Node *x = st->lookup(StoreNode::Memory);
2764 
2765   if (! x || !x->is_Proj()) {
2766     return false;
2767   }
2768 
2769   ProjNode *proj = x->as_Proj();
2770 
2771   x = proj->lookup(0);
2772 
2773   if (!x || !x->is_MemBar()) {
2774     return false;
2775   }
2776 
2777   MemBarNode *barrier = x->as_MemBar();
2778 
2779   // if the barrier is a release membar or a cpuorder mmebar fed by a
2780   // release membar then we need to check whether that forms part of a
2781   // volatile put graph.
2782 
2783   // reject invalid candidates
2784   if (!leading_membar(barrier)) {
2785     return false;
2786   }
2787 
2788   // does this lead a normal subgraph?
2789   MemBarNode *mbvol = leading_to_normal(barrier);
2790 
2791   if (!mbvol) {
2792     return false;
2793   }
2794 
2795   // all done unless this is a card mark
2796   if (!is_card_mark_membar(mbvol)) {
2797     return true;
2798   }
2799 
2800   // we found a card mark -- just make sure we have a trailing barrier
2801 
2802   return (card_mark_to_trailing(mbvol) != NULL);
2803 }
2804 
2805 // predicate controlling translation of CAS
2806 //
2807 // returns true if CAS needs to use an acquiring load otherwise false
2808 
2809 bool needs_acquiring_load_exclusive(const Node *n)
2810 {
2811   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2812   if (UseBarriersForVolatile) {
2813     return false;
2814   }
2815 
2816   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2817 #ifdef ASSERT
2818   LoadStoreNode *st = n->as_LoadStore();
2819 
2820   // the store must be fed by a membar
2821 
2822   Node *x = st->lookup(StoreNode::Memory);
2823 
2824   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2825 
2826   ProjNode *proj = x->as_Proj();
2827 
2828   x = proj->lookup(0);
2829 
2830   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2831 
2832   MemBarNode *barrier = x->as_MemBar();
2833 
2834   // the barrier must be a cpuorder mmebar fed by a release membar
2835 
2836   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2837          "CAS not fed by cpuorder membar!");
2838 
2839   MemBarNode *b = parent_membar(barrier);
2840   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2841           "CAS not fed by cpuorder+release membar pair!");
2842 
2843   // does this lead a normal subgraph?
2844   MemBarNode *mbar = leading_to_normal(barrier);
2845 
2846   assert(mbar != NULL, "CAS not embedded in normal graph!");
2847 
2848   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2849 #endif // ASSERT
2850   // so we can just return true here
2851   return true;
2852 }
2853 
2854 // predicate controlling translation of StoreCM
2855 //
2856 // returns true if a StoreStore must precede the card write otherwise
2857 // false
2858 
2859 bool unnecessary_storestore(const Node *storecm)
2860 {
2861   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2862 
2863   // we only ever need to generate a dmb ishst between an object put
2864   // and the associated card mark when we are using CMS without
2865   // conditional card marking
2866 
2867   if (!UseConcMarkSweepGC || UseCondCardMark) {
2868     return true;
2869   }
2870 
2871   // if we are implementing volatile puts using barriers then the
2872   // object put as an str so we must insert the dmb ishst
2873 
2874   if (UseBarriersForVolatile) {
2875     return false;
2876   }
2877 
2878   // we can omit the dmb ishst if this StoreCM is part of a volatile
2879   // put because in thta case the put will be implemented by stlr
2880   //
2881   // we need to check for a normal subgraph feeding this StoreCM.
2882   // that means the StoreCM must be fed Memory from a leading membar,
2883   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2884   // leading membar must be part of a normal subgraph
2885 
2886   Node *x = storecm->in(StoreNode::Memory);
2887 
2888   if (!x->is_Proj()) {
2889     return false;
2890   }
2891 
2892   x = x->in(0);
2893 
2894   if (!x->is_MemBar()) {
2895     return false;
2896   }
2897 
2898   MemBarNode *leading = x->as_MemBar();
2899 
2900   // reject invalid candidates
2901   if (!leading_membar(leading)) {
2902     return false;
2903   }
2904 
2905   // we can omit the StoreStore if it is the head of a normal subgraph
2906   return (leading_to_normal(leading) != NULL);
2907 }
2908 
2909 
2910 #define __ _masm.
2911 
2912 // advance declarations for helper functions to convert register
2913 // indices to register objects
2914 
2915 // the ad file has to provide implementations of certain methods
2916 // expected by the generic code
2917 //
2918 // REQUIRED FUNCTIONALITY
2919 
2920 //=============================================================================
2921 
2922 // !!!!! Special hack to get all types of calls to specify the byte offset
2923 //       from the start of the call to the point where the return address
2924 //       will point.
2925 
2926 int MachCallStaticJavaNode::ret_addr_offset()
2927 {
2928   // call should be a simple bl
2929   int off = 4;
2930   return off;
2931 }
2932 
2933 int MachCallDynamicJavaNode::ret_addr_offset()
2934 {
2935   return 16; // movz, movk, movk, bl
2936 }
2937 
2938 int MachCallRuntimeNode::ret_addr_offset() {
2939   // for generated stubs the call will be
2940   //   far_call(addr)
2941   // for real runtime callouts it will be six instructions
2942   // see aarch64_enc_java_to_runtime
2943   //   adr(rscratch2, retaddr)
2944   //   lea(rscratch1, RuntimeAddress(addr)
2945   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2946   //   blrt rscratch1
2947   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2948   if (cb) {
2949     return MacroAssembler::far_branch_size();
2950   } else {
2951     return 6 * NativeInstruction::instruction_size;
2952   }
2953 }
2954 
2955 // Indicate if the safepoint node needs the polling page as an input
2956 
2957 // the shared code plants the oop data at the start of the generated
2958 // code for the safepoint node and that needs ot be at the load
2959 // instruction itself. so we cannot plant a mov of the safepoint poll
2960 // address followed by a load. setting this to true means the mov is
2961 // scheduled as a prior instruction. that's better for scheduling
2962 // anyway.
2963 
2964 bool SafePointNode::needs_polling_address_input()
2965 {
2966   return true;
2967 }
2968 
2969 //=============================================================================
2970 
2971 #ifndef PRODUCT
2972 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2973   st->print("BREAKPOINT");
2974 }
2975 #endif
2976 
2977 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2978   MacroAssembler _masm(&cbuf);
2979   __ brk(0);
2980 }
2981 
2982 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2983   return MachNode::size(ra_);
2984 }
2985 
2986 //=============================================================================
2987 
2988 #ifndef PRODUCT
2989   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2990     st->print("nop \t# %d bytes pad for loops and calls", _count);
2991   }
2992 #endif
2993 
2994   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2995     MacroAssembler _masm(&cbuf);
2996     for (int i = 0; i < _count; i++) {
2997       __ nop();
2998     }
2999   }
3000 
3001   uint MachNopNode::size(PhaseRegAlloc*) const {
3002     return _count * NativeInstruction::instruction_size;
3003   }
3004 
3005 //=============================================================================
3006 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3007 
3008 int Compile::ConstantTable::calculate_table_base_offset() const {
3009   return 0;  // absolute addressing, no offset
3010 }
3011 
3012 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3013 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3014   ShouldNotReachHere();
3015 }
3016 
3017 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3018   // Empty encoding
3019 }
3020 
3021 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3022   return 0;
3023 }
3024 
3025 #ifndef PRODUCT
3026 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3027   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3028 }
3029 #endif
3030 
3031 #ifndef PRODUCT
3032 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3033   Compile* C = ra_->C;
3034 
3035   int framesize = C->frame_slots() << LogBytesPerInt;
3036 
3037   if (C->need_stack_bang(framesize))
3038     st->print("# stack bang size=%d\n\t", framesize);
3039 
3040   if (framesize < ((1 << 9) + 2 * wordSize)) {
3041     st->print("sub  sp, sp, #%d\n\t", framesize);
3042     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3043     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3044   } else {
3045     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3046     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3047     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3048     st->print("sub  sp, sp, rscratch1");
3049   }
3050 }
3051 #endif
3052 
3053 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3054   Compile* C = ra_->C;
3055   MacroAssembler _masm(&cbuf);
3056 
3057   // n.b. frame size includes space for return pc and rfp
3058   const long framesize = C->frame_size_in_bytes();
3059   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3060 
3061   // insert a nop at the start of the prolog so we can patch in a
3062   // branch if we need to invalidate the method later
3063   __ nop();
3064 
3065   int bangsize = C->bang_size_in_bytes();
3066   if (C->need_stack_bang(bangsize) && UseStackBanging)
3067     __ generate_stack_overflow_check(bangsize);
3068 
3069   __ build_frame(framesize);
3070 
3071   if (NotifySimulator) {
3072     __ notify(Assembler::method_entry);
3073   }
3074 
3075   if (VerifyStackAtCalls) {
3076     Unimplemented();
3077   }
3078 
3079   C->set_frame_complete(cbuf.insts_size());
3080 
3081   if (C->has_mach_constant_base_node()) {
3082     // NOTE: We set the table base offset here because users might be
3083     // emitted before MachConstantBaseNode.
3084     Compile::ConstantTable& constant_table = C->constant_table();
3085     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3086   }
3087 }
3088 
3089 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3090 {
3091   return MachNode::size(ra_); // too many variables; just compute it
3092                               // the hard way
3093 }
3094 
3095 int MachPrologNode::reloc() const
3096 {
3097   return 0;
3098 }
3099 
3100 //=============================================================================
3101 
3102 #ifndef PRODUCT
3103 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3104   Compile* C = ra_->C;
3105   int framesize = C->frame_slots() << LogBytesPerInt;
3106 
3107   st->print("# pop frame %d\n\t",framesize);
3108 
3109   if (framesize == 0) {
3110     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3111   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3112     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3113     st->print("add  sp, sp, #%d\n\t", framesize);
3114   } else {
3115     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3116     st->print("add  sp, sp, rscratch1\n\t");
3117     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3118   }
3119 
3120   if (do_polling() && C->is_method_compilation()) {
3121     st->print("# touch polling page\n\t");
3122     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3123     st->print("ldr zr, [rscratch1]");
3124   }
3125 }
3126 #endif
3127 
3128 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3129   Compile* C = ra_->C;
3130   MacroAssembler _masm(&cbuf);
3131   int framesize = C->frame_slots() << LogBytesPerInt;
3132 
3133   __ remove_frame(framesize);
3134 
3135   if (NotifySimulator) {
3136     __ notify(Assembler::method_reentry);
3137   }
3138 
3139   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3140     __ reserved_stack_check();
3141   }
3142 
3143   if (do_polling() && C->is_method_compilation()) {
3144     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3145   }
3146 }
3147 
3148 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3149   // Variable size. Determine dynamically.
3150   return MachNode::size(ra_);
3151 }
3152 
3153 int MachEpilogNode::reloc() const {
3154   // Return number of relocatable values contained in this instruction.
3155   return 1; // 1 for polling page.
3156 }
3157 
3158 const Pipeline * MachEpilogNode::pipeline() const {
3159   return MachNode::pipeline_class();
3160 }
3161 
3162 // This method seems to be obsolete. It is declared in machnode.hpp
3163 // and defined in all *.ad files, but it is never called. Should we
3164 // get rid of it?
3165 int MachEpilogNode::safepoint_offset() const {
3166   assert(do_polling(), "no return for this epilog node");
3167   return 4;
3168 }
3169 
3170 //=============================================================================
3171 
3172 // Figure out which register class each belongs in: rc_int, rc_float or
3173 // rc_stack.
3174 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3175 
3176 static enum RC rc_class(OptoReg::Name reg) {
3177 
3178   if (reg == OptoReg::Bad) {
3179     return rc_bad;
3180   }
3181 
3182   // we have 30 int registers * 2 halves
3183   // (rscratch1 and rscratch2 are omitted)
3184 
3185   if (reg < 60) {
3186     return rc_int;
3187   }
3188 
3189   // we have 32 float register * 2 halves
3190   if (reg < 60 + 128) {
3191     return rc_float;
3192   }
3193 
3194   // Between float regs & stack is the flags regs.
3195   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3196 
3197   return rc_stack;
3198 }
3199 
3200 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3201   Compile* C = ra_->C;
3202 
3203   // Get registers to move.
3204   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3205   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3206   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3207   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3208 
3209   enum RC src_hi_rc = rc_class(src_hi);
3210   enum RC src_lo_rc = rc_class(src_lo);
3211   enum RC dst_hi_rc = rc_class(dst_hi);
3212   enum RC dst_lo_rc = rc_class(dst_lo);
3213 
3214   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3215 
3216   if (src_hi != OptoReg::Bad) {
3217     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3218            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3219            "expected aligned-adjacent pairs");
3220   }
3221 
3222   if (src_lo == dst_lo && src_hi == dst_hi) {
3223     return 0;            // Self copy, no move.
3224   }
3225 
3226   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3227               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3228   int src_offset = ra_->reg2offset(src_lo);
3229   int dst_offset = ra_->reg2offset(dst_lo);
3230 
3231   if (bottom_type()->isa_vect() != NULL) {
3232     uint ireg = ideal_reg();
3233     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3234     if (cbuf) {
3235       MacroAssembler _masm(cbuf);
3236       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3237       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3238         // stack->stack
3239         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3240         if (ireg == Op_VecD) {
3241           __ unspill(rscratch1, true, src_offset);
3242           __ spill(rscratch1, true, dst_offset);
3243         } else {
3244           __ spill_copy128(src_offset, dst_offset);
3245         }
3246       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3247         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3248                ireg == Op_VecD ? __ T8B : __ T16B,
3249                as_FloatRegister(Matcher::_regEncode[src_lo]));
3250       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3251         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3252                        ireg == Op_VecD ? __ D : __ Q,
3253                        ra_->reg2offset(dst_lo));
3254       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3255         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3256                        ireg == Op_VecD ? __ D : __ Q,
3257                        ra_->reg2offset(src_lo));
3258       } else {
3259         ShouldNotReachHere();
3260       }
3261     }
3262   } else if (cbuf) {
3263     MacroAssembler _masm(cbuf);
3264     switch (src_lo_rc) {
3265     case rc_int:
3266       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3267         if (is64) {
3268             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3269                    as_Register(Matcher::_regEncode[src_lo]));
3270         } else {
3271             MacroAssembler _masm(cbuf);
3272             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3273                     as_Register(Matcher::_regEncode[src_lo]));
3274         }
3275       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3276         if (is64) {
3277             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3278                      as_Register(Matcher::_regEncode[src_lo]));
3279         } else {
3280             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3281                      as_Register(Matcher::_regEncode[src_lo]));
3282         }
3283       } else {                    // gpr --> stack spill
3284         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3285         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3286       }
3287       break;
3288     case rc_float:
3289       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3290         if (is64) {
3291             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3292                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3293         } else {
3294             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3295                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3296         }
3297       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3298           if (cbuf) {
3299             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3300                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3301         } else {
3302             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3303                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3304         }
3305       } else {                    // fpr --> stack spill
3306         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3307         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3308                  is64 ? __ D : __ S, dst_offset);
3309       }
3310       break;
3311     case rc_stack:
3312       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3313         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3314       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3315         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3316                    is64 ? __ D : __ S, src_offset);
3317       } else {                    // stack --> stack copy
3318         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3319         __ unspill(rscratch1, is64, src_offset);
3320         __ spill(rscratch1, is64, dst_offset);
3321       }
3322       break;
3323     default:
3324       assert(false, "bad rc_class for spill");
3325       ShouldNotReachHere();
3326     }
3327   }
3328 
3329   if (st) {
3330     st->print("spill ");
3331     if (src_lo_rc == rc_stack) {
3332       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3333     } else {
3334       st->print("%s -> ", Matcher::regName[src_lo]);
3335     }
3336     if (dst_lo_rc == rc_stack) {
3337       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3338     } else {
3339       st->print("%s", Matcher::regName[dst_lo]);
3340     }
3341     if (bottom_type()->isa_vect() != NULL) {
3342       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3343     } else {
3344       st->print("\t# spill size = %d", is64 ? 64:32);
3345     }
3346   }
3347 
3348   return 0;
3349 
3350 }
3351 
3352 #ifndef PRODUCT
3353 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3354   if (!ra_)
3355     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3356   else
3357     implementation(NULL, ra_, false, st);
3358 }
3359 #endif
3360 
3361 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3362   implementation(&cbuf, ra_, false, NULL);
3363 }
3364 
3365 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3366   return MachNode::size(ra_);
3367 }
3368 
3369 //=============================================================================
3370 
3371 #ifndef PRODUCT
3372 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3373   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3374   int reg = ra_->get_reg_first(this);
3375   st->print("add %s, rsp, #%d]\t# box lock",
3376             Matcher::regName[reg], offset);
3377 }
3378 #endif
3379 
3380 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3381   MacroAssembler _masm(&cbuf);
3382 
3383   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3384   int reg    = ra_->get_encode(this);
3385 
3386   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3387     __ add(as_Register(reg), sp, offset);
3388   } else {
3389     ShouldNotReachHere();
3390   }
3391 }
3392 
3393 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3394   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3395   return 4;
3396 }
3397 
3398 //=============================================================================
3399 
3400 #ifndef PRODUCT
3401 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3402 {
3403   st->print_cr("# MachUEPNode");
3404   if (UseCompressedClassPointers) {
3405     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3406     if (Universe::narrow_klass_shift() != 0) {
3407       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3408     }
3409   } else {
3410    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3411   }
3412   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3413   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3414 }
3415 #endif
3416 
3417 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3418 {
3419   // This is the unverified entry point.
3420   MacroAssembler _masm(&cbuf);
3421 
3422   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3423   Label skip;
3424   // TODO
3425   // can we avoid this skip and still use a reloc?
3426   __ br(Assembler::EQ, skip);
3427   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3428   __ bind(skip);
3429 }
3430 
3431 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3432 {
3433   return MachNode::size(ra_);
3434 }
3435 
3436 // REQUIRED EMIT CODE
3437 
3438 //=============================================================================
3439 
3440 // Emit exception handler code.
3441 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3442 {
3443   // mov rscratch1 #exception_blob_entry_point
3444   // br rscratch1
3445   // Note that the code buffer's insts_mark is always relative to insts.
3446   // That's why we must use the macroassembler to generate a handler.
3447   MacroAssembler _masm(&cbuf);
3448   address base = __ start_a_stub(size_exception_handler());
3449   if (base == NULL) {
3450     ciEnv::current()->record_failure("CodeCache is full");
3451     return 0;  // CodeBuffer::expand failed
3452   }
3453   int offset = __ offset();
3454   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3455   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3456   __ end_a_stub();
3457   return offset;
3458 }
3459 
3460 // Emit deopt handler code.
3461 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3462 {
3463   // Note that the code buffer's insts_mark is always relative to insts.
3464   // That's why we must use the macroassembler to generate a handler.
3465   MacroAssembler _masm(&cbuf);
3466   address base = __ start_a_stub(size_deopt_handler());
3467   if (base == NULL) {
3468     ciEnv::current()->record_failure("CodeCache is full");
3469     return 0;  // CodeBuffer::expand failed
3470   }
3471   int offset = __ offset();
3472 
3473   __ adr(lr, __ pc());
3474   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3475 
3476   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3477   __ end_a_stub();
3478   return offset;
3479 }
3480 
3481 // REQUIRED MATCHER CODE
3482 
3483 //=============================================================================
3484 
3485 const bool Matcher::match_rule_supported(int opcode) {
3486 
3487   switch (opcode) {
3488   default:
3489     break;
3490   }
3491 
3492   if (!has_match_rule(opcode)) {
3493     return false;
3494   }
3495 
3496   return true;  // Per default match rules are supported.
3497 }
3498 
3499 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3500 
3501   // TODO
3502   // identify extra cases that we might want to provide match rules for
3503   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3504   bool ret_value = match_rule_supported(opcode);
3505   // Add rules here.
3506 
3507   return ret_value;  // Per default match rules are supported.
3508 }
3509 
3510 const bool Matcher::has_predicated_vectors(void) {
3511   return false;
3512 }
3513 
3514 const int Matcher::float_pressure(int default_pressure_threshold) {
3515   return default_pressure_threshold;
3516 }
3517 
3518 int Matcher::regnum_to_fpu_offset(int regnum)
3519 {
3520   Unimplemented();
3521   return 0;
3522 }
3523 
3524 // Is this branch offset short enough that a short branch can be used?
3525 //
3526 // NOTE: If the platform does not provide any short branch variants, then
3527 //       this method should return false for offset 0.
3528 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3529   // The passed offset is relative to address of the branch.
3530 
3531   return (-32768 <= offset && offset < 32768);
3532 }
3533 
3534 const bool Matcher::isSimpleConstant64(jlong value) {
3535   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3536   // Probably always true, even if a temp register is required.
3537   return true;
3538 }
3539 
3540 // true just means we have fast l2f conversion
3541 const bool Matcher::convL2FSupported(void) {
3542   return true;
3543 }
3544 
3545 // Vector width in bytes.
3546 const int Matcher::vector_width_in_bytes(BasicType bt) {
3547   int size = MIN2(16,(int)MaxVectorSize);
3548   // Minimum 2 values in vector
3549   if (size < 2*type2aelembytes(bt)) size = 0;
3550   // But never < 4
3551   if (size < 4) size = 0;
3552   return size;
3553 }
3554 
3555 // Limits on vector size (number of elements) loaded into vector.
3556 const int Matcher::max_vector_size(const BasicType bt) {
3557   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3558 }
3559 const int Matcher::min_vector_size(const BasicType bt) {
3560 //  For the moment limit the vector size to 8 bytes
3561     int size = 8 / type2aelembytes(bt);
3562     if (size < 2) size = 2;
3563     return size;
3564 }
3565 
3566 // Vector ideal reg.
3567 const uint Matcher::vector_ideal_reg(int len) {
3568   switch(len) {
3569     case  8: return Op_VecD;
3570     case 16: return Op_VecX;
3571   }
3572   ShouldNotReachHere();
3573   return 0;
3574 }
3575 
3576 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3577   return Op_VecX;
3578 }
3579 
3580 // AES support not yet implemented
3581 const bool Matcher::pass_original_key_for_aes() {
3582   return false;
3583 }
3584 
3585 // x86 supports misaligned vectors store/load.
3586 const bool Matcher::misaligned_vectors_ok() {
3587   return !AlignVector; // can be changed by flag
3588 }
3589 
3590 // false => size gets scaled to BytesPerLong, ok.
3591 const bool Matcher::init_array_count_is_in_bytes = false;
3592 
3593 // Use conditional move (CMOVL)
3594 const int Matcher::long_cmove_cost() {
3595   // long cmoves are no more expensive than int cmoves
3596   return 0;
3597 }
3598 
3599 const int Matcher::float_cmove_cost() {
3600   // float cmoves are no more expensive than int cmoves
3601   return 0;
3602 }
3603 
3604 // Does the CPU require late expand (see block.cpp for description of late expand)?
3605 const bool Matcher::require_postalloc_expand = false;
3606 
3607 // Do we need to mask the count passed to shift instructions or does
3608 // the cpu only look at the lower 5/6 bits anyway?
3609 const bool Matcher::need_masked_shift_count = false;
3610 
3611 // This affects two different things:
3612 //  - how Decode nodes are matched
3613 //  - how ImplicitNullCheck opportunities are recognized
3614 // If true, the matcher will try to remove all Decodes and match them
3615 // (as operands) into nodes. NullChecks are not prepared to deal with
3616 // Decodes by final_graph_reshaping().
3617 // If false, final_graph_reshaping() forces the decode behind the Cmp
3618 // for a NullCheck. The matcher matches the Decode node into a register.
3619 // Implicit_null_check optimization moves the Decode along with the
3620 // memory operation back up before the NullCheck.
3621 bool Matcher::narrow_oop_use_complex_address() {
3622   return Universe::narrow_oop_shift() == 0;
3623 }
3624 
3625 bool Matcher::narrow_klass_use_complex_address() {
3626 // TODO
3627 // decide whether we need to set this to true
3628   return false;
3629 }
3630 
3631 bool Matcher::const_oop_prefer_decode() {
3632   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3633   return Universe::narrow_oop_base() == NULL;
3634 }
3635 
3636 bool Matcher::const_klass_prefer_decode() {
3637   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3638   return Universe::narrow_klass_base() == NULL;
3639 }
3640 
3641 // Is it better to copy float constants, or load them directly from
3642 // memory?  Intel can load a float constant from a direct address,
3643 // requiring no extra registers.  Most RISCs will have to materialize
3644 // an address into a register first, so they would do better to copy
3645 // the constant from stack.
3646 const bool Matcher::rematerialize_float_constants = false;
3647 
3648 // If CPU can load and store mis-aligned doubles directly then no
3649 // fixup is needed.  Else we split the double into 2 integer pieces
3650 // and move it piece-by-piece.  Only happens when passing doubles into
3651 // C code as the Java calling convention forces doubles to be aligned.
3652 const bool Matcher::misaligned_doubles_ok = true;
3653 
3654 // No-op on amd64
3655 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3656   Unimplemented();
3657 }
3658 
3659 // Advertise here if the CPU requires explicit rounding operations to
3660 // implement the UseStrictFP mode.
3661 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3662 
3663 // Are floats converted to double when stored to stack during
3664 // deoptimization?
3665 bool Matcher::float_in_double() { return true; }
3666 
3667 // Do ints take an entire long register or just half?
3668 // The relevant question is how the int is callee-saved:
3669 // the whole long is written but de-opt'ing will have to extract
3670 // the relevant 32 bits.
3671 const bool Matcher::int_in_long = true;
3672 
3673 // Return whether or not this register is ever used as an argument.
3674 // This function is used on startup to build the trampoline stubs in
3675 // generateOptoStub.  Registers not mentioned will be killed by the VM
3676 // call in the trampoline, and arguments in those registers not be
3677 // available to the callee.
3678 bool Matcher::can_be_java_arg(int reg)
3679 {
3680   return
3681     reg ==  R0_num || reg == R0_H_num ||
3682     reg ==  R1_num || reg == R1_H_num ||
3683     reg ==  R2_num || reg == R2_H_num ||
3684     reg ==  R3_num || reg == R3_H_num ||
3685     reg ==  R4_num || reg == R4_H_num ||
3686     reg ==  R5_num || reg == R5_H_num ||
3687     reg ==  R6_num || reg == R6_H_num ||
3688     reg ==  R7_num || reg == R7_H_num ||
3689     reg ==  V0_num || reg == V0_H_num ||
3690     reg ==  V1_num || reg == V1_H_num ||
3691     reg ==  V2_num || reg == V2_H_num ||
3692     reg ==  V3_num || reg == V3_H_num ||
3693     reg ==  V4_num || reg == V4_H_num ||
3694     reg ==  V5_num || reg == V5_H_num ||
3695     reg ==  V6_num || reg == V6_H_num ||
3696     reg ==  V7_num || reg == V7_H_num;
3697 }
3698 
3699 bool Matcher::is_spillable_arg(int reg)
3700 {
3701   return can_be_java_arg(reg);
3702 }
3703 
3704 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3705   return false;
3706 }
3707 
3708 RegMask Matcher::divI_proj_mask() {
3709   ShouldNotReachHere();
3710   return RegMask();
3711 }
3712 
3713 // Register for MODI projection of divmodI.
3714 RegMask Matcher::modI_proj_mask() {
3715   ShouldNotReachHere();
3716   return RegMask();
3717 }
3718 
3719 // Register for DIVL projection of divmodL.
3720 RegMask Matcher::divL_proj_mask() {
3721   ShouldNotReachHere();
3722   return RegMask();
3723 }
3724 
3725 // Register for MODL projection of divmodL.
3726 RegMask Matcher::modL_proj_mask() {
3727   ShouldNotReachHere();
3728   return RegMask();
3729 }
3730 
3731 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3732   return FP_REG_mask();
3733 }
3734 
3735 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3736   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3737     Node* u = addp->fast_out(i);
3738     if (u->is_Mem()) {
3739       int opsize = u->as_Mem()->memory_size();
3740       assert(opsize > 0, "unexpected memory operand size");
3741       if (u->as_Mem()->memory_size() != (1<<shift)) {
3742         return false;
3743       }
3744     }
3745   }
3746   return true;
3747 }
3748 
3749 const bool Matcher::convi2l_type_required = false;
3750 
3751 // Should the Matcher clone shifts on addressing modes, expecting them
3752 // to be subsumed into complex addressing expressions or compute them
3753 // into registers?
3754 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3755   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3756     return true;
3757   }
3758 
3759   Node *off = m->in(AddPNode::Offset);
3760   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3761       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3762       // Are there other uses besides address expressions?
3763       !is_visited(off)) {
3764     address_visited.set(off->_idx); // Flag as address_visited
3765     mstack.push(off->in(2), Visit);
3766     Node *conv = off->in(1);
3767     if (conv->Opcode() == Op_ConvI2L &&
3768         // Are there other uses besides address expressions?
3769         !is_visited(conv)) {
3770       address_visited.set(conv->_idx); // Flag as address_visited
3771       mstack.push(conv->in(1), Pre_Visit);
3772     } else {
3773       mstack.push(conv, Pre_Visit);
3774     }
3775     address_visited.test_set(m->_idx); // Flag as address_visited
3776     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3777     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3778     return true;
3779   } else if (off->Opcode() == Op_ConvI2L &&
3780              // Are there other uses besides address expressions?
3781              !is_visited(off)) {
3782     address_visited.test_set(m->_idx); // Flag as address_visited
3783     address_visited.set(off->_idx); // Flag as address_visited
3784     mstack.push(off->in(1), Pre_Visit);
3785     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3786     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3787     return true;
3788   }
3789   return false;
3790 }
3791 
3792 // Transform:
3793 // (AddP base (AddP base address (LShiftL index con)) offset)
3794 // into:
3795 // (AddP base (AddP base offset) (LShiftL index con))
3796 // to take full advantage of ARM's addressing modes
3797 void Compile::reshape_address(AddPNode* addp) {
3798   Node *addr = addp->in(AddPNode::Address);
3799   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3800     const AddPNode *addp2 = addr->as_AddP();
3801     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3802          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3803          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3804         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3805 
3806       // Any use that can't embed the address computation?
3807       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3808         Node* u = addp->fast_out(i);
3809         if (!u->is_Mem()) {
3810           return;
3811         }
3812         if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3813           return;
3814         }
3815         if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
3816           int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
3817           if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
3818             return;
3819           }
3820         }
3821       }
3822 
3823       Node* off = addp->in(AddPNode::Offset);
3824       Node* addr2 = addp2->in(AddPNode::Address);
3825       Node* base = addp->in(AddPNode::Base);
3826 
3827       Node* new_addr = NULL;
3828       // Check whether the graph already has the new AddP we need
3829       // before we create one (no GVN available here).
3830       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3831         Node* u = addr2->fast_out(i);
3832         if (u->is_AddP() &&
3833             u->in(AddPNode::Base) == base &&
3834             u->in(AddPNode::Address) == addr2 &&
3835             u->in(AddPNode::Offset) == off) {
3836           new_addr = u;
3837           break;
3838         }
3839       }
3840 
3841       if (new_addr == NULL) {
3842         new_addr = new AddPNode(base, addr2, off);
3843       }
3844       Node* new_off = addp2->in(AddPNode::Offset);
3845       addp->set_req(AddPNode::Address, new_addr);
3846       if (addr->outcnt() == 0) {
3847         addr->disconnect_inputs(NULL, this);
3848       }
3849       addp->set_req(AddPNode::Offset, new_off);
3850       if (off->outcnt() == 0) {
3851         off->disconnect_inputs(NULL, this);
3852       }
3853     }
3854   }
3855 }
3856 
3857 // helper for encoding java_to_runtime calls on sim
3858 //
3859 // this is needed to compute the extra arguments required when
3860 // planting a call to the simulator blrt instruction. the TypeFunc
3861 // can be queried to identify the counts for integral, and floating
3862 // arguments and the return type
3863 
3864 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3865 {
3866   int gps = 0;
3867   int fps = 0;
3868   const TypeTuple *domain = tf->domain();
3869   int max = domain->cnt();
3870   for (int i = TypeFunc::Parms; i < max; i++) {
3871     const Type *t = domain->field_at(i);
3872     switch(t->basic_type()) {
3873     case T_FLOAT:
3874     case T_DOUBLE:
3875       fps++;
3876     default:
3877       gps++;
3878     }
3879   }
3880   gpcnt = gps;
3881   fpcnt = fps;
3882   BasicType rt = tf->return_type();
3883   switch (rt) {
3884   case T_VOID:
3885     rtype = MacroAssembler::ret_type_void;
3886     break;
3887   default:
3888     rtype = MacroAssembler::ret_type_integral;
3889     break;
3890   case T_FLOAT:
3891     rtype = MacroAssembler::ret_type_float;
3892     break;
3893   case T_DOUBLE:
3894     rtype = MacroAssembler::ret_type_double;
3895     break;
3896   }
3897 }
3898 
3899 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3900   MacroAssembler _masm(&cbuf);                                          \
3901   {                                                                     \
3902     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3903     guarantee(DISP == 0, "mode not permitted for volatile");            \
3904     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3905     __ INSN(REG, as_Register(BASE));                                    \
3906   }
3907 
3908 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3909 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3910 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3911                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3912 
3913   // Used for all non-volatile memory accesses.  The use of
3914   // $mem->opcode() to discover whether this pattern uses sign-extended
3915   // offsets is something of a kludge.
3916   static void loadStore(MacroAssembler masm, mem_insn insn,
3917                          Register reg, int opcode,
3918                          Register base, int index, int size, int disp)
3919   {
3920     Address::extend scale;
3921 
3922     // Hooboy, this is fugly.  We need a way to communicate to the
3923     // encoder that the index needs to be sign extended, so we have to
3924     // enumerate all the cases.
3925     switch (opcode) {
3926     case INDINDEXSCALEDI2L:
3927     case INDINDEXSCALEDI2LN:
3928     case INDINDEXI2L:
3929     case INDINDEXI2LN:
3930       scale = Address::sxtw(size);
3931       break;
3932     default:
3933       scale = Address::lsl(size);
3934     }
3935 
3936     if (index == -1) {
3937       (masm.*insn)(reg, Address(base, disp));
3938     } else {
3939       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3940       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3941     }
3942   }
3943 
3944   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3945                          FloatRegister reg, int opcode,
3946                          Register base, int index, int size, int disp)
3947   {
3948     Address::extend scale;
3949 
3950     switch (opcode) {
3951     case INDINDEXSCALEDI2L:
3952     case INDINDEXSCALEDI2LN:
3953       scale = Address::sxtw(size);
3954       break;
3955     default:
3956       scale = Address::lsl(size);
3957     }
3958 
3959      if (index == -1) {
3960       (masm.*insn)(reg, Address(base, disp));
3961     } else {
3962       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3963       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3964     }
3965   }
3966 
3967   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3968                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3969                          int opcode, Register base, int index, int size, int disp)
3970   {
3971     if (index == -1) {
3972       (masm.*insn)(reg, T, Address(base, disp));
3973     } else {
3974       assert(disp == 0, "unsupported address mode");
3975       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3976     }
3977   }
3978 
3979 %}
3980 
3981 
3982 
3983 //----------ENCODING BLOCK-----------------------------------------------------
3984 // This block specifies the encoding classes used by the compiler to
3985 // output byte streams.  Encoding classes are parameterized macros
3986 // used by Machine Instruction Nodes in order to generate the bit
3987 // encoding of the instruction.  Operands specify their base encoding
3988 // interface with the interface keyword.  There are currently
3989 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3990 // COND_INTER.  REG_INTER causes an operand to generate a function
3991 // which returns its register number when queried.  CONST_INTER causes
3992 // an operand to generate a function which returns the value of the
3993 // constant when queried.  MEMORY_INTER causes an operand to generate
3994 // four functions which return the Base Register, the Index Register,
3995 // the Scale Value, and the Offset Value of the operand when queried.
3996 // COND_INTER causes an operand to generate six functions which return
3997 // the encoding code (ie - encoding bits for the instruction)
3998 // associated with each basic boolean condition for a conditional
3999 // instruction.
4000 //
4001 // Instructions specify two basic values for encoding.  Again, a
4002 // function is available to check if the constant displacement is an
4003 // oop. They use the ins_encode keyword to specify their encoding
4004 // classes (which must be a sequence of enc_class names, and their
4005 // parameters, specified in the encoding block), and they use the
4006 // opcode keyword to specify, in order, their primary, secondary, and
4007 // tertiary opcode.  Only the opcode sections which a particular
4008 // instruction needs for encoding need to be specified.
4009 encode %{
4010   // Build emit functions for each basic byte or larger field in the
4011   // intel encoding scheme (opcode, rm, sib, immediate), and call them
4012   // from C++ code in the enc_class source block.  Emit functions will
4013   // live in the main source block for now.  In future, we can
4014   // generalize this by adding a syntax that specifies the sizes of
4015   // fields in an order, so that the adlc can build the emit functions
4016   // automagically
4017 
4018   // catch all for unimplemented encodings
4019   enc_class enc_unimplemented %{
4020     MacroAssembler _masm(&cbuf);
4021     __ unimplemented("C2 catch all");
4022   %}
4023 
4024   // BEGIN Non-volatile memory access
4025 
4026   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
4027     Register dst_reg = as_Register($dst$$reg);
4028     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
4029                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4030   %}
4031 
4032   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
4033     Register dst_reg = as_Register($dst$$reg);
4034     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
4035                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4036   %}
4037 
4038   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
4039     Register dst_reg = as_Register($dst$$reg);
4040     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
4041                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042   %}
4043 
4044   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
4045     Register dst_reg = as_Register($dst$$reg);
4046     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
4047                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048   %}
4049 
4050   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
4051     Register dst_reg = as_Register($dst$$reg);
4052     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
4053                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054   %}
4055 
4056   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4057     Register dst_reg = as_Register($dst$$reg);
4058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
4059                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060   %}
4061 
4062   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4063     Register dst_reg = as_Register($dst$$reg);
4064     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4065                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066   %}
4067 
4068   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4069     Register dst_reg = as_Register($dst$$reg);
4070     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4071                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072   %}
4073 
4074   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4075     Register dst_reg = as_Register($dst$$reg);
4076     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4077                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078   %}
4079 
4080   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4081     Register dst_reg = as_Register($dst$$reg);
4082     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4083                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4084   %}
4085 
4086   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4087     Register dst_reg = as_Register($dst$$reg);
4088     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
4089                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4090   %}
4091 
4092   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4093     Register dst_reg = as_Register($dst$$reg);
4094     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4095                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4096   %}
4097 
4098   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4099     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4100     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4101                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4102   %}
4103 
4104   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4105     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4106     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4107                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4108   %}
4109 
4110   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4111     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4112     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4113        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4114   %}
4115 
4116   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4117     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4118     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4119        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4120   %}
4121 
4122   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4123     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4124     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4125        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4126   %}
4127 
4128   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4129     Register src_reg = as_Register($src$$reg);
4130     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4131                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4132   %}
4133 
4134   enc_class aarch64_enc_strb0(memory mem) %{
4135     MacroAssembler _masm(&cbuf);
4136     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4137                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4138   %}
4139 
4140   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4141     MacroAssembler _masm(&cbuf);
4142     __ membar(Assembler::StoreStore);
4143     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4144                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4145   %}
4146 
4147   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4148     Register src_reg = as_Register($src$$reg);
4149     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4150                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4151   %}
4152 
4153   enc_class aarch64_enc_strh0(memory mem) %{
4154     MacroAssembler _masm(&cbuf);
4155     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4156                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4157   %}
4158 
4159   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4160     Register src_reg = as_Register($src$$reg);
4161     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4162                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4163   %}
4164 
4165   enc_class aarch64_enc_strw0(memory mem) %{
4166     MacroAssembler _masm(&cbuf);
4167     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4168                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4169   %}
4170 
4171   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4172     Register src_reg = as_Register($src$$reg);
4173     // we sometimes get asked to store the stack pointer into the
4174     // current thread -- we cannot do that directly on AArch64
4175     if (src_reg == r31_sp) {
4176       MacroAssembler _masm(&cbuf);
4177       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4178       __ mov(rscratch2, sp);
4179       src_reg = rscratch2;
4180     }
4181     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4182                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4183   %}
4184 
4185   enc_class aarch64_enc_str0(memory mem) %{
4186     MacroAssembler _masm(&cbuf);
4187     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4188                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4189   %}
4190 
4191   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4192     FloatRegister src_reg = as_FloatRegister($src$$reg);
4193     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4194                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4195   %}
4196 
4197   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4198     FloatRegister src_reg = as_FloatRegister($src$$reg);
4199     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4200                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4201   %}
4202 
4203   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4204     FloatRegister src_reg = as_FloatRegister($src$$reg);
4205     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4206        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4207   %}
4208 
4209   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4210     FloatRegister src_reg = as_FloatRegister($src$$reg);
4211     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4212        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4213   %}
4214 
4215   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4216     FloatRegister src_reg = as_FloatRegister($src$$reg);
4217     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4218        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4219   %}
4220 
4221   // END Non-volatile memory access
4222 
4223   // volatile loads and stores
4224 
4225   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4226     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4227                  rscratch1, stlrb);
4228   %}
4229 
4230   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4231     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4232                  rscratch1, stlrh);
4233   %}
4234 
4235   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4236     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4237                  rscratch1, stlrw);
4238   %}
4239 
4240 
4241   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4242     Register dst_reg = as_Register($dst$$reg);
4243     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4244              rscratch1, ldarb);
4245     __ sxtbw(dst_reg, dst_reg);
4246   %}
4247 
4248   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4249     Register dst_reg = as_Register($dst$$reg);
4250     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4251              rscratch1, ldarb);
4252     __ sxtb(dst_reg, dst_reg);
4253   %}
4254 
4255   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4256     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4257              rscratch1, ldarb);
4258   %}
4259 
4260   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4261     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4262              rscratch1, ldarb);
4263   %}
4264 
4265   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4266     Register dst_reg = as_Register($dst$$reg);
4267     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4268              rscratch1, ldarh);
4269     __ sxthw(dst_reg, dst_reg);
4270   %}
4271 
4272   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4273     Register dst_reg = as_Register($dst$$reg);
4274     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4275              rscratch1, ldarh);
4276     __ sxth(dst_reg, dst_reg);
4277   %}
4278 
4279   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4280     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4281              rscratch1, ldarh);
4282   %}
4283 
4284   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4285     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4286              rscratch1, ldarh);
4287   %}
4288 
4289   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4290     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4291              rscratch1, ldarw);
4292   %}
4293 
4294   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4295     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4296              rscratch1, ldarw);
4297   %}
4298 
4299   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4300     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4301              rscratch1, ldar);
4302   %}
4303 
4304   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4305     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4306              rscratch1, ldarw);
4307     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4308   %}
4309 
4310   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4311     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4312              rscratch1, ldar);
4313     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4314   %}
4315 
4316   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4317     Register src_reg = as_Register($src$$reg);
4318     // we sometimes get asked to store the stack pointer into the
4319     // current thread -- we cannot do that directly on AArch64
4320     if (src_reg == r31_sp) {
4321         MacroAssembler _masm(&cbuf);
4322       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4323       __ mov(rscratch2, sp);
4324       src_reg = rscratch2;
4325     }
4326     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4327                  rscratch1, stlr);
4328   %}
4329 
4330   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4331     {
4332       MacroAssembler _masm(&cbuf);
4333       FloatRegister src_reg = as_FloatRegister($src$$reg);
4334       __ fmovs(rscratch2, src_reg);
4335     }
4336     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4337                  rscratch1, stlrw);
4338   %}
4339 
4340   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4341     {
4342       MacroAssembler _masm(&cbuf);
4343       FloatRegister src_reg = as_FloatRegister($src$$reg);
4344       __ fmovd(rscratch2, src_reg);
4345     }
4346     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4347                  rscratch1, stlr);
4348   %}
4349 
4350   // synchronized read/update encodings
4351 
4352   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4353     MacroAssembler _masm(&cbuf);
4354     Register dst_reg = as_Register($dst$$reg);
4355     Register base = as_Register($mem$$base);
4356     int index = $mem$$index;
4357     int scale = $mem$$scale;
4358     int disp = $mem$$disp;
4359     if (index == -1) {
4360        if (disp != 0) {
4361         __ lea(rscratch1, Address(base, disp));
4362         __ ldaxr(dst_reg, rscratch1);
4363       } else {
4364         // TODO
4365         // should we ever get anything other than this case?
4366         __ ldaxr(dst_reg, base);
4367       }
4368     } else {
4369       Register index_reg = as_Register(index);
4370       if (disp == 0) {
4371         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4372         __ ldaxr(dst_reg, rscratch1);
4373       } else {
4374         __ lea(rscratch1, Address(base, disp));
4375         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4376         __ ldaxr(dst_reg, rscratch1);
4377       }
4378     }
4379   %}
4380 
4381   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4382     MacroAssembler _masm(&cbuf);
4383     Register src_reg = as_Register($src$$reg);
4384     Register base = as_Register($mem$$base);
4385     int index = $mem$$index;
4386     int scale = $mem$$scale;
4387     int disp = $mem$$disp;
4388     if (index == -1) {
4389        if (disp != 0) {
4390         __ lea(rscratch2, Address(base, disp));
4391         __ stlxr(rscratch1, src_reg, rscratch2);
4392       } else {
4393         // TODO
4394         // should we ever get anything other than this case?
4395         __ stlxr(rscratch1, src_reg, base);
4396       }
4397     } else {
4398       Register index_reg = as_Register(index);
4399       if (disp == 0) {
4400         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4401         __ stlxr(rscratch1, src_reg, rscratch2);
4402       } else {
4403         __ lea(rscratch2, Address(base, disp));
4404         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4405         __ stlxr(rscratch1, src_reg, rscratch2);
4406       }
4407     }
4408     __ cmpw(rscratch1, zr);
4409   %}
4410 
4411   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4412     MacroAssembler _masm(&cbuf);
4413     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4414     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4415                Assembler::xword, /*acquire*/ false, /*release*/ true,
4416                /*weak*/ false, noreg);
4417   %}
4418 
4419   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4420     MacroAssembler _masm(&cbuf);
4421     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4422     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4423                Assembler::word, /*acquire*/ false, /*release*/ true,
4424                /*weak*/ false, noreg);
4425   %}
4426 
4427   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4428     MacroAssembler _masm(&cbuf);
4429     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4430     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4431                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4432                /*weak*/ false, noreg);
4433   %}
4434 
4435   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4436     MacroAssembler _masm(&cbuf);
4437     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4438     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4439                Assembler::byte, /*acquire*/ false, /*release*/ true,
4440                /*weak*/ false, noreg);
4441   %}  
4442     
4443 
4444   // The only difference between aarch64_enc_cmpxchg and
4445   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4446   // CompareAndSwap sequence to serve as a barrier on acquiring a
4447   // lock.
4448   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4449     MacroAssembler _masm(&cbuf);
4450     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4451     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4452                Assembler::xword, /*acquire*/ true, /*release*/ true,
4453                /*weak*/ false, noreg);
4454   %}
4455 
4456   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4457     MacroAssembler _masm(&cbuf);
4458     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4459     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4460                Assembler::word, /*acquire*/ true, /*release*/ true,
4461                /*weak*/ false, noreg);
4462   %}
4463 
4464 
4465   // auxiliary used for CompareAndSwapX to set result register
4466   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4467     MacroAssembler _masm(&cbuf);
4468     Register res_reg = as_Register($res$$reg);
4469     __ cset(res_reg, Assembler::EQ);
4470   %}
4471 
4472   // prefetch encodings
4473 
4474   enc_class aarch64_enc_prefetchw(memory mem) %{
4475     MacroAssembler _masm(&cbuf);
4476     Register base = as_Register($mem$$base);
4477     int index = $mem$$index;
4478     int scale = $mem$$scale;
4479     int disp = $mem$$disp;
4480     if (index == -1) {
4481       __ prfm(Address(base, disp), PSTL1KEEP);
4482     } else {
4483       Register index_reg = as_Register(index);
4484       if (disp == 0) {
4485         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4486       } else {
4487         __ lea(rscratch1, Address(base, disp));
4488         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4489       }
4490     }
4491   %}
4492 
4493   /// mov envcodings
4494 
4495   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4496     MacroAssembler _masm(&cbuf);
4497     u_int32_t con = (u_int32_t)$src$$constant;
4498     Register dst_reg = as_Register($dst$$reg);
4499     if (con == 0) {
4500       __ movw(dst_reg, zr);
4501     } else {
4502       __ movw(dst_reg, con);
4503     }
4504   %}
4505 
4506   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4507     MacroAssembler _masm(&cbuf);
4508     Register dst_reg = as_Register($dst$$reg);
4509     u_int64_t con = (u_int64_t)$src$$constant;
4510     if (con == 0) {
4511       __ mov(dst_reg, zr);
4512     } else {
4513       __ mov(dst_reg, con);
4514     }
4515   %}
4516 
4517   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4518     MacroAssembler _masm(&cbuf);
4519     Register dst_reg = as_Register($dst$$reg);
4520     address con = (address)$src$$constant;
4521     if (con == NULL || con == (address)1) {
4522       ShouldNotReachHere();
4523     } else {
4524       relocInfo::relocType rtype = $src->constant_reloc();
4525       if (rtype == relocInfo::oop_type) {
4526         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4527       } else if (rtype == relocInfo::metadata_type) {
4528         __ mov_metadata(dst_reg, (Metadata*)con);
4529       } else {
4530         assert(rtype == relocInfo::none, "unexpected reloc type");
4531         if (con < (address)(uintptr_t)os::vm_page_size()) {
4532           __ mov(dst_reg, con);
4533         } else {
4534           unsigned long offset;
4535           __ adrp(dst_reg, con, offset);
4536           __ add(dst_reg, dst_reg, offset);
4537         }
4538       }
4539     }
4540   %}
4541 
4542   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4543     MacroAssembler _masm(&cbuf);
4544     Register dst_reg = as_Register($dst$$reg);
4545     __ mov(dst_reg, zr);
4546   %}
4547 
4548   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4549     MacroAssembler _masm(&cbuf);
4550     Register dst_reg = as_Register($dst$$reg);
4551     __ mov(dst_reg, (u_int64_t)1);
4552   %}
4553 
4554   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4555     MacroAssembler _masm(&cbuf);
4556     address page = (address)$src$$constant;
4557     Register dst_reg = as_Register($dst$$reg);
4558     unsigned long off;
4559     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4560     assert(off == 0, "assumed offset == 0");
4561   %}
4562 
4563   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4564     MacroAssembler _masm(&cbuf);
4565     __ load_byte_map_base($dst$$Register);
4566   %}
4567 
4568   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4569     MacroAssembler _masm(&cbuf);
4570     Register dst_reg = as_Register($dst$$reg);
4571     address con = (address)$src$$constant;
4572     if (con == NULL) {
4573       ShouldNotReachHere();
4574     } else {
4575       relocInfo::relocType rtype = $src->constant_reloc();
4576       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4577       __ set_narrow_oop(dst_reg, (jobject)con);
4578     }
4579   %}
4580 
4581   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4582     MacroAssembler _masm(&cbuf);
4583     Register dst_reg = as_Register($dst$$reg);
4584     __ mov(dst_reg, zr);
4585   %}
4586 
4587   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4588     MacroAssembler _masm(&cbuf);
4589     Register dst_reg = as_Register($dst$$reg);
4590     address con = (address)$src$$constant;
4591     if (con == NULL) {
4592       ShouldNotReachHere();
4593     } else {
4594       relocInfo::relocType rtype = $src->constant_reloc();
4595       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4596       __ set_narrow_klass(dst_reg, (Klass *)con);
4597     }
4598   %}
4599 
4600   // arithmetic encodings
4601 
4602   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4603     MacroAssembler _masm(&cbuf);
4604     Register dst_reg = as_Register($dst$$reg);
4605     Register src_reg = as_Register($src1$$reg);
4606     int32_t con = (int32_t)$src2$$constant;
4607     // add has primary == 0, subtract has primary == 1
4608     if ($primary) { con = -con; }
4609     if (con < 0) {
4610       __ subw(dst_reg, src_reg, -con);
4611     } else {
4612       __ addw(dst_reg, src_reg, con);
4613     }
4614   %}
4615 
4616   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4617     MacroAssembler _masm(&cbuf);
4618     Register dst_reg = as_Register($dst$$reg);
4619     Register src_reg = as_Register($src1$$reg);
4620     int32_t con = (int32_t)$src2$$constant;
4621     // add has primary == 0, subtract has primary == 1
4622     if ($primary) { con = -con; }
4623     if (con < 0) {
4624       __ sub(dst_reg, src_reg, -con);
4625     } else {
4626       __ add(dst_reg, src_reg, con);
4627     }
4628   %}
4629 
4630   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4631     MacroAssembler _masm(&cbuf);
4632    Register dst_reg = as_Register($dst$$reg);
4633    Register src1_reg = as_Register($src1$$reg);
4634    Register src2_reg = as_Register($src2$$reg);
4635     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4636   %}
4637 
4638   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4639     MacroAssembler _masm(&cbuf);
4640    Register dst_reg = as_Register($dst$$reg);
4641    Register src1_reg = as_Register($src1$$reg);
4642    Register src2_reg = as_Register($src2$$reg);
4643     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4644   %}
4645 
4646   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4647     MacroAssembler _masm(&cbuf);
4648    Register dst_reg = as_Register($dst$$reg);
4649    Register src1_reg = as_Register($src1$$reg);
4650    Register src2_reg = as_Register($src2$$reg);
4651     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4652   %}
4653 
4654   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4655     MacroAssembler _masm(&cbuf);
4656    Register dst_reg = as_Register($dst$$reg);
4657    Register src1_reg = as_Register($src1$$reg);
4658    Register src2_reg = as_Register($src2$$reg);
4659     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4660   %}
4661 
4662   // compare instruction encodings
4663 
4664   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4665     MacroAssembler _masm(&cbuf);
4666     Register reg1 = as_Register($src1$$reg);
4667     Register reg2 = as_Register($src2$$reg);
4668     __ cmpw(reg1, reg2);
4669   %}
4670 
4671   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4672     MacroAssembler _masm(&cbuf);
4673     Register reg = as_Register($src1$$reg);
4674     int32_t val = $src2$$constant;
4675     if (val >= 0) {
4676       __ subsw(zr, reg, val);
4677     } else {
4678       __ addsw(zr, reg, -val);
4679     }
4680   %}
4681 
4682   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4683     MacroAssembler _masm(&cbuf);
4684     Register reg1 = as_Register($src1$$reg);
4685     u_int32_t val = (u_int32_t)$src2$$constant;
4686     __ movw(rscratch1, val);
4687     __ cmpw(reg1, rscratch1);
4688   %}
4689 
4690   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4691     MacroAssembler _masm(&cbuf);
4692     Register reg1 = as_Register($src1$$reg);
4693     Register reg2 = as_Register($src2$$reg);
4694     __ cmp(reg1, reg2);
4695   %}
4696 
4697   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4698     MacroAssembler _masm(&cbuf);
4699     Register reg = as_Register($src1$$reg);
4700     int64_t val = $src2$$constant;
4701     if (val >= 0) {
4702       __ subs(zr, reg, val);
4703     } else if (val != -val) {
4704       __ adds(zr, reg, -val);
4705     } else {
4706     // aargh, Long.MIN_VALUE is a special case
4707       __ orr(rscratch1, zr, (u_int64_t)val);
4708       __ subs(zr, reg, rscratch1);
4709     }
4710   %}
4711 
4712   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4713     MacroAssembler _masm(&cbuf);
4714     Register reg1 = as_Register($src1$$reg);
4715     u_int64_t val = (u_int64_t)$src2$$constant;
4716     __ mov(rscratch1, val);
4717     __ cmp(reg1, rscratch1);
4718   %}
4719 
4720   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4721     MacroAssembler _masm(&cbuf);
4722     Register reg1 = as_Register($src1$$reg);
4723     Register reg2 = as_Register($src2$$reg);
4724     __ cmp(reg1, reg2);
4725   %}
4726 
4727   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4728     MacroAssembler _masm(&cbuf);
4729     Register reg1 = as_Register($src1$$reg);
4730     Register reg2 = as_Register($src2$$reg);
4731     __ cmpw(reg1, reg2);
4732   %}
4733 
4734   enc_class aarch64_enc_testp(iRegP src) %{
4735     MacroAssembler _masm(&cbuf);
4736     Register reg = as_Register($src$$reg);
4737     __ cmp(reg, zr);
4738   %}
4739 
4740   enc_class aarch64_enc_testn(iRegN src) %{
4741     MacroAssembler _masm(&cbuf);
4742     Register reg = as_Register($src$$reg);
4743     __ cmpw(reg, zr);
4744   %}
4745 
4746   enc_class aarch64_enc_b(label lbl) %{
4747     MacroAssembler _masm(&cbuf);
4748     Label *L = $lbl$$label;
4749     __ b(*L);
4750   %}
4751 
4752   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4753     MacroAssembler _masm(&cbuf);
4754     Label *L = $lbl$$label;
4755     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4756   %}
4757 
4758   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4759     MacroAssembler _masm(&cbuf);
4760     Label *L = $lbl$$label;
4761     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4762   %}
4763 
4764   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4765   %{
4766      Register sub_reg = as_Register($sub$$reg);
4767      Register super_reg = as_Register($super$$reg);
4768      Register temp_reg = as_Register($temp$$reg);
4769      Register result_reg = as_Register($result$$reg);
4770 
4771      Label miss;
4772      MacroAssembler _masm(&cbuf);
4773      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4774                                      NULL, &miss,
4775                                      /*set_cond_codes:*/ true);
4776      if ($primary) {
4777        __ mov(result_reg, zr);
4778      }
4779      __ bind(miss);
4780   %}
4781 
4782   enc_class aarch64_enc_java_static_call(method meth) %{
4783     MacroAssembler _masm(&cbuf);
4784 
4785     address addr = (address)$meth$$method;
4786     address call;
4787     if (!_method) {
4788       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4789       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4790     } else {
4791       int method_index = resolved_method_index(cbuf);
4792       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4793                                                   : static_call_Relocation::spec(method_index);
4794       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4795 
4796       // Emit stub for static call
4797       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4798       if (stub == NULL) {
4799         ciEnv::current()->record_failure("CodeCache is full");
4800         return;
4801       }
4802     }
4803     if (call == NULL) {
4804       ciEnv::current()->record_failure("CodeCache is full");
4805       return;
4806     }
4807   %}
4808 
4809   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4810     MacroAssembler _masm(&cbuf);
4811     int method_index = resolved_method_index(cbuf);
4812     address call = __ ic_call((address)$meth$$method, method_index);
4813     if (call == NULL) {
4814       ciEnv::current()->record_failure("CodeCache is full");
4815       return;
4816     }
4817   %}
4818 
4819   enc_class aarch64_enc_call_epilog() %{
4820     MacroAssembler _masm(&cbuf);
4821     if (VerifyStackAtCalls) {
4822       // Check that stack depth is unchanged: find majik cookie on stack
4823       __ call_Unimplemented();
4824     }
4825   %}
4826 
4827   enc_class aarch64_enc_java_to_runtime(method meth) %{
4828     MacroAssembler _masm(&cbuf);
4829 
4830     // some calls to generated routines (arraycopy code) are scheduled
4831     // by C2 as runtime calls. if so we can call them using a br (they
4832     // will be in a reachable segment) otherwise we have to use a blrt
4833     // which loads the absolute address into a register.
4834     address entry = (address)$meth$$method;
4835     CodeBlob *cb = CodeCache::find_blob(entry);
4836     if (cb) {
4837       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4838       if (call == NULL) {
4839         ciEnv::current()->record_failure("CodeCache is full");
4840         return;
4841       }
4842     } else {
4843       int gpcnt;
4844       int fpcnt;
4845       int rtype;
4846       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4847       Label retaddr;
4848       __ adr(rscratch2, retaddr);
4849       __ lea(rscratch1, RuntimeAddress(entry));
4850       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4851       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4852       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4853       __ bind(retaddr);
4854       __ add(sp, sp, 2 * wordSize);
4855     }
4856   %}
4857 
4858   enc_class aarch64_enc_rethrow() %{
4859     MacroAssembler _masm(&cbuf);
4860     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4861   %}
4862 
4863   enc_class aarch64_enc_ret() %{
4864     MacroAssembler _masm(&cbuf);
4865     __ ret(lr);
4866   %}
4867 
4868   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4869     MacroAssembler _masm(&cbuf);
4870     Register target_reg = as_Register($jump_target$$reg);
4871     __ br(target_reg);
4872   %}
4873 
4874   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4875     MacroAssembler _masm(&cbuf);
4876     Register target_reg = as_Register($jump_target$$reg);
4877     // exception oop should be in r0
4878     // ret addr has been popped into lr
4879     // callee expects it in r3
4880     __ mov(r3, lr);
4881     __ br(target_reg);
4882   %}
4883 
4884   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4885     MacroAssembler _masm(&cbuf);
4886     Register oop = as_Register($object$$reg);
4887     Register box = as_Register($box$$reg);
4888     Register disp_hdr = as_Register($tmp$$reg);
4889     Register tmp = as_Register($tmp2$$reg);
4890     Label cont;
4891     Label object_has_monitor;
4892     Label cas_failed;
4893 
4894     assert_different_registers(oop, box, tmp, disp_hdr);
4895 
4896     // Load markOop from object into displaced_header.
4897     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4898 
4899     // Always do locking in runtime.
4900     if (EmitSync & 0x01) {
4901       __ cmp(oop, zr);
4902       return;
4903     }
4904 
4905     if (UseBiasedLocking && !UseOptoBiasInlining) {
4906       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4907     }
4908 
4909     // Handle existing monitor
4910     if ((EmitSync & 0x02) == 0) {
4911       // we can use AArch64's bit test and branch here but
4912       // markoopDesc does not define a bit index just the bit value
4913       // so assert in case the bit pos changes
4914 #     define __monitor_value_log2 1
4915       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4916       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4917 #     undef __monitor_value_log2
4918     }
4919 
4920     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4921     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4922 
4923     // Load Compare Value application register.
4924 
4925     // Initialize the box. (Must happen before we update the object mark!)
4926     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4927 
4928     // Compare object markOop with mark and if equal exchange scratch1
4929     // with object markOop.
4930     if (UseLSE) {
4931       __ mov(tmp, disp_hdr);
4932       __ casal(Assembler::xword, tmp, box, oop);
4933       __ cmp(tmp, disp_hdr);
4934       __ br(Assembler::EQ, cont);
4935     } else {
4936       Label retry_load;
4937       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4938         __ prfm(Address(oop), PSTL1STRM);
4939       __ bind(retry_load);
4940       __ ldaxr(tmp, oop);
4941       __ cmp(tmp, disp_hdr);
4942       __ br(Assembler::NE, cas_failed);
4943       // use stlxr to ensure update is immediately visible
4944       __ stlxr(tmp, box, oop);
4945       __ cbzw(tmp, cont);
4946       __ b(retry_load);
4947     }
4948 
4949     // Formerly:
4950     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4951     //               /*newv=*/box,
4952     //               /*addr=*/oop,
4953     //               /*tmp=*/tmp,
4954     //               cont,
4955     //               /*fail*/NULL);
4956 
4957     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4958 
4959     // If the compare-and-exchange succeeded, then we found an unlocked
4960     // object, will have now locked it will continue at label cont
4961 
4962     __ bind(cas_failed);
4963     // We did not see an unlocked object so try the fast recursive case.
4964 
4965     // Check if the owner is self by comparing the value in the
4966     // markOop of object (disp_hdr) with the stack pointer.
4967     __ mov(rscratch1, sp);
4968     __ sub(disp_hdr, disp_hdr, rscratch1);
4969     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4970     // If condition is true we are cont and hence we can store 0 as the
4971     // displaced header in the box, which indicates that it is a recursive lock.
4972     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4973     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4974 
4975     // Handle existing monitor.
4976     if ((EmitSync & 0x02) == 0) {
4977       __ b(cont);
4978 
4979       __ bind(object_has_monitor);
4980       // The object's monitor m is unlocked iff m->owner == NULL,
4981       // otherwise m->owner may contain a thread or a stack address.
4982       //
4983       // Try to CAS m->owner from NULL to current thread.
4984       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4985       __ mov(disp_hdr, zr);
4986 
4987       if (UseLSE) {
4988         __ mov(rscratch1, disp_hdr);
4989         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4990         __ cmp(rscratch1, disp_hdr);
4991       } else {
4992         Label retry_load, fail;
4993         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4994           __ prfm(Address(tmp), PSTL1STRM);
4995         __ bind(retry_load);
4996         __ ldaxr(rscratch1, tmp);
4997         __ cmp(disp_hdr, rscratch1);
4998         __ br(Assembler::NE, fail);
4999         // use stlxr to ensure update is immediately visible
5000         __ stlxr(rscratch1, rthread, tmp);
5001         __ cbnzw(rscratch1, retry_load);
5002         __ bind(fail);
5003       }
5004 
5005       // Label next;
5006       // __ cmpxchgptr(/*oldv=*/disp_hdr,
5007       //               /*newv=*/rthread,
5008       //               /*addr=*/tmp,
5009       //               /*tmp=*/rscratch1,
5010       //               /*succeed*/next,
5011       //               /*fail*/NULL);
5012       // __ bind(next);
5013 
5014       // store a non-null value into the box.
5015       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5016 
5017       // PPC port checks the following invariants
5018       // #ifdef ASSERT
5019       // bne(flag, cont);
5020       // We have acquired the monitor, check some invariants.
5021       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
5022       // Invariant 1: _recursions should be 0.
5023       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
5024       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
5025       //                        "monitor->_recursions should be 0", -1);
5026       // Invariant 2: OwnerIsThread shouldn't be 0.
5027       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
5028       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
5029       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
5030       // #endif
5031     }
5032 
5033     __ bind(cont);
5034     // flag == EQ indicates success
5035     // flag == NE indicates failure
5036 
5037   %}
5038 
5039   // TODO
5040   // reimplement this with custom cmpxchgptr code
5041   // which avoids some of the unnecessary branching
5042   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
5043     MacroAssembler _masm(&cbuf);
5044     Register oop = as_Register($object$$reg);
5045     Register box = as_Register($box$$reg);
5046     Register disp_hdr = as_Register($tmp$$reg);
5047     Register tmp = as_Register($tmp2$$reg);
5048     Label cont;
5049     Label object_has_monitor;
5050     Label cas_failed;
5051 
5052     assert_different_registers(oop, box, tmp, disp_hdr);
5053 
5054     // Always do locking in runtime.
5055     if (EmitSync & 0x01) {
5056       __ cmp(oop, zr); // Oop can't be 0 here => always false.
5057       return;
5058     }
5059 
5060     if (UseBiasedLocking && !UseOptoBiasInlining) {
5061       __ biased_locking_exit(oop, tmp, cont);
5062     }
5063 
5064     // Find the lock address and load the displaced header from the stack.
5065     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5066 
5067     // If the displaced header is 0, we have a recursive unlock.
5068     __ cmp(disp_hdr, zr);
5069     __ br(Assembler::EQ, cont);
5070 
5071 
5072     // Handle existing monitor.
5073     if ((EmitSync & 0x02) == 0) {
5074       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
5075       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
5076     }
5077 
5078     // Check if it is still a light weight lock, this is is true if we
5079     // see the stack address of the basicLock in the markOop of the
5080     // object.
5081 
5082       if (UseLSE) {
5083         __ mov(tmp, box);
5084         __ casl(Assembler::xword, tmp, disp_hdr, oop);
5085         __ cmp(tmp, box);
5086       } else {
5087         Label retry_load;
5088         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5089           __ prfm(Address(oop), PSTL1STRM);
5090         __ bind(retry_load);
5091         __ ldxr(tmp, oop);
5092         __ cmp(box, tmp);
5093         __ br(Assembler::NE, cas_failed);
5094         // use stlxr to ensure update is immediately visible
5095         __ stlxr(tmp, disp_hdr, oop);
5096         __ cbzw(tmp, cont);
5097         __ b(retry_load);
5098       }
5099 
5100     // __ cmpxchgptr(/*compare_value=*/box,
5101     //               /*exchange_value=*/disp_hdr,
5102     //               /*where=*/oop,
5103     //               /*result=*/tmp,
5104     //               cont,
5105     //               /*cas_failed*/NULL);
5106     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5107 
5108     __ bind(cas_failed);
5109 
5110     // Handle existing monitor.
5111     if ((EmitSync & 0x02) == 0) {
5112       __ b(cont);
5113 
5114       __ bind(object_has_monitor);
5115       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5116       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5117       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5118       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5119       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5120       __ cmp(rscratch1, zr);
5121       __ br(Assembler::NE, cont);
5122 
5123       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5124       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5125       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5126       __ cmp(rscratch1, zr);
5127       __ cbnz(rscratch1, cont);
5128       // need a release store here
5129       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5130       __ stlr(rscratch1, tmp); // rscratch1 is zero
5131     }
5132 
5133     __ bind(cont);
5134     // flag == EQ indicates success
5135     // flag == NE indicates failure
5136   %}
5137 
5138 %}
5139 
5140 //----------FRAME--------------------------------------------------------------
5141 // Definition of frame structure and management information.
5142 //
5143 //  S T A C K   L A Y O U T    Allocators stack-slot number
5144 //                             |   (to get allocators register number
5145 //  G  Owned by    |        |  v    add OptoReg::stack0())
5146 //  r   CALLER     |        |
5147 //  o     |        +--------+      pad to even-align allocators stack-slot
5148 //  w     V        |  pad0  |        numbers; owned by CALLER
5149 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5150 //  h     ^        |   in   |  5
5151 //        |        |  args  |  4   Holes in incoming args owned by SELF
5152 //  |     |        |        |  3
5153 //  |     |        +--------+
5154 //  V     |        | old out|      Empty on Intel, window on Sparc
5155 //        |    old |preserve|      Must be even aligned.
5156 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5157 //        |        |   in   |  3   area for Intel ret address
5158 //     Owned by    |preserve|      Empty on Sparc.
5159 //       SELF      +--------+
5160 //        |        |  pad2  |  2   pad to align old SP
5161 //        |        +--------+  1
5162 //        |        | locks  |  0
5163 //        |        +--------+----> OptoReg::stack0(), even aligned
5164 //        |        |  pad1  | 11   pad to align new SP
5165 //        |        +--------+
5166 //        |        |        | 10
5167 //        |        | spills |  9   spills
5168 //        V        |        |  8   (pad0 slot for callee)
5169 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5170 //        ^        |  out   |  7
5171 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5172 //     Owned by    +--------+
5173 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5174 //        |    new |preserve|      Must be even-aligned.
5175 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5176 //        |        |        |
5177 //
5178 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5179 //         known from SELF's arguments and the Java calling convention.
5180 //         Region 6-7 is determined per call site.
5181 // Note 2: If the calling convention leaves holes in the incoming argument
5182 //         area, those holes are owned by SELF.  Holes in the outgoing area
5183 //         are owned by the CALLEE.  Holes should not be nessecary in the
5184 //         incoming area, as the Java calling convention is completely under
5185 //         the control of the AD file.  Doubles can be sorted and packed to
5186 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5187 //         varargs C calling conventions.
5188 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5189 //         even aligned with pad0 as needed.
5190 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5191 //           (the latter is true on Intel but is it false on AArch64?)
5192 //         region 6-11 is even aligned; it may be padded out more so that
5193 //         the region from SP to FP meets the minimum stack alignment.
5194 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5195 //         alignment.  Region 11, pad1, may be dynamically extended so that
5196 //         SP meets the minimum alignment.
5197 
5198 frame %{
5199   // What direction does stack grow in (assumed to be same for C & Java)
5200   stack_direction(TOWARDS_LOW);
5201 
5202   // These three registers define part of the calling convention
5203   // between compiled code and the interpreter.
5204 
5205   // Inline Cache Register or methodOop for I2C.
5206   inline_cache_reg(R12);
5207 
5208   // Method Oop Register when calling interpreter.
5209   interpreter_method_oop_reg(R12);
5210 
5211   // Number of stack slots consumed by locking an object
5212   sync_stack_slots(2);
5213 
5214   // Compiled code's Frame Pointer
5215   frame_pointer(R31);
5216 
5217   // Interpreter stores its frame pointer in a register which is
5218   // stored to the stack by I2CAdaptors.
5219   // I2CAdaptors convert from interpreted java to compiled java.
5220   interpreter_frame_pointer(R29);
5221 
5222   // Stack alignment requirement
5223   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5224 
5225   // Number of stack slots between incoming argument block and the start of
5226   // a new frame.  The PROLOG must add this many slots to the stack.  The
5227   // EPILOG must remove this many slots. aarch64 needs two slots for
5228   // return address and fp.
5229   // TODO think this is correct but check
5230   in_preserve_stack_slots(4);
5231 
5232   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5233   // for calls to C.  Supports the var-args backing area for register parms.
5234   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5235 
5236   // The after-PROLOG location of the return address.  Location of
5237   // return address specifies a type (REG or STACK) and a number
5238   // representing the register number (i.e. - use a register name) or
5239   // stack slot.
5240   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5241   // Otherwise, it is above the locks and verification slot and alignment word
5242   // TODO this may well be correct but need to check why that - 2 is there
5243   // ppc port uses 0 but we definitely need to allow for fixed_slots
5244   // which folds in the space used for monitors
5245   return_addr(STACK - 2 +
5246               align_up((Compile::current()->in_preserve_stack_slots() +
5247                         Compile::current()->fixed_slots()),
5248                        stack_alignment_in_slots()));
5249 
5250   // Body of function which returns an integer array locating
5251   // arguments either in registers or in stack slots.  Passed an array
5252   // of ideal registers called "sig" and a "length" count.  Stack-slot
5253   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5254   // arguments for a CALLEE.  Incoming stack arguments are
5255   // automatically biased by the preserve_stack_slots field above.
5256 
5257   calling_convention
5258   %{
5259     // No difference between ingoing/outgoing just pass false
5260     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5261   %}
5262 
5263   c_calling_convention
5264   %{
5265     // This is obviously always outgoing
5266     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5267   %}
5268 
5269   // Location of compiled Java return values.  Same as C for now.
5270   return_value
5271   %{
5272     // TODO do we allow ideal_reg == Op_RegN???
5273     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5274            "only return normal values");
5275 
5276     static const int lo[Op_RegL + 1] = { // enum name
5277       0,                                 // Op_Node
5278       0,                                 // Op_Set
5279       R0_num,                            // Op_RegN
5280       R0_num,                            // Op_RegI
5281       R0_num,                            // Op_RegP
5282       V0_num,                            // Op_RegF
5283       V0_num,                            // Op_RegD
5284       R0_num                             // Op_RegL
5285     };
5286 
5287     static const int hi[Op_RegL + 1] = { // enum name
5288       0,                                 // Op_Node
5289       0,                                 // Op_Set
5290       OptoReg::Bad,                       // Op_RegN
5291       OptoReg::Bad,                      // Op_RegI
5292       R0_H_num,                          // Op_RegP
5293       OptoReg::Bad,                      // Op_RegF
5294       V0_H_num,                          // Op_RegD
5295       R0_H_num                           // Op_RegL
5296     };
5297 
5298     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5299   %}
5300 %}
5301 
5302 //----------ATTRIBUTES---------------------------------------------------------
5303 //----------Operand Attributes-------------------------------------------------
5304 op_attrib op_cost(1);        // Required cost attribute
5305 
5306 //----------Instruction Attributes---------------------------------------------
5307 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5308 ins_attrib ins_size(32);        // Required size attribute (in bits)
5309 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5310                                 // a non-matching short branch variant
5311                                 // of some long branch?
5312 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5313                                 // be a power of 2) specifies the
5314                                 // alignment that some part of the
5315                                 // instruction (not necessarily the
5316                                 // start) requires.  If > 1, a
5317                                 // compute_padding() function must be
5318                                 // provided for the instruction
5319 
5320 //----------OPERANDS-----------------------------------------------------------
5321 // Operand definitions must precede instruction definitions for correct parsing
5322 // in the ADLC because operands constitute user defined types which are used in
5323 // instruction definitions.
5324 
5325 //----------Simple Operands----------------------------------------------------
5326 
5327 // Integer operands 32 bit
5328 // 32 bit immediate
5329 operand immI()
5330 %{
5331   match(ConI);
5332 
5333   op_cost(0);
5334   format %{ %}
5335   interface(CONST_INTER);
5336 %}
5337 
5338 // 32 bit zero
5339 operand immI0()
5340 %{
5341   predicate(n->get_int() == 0);
5342   match(ConI);
5343 
5344   op_cost(0);
5345   format %{ %}
5346   interface(CONST_INTER);
5347 %}
5348 
5349 // 32 bit unit increment
5350 operand immI_1()
5351 %{
5352   predicate(n->get_int() == 1);
5353   match(ConI);
5354 
5355   op_cost(0);
5356   format %{ %}
5357   interface(CONST_INTER);
5358 %}
5359 
5360 // 32 bit unit decrement
5361 operand immI_M1()
5362 %{
5363   predicate(n->get_int() == -1);
5364   match(ConI);
5365 
5366   op_cost(0);
5367   format %{ %}
5368   interface(CONST_INTER);
5369 %}
5370 
5371 // Shift values for add/sub extension shift
5372 operand immIExt()
5373 %{
5374   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5375   match(ConI);
5376 
5377   op_cost(0);
5378   format %{ %}
5379   interface(CONST_INTER);
5380 %}
5381 
5382 operand immI_le_4()
5383 %{
5384   predicate(n->get_int() <= 4);
5385   match(ConI);
5386 
5387   op_cost(0);
5388   format %{ %}
5389   interface(CONST_INTER);
5390 %}
5391 
5392 operand immI_31()
5393 %{
5394   predicate(n->get_int() == 31);
5395   match(ConI);
5396 
5397   op_cost(0);
5398   format %{ %}
5399   interface(CONST_INTER);
5400 %}
5401 
5402 operand immI_8()
5403 %{
5404   predicate(n->get_int() == 8);
5405   match(ConI);
5406 
5407   op_cost(0);
5408   format %{ %}
5409   interface(CONST_INTER);
5410 %}
5411 
5412 operand immI_16()
5413 %{
5414   predicate(n->get_int() == 16);
5415   match(ConI);
5416 
5417   op_cost(0);
5418   format %{ %}
5419   interface(CONST_INTER);
5420 %}
5421 
5422 operand immI_24()
5423 %{
5424   predicate(n->get_int() == 24);
5425   match(ConI);
5426 
5427   op_cost(0);
5428   format %{ %}
5429   interface(CONST_INTER);
5430 %}
5431 
5432 operand immI_32()
5433 %{
5434   predicate(n->get_int() == 32);
5435   match(ConI);
5436 
5437   op_cost(0);
5438   format %{ %}
5439   interface(CONST_INTER);
5440 %}
5441 
5442 operand immI_48()
5443 %{
5444   predicate(n->get_int() == 48);
5445   match(ConI);
5446 
5447   op_cost(0);
5448   format %{ %}
5449   interface(CONST_INTER);
5450 %}
5451 
5452 operand immI_56()
5453 %{
5454   predicate(n->get_int() == 56);
5455   match(ConI);
5456 
5457   op_cost(0);
5458   format %{ %}
5459   interface(CONST_INTER);
5460 %}
5461 
5462 operand immI_63()
5463 %{
5464   predicate(n->get_int() == 63);
5465   match(ConI);
5466 
5467   op_cost(0);
5468   format %{ %}
5469   interface(CONST_INTER);
5470 %}
5471 
5472 operand immI_64()
5473 %{
5474   predicate(n->get_int() == 64);
5475   match(ConI);
5476 
5477   op_cost(0);
5478   format %{ %}
5479   interface(CONST_INTER);
5480 %}
5481 
5482 operand immI_255()
5483 %{
5484   predicate(n->get_int() == 255);
5485   match(ConI);
5486 
5487   op_cost(0);
5488   format %{ %}
5489   interface(CONST_INTER);
5490 %}
5491 
5492 operand immI_65535()
5493 %{
5494   predicate(n->get_int() == 65535);
5495   match(ConI);
5496 
5497   op_cost(0);
5498   format %{ %}
5499   interface(CONST_INTER);
5500 %}
5501 
5502 operand immL_255()
5503 %{
5504   predicate(n->get_long() == 255L);
5505   match(ConL);
5506 
5507   op_cost(0);
5508   format %{ %}
5509   interface(CONST_INTER);
5510 %}
5511 
5512 operand immL_65535()
5513 %{
5514   predicate(n->get_long() == 65535L);
5515   match(ConL);
5516 
5517   op_cost(0);
5518   format %{ %}
5519   interface(CONST_INTER);
5520 %}
5521 
5522 operand immL_4294967295()
5523 %{
5524   predicate(n->get_long() == 4294967295L);
5525   match(ConL);
5526 
5527   op_cost(0);
5528   format %{ %}
5529   interface(CONST_INTER);
5530 %}
5531 
5532 operand immL_bitmask()
5533 %{
5534   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5535             && is_power_of_2(n->get_long() + 1));
5536   match(ConL);
5537 
5538   op_cost(0);
5539   format %{ %}
5540   interface(CONST_INTER);
5541 %}
5542 
5543 operand immI_bitmask()
5544 %{
5545   predicate(((n->get_int() & 0xc0000000) == 0)
5546             && is_power_of_2(n->get_int() + 1));
5547   match(ConI);
5548 
5549   op_cost(0);
5550   format %{ %}
5551   interface(CONST_INTER);
5552 %}
5553 
5554 // Scale values for scaled offset addressing modes (up to long but not quad)
5555 operand immIScale()
5556 %{
5557   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5558   match(ConI);
5559 
5560   op_cost(0);
5561   format %{ %}
5562   interface(CONST_INTER);
5563 %}
5564 
5565 // 26 bit signed offset -- for pc-relative branches
5566 operand immI26()
5567 %{
5568   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5569   match(ConI);
5570 
5571   op_cost(0);
5572   format %{ %}
5573   interface(CONST_INTER);
5574 %}
5575 
5576 // 19 bit signed offset -- for pc-relative loads
5577 operand immI19()
5578 %{
5579   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5580   match(ConI);
5581 
5582   op_cost(0);
5583   format %{ %}
5584   interface(CONST_INTER);
5585 %}
5586 
5587 // 12 bit unsigned offset -- for base plus immediate loads
5588 operand immIU12()
5589 %{
5590   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5591   match(ConI);
5592 
5593   op_cost(0);
5594   format %{ %}
5595   interface(CONST_INTER);
5596 %}
5597 
5598 operand immLU12()
5599 %{
5600   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5601   match(ConL);
5602 
5603   op_cost(0);
5604   format %{ %}
5605   interface(CONST_INTER);
5606 %}
5607 
5608 // Offset for scaled or unscaled immediate loads and stores
5609 operand immIOffset()
5610 %{
5611   predicate(Address::offset_ok_for_immed(n->get_int()));
5612   match(ConI);
5613 
5614   op_cost(0);
5615   format %{ %}
5616   interface(CONST_INTER);
5617 %}
5618 
5619 operand immIOffset4()
5620 %{
5621   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5622   match(ConI);
5623 
5624   op_cost(0);
5625   format %{ %}
5626   interface(CONST_INTER);
5627 %}
5628 
5629 operand immIOffset8()
5630 %{
5631   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5632   match(ConI);
5633 
5634   op_cost(0);
5635   format %{ %}
5636   interface(CONST_INTER);
5637 %}
5638 
5639 operand immIOffset16()
5640 %{
5641   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5642   match(ConI);
5643 
5644   op_cost(0);
5645   format %{ %}
5646   interface(CONST_INTER);
5647 %}
5648 
5649 operand immLoffset()
5650 %{
5651   predicate(Address::offset_ok_for_immed(n->get_long()));
5652   match(ConL);
5653 
5654   op_cost(0);
5655   format %{ %}
5656   interface(CONST_INTER);
5657 %}
5658 
5659 operand immLoffset4()
5660 %{
5661   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5662   match(ConL);
5663 
5664   op_cost(0);
5665   format %{ %}
5666   interface(CONST_INTER);
5667 %}
5668 
5669 operand immLoffset8()
5670 %{
5671   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5672   match(ConL);
5673 
5674   op_cost(0);
5675   format %{ %}
5676   interface(CONST_INTER);
5677 %}
5678 
5679 operand immLoffset16()
5680 %{
5681   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5682   match(ConL);
5683 
5684   op_cost(0);
5685   format %{ %}
5686   interface(CONST_INTER);
5687 %}
5688 
5689 // 32 bit integer valid for add sub immediate
5690 operand immIAddSub()
5691 %{
5692   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5693   match(ConI);
5694   op_cost(0);
5695   format %{ %}
5696   interface(CONST_INTER);
5697 %}
5698 
5699 // 32 bit unsigned integer valid for logical immediate
5700 // TODO -- check this is right when e.g the mask is 0x80000000
5701 operand immILog()
5702 %{
5703   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5704   match(ConI);
5705 
5706   op_cost(0);
5707   format %{ %}
5708   interface(CONST_INTER);
5709 %}
5710 
5711 // Integer operands 64 bit
5712 // 64 bit immediate
5713 operand immL()
5714 %{
5715   match(ConL);
5716 
5717   op_cost(0);
5718   format %{ %}
5719   interface(CONST_INTER);
5720 %}
5721 
5722 // 64 bit zero
5723 operand immL0()
5724 %{
5725   predicate(n->get_long() == 0);
5726   match(ConL);
5727 
5728   op_cost(0);
5729   format %{ %}
5730   interface(CONST_INTER);
5731 %}
5732 
5733 // 64 bit unit increment
5734 operand immL_1()
5735 %{
5736   predicate(n->get_long() == 1);
5737   match(ConL);
5738 
5739   op_cost(0);
5740   format %{ %}
5741   interface(CONST_INTER);
5742 %}
5743 
5744 // 64 bit unit decrement
5745 operand immL_M1()
5746 %{
5747   predicate(n->get_long() == -1);
5748   match(ConL);
5749 
5750   op_cost(0);
5751   format %{ %}
5752   interface(CONST_INTER);
5753 %}
5754 
5755 // 32 bit offset of pc in thread anchor
5756 
5757 operand immL_pc_off()
5758 %{
5759   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5760                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5761   match(ConL);
5762 
5763   op_cost(0);
5764   format %{ %}
5765   interface(CONST_INTER);
5766 %}
5767 
5768 // 64 bit integer valid for add sub immediate
5769 operand immLAddSub()
5770 %{
5771   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5772   match(ConL);
5773   op_cost(0);
5774   format %{ %}
5775   interface(CONST_INTER);
5776 %}
5777 
5778 // 64 bit integer valid for logical immediate
5779 operand immLLog()
5780 %{
5781   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5782   match(ConL);
5783   op_cost(0);
5784   format %{ %}
5785   interface(CONST_INTER);
5786 %}
5787 
5788 // Long Immediate: low 32-bit mask
5789 operand immL_32bits()
5790 %{
5791   predicate(n->get_long() == 0xFFFFFFFFL);
5792   match(ConL);
5793   op_cost(0);
5794   format %{ %}
5795   interface(CONST_INTER);
5796 %}
5797 
5798 // Pointer operands
5799 // Pointer Immediate
5800 operand immP()
5801 %{
5802   match(ConP);
5803 
5804   op_cost(0);
5805   format %{ %}
5806   interface(CONST_INTER);
5807 %}
5808 
5809 // NULL Pointer Immediate
5810 operand immP0()
5811 %{
5812   predicate(n->get_ptr() == 0);
5813   match(ConP);
5814 
5815   op_cost(0);
5816   format %{ %}
5817   interface(CONST_INTER);
5818 %}
5819 
5820 // Pointer Immediate One
5821 // this is used in object initialization (initial object header)
5822 operand immP_1()
5823 %{
5824   predicate(n->get_ptr() == 1);
5825   match(ConP);
5826 
5827   op_cost(0);
5828   format %{ %}
5829   interface(CONST_INTER);
5830 %}
5831 
5832 // Polling Page Pointer Immediate
5833 operand immPollPage()
5834 %{
5835   predicate((address)n->get_ptr() == os::get_polling_page());
5836   match(ConP);
5837 
5838   op_cost(0);
5839   format %{ %}
5840   interface(CONST_INTER);
5841 %}
5842 
5843 // Card Table Byte Map Base
5844 operand immByteMapBase()
5845 %{
5846   // Get base of card map
5847   predicate((jbyte*)n->get_ptr() ==
5848         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5849   match(ConP);
5850 
5851   op_cost(0);
5852   format %{ %}
5853   interface(CONST_INTER);
5854 %}
5855 
5856 // Pointer Immediate Minus One
5857 // this is used when we want to write the current PC to the thread anchor
5858 operand immP_M1()
5859 %{
5860   predicate(n->get_ptr() == -1);
5861   match(ConP);
5862 
5863   op_cost(0);
5864   format %{ %}
5865   interface(CONST_INTER);
5866 %}
5867 
5868 // Pointer Immediate Minus Two
5869 // this is used when we want to write the current PC to the thread anchor
5870 operand immP_M2()
5871 %{
5872   predicate(n->get_ptr() == -2);
5873   match(ConP);
5874 
5875   op_cost(0);
5876   format %{ %}
5877   interface(CONST_INTER);
5878 %}
5879 
5880 // Float and Double operands
5881 // Double Immediate
5882 operand immD()
5883 %{
5884   match(ConD);
5885   op_cost(0);
5886   format %{ %}
5887   interface(CONST_INTER);
5888 %}
5889 
5890 // Double Immediate: +0.0d
5891 operand immD0()
5892 %{
5893   predicate(jlong_cast(n->getd()) == 0);
5894   match(ConD);
5895 
5896   op_cost(0);
5897   format %{ %}
5898   interface(CONST_INTER);
5899 %}
5900 
5901 // constant 'double +0.0'.
5902 operand immDPacked()
5903 %{
5904   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5905   match(ConD);
5906   op_cost(0);
5907   format %{ %}
5908   interface(CONST_INTER);
5909 %}
5910 
5911 // Float Immediate
5912 operand immF()
5913 %{
5914   match(ConF);
5915   op_cost(0);
5916   format %{ %}
5917   interface(CONST_INTER);
5918 %}
5919 
5920 // Float Immediate: +0.0f.
5921 operand immF0()
5922 %{
5923   predicate(jint_cast(n->getf()) == 0);
5924   match(ConF);
5925 
5926   op_cost(0);
5927   format %{ %}
5928   interface(CONST_INTER);
5929 %}
5930 
5931 //
5932 operand immFPacked()
5933 %{
5934   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5935   match(ConF);
5936   op_cost(0);
5937   format %{ %}
5938   interface(CONST_INTER);
5939 %}
5940 
5941 // Narrow pointer operands
5942 // Narrow Pointer Immediate
5943 operand immN()
5944 %{
5945   match(ConN);
5946 
5947   op_cost(0);
5948   format %{ %}
5949   interface(CONST_INTER);
5950 %}
5951 
5952 // Narrow NULL Pointer Immediate
5953 operand immN0()
5954 %{
5955   predicate(n->get_narrowcon() == 0);
5956   match(ConN);
5957 
5958   op_cost(0);
5959   format %{ %}
5960   interface(CONST_INTER);
5961 %}
5962 
5963 operand immNKlass()
5964 %{
5965   match(ConNKlass);
5966 
5967   op_cost(0);
5968   format %{ %}
5969   interface(CONST_INTER);
5970 %}
5971 
5972 // Integer 32 bit Register Operands
5973 // Integer 32 bitRegister (excludes SP)
5974 operand iRegI()
5975 %{
5976   constraint(ALLOC_IN_RC(any_reg32));
5977   match(RegI);
5978   match(iRegINoSp);
5979   op_cost(0);
5980   format %{ %}
5981   interface(REG_INTER);
5982 %}
5983 
5984 // Integer 32 bit Register not Special
5985 operand iRegINoSp()
5986 %{
5987   constraint(ALLOC_IN_RC(no_special_reg32));
5988   match(RegI);
5989   op_cost(0);
5990   format %{ %}
5991   interface(REG_INTER);
5992 %}
5993 
5994 // Integer 64 bit Register Operands
5995 // Integer 64 bit Register (includes SP)
5996 operand iRegL()
5997 %{
5998   constraint(ALLOC_IN_RC(any_reg));
5999   match(RegL);
6000   match(iRegLNoSp);
6001   op_cost(0);
6002   format %{ %}
6003   interface(REG_INTER);
6004 %}
6005 
6006 // Integer 64 bit Register not Special
6007 operand iRegLNoSp()
6008 %{
6009   constraint(ALLOC_IN_RC(no_special_reg));
6010   match(RegL);
6011   match(iRegL_R0);
6012   format %{ %}
6013   interface(REG_INTER);
6014 %}
6015 
6016 // Pointer Register Operands
6017 // Pointer Register
6018 operand iRegP()
6019 %{
6020   constraint(ALLOC_IN_RC(ptr_reg));
6021   match(RegP);
6022   match(iRegPNoSp);
6023   match(iRegP_R0);
6024   //match(iRegP_R2);
6025   //match(iRegP_R4);
6026   //match(iRegP_R5);
6027   match(thread_RegP);
6028   op_cost(0);
6029   format %{ %}
6030   interface(REG_INTER);
6031 %}
6032 
6033 // Pointer 64 bit Register not Special
6034 operand iRegPNoSp()
6035 %{
6036   constraint(ALLOC_IN_RC(no_special_ptr_reg));
6037   match(RegP);
6038   // match(iRegP);
6039   // match(iRegP_R0);
6040   // match(iRegP_R2);
6041   // match(iRegP_R4);
6042   // match(iRegP_R5);
6043   // match(thread_RegP);
6044   op_cost(0);
6045   format %{ %}
6046   interface(REG_INTER);
6047 %}
6048 
6049 // Pointer 64 bit Register R0 only
6050 operand iRegP_R0()
6051 %{
6052   constraint(ALLOC_IN_RC(r0_reg));
6053   match(RegP);
6054   // match(iRegP);
6055   match(iRegPNoSp);
6056   op_cost(0);
6057   format %{ %}
6058   interface(REG_INTER);
6059 %}
6060 
6061 // Pointer 64 bit Register R1 only
6062 operand iRegP_R1()
6063 %{
6064   constraint(ALLOC_IN_RC(r1_reg));
6065   match(RegP);
6066   // match(iRegP);
6067   match(iRegPNoSp);
6068   op_cost(0);
6069   format %{ %}
6070   interface(REG_INTER);
6071 %}
6072 
6073 // Pointer 64 bit Register R2 only
6074 operand iRegP_R2()
6075 %{
6076   constraint(ALLOC_IN_RC(r2_reg));
6077   match(RegP);
6078   // match(iRegP);
6079   match(iRegPNoSp);
6080   op_cost(0);
6081   format %{ %}
6082   interface(REG_INTER);
6083 %}
6084 
6085 // Pointer 64 bit Register R3 only
6086 operand iRegP_R3()
6087 %{
6088   constraint(ALLOC_IN_RC(r3_reg));
6089   match(RegP);
6090   // match(iRegP);
6091   match(iRegPNoSp);
6092   op_cost(0);
6093   format %{ %}
6094   interface(REG_INTER);
6095 %}
6096 
6097 // Pointer 64 bit Register R4 only
6098 operand iRegP_R4()
6099 %{
6100   constraint(ALLOC_IN_RC(r4_reg));
6101   match(RegP);
6102   // match(iRegP);
6103   match(iRegPNoSp);
6104   op_cost(0);
6105   format %{ %}
6106   interface(REG_INTER);
6107 %}
6108 
6109 // Pointer 64 bit Register R5 only
6110 operand iRegP_R5()
6111 %{
6112   constraint(ALLOC_IN_RC(r5_reg));
6113   match(RegP);
6114   // match(iRegP);
6115   match(iRegPNoSp);
6116   op_cost(0);
6117   format %{ %}
6118   interface(REG_INTER);
6119 %}
6120 
6121 // Pointer 64 bit Register R10 only
6122 operand iRegP_R10()
6123 %{
6124   constraint(ALLOC_IN_RC(r10_reg));
6125   match(RegP);
6126   // match(iRegP);
6127   match(iRegPNoSp);
6128   op_cost(0);
6129   format %{ %}
6130   interface(REG_INTER);
6131 %}
6132 
6133 // Long 64 bit Register R0 only
6134 operand iRegL_R0()
6135 %{
6136   constraint(ALLOC_IN_RC(r0_reg));
6137   match(RegL);
6138   match(iRegLNoSp);
6139   op_cost(0);
6140   format %{ %}
6141   interface(REG_INTER);
6142 %}
6143 
6144 // Long 64 bit Register R2 only
6145 operand iRegL_R2()
6146 %{
6147   constraint(ALLOC_IN_RC(r2_reg));
6148   match(RegL);
6149   match(iRegLNoSp);
6150   op_cost(0);
6151   format %{ %}
6152   interface(REG_INTER);
6153 %}
6154 
6155 // Long 64 bit Register R3 only
6156 operand iRegL_R3()
6157 %{
6158   constraint(ALLOC_IN_RC(r3_reg));
6159   match(RegL);
6160   match(iRegLNoSp);
6161   op_cost(0);
6162   format %{ %}
6163   interface(REG_INTER);
6164 %}
6165 
6166 // Long 64 bit Register R11 only
6167 operand iRegL_R11()
6168 %{
6169   constraint(ALLOC_IN_RC(r11_reg));
6170   match(RegL);
6171   match(iRegLNoSp);
6172   op_cost(0);
6173   format %{ %}
6174   interface(REG_INTER);
6175 %}
6176 
6177 // Pointer 64 bit Register FP only
6178 operand iRegP_FP()
6179 %{
6180   constraint(ALLOC_IN_RC(fp_reg));
6181   match(RegP);
6182   // match(iRegP);
6183   op_cost(0);
6184   format %{ %}
6185   interface(REG_INTER);
6186 %}
6187 
6188 // Register R0 only
6189 operand iRegI_R0()
6190 %{
6191   constraint(ALLOC_IN_RC(int_r0_reg));
6192   match(RegI);
6193   match(iRegINoSp);
6194   op_cost(0);
6195   format %{ %}
6196   interface(REG_INTER);
6197 %}
6198 
6199 // Register R2 only
6200 operand iRegI_R2()
6201 %{
6202   constraint(ALLOC_IN_RC(int_r2_reg));
6203   match(RegI);
6204   match(iRegINoSp);
6205   op_cost(0);
6206   format %{ %}
6207   interface(REG_INTER);
6208 %}
6209 
6210 // Register R3 only
6211 operand iRegI_R3()
6212 %{
6213   constraint(ALLOC_IN_RC(int_r3_reg));
6214   match(RegI);
6215   match(iRegINoSp);
6216   op_cost(0);
6217   format %{ %}
6218   interface(REG_INTER);
6219 %}
6220 
6221 
6222 // Register R4 only
6223 operand iRegI_R4()
6224 %{
6225   constraint(ALLOC_IN_RC(int_r4_reg));
6226   match(RegI);
6227   match(iRegINoSp);
6228   op_cost(0);
6229   format %{ %}
6230   interface(REG_INTER);
6231 %}
6232 
6233 
6234 // Pointer Register Operands
6235 // Narrow Pointer Register
6236 operand iRegN()
6237 %{
6238   constraint(ALLOC_IN_RC(any_reg32));
6239   match(RegN);
6240   match(iRegNNoSp);
6241   op_cost(0);
6242   format %{ %}
6243   interface(REG_INTER);
6244 %}
6245 
6246 operand iRegN_R0()
6247 %{
6248   constraint(ALLOC_IN_RC(r0_reg));
6249   match(iRegN);
6250   op_cost(0);
6251   format %{ %}
6252   interface(REG_INTER);
6253 %}
6254 
6255 operand iRegN_R2()
6256 %{
6257   constraint(ALLOC_IN_RC(r2_reg));
6258   match(iRegN);
6259   op_cost(0);
6260   format %{ %}
6261   interface(REG_INTER);
6262 %}
6263 
6264 operand iRegN_R3()
6265 %{
6266   constraint(ALLOC_IN_RC(r3_reg));
6267   match(iRegN);
6268   op_cost(0);
6269   format %{ %}
6270   interface(REG_INTER);
6271 %}
6272 
6273 // Integer 64 bit Register not Special
6274 operand iRegNNoSp()
6275 %{
6276   constraint(ALLOC_IN_RC(no_special_reg32));
6277   match(RegN);
6278   op_cost(0);
6279   format %{ %}
6280   interface(REG_INTER);
6281 %}
6282 
6283 // heap base register -- used for encoding immN0
6284 
6285 operand iRegIHeapbase()
6286 %{
6287   constraint(ALLOC_IN_RC(heapbase_reg));
6288   match(RegI);
6289   op_cost(0);
6290   format %{ %}
6291   interface(REG_INTER);
6292 %}
6293 
6294 // Float Register
6295 // Float register operands
6296 operand vRegF()
6297 %{
6298   constraint(ALLOC_IN_RC(float_reg));
6299   match(RegF);
6300 
6301   op_cost(0);
6302   format %{ %}
6303   interface(REG_INTER);
6304 %}
6305 
6306 // Double Register
6307 // Double register operands
6308 operand vRegD()
6309 %{
6310   constraint(ALLOC_IN_RC(double_reg));
6311   match(RegD);
6312 
6313   op_cost(0);
6314   format %{ %}
6315   interface(REG_INTER);
6316 %}
6317 
6318 operand vecD()
6319 %{
6320   constraint(ALLOC_IN_RC(vectord_reg));
6321   match(VecD);
6322 
6323   op_cost(0);
6324   format %{ %}
6325   interface(REG_INTER);
6326 %}
6327 
6328 operand vecX()
6329 %{
6330   constraint(ALLOC_IN_RC(vectorx_reg));
6331   match(VecX);
6332 
6333   op_cost(0);
6334   format %{ %}
6335   interface(REG_INTER);
6336 %}
6337 
6338 operand vRegD_V0()
6339 %{
6340   constraint(ALLOC_IN_RC(v0_reg));
6341   match(RegD);
6342   op_cost(0);
6343   format %{ %}
6344   interface(REG_INTER);
6345 %}
6346 
6347 operand vRegD_V1()
6348 %{
6349   constraint(ALLOC_IN_RC(v1_reg));
6350   match(RegD);
6351   op_cost(0);
6352   format %{ %}
6353   interface(REG_INTER);
6354 %}
6355 
6356 operand vRegD_V2()
6357 %{
6358   constraint(ALLOC_IN_RC(v2_reg));
6359   match(RegD);
6360   op_cost(0);
6361   format %{ %}
6362   interface(REG_INTER);
6363 %}
6364 
6365 operand vRegD_V3()
6366 %{
6367   constraint(ALLOC_IN_RC(v3_reg));
6368   match(RegD);
6369   op_cost(0);
6370   format %{ %}
6371   interface(REG_INTER);
6372 %}
6373 
6374 // Flags register, used as output of signed compare instructions
6375 
6376 // note that on AArch64 we also use this register as the output for
6377 // for floating point compare instructions (CmpF CmpD). this ensures
6378 // that ordered inequality tests use GT, GE, LT or LE none of which
6379 // pass through cases where the result is unordered i.e. one or both
6380 // inputs to the compare is a NaN. this means that the ideal code can
6381 // replace e.g. a GT with an LE and not end up capturing the NaN case
6382 // (where the comparison should always fail). EQ and NE tests are
6383 // always generated in ideal code so that unordered folds into the NE
6384 // case, matching the behaviour of AArch64 NE.
6385 //
6386 // This differs from x86 where the outputs of FP compares use a
6387 // special FP flags registers and where compares based on this
6388 // register are distinguished into ordered inequalities (cmpOpUCF) and
6389 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6390 // to explicitly handle the unordered case in branches. x86 also has
6391 // to include extra CMoveX rules to accept a cmpOpUCF input.
6392 
6393 operand rFlagsReg()
6394 %{
6395   constraint(ALLOC_IN_RC(int_flags));
6396   match(RegFlags);
6397 
6398   op_cost(0);
6399   format %{ "RFLAGS" %}
6400   interface(REG_INTER);
6401 %}
6402 
6403 // Flags register, used as output of unsigned compare instructions
6404 operand rFlagsRegU()
6405 %{
6406   constraint(ALLOC_IN_RC(int_flags));
6407   match(RegFlags);
6408 
6409   op_cost(0);
6410   format %{ "RFLAGSU" %}
6411   interface(REG_INTER);
6412 %}
6413 
6414 // Special Registers
6415 
6416 // Method Register
6417 operand inline_cache_RegP(iRegP reg)
6418 %{
6419   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6420   match(reg);
6421   match(iRegPNoSp);
6422   op_cost(0);
6423   format %{ %}
6424   interface(REG_INTER);
6425 %}
6426 
6427 operand interpreter_method_oop_RegP(iRegP reg)
6428 %{
6429   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6430   match(reg);
6431   match(iRegPNoSp);
6432   op_cost(0);
6433   format %{ %}
6434   interface(REG_INTER);
6435 %}
6436 
6437 // Thread Register
6438 operand thread_RegP(iRegP reg)
6439 %{
6440   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6441   match(reg);
6442   op_cost(0);
6443   format %{ %}
6444   interface(REG_INTER);
6445 %}
6446 
6447 operand lr_RegP(iRegP reg)
6448 %{
6449   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6450   match(reg);
6451   op_cost(0);
6452   format %{ %}
6453   interface(REG_INTER);
6454 %}
6455 
6456 //----------Memory Operands----------------------------------------------------
6457 
6458 operand indirect(iRegP reg)
6459 %{
6460   constraint(ALLOC_IN_RC(ptr_reg));
6461   match(reg);
6462   op_cost(0);
6463   format %{ "[$reg]" %}
6464   interface(MEMORY_INTER) %{
6465     base($reg);
6466     index(0xffffffff);
6467     scale(0x0);
6468     disp(0x0);
6469   %}
6470 %}
6471 
6472 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6473 %{
6474   constraint(ALLOC_IN_RC(ptr_reg));
6475   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6476   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6477   op_cost(0);
6478   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6479   interface(MEMORY_INTER) %{
6480     base($reg);
6481     index($ireg);
6482     scale($scale);
6483     disp(0x0);
6484   %}
6485 %}
6486 
6487 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6488 %{
6489   constraint(ALLOC_IN_RC(ptr_reg));
6490   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6491   match(AddP reg (LShiftL lreg scale));
6492   op_cost(0);
6493   format %{ "$reg, $lreg lsl($scale)" %}
6494   interface(MEMORY_INTER) %{
6495     base($reg);
6496     index($lreg);
6497     scale($scale);
6498     disp(0x0);
6499   %}
6500 %}
6501 
6502 operand indIndexI2L(iRegP reg, iRegI ireg)
6503 %{
6504   constraint(ALLOC_IN_RC(ptr_reg));
6505   match(AddP reg (ConvI2L ireg));
6506   op_cost(0);
6507   format %{ "$reg, $ireg, 0, I2L" %}
6508   interface(MEMORY_INTER) %{
6509     base($reg);
6510     index($ireg);
6511     scale(0x0);
6512     disp(0x0);
6513   %}
6514 %}
6515 
6516 operand indIndex(iRegP reg, iRegL lreg)
6517 %{
6518   constraint(ALLOC_IN_RC(ptr_reg));
6519   match(AddP reg lreg);
6520   op_cost(0);
6521   format %{ "$reg, $lreg" %}
6522   interface(MEMORY_INTER) %{
6523     base($reg);
6524     index($lreg);
6525     scale(0x0);
6526     disp(0x0);
6527   %}
6528 %}
6529 
6530 operand indOffI(iRegP reg, immIOffset off)
6531 %{
6532   constraint(ALLOC_IN_RC(ptr_reg));
6533   match(AddP reg off);
6534   op_cost(0);
6535   format %{ "[$reg, $off]" %}
6536   interface(MEMORY_INTER) %{
6537     base($reg);
6538     index(0xffffffff);
6539     scale(0x0);
6540     disp($off);
6541   %}
6542 %}
6543 
6544 operand indOffI4(iRegP reg, immIOffset4 off)
6545 %{
6546   constraint(ALLOC_IN_RC(ptr_reg));
6547   match(AddP reg off);
6548   op_cost(0);
6549   format %{ "[$reg, $off]" %}
6550   interface(MEMORY_INTER) %{
6551     base($reg);
6552     index(0xffffffff);
6553     scale(0x0);
6554     disp($off);
6555   %}
6556 %}
6557 
6558 operand indOffI8(iRegP reg, immIOffset8 off)
6559 %{
6560   constraint(ALLOC_IN_RC(ptr_reg));
6561   match(AddP reg off);
6562   op_cost(0);
6563   format %{ "[$reg, $off]" %}
6564   interface(MEMORY_INTER) %{
6565     base($reg);
6566     index(0xffffffff);
6567     scale(0x0);
6568     disp($off);
6569   %}
6570 %}
6571 
6572 operand indOffI16(iRegP reg, immIOffset16 off)
6573 %{
6574   constraint(ALLOC_IN_RC(ptr_reg));
6575   match(AddP reg off);
6576   op_cost(0);
6577   format %{ "[$reg, $off]" %}
6578   interface(MEMORY_INTER) %{
6579     base($reg);
6580     index(0xffffffff);
6581     scale(0x0);
6582     disp($off);
6583   %}
6584 %}
6585 
6586 operand indOffL(iRegP reg, immLoffset off)
6587 %{
6588   constraint(ALLOC_IN_RC(ptr_reg));
6589   match(AddP reg off);
6590   op_cost(0);
6591   format %{ "[$reg, $off]" %}
6592   interface(MEMORY_INTER) %{
6593     base($reg);
6594     index(0xffffffff);
6595     scale(0x0);
6596     disp($off);
6597   %}
6598 %}
6599 
6600 operand indOffL4(iRegP reg, immLoffset4 off)
6601 %{
6602   constraint(ALLOC_IN_RC(ptr_reg));
6603   match(AddP reg off);
6604   op_cost(0);
6605   format %{ "[$reg, $off]" %}
6606   interface(MEMORY_INTER) %{
6607     base($reg);
6608     index(0xffffffff);
6609     scale(0x0);
6610     disp($off);
6611   %}
6612 %}
6613 
6614 operand indOffL8(iRegP reg, immLoffset8 off)
6615 %{
6616   constraint(ALLOC_IN_RC(ptr_reg));
6617   match(AddP reg off);
6618   op_cost(0);
6619   format %{ "[$reg, $off]" %}
6620   interface(MEMORY_INTER) %{
6621     base($reg);
6622     index(0xffffffff);
6623     scale(0x0);
6624     disp($off);
6625   %}
6626 %}
6627 
6628 operand indOffL16(iRegP reg, immLoffset16 off)
6629 %{
6630   constraint(ALLOC_IN_RC(ptr_reg));
6631   match(AddP reg off);
6632   op_cost(0);
6633   format %{ "[$reg, $off]" %}
6634   interface(MEMORY_INTER) %{
6635     base($reg);
6636     index(0xffffffff);
6637     scale(0x0);
6638     disp($off);
6639   %}
6640 %}
6641 
6642 operand indirectN(iRegN reg)
6643 %{
6644   predicate(Universe::narrow_oop_shift() == 0);
6645   constraint(ALLOC_IN_RC(ptr_reg));
6646   match(DecodeN reg);
6647   op_cost(0);
6648   format %{ "[$reg]\t# narrow" %}
6649   interface(MEMORY_INTER) %{
6650     base($reg);
6651     index(0xffffffff);
6652     scale(0x0);
6653     disp(0x0);
6654   %}
6655 %}
6656 
6657 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6658 %{
6659   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6660   constraint(ALLOC_IN_RC(ptr_reg));
6661   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6662   op_cost(0);
6663   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6664   interface(MEMORY_INTER) %{
6665     base($reg);
6666     index($ireg);
6667     scale($scale);
6668     disp(0x0);
6669   %}
6670 %}
6671 
6672 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6673 %{
6674   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6675   constraint(ALLOC_IN_RC(ptr_reg));
6676   match(AddP (DecodeN reg) (LShiftL lreg scale));
6677   op_cost(0);
6678   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6679   interface(MEMORY_INTER) %{
6680     base($reg);
6681     index($lreg);
6682     scale($scale);
6683     disp(0x0);
6684   %}
6685 %}
6686 
6687 operand indIndexI2LN(iRegN reg, iRegI ireg)
6688 %{
6689   predicate(Universe::narrow_oop_shift() == 0);
6690   constraint(ALLOC_IN_RC(ptr_reg));
6691   match(AddP (DecodeN reg) (ConvI2L ireg));
6692   op_cost(0);
6693   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6694   interface(MEMORY_INTER) %{
6695     base($reg);
6696     index($ireg);
6697     scale(0x0);
6698     disp(0x0);
6699   %}
6700 %}
6701 
6702 operand indIndexN(iRegN reg, iRegL lreg)
6703 %{
6704   predicate(Universe::narrow_oop_shift() == 0);
6705   constraint(ALLOC_IN_RC(ptr_reg));
6706   match(AddP (DecodeN reg) lreg);
6707   op_cost(0);
6708   format %{ "$reg, $lreg\t# narrow" %}
6709   interface(MEMORY_INTER) %{
6710     base($reg);
6711     index($lreg);
6712     scale(0x0);
6713     disp(0x0);
6714   %}
6715 %}
6716 
6717 operand indOffIN(iRegN reg, immIOffset off)
6718 %{
6719   predicate(Universe::narrow_oop_shift() == 0);
6720   constraint(ALLOC_IN_RC(ptr_reg));
6721   match(AddP (DecodeN reg) off);
6722   op_cost(0);
6723   format %{ "[$reg, $off]\t# narrow" %}
6724   interface(MEMORY_INTER) %{
6725     base($reg);
6726     index(0xffffffff);
6727     scale(0x0);
6728     disp($off);
6729   %}
6730 %}
6731 
6732 operand indOffLN(iRegN reg, immLoffset off)
6733 %{
6734   predicate(Universe::narrow_oop_shift() == 0);
6735   constraint(ALLOC_IN_RC(ptr_reg));
6736   match(AddP (DecodeN reg) off);
6737   op_cost(0);
6738   format %{ "[$reg, $off]\t# narrow" %}
6739   interface(MEMORY_INTER) %{
6740     base($reg);
6741     index(0xffffffff);
6742     scale(0x0);
6743     disp($off);
6744   %}
6745 %}
6746 
6747 
6748 
6749 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6750 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6751 %{
6752   constraint(ALLOC_IN_RC(ptr_reg));
6753   match(AddP reg off);
6754   op_cost(0);
6755   format %{ "[$reg, $off]" %}
6756   interface(MEMORY_INTER) %{
6757     base($reg);
6758     index(0xffffffff);
6759     scale(0x0);
6760     disp($off);
6761   %}
6762 %}
6763 
6764 //----------Special Memory Operands--------------------------------------------
6765 // Stack Slot Operand - This operand is used for loading and storing temporary
6766 //                      values on the stack where a match requires a value to
6767 //                      flow through memory.
6768 operand stackSlotP(sRegP reg)
6769 %{
6770   constraint(ALLOC_IN_RC(stack_slots));
6771   op_cost(100);
6772   // No match rule because this operand is only generated in matching
6773   // match(RegP);
6774   format %{ "[$reg]" %}
6775   interface(MEMORY_INTER) %{
6776     base(0x1e);  // RSP
6777     index(0x0);  // No Index
6778     scale(0x0);  // No Scale
6779     disp($reg);  // Stack Offset
6780   %}
6781 %}
6782 
6783 operand stackSlotI(sRegI reg)
6784 %{
6785   constraint(ALLOC_IN_RC(stack_slots));
6786   // No match rule because this operand is only generated in matching
6787   // match(RegI);
6788   format %{ "[$reg]" %}
6789   interface(MEMORY_INTER) %{
6790     base(0x1e);  // RSP
6791     index(0x0);  // No Index
6792     scale(0x0);  // No Scale
6793     disp($reg);  // Stack Offset
6794   %}
6795 %}
6796 
6797 operand stackSlotF(sRegF reg)
6798 %{
6799   constraint(ALLOC_IN_RC(stack_slots));
6800   // No match rule because this operand is only generated in matching
6801   // match(RegF);
6802   format %{ "[$reg]" %}
6803   interface(MEMORY_INTER) %{
6804     base(0x1e);  // RSP
6805     index(0x0);  // No Index
6806     scale(0x0);  // No Scale
6807     disp($reg);  // Stack Offset
6808   %}
6809 %}
6810 
6811 operand stackSlotD(sRegD reg)
6812 %{
6813   constraint(ALLOC_IN_RC(stack_slots));
6814   // No match rule because this operand is only generated in matching
6815   // match(RegD);
6816   format %{ "[$reg]" %}
6817   interface(MEMORY_INTER) %{
6818     base(0x1e);  // RSP
6819     index(0x0);  // No Index
6820     scale(0x0);  // No Scale
6821     disp($reg);  // Stack Offset
6822   %}
6823 %}
6824 
6825 operand stackSlotL(sRegL reg)
6826 %{
6827   constraint(ALLOC_IN_RC(stack_slots));
6828   // No match rule because this operand is only generated in matching
6829   // match(RegL);
6830   format %{ "[$reg]" %}
6831   interface(MEMORY_INTER) %{
6832     base(0x1e);  // RSP
6833     index(0x0);  // No Index
6834     scale(0x0);  // No Scale
6835     disp($reg);  // Stack Offset
6836   %}
6837 %}
6838 
6839 // Operands for expressing Control Flow
6840 // NOTE: Label is a predefined operand which should not be redefined in
6841 //       the AD file. It is generically handled within the ADLC.
6842 
6843 //----------Conditional Branch Operands----------------------------------------
6844 // Comparison Op  - This is the operation of the comparison, and is limited to
6845 //                  the following set of codes:
6846 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6847 //
6848 // Other attributes of the comparison, such as unsignedness, are specified
6849 // by the comparison instruction that sets a condition code flags register.
6850 // That result is represented by a flags operand whose subtype is appropriate
6851 // to the unsignedness (etc.) of the comparison.
6852 //
6853 // Later, the instruction which matches both the Comparison Op (a Bool) and
6854 // the flags (produced by the Cmp) specifies the coding of the comparison op
6855 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6856 
6857 // used for signed integral comparisons and fp comparisons
6858 
6859 operand cmpOp()
6860 %{
6861   match(Bool);
6862 
6863   format %{ "" %}
6864   interface(COND_INTER) %{
6865     equal(0x0, "eq");
6866     not_equal(0x1, "ne");
6867     less(0xb, "lt");
6868     greater_equal(0xa, "ge");
6869     less_equal(0xd, "le");
6870     greater(0xc, "gt");
6871     overflow(0x6, "vs");
6872     no_overflow(0x7, "vc");
6873   %}
6874 %}
6875 
6876 // used for unsigned integral comparisons
6877 
6878 operand cmpOpU()
6879 %{
6880   match(Bool);
6881 
6882   format %{ "" %}
6883   interface(COND_INTER) %{
6884     equal(0x0, "eq");
6885     not_equal(0x1, "ne");
6886     less(0x3, "lo");
6887     greater_equal(0x2, "hs");
6888     less_equal(0x9, "ls");
6889     greater(0x8, "hi");
6890     overflow(0x6, "vs");
6891     no_overflow(0x7, "vc");
6892   %}
6893 %}
6894 
6895 // used for certain integral comparisons which can be
6896 // converted to cbxx or tbxx instructions
6897 
6898 operand cmpOpEqNe()
6899 %{
6900   match(Bool);
6901   match(CmpOp);
6902   op_cost(0);
6903   predicate(n->as_Bool()->_test._test == BoolTest::ne
6904             || n->as_Bool()->_test._test == BoolTest::eq);
6905 
6906   format %{ "" %}
6907   interface(COND_INTER) %{
6908     equal(0x0, "eq");
6909     not_equal(0x1, "ne");
6910     less(0xb, "lt");
6911     greater_equal(0xa, "ge");
6912     less_equal(0xd, "le");
6913     greater(0xc, "gt");
6914     overflow(0x6, "vs");
6915     no_overflow(0x7, "vc");
6916   %}
6917 %}
6918 
6919 // used for certain integral comparisons which can be
6920 // converted to cbxx or tbxx instructions
6921 
6922 operand cmpOpLtGe()
6923 %{
6924   match(Bool);
6925   match(CmpOp);
6926   op_cost(0);
6927 
6928   predicate(n->as_Bool()->_test._test == BoolTest::lt
6929             || n->as_Bool()->_test._test == BoolTest::ge);
6930 
6931   format %{ "" %}
6932   interface(COND_INTER) %{
6933     equal(0x0, "eq");
6934     not_equal(0x1, "ne");
6935     less(0xb, "lt");
6936     greater_equal(0xa, "ge");
6937     less_equal(0xd, "le");
6938     greater(0xc, "gt");
6939     overflow(0x6, "vs");
6940     no_overflow(0x7, "vc");
6941   %}
6942 %}
6943 
6944 // used for certain unsigned integral comparisons which can be
6945 // converted to cbxx or tbxx instructions
6946 
6947 operand cmpOpUEqNeLtGe()
6948 %{
6949   match(Bool);
6950   match(CmpOp);
6951   op_cost(0);
6952 
6953   predicate(n->as_Bool()->_test._test == BoolTest::eq
6954             || n->as_Bool()->_test._test == BoolTest::ne
6955             || n->as_Bool()->_test._test == BoolTest::lt
6956             || n->as_Bool()->_test._test == BoolTest::ge);
6957 
6958   format %{ "" %}
6959   interface(COND_INTER) %{
6960     equal(0x0, "eq");
6961     not_equal(0x1, "ne");
6962     less(0xb, "lt");
6963     greater_equal(0xa, "ge");
6964     less_equal(0xd, "le");
6965     greater(0xc, "gt");
6966     overflow(0x6, "vs");
6967     no_overflow(0x7, "vc");
6968   %}
6969 %}
6970 
6971 // Special operand allowing long args to int ops to be truncated for free
6972 
6973 operand iRegL2I(iRegL reg) %{
6974 
6975   op_cost(0);
6976 
6977   match(ConvL2I reg);
6978 
6979   format %{ "l2i($reg)" %}
6980 
6981   interface(REG_INTER)
6982 %}
6983 
6984 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6985 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6986 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6987 
6988 //----------OPERAND CLASSES----------------------------------------------------
6989 // Operand Classes are groups of operands that are used as to simplify
6990 // instruction definitions by not requiring the AD writer to specify
6991 // separate instructions for every form of operand when the
6992 // instruction accepts multiple operand types with the same basic
6993 // encoding and format. The classic case of this is memory operands.
6994 
6995 // memory is used to define read/write location for load/store
6996 // instruction defs. we can turn a memory op into an Address
6997 
6998 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6999                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
7000 
7001 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
7002 // operations. it allows the src to be either an iRegI or a (ConvL2I
7003 // iRegL). in the latter case the l2i normally planted for a ConvL2I
7004 // can be elided because the 32-bit instruction will just employ the
7005 // lower 32 bits anyway.
7006 //
7007 // n.b. this does not elide all L2I conversions. if the truncated
7008 // value is consumed by more than one operation then the ConvL2I
7009 // cannot be bundled into the consuming nodes so an l2i gets planted
7010 // (actually a movw $dst $src) and the downstream instructions consume
7011 // the result of the l2i as an iRegI input. That's a shame since the
7012 // movw is actually redundant but its not too costly.
7013 
7014 opclass iRegIorL2I(iRegI, iRegL2I);
7015 
7016 //----------PIPELINE-----------------------------------------------------------
7017 // Rules which define the behavior of the target architectures pipeline.
7018 
7019 // For specific pipelines, eg A53, define the stages of that pipeline
7020 //pipe_desc(ISS, EX1, EX2, WR);
7021 #define ISS S0
7022 #define EX1 S1
7023 #define EX2 S2
7024 #define WR  S3
7025 
7026 // Integer ALU reg operation
7027 pipeline %{
7028 
7029 attributes %{
7030   // ARM instructions are of fixed length
7031   fixed_size_instructions;        // Fixed size instructions TODO does
7032   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
7033   // ARM instructions come in 32-bit word units
7034   instruction_unit_size = 4;         // An instruction is 4 bytes long
7035   instruction_fetch_unit_size = 64;  // The processor fetches one line
7036   instruction_fetch_units = 1;       // of 64 bytes
7037 
7038   // List of nop instructions
7039   nops( MachNop );
7040 %}
7041 
7042 // We don't use an actual pipeline model so don't care about resources
7043 // or description. we do use pipeline classes to introduce fixed
7044 // latencies
7045 
7046 //----------RESOURCES----------------------------------------------------------
7047 // Resources are the functional units available to the machine
7048 
7049 resources( INS0, INS1, INS01 = INS0 | INS1,
7050            ALU0, ALU1, ALU = ALU0 | ALU1,
7051            MAC,
7052            DIV,
7053            BRANCH,
7054            LDST,
7055            NEON_FP);
7056 
7057 //----------PIPELINE DESCRIPTION-----------------------------------------------
7058 // Pipeline Description specifies the stages in the machine's pipeline
7059 
7060 // Define the pipeline as a generic 6 stage pipeline
7061 pipe_desc(S0, S1, S2, S3, S4, S5);
7062 
7063 //----------PIPELINE CLASSES---------------------------------------------------
7064 // Pipeline Classes describe the stages in which input and output are
7065 // referenced by the hardware pipeline.
7066 
7067 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
7068 %{
7069   single_instruction;
7070   src1   : S1(read);
7071   src2   : S2(read);
7072   dst    : S5(write);
7073   INS01  : ISS;
7074   NEON_FP : S5;
7075 %}
7076 
7077 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
7078 %{
7079   single_instruction;
7080   src1   : S1(read);
7081   src2   : S2(read);
7082   dst    : S5(write);
7083   INS01  : ISS;
7084   NEON_FP : S5;
7085 %}
7086 
7087 pipe_class fp_uop_s(vRegF dst, vRegF src)
7088 %{
7089   single_instruction;
7090   src    : S1(read);
7091   dst    : S5(write);
7092   INS01  : ISS;
7093   NEON_FP : S5;
7094 %}
7095 
7096 pipe_class fp_uop_d(vRegD dst, vRegD src)
7097 %{
7098   single_instruction;
7099   src    : S1(read);
7100   dst    : S5(write);
7101   INS01  : ISS;
7102   NEON_FP : S5;
7103 %}
7104 
7105 pipe_class fp_d2f(vRegF dst, vRegD src)
7106 %{
7107   single_instruction;
7108   src    : S1(read);
7109   dst    : S5(write);
7110   INS01  : ISS;
7111   NEON_FP : S5;
7112 %}
7113 
7114 pipe_class fp_f2d(vRegD dst, vRegF src)
7115 %{
7116   single_instruction;
7117   src    : S1(read);
7118   dst    : S5(write);
7119   INS01  : ISS;
7120   NEON_FP : S5;
7121 %}
7122 
7123 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7124 %{
7125   single_instruction;
7126   src    : S1(read);
7127   dst    : S5(write);
7128   INS01  : ISS;
7129   NEON_FP : S5;
7130 %}
7131 
7132 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7133 %{
7134   single_instruction;
7135   src    : S1(read);
7136   dst    : S5(write);
7137   INS01  : ISS;
7138   NEON_FP : S5;
7139 %}
7140 
7141 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7142 %{
7143   single_instruction;
7144   src    : S1(read);
7145   dst    : S5(write);
7146   INS01  : ISS;
7147   NEON_FP : S5;
7148 %}
7149 
7150 pipe_class fp_l2f(vRegF dst, iRegL src)
7151 %{
7152   single_instruction;
7153   src    : S1(read);
7154   dst    : S5(write);
7155   INS01  : ISS;
7156   NEON_FP : S5;
7157 %}
7158 
7159 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7160 %{
7161   single_instruction;
7162   src    : S1(read);
7163   dst    : S5(write);
7164   INS01  : ISS;
7165   NEON_FP : S5;
7166 %}
7167 
7168 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7169 %{
7170   single_instruction;
7171   src    : S1(read);
7172   dst    : S5(write);
7173   INS01  : ISS;
7174   NEON_FP : S5;
7175 %}
7176 
7177 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7178 %{
7179   single_instruction;
7180   src    : S1(read);
7181   dst    : S5(write);
7182   INS01  : ISS;
7183   NEON_FP : S5;
7184 %}
7185 
7186 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7187 %{
7188   single_instruction;
7189   src    : S1(read);
7190   dst    : S5(write);
7191   INS01  : ISS;
7192   NEON_FP : S5;
7193 %}
7194 
7195 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7196 %{
7197   single_instruction;
7198   src1   : S1(read);
7199   src2   : S2(read);
7200   dst    : S5(write);
7201   INS0   : ISS;
7202   NEON_FP : S5;
7203 %}
7204 
7205 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7206 %{
7207   single_instruction;
7208   src1   : S1(read);
7209   src2   : S2(read);
7210   dst    : S5(write);
7211   INS0   : ISS;
7212   NEON_FP : S5;
7213 %}
7214 
7215 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7216 %{
7217   single_instruction;
7218   cr     : S1(read);
7219   src1   : S1(read);
7220   src2   : S1(read);
7221   dst    : S3(write);
7222   INS01  : ISS;
7223   NEON_FP : S3;
7224 %}
7225 
7226 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7227 %{
7228   single_instruction;
7229   cr     : S1(read);
7230   src1   : S1(read);
7231   src2   : S1(read);
7232   dst    : S3(write);
7233   INS01  : ISS;
7234   NEON_FP : S3;
7235 %}
7236 
7237 pipe_class fp_imm_s(vRegF dst)
7238 %{
7239   single_instruction;
7240   dst    : S3(write);
7241   INS01  : ISS;
7242   NEON_FP : S3;
7243 %}
7244 
7245 pipe_class fp_imm_d(vRegD dst)
7246 %{
7247   single_instruction;
7248   dst    : S3(write);
7249   INS01  : ISS;
7250   NEON_FP : S3;
7251 %}
7252 
7253 pipe_class fp_load_constant_s(vRegF dst)
7254 %{
7255   single_instruction;
7256   dst    : S4(write);
7257   INS01  : ISS;
7258   NEON_FP : S4;
7259 %}
7260 
7261 pipe_class fp_load_constant_d(vRegD dst)
7262 %{
7263   single_instruction;
7264   dst    : S4(write);
7265   INS01  : ISS;
7266   NEON_FP : S4;
7267 %}
7268 
7269 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7270 %{
7271   single_instruction;
7272   dst    : S5(write);
7273   src1   : S1(read);
7274   src2   : S1(read);
7275   INS01  : ISS;
7276   NEON_FP : S5;
7277 %}
7278 
7279 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7280 %{
7281   single_instruction;
7282   dst    : S5(write);
7283   src1   : S1(read);
7284   src2   : S1(read);
7285   INS0   : ISS;
7286   NEON_FP : S5;
7287 %}
7288 
7289 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7290 %{
7291   single_instruction;
7292   dst    : S5(write);
7293   src1   : S1(read);
7294   src2   : S1(read);
7295   dst    : S1(read);
7296   INS01  : ISS;
7297   NEON_FP : S5;
7298 %}
7299 
7300 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7301 %{
7302   single_instruction;
7303   dst    : S5(write);
7304   src1   : S1(read);
7305   src2   : S1(read);
7306   dst    : S1(read);
7307   INS0   : ISS;
7308   NEON_FP : S5;
7309 %}
7310 
7311 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7312 %{
7313   single_instruction;
7314   dst    : S4(write);
7315   src1   : S2(read);
7316   src2   : S2(read);
7317   INS01  : ISS;
7318   NEON_FP : S4;
7319 %}
7320 
7321 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7322 %{
7323   single_instruction;
7324   dst    : S4(write);
7325   src1   : S2(read);
7326   src2   : S2(read);
7327   INS0   : ISS;
7328   NEON_FP : S4;
7329 %}
7330 
7331 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7332 %{
7333   single_instruction;
7334   dst    : S3(write);
7335   src1   : S2(read);
7336   src2   : S2(read);
7337   INS01  : ISS;
7338   NEON_FP : S3;
7339 %}
7340 
7341 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7342 %{
7343   single_instruction;
7344   dst    : S3(write);
7345   src1   : S2(read);
7346   src2   : S2(read);
7347   INS0   : ISS;
7348   NEON_FP : S3;
7349 %}
7350 
7351 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7352 %{
7353   single_instruction;
7354   dst    : S3(write);
7355   src    : S1(read);
7356   shift  : S1(read);
7357   INS01  : ISS;
7358   NEON_FP : S3;
7359 %}
7360 
7361 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7362 %{
7363   single_instruction;
7364   dst    : S3(write);
7365   src    : S1(read);
7366   shift  : S1(read);
7367   INS0   : ISS;
7368   NEON_FP : S3;
7369 %}
7370 
7371 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7372 %{
7373   single_instruction;
7374   dst    : S3(write);
7375   src    : S1(read);
7376   INS01  : ISS;
7377   NEON_FP : S3;
7378 %}
7379 
7380 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7381 %{
7382   single_instruction;
7383   dst    : S3(write);
7384   src    : S1(read);
7385   INS0   : ISS;
7386   NEON_FP : S3;
7387 %}
7388 
7389 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7390 %{
7391   single_instruction;
7392   dst    : S5(write);
7393   src1   : S1(read);
7394   src2   : S1(read);
7395   INS01  : ISS;
7396   NEON_FP : S5;
7397 %}
7398 
7399 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7400 %{
7401   single_instruction;
7402   dst    : S5(write);
7403   src1   : S1(read);
7404   src2   : S1(read);
7405   INS0   : ISS;
7406   NEON_FP : S5;
7407 %}
7408 
7409 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7410 %{
7411   single_instruction;
7412   dst    : S5(write);
7413   src1   : S1(read);
7414   src2   : S1(read);
7415   INS0   : ISS;
7416   NEON_FP : S5;
7417 %}
7418 
7419 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7420 %{
7421   single_instruction;
7422   dst    : S5(write);
7423   src1   : S1(read);
7424   src2   : S1(read);
7425   INS0   : ISS;
7426   NEON_FP : S5;
7427 %}
7428 
7429 pipe_class vsqrt_fp128(vecX dst, vecX src)
7430 %{
7431   single_instruction;
7432   dst    : S5(write);
7433   src    : S1(read);
7434   INS0   : ISS;
7435   NEON_FP : S5;
7436 %}
7437 
7438 pipe_class vunop_fp64(vecD dst, vecD src)
7439 %{
7440   single_instruction;
7441   dst    : S5(write);
7442   src    : S1(read);
7443   INS01  : ISS;
7444   NEON_FP : S5;
7445 %}
7446 
7447 pipe_class vunop_fp128(vecX dst, vecX src)
7448 %{
7449   single_instruction;
7450   dst    : S5(write);
7451   src    : S1(read);
7452   INS0   : ISS;
7453   NEON_FP : S5;
7454 %}
7455 
7456 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7457 %{
7458   single_instruction;
7459   dst    : S3(write);
7460   src    : S1(read);
7461   INS01  : ISS;
7462   NEON_FP : S3;
7463 %}
7464 
7465 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7466 %{
7467   single_instruction;
7468   dst    : S3(write);
7469   src    : S1(read);
7470   INS01  : ISS;
7471   NEON_FP : S3;
7472 %}
7473 
7474 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7475 %{
7476   single_instruction;
7477   dst    : S3(write);
7478   src    : S1(read);
7479   INS01  : ISS;
7480   NEON_FP : S3;
7481 %}
7482 
7483 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7484 %{
7485   single_instruction;
7486   dst    : S3(write);
7487   src    : S1(read);
7488   INS01  : ISS;
7489   NEON_FP : S3;
7490 %}
7491 
7492 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7493 %{
7494   single_instruction;
7495   dst    : S3(write);
7496   src    : S1(read);
7497   INS01  : ISS;
7498   NEON_FP : S3;
7499 %}
7500 
7501 pipe_class vmovi_reg_imm64(vecD dst)
7502 %{
7503   single_instruction;
7504   dst    : S3(write);
7505   INS01  : ISS;
7506   NEON_FP : S3;
7507 %}
7508 
7509 pipe_class vmovi_reg_imm128(vecX dst)
7510 %{
7511   single_instruction;
7512   dst    : S3(write);
7513   INS0   : ISS;
7514   NEON_FP : S3;
7515 %}
7516 
7517 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7518 %{
7519   single_instruction;
7520   dst    : S5(write);
7521   mem    : ISS(read);
7522   INS01  : ISS;
7523   NEON_FP : S3;
7524 %}
7525 
7526 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7527 %{
7528   single_instruction;
7529   dst    : S5(write);
7530   mem    : ISS(read);
7531   INS01  : ISS;
7532   NEON_FP : S3;
7533 %}
7534 
7535 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7536 %{
7537   single_instruction;
7538   mem    : ISS(read);
7539   src    : S2(read);
7540   INS01  : ISS;
7541   NEON_FP : S3;
7542 %}
7543 
7544 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7545 %{
7546   single_instruction;
7547   mem    : ISS(read);
7548   src    : S2(read);
7549   INS01  : ISS;
7550   NEON_FP : S3;
7551 %}
7552 
7553 //------- Integer ALU operations --------------------------
7554 
7555 // Integer ALU reg-reg operation
7556 // Operands needed in EX1, result generated in EX2
7557 // Eg.  ADD     x0, x1, x2
7558 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7559 %{
7560   single_instruction;
7561   dst    : EX2(write);
7562   src1   : EX1(read);
7563   src2   : EX1(read);
7564   INS01  : ISS; // Dual issue as instruction 0 or 1
7565   ALU    : EX2;
7566 %}
7567 
7568 // Integer ALU reg-reg operation with constant shift
7569 // Shifted register must be available in LATE_ISS instead of EX1
7570 // Eg.  ADD     x0, x1, x2, LSL #2
7571 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7572 %{
7573   single_instruction;
7574   dst    : EX2(write);
7575   src1   : EX1(read);
7576   src2   : ISS(read);
7577   INS01  : ISS;
7578   ALU    : EX2;
7579 %}
7580 
7581 // Integer ALU reg operation with constant shift
7582 // Eg.  LSL     x0, x1, #shift
7583 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7584 %{
7585   single_instruction;
7586   dst    : EX2(write);
7587   src1   : ISS(read);
7588   INS01  : ISS;
7589   ALU    : EX2;
7590 %}
7591 
7592 // Integer ALU reg-reg operation with variable shift
7593 // Both operands must be available in LATE_ISS instead of EX1
7594 // Result is available in EX1 instead of EX2
7595 // Eg.  LSLV    x0, x1, x2
7596 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7597 %{
7598   single_instruction;
7599   dst    : EX1(write);
7600   src1   : ISS(read);
7601   src2   : ISS(read);
7602   INS01  : ISS;
7603   ALU    : EX1;
7604 %}
7605 
7606 // Integer ALU reg-reg operation with extract
7607 // As for _vshift above, but result generated in EX2
7608 // Eg.  EXTR    x0, x1, x2, #N
7609 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7610 %{
7611   single_instruction;
7612   dst    : EX2(write);
7613   src1   : ISS(read);
7614   src2   : ISS(read);
7615   INS1   : ISS; // Can only dual issue as Instruction 1
7616   ALU    : EX1;
7617 %}
7618 
7619 // Integer ALU reg operation
7620 // Eg.  NEG     x0, x1
7621 pipe_class ialu_reg(iRegI dst, iRegI src)
7622 %{
7623   single_instruction;
7624   dst    : EX2(write);
7625   src    : EX1(read);
7626   INS01  : ISS;
7627   ALU    : EX2;
7628 %}
7629 
7630 // Integer ALU reg mmediate operation
7631 // Eg.  ADD     x0, x1, #N
7632 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7633 %{
7634   single_instruction;
7635   dst    : EX2(write);
7636   src1   : EX1(read);
7637   INS01  : ISS;
7638   ALU    : EX2;
7639 %}
7640 
7641 // Integer ALU immediate operation (no source operands)
7642 // Eg.  MOV     x0, #N
7643 pipe_class ialu_imm(iRegI dst)
7644 %{
7645   single_instruction;
7646   dst    : EX1(write);
7647   INS01  : ISS;
7648   ALU    : EX1;
7649 %}
7650 
7651 //------- Compare operation -------------------------------
7652 
7653 // Compare reg-reg
7654 // Eg.  CMP     x0, x1
7655 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7656 %{
7657   single_instruction;
7658 //  fixed_latency(16);
7659   cr     : EX2(write);
7660   op1    : EX1(read);
7661   op2    : EX1(read);
7662   INS01  : ISS;
7663   ALU    : EX2;
7664 %}
7665 
7666 // Compare reg-reg
7667 // Eg.  CMP     x0, #N
7668 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7669 %{
7670   single_instruction;
7671 //  fixed_latency(16);
7672   cr     : EX2(write);
7673   op1    : EX1(read);
7674   INS01  : ISS;
7675   ALU    : EX2;
7676 %}
7677 
7678 //------- Conditional instructions ------------------------
7679 
7680 // Conditional no operands
7681 // Eg.  CSINC   x0, zr, zr, <cond>
7682 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7683 %{
7684   single_instruction;
7685   cr     : EX1(read);
7686   dst    : EX2(write);
7687   INS01  : ISS;
7688   ALU    : EX2;
7689 %}
7690 
7691 // Conditional 2 operand
7692 // EG.  CSEL    X0, X1, X2, <cond>
7693 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7694 %{
7695   single_instruction;
7696   cr     : EX1(read);
7697   src1   : EX1(read);
7698   src2   : EX1(read);
7699   dst    : EX2(write);
7700   INS01  : ISS;
7701   ALU    : EX2;
7702 %}
7703 
7704 // Conditional 2 operand
7705 // EG.  CSEL    X0, X1, X2, <cond>
7706 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7707 %{
7708   single_instruction;
7709   cr     : EX1(read);
7710   src    : EX1(read);
7711   dst    : EX2(write);
7712   INS01  : ISS;
7713   ALU    : EX2;
7714 %}
7715 
7716 //------- Multiply pipeline operations --------------------
7717 
7718 // Multiply reg-reg
7719 // Eg.  MUL     w0, w1, w2
7720 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7721 %{
7722   single_instruction;
7723   dst    : WR(write);
7724   src1   : ISS(read);
7725   src2   : ISS(read);
7726   INS01  : ISS;
7727   MAC    : WR;
7728 %}
7729 
7730 // Multiply accumulate
7731 // Eg.  MADD    w0, w1, w2, w3
7732 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7733 %{
7734   single_instruction;
7735   dst    : WR(write);
7736   src1   : ISS(read);
7737   src2   : ISS(read);
7738   src3   : ISS(read);
7739   INS01  : ISS;
7740   MAC    : WR;
7741 %}
7742 
7743 // Eg.  MUL     w0, w1, w2
7744 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7745 %{
7746   single_instruction;
7747   fixed_latency(3); // Maximum latency for 64 bit mul
7748   dst    : WR(write);
7749   src1   : ISS(read);
7750   src2   : ISS(read);
7751   INS01  : ISS;
7752   MAC    : WR;
7753 %}
7754 
7755 // Multiply accumulate
7756 // Eg.  MADD    w0, w1, w2, w3
7757 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7758 %{
7759   single_instruction;
7760   fixed_latency(3); // Maximum latency for 64 bit mul
7761   dst    : WR(write);
7762   src1   : ISS(read);
7763   src2   : ISS(read);
7764   src3   : ISS(read);
7765   INS01  : ISS;
7766   MAC    : WR;
7767 %}
7768 
7769 //------- Divide pipeline operations --------------------
7770 
7771 // Eg.  SDIV    w0, w1, w2
7772 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7773 %{
7774   single_instruction;
7775   fixed_latency(8); // Maximum latency for 32 bit divide
7776   dst    : WR(write);
7777   src1   : ISS(read);
7778   src2   : ISS(read);
7779   INS0   : ISS; // Can only dual issue as instruction 0
7780   DIV    : WR;
7781 %}
7782 
7783 // Eg.  SDIV    x0, x1, x2
7784 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7785 %{
7786   single_instruction;
7787   fixed_latency(16); // Maximum latency for 64 bit divide
7788   dst    : WR(write);
7789   src1   : ISS(read);
7790   src2   : ISS(read);
7791   INS0   : ISS; // Can only dual issue as instruction 0
7792   DIV    : WR;
7793 %}
7794 
7795 //------- Load pipeline operations ------------------------
7796 
7797 // Load - prefetch
7798 // Eg.  PFRM    <mem>
7799 pipe_class iload_prefetch(memory mem)
7800 %{
7801   single_instruction;
7802   mem    : ISS(read);
7803   INS01  : ISS;
7804   LDST   : WR;
7805 %}
7806 
7807 // Load - reg, mem
7808 // Eg.  LDR     x0, <mem>
7809 pipe_class iload_reg_mem(iRegI dst, memory mem)
7810 %{
7811   single_instruction;
7812   dst    : WR(write);
7813   mem    : ISS(read);
7814   INS01  : ISS;
7815   LDST   : WR;
7816 %}
7817 
7818 // Load - reg, reg
7819 // Eg.  LDR     x0, [sp, x1]
7820 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7821 %{
7822   single_instruction;
7823   dst    : WR(write);
7824   src    : ISS(read);
7825   INS01  : ISS;
7826   LDST   : WR;
7827 %}
7828 
7829 //------- Store pipeline operations -----------------------
7830 
7831 // Store - zr, mem
7832 // Eg.  STR     zr, <mem>
7833 pipe_class istore_mem(memory mem)
7834 %{
7835   single_instruction;
7836   mem    : ISS(read);
7837   INS01  : ISS;
7838   LDST   : WR;
7839 %}
7840 
7841 // Store - reg, mem
7842 // Eg.  STR     x0, <mem>
7843 pipe_class istore_reg_mem(iRegI src, memory mem)
7844 %{
7845   single_instruction;
7846   mem    : ISS(read);
7847   src    : EX2(read);
7848   INS01  : ISS;
7849   LDST   : WR;
7850 %}
7851 
7852 // Store - reg, reg
7853 // Eg. STR      x0, [sp, x1]
7854 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7855 %{
7856   single_instruction;
7857   dst    : ISS(read);
7858   src    : EX2(read);
7859   INS01  : ISS;
7860   LDST   : WR;
7861 %}
7862 
7863 //------- Store pipeline operations -----------------------
7864 
7865 // Branch
7866 pipe_class pipe_branch()
7867 %{
7868   single_instruction;
7869   INS01  : ISS;
7870   BRANCH : EX1;
7871 %}
7872 
7873 // Conditional branch
7874 pipe_class pipe_branch_cond(rFlagsReg cr)
7875 %{
7876   single_instruction;
7877   cr     : EX1(read);
7878   INS01  : ISS;
7879   BRANCH : EX1;
7880 %}
7881 
7882 // Compare & Branch
7883 // EG.  CBZ/CBNZ
7884 pipe_class pipe_cmp_branch(iRegI op1)
7885 %{
7886   single_instruction;
7887   op1    : EX1(read);
7888   INS01  : ISS;
7889   BRANCH : EX1;
7890 %}
7891 
7892 //------- Synchronisation operations ----------------------
7893 
7894 // Any operation requiring serialization.
7895 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7896 pipe_class pipe_serial()
7897 %{
7898   single_instruction;
7899   force_serialization;
7900   fixed_latency(16);
7901   INS01  : ISS(2); // Cannot dual issue with any other instruction
7902   LDST   : WR;
7903 %}
7904 
7905 // Generic big/slow expanded idiom - also serialized
7906 pipe_class pipe_slow()
7907 %{
7908   instruction_count(10);
7909   multiple_bundles;
7910   force_serialization;
7911   fixed_latency(16);
7912   INS01  : ISS(2); // Cannot dual issue with any other instruction
7913   LDST   : WR;
7914 %}
7915 
7916 // Empty pipeline class
7917 pipe_class pipe_class_empty()
7918 %{
7919   single_instruction;
7920   fixed_latency(0);
7921 %}
7922 
7923 // Default pipeline class.
7924 pipe_class pipe_class_default()
7925 %{
7926   single_instruction;
7927   fixed_latency(2);
7928 %}
7929 
7930 // Pipeline class for compares.
7931 pipe_class pipe_class_compare()
7932 %{
7933   single_instruction;
7934   fixed_latency(16);
7935 %}
7936 
7937 // Pipeline class for memory operations.
7938 pipe_class pipe_class_memory()
7939 %{
7940   single_instruction;
7941   fixed_latency(16);
7942 %}
7943 
7944 // Pipeline class for call.
7945 pipe_class pipe_class_call()
7946 %{
7947   single_instruction;
7948   fixed_latency(100);
7949 %}
7950 
7951 // Define the class for the Nop node.
7952 define %{
7953    MachNop = pipe_class_empty;
7954 %}
7955 
7956 %}
7957 //----------INSTRUCTIONS-------------------------------------------------------
7958 //
7959 // match      -- States which machine-independent subtree may be replaced
7960 //               by this instruction.
7961 // ins_cost   -- The estimated cost of this instruction is used by instruction
7962 //               selection to identify a minimum cost tree of machine
7963 //               instructions that matches a tree of machine-independent
7964 //               instructions.
7965 // format     -- A string providing the disassembly for this instruction.
7966 //               The value of an instruction's operand may be inserted
7967 //               by referring to it with a '$' prefix.
7968 // opcode     -- Three instruction opcodes may be provided.  These are referred
7969 //               to within an encode class as $primary, $secondary, and $tertiary
7970 //               rrspectively.  The primary opcode is commonly used to
7971 //               indicate the type of machine instruction, while secondary
7972 //               and tertiary are often used for prefix options or addressing
7973 //               modes.
7974 // ins_encode -- A list of encode classes with parameters. The encode class
7975 //               name must have been defined in an 'enc_class' specification
7976 //               in the encode section of the architecture description.
7977 
7978 // ============================================================================
7979 // Memory (Load/Store) Instructions
7980 
7981 // Load Instructions
7982 
7983 // Load Byte (8 bit signed)
7984 instruct loadB(iRegINoSp dst, memory mem)
7985 %{
7986   match(Set dst (LoadB mem));
7987   predicate(!needs_acquiring_load(n));
7988 
7989   ins_cost(4 * INSN_COST);
7990   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7991 
7992   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7993 
7994   ins_pipe(iload_reg_mem);
7995 %}
7996 
7997 // Load Byte (8 bit signed) into long
7998 instruct loadB2L(iRegLNoSp dst, memory mem)
7999 %{
8000   match(Set dst (ConvI2L (LoadB mem)));
8001   predicate(!needs_acquiring_load(n->in(1)));
8002 
8003   ins_cost(4 * INSN_COST);
8004   format %{ "ldrsb  $dst, $mem\t# byte" %}
8005 
8006   ins_encode(aarch64_enc_ldrsb(dst, mem));
8007 
8008   ins_pipe(iload_reg_mem);
8009 %}
8010 
8011 // Load Byte (8 bit unsigned)
8012 instruct loadUB(iRegINoSp dst, memory mem)
8013 %{
8014   match(Set dst (LoadUB mem));
8015   predicate(!needs_acquiring_load(n));
8016 
8017   ins_cost(4 * INSN_COST);
8018   format %{ "ldrbw  $dst, $mem\t# byte" %}
8019 
8020   ins_encode(aarch64_enc_ldrb(dst, mem));
8021 
8022   ins_pipe(iload_reg_mem);
8023 %}
8024 
8025 // Load Byte (8 bit unsigned) into long
8026 instruct loadUB2L(iRegLNoSp dst, memory mem)
8027 %{
8028   match(Set dst (ConvI2L (LoadUB mem)));
8029   predicate(!needs_acquiring_load(n->in(1)));
8030 
8031   ins_cost(4 * INSN_COST);
8032   format %{ "ldrb  $dst, $mem\t# byte" %}
8033 
8034   ins_encode(aarch64_enc_ldrb(dst, mem));
8035 
8036   ins_pipe(iload_reg_mem);
8037 %}
8038 
8039 // Load Short (16 bit signed)
8040 instruct loadS(iRegINoSp dst, memory mem)
8041 %{
8042   match(Set dst (LoadS mem));
8043   predicate(!needs_acquiring_load(n));
8044 
8045   ins_cost(4 * INSN_COST);
8046   format %{ "ldrshw  $dst, $mem\t# short" %}
8047 
8048   ins_encode(aarch64_enc_ldrshw(dst, mem));
8049 
8050   ins_pipe(iload_reg_mem);
8051 %}
8052 
8053 // Load Short (16 bit signed) into long
8054 instruct loadS2L(iRegLNoSp dst, memory mem)
8055 %{
8056   match(Set dst (ConvI2L (LoadS mem)));
8057   predicate(!needs_acquiring_load(n->in(1)));
8058 
8059   ins_cost(4 * INSN_COST);
8060   format %{ "ldrsh  $dst, $mem\t# short" %}
8061 
8062   ins_encode(aarch64_enc_ldrsh(dst, mem));
8063 
8064   ins_pipe(iload_reg_mem);
8065 %}
8066 
8067 // Load Char (16 bit unsigned)
8068 instruct loadUS(iRegINoSp dst, memory mem)
8069 %{
8070   match(Set dst (LoadUS mem));
8071   predicate(!needs_acquiring_load(n));
8072 
8073   ins_cost(4 * INSN_COST);
8074   format %{ "ldrh  $dst, $mem\t# short" %}
8075 
8076   ins_encode(aarch64_enc_ldrh(dst, mem));
8077 
8078   ins_pipe(iload_reg_mem);
8079 %}
8080 
8081 // Load Short/Char (16 bit unsigned) into long
8082 instruct loadUS2L(iRegLNoSp dst, memory mem)
8083 %{
8084   match(Set dst (ConvI2L (LoadUS mem)));
8085   predicate(!needs_acquiring_load(n->in(1)));
8086 
8087   ins_cost(4 * INSN_COST);
8088   format %{ "ldrh  $dst, $mem\t# short" %}
8089 
8090   ins_encode(aarch64_enc_ldrh(dst, mem));
8091 
8092   ins_pipe(iload_reg_mem);
8093 %}
8094 
8095 // Load Integer (32 bit signed)
8096 instruct loadI(iRegINoSp dst, memory mem)
8097 %{
8098   match(Set dst (LoadI mem));
8099   predicate(!needs_acquiring_load(n));
8100 
8101   ins_cost(4 * INSN_COST);
8102   format %{ "ldrw  $dst, $mem\t# int" %}
8103 
8104   ins_encode(aarch64_enc_ldrw(dst, mem));
8105 
8106   ins_pipe(iload_reg_mem);
8107 %}
8108 
8109 // Load Integer (32 bit signed) into long
8110 instruct loadI2L(iRegLNoSp dst, memory mem)
8111 %{
8112   match(Set dst (ConvI2L (LoadI mem)));
8113   predicate(!needs_acquiring_load(n->in(1)));
8114 
8115   ins_cost(4 * INSN_COST);
8116   format %{ "ldrsw  $dst, $mem\t# int" %}
8117 
8118   ins_encode(aarch64_enc_ldrsw(dst, mem));
8119 
8120   ins_pipe(iload_reg_mem);
8121 %}
8122 
8123 // Load Integer (32 bit unsigned) into long
8124 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8125 %{
8126   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8127   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8128 
8129   ins_cost(4 * INSN_COST);
8130   format %{ "ldrw  $dst, $mem\t# int" %}
8131 
8132   ins_encode(aarch64_enc_ldrw(dst, mem));
8133 
8134   ins_pipe(iload_reg_mem);
8135 %}
8136 
8137 // Load Long (64 bit signed)
8138 instruct loadL(iRegLNoSp dst, memory mem)
8139 %{
8140   match(Set dst (LoadL mem));
8141   predicate(!needs_acquiring_load(n));
8142 
8143   ins_cost(4 * INSN_COST);
8144   format %{ "ldr  $dst, $mem\t# int" %}
8145 
8146   ins_encode(aarch64_enc_ldr(dst, mem));
8147 
8148   ins_pipe(iload_reg_mem);
8149 %}
8150 
8151 // Load Range
8152 instruct loadRange(iRegINoSp dst, memory mem)
8153 %{
8154   match(Set dst (LoadRange mem));
8155 
8156   ins_cost(4 * INSN_COST);
8157   format %{ "ldrw  $dst, $mem\t# range" %}
8158 
8159   ins_encode(aarch64_enc_ldrw(dst, mem));
8160 
8161   ins_pipe(iload_reg_mem);
8162 %}
8163 
8164 // Load Pointer
8165 instruct loadP(iRegPNoSp dst, memory mem)
8166 %{
8167   match(Set dst (LoadP mem));
8168   predicate(!needs_acquiring_load(n));
8169 
8170   ins_cost(4 * INSN_COST);
8171   format %{ "ldr  $dst, $mem\t# ptr" %}
8172 
8173   ins_encode(aarch64_enc_ldr(dst, mem));
8174 
8175   ins_pipe(iload_reg_mem);
8176 %}
8177 
8178 // Load Compressed Pointer
8179 instruct loadN(iRegNNoSp dst, memory mem)
8180 %{
8181   match(Set dst (LoadN mem));
8182   predicate(!needs_acquiring_load(n));
8183 
8184   ins_cost(4 * INSN_COST);
8185   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8186 
8187   ins_encode(aarch64_enc_ldrw(dst, mem));
8188 
8189   ins_pipe(iload_reg_mem);
8190 %}
8191 
8192 // Load Klass Pointer
8193 instruct loadKlass(iRegPNoSp dst, memory mem)
8194 %{
8195   match(Set dst (LoadKlass mem));
8196   predicate(!needs_acquiring_load(n));
8197 
8198   ins_cost(4 * INSN_COST);
8199   format %{ "ldr  $dst, $mem\t# class" %}
8200 
8201   ins_encode(aarch64_enc_ldr(dst, mem));
8202 
8203   ins_pipe(iload_reg_mem);
8204 %}
8205 
8206 // Load Narrow Klass Pointer
8207 instruct loadNKlass(iRegNNoSp dst, memory mem)
8208 %{
8209   match(Set dst (LoadNKlass mem));
8210   predicate(!needs_acquiring_load(n));
8211 
8212   ins_cost(4 * INSN_COST);
8213   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8214 
8215   ins_encode(aarch64_enc_ldrw(dst, mem));
8216 
8217   ins_pipe(iload_reg_mem);
8218 %}
8219 
8220 // Load Float
8221 instruct loadF(vRegF dst, memory mem)
8222 %{
8223   match(Set dst (LoadF mem));
8224   predicate(!needs_acquiring_load(n));
8225 
8226   ins_cost(4 * INSN_COST);
8227   format %{ "ldrs  $dst, $mem\t# float" %}
8228 
8229   ins_encode( aarch64_enc_ldrs(dst, mem) );
8230 
8231   ins_pipe(pipe_class_memory);
8232 %}
8233 
8234 // Load Double
8235 instruct loadD(vRegD dst, memory mem)
8236 %{
8237   match(Set dst (LoadD mem));
8238   predicate(!needs_acquiring_load(n));
8239 
8240   ins_cost(4 * INSN_COST);
8241   format %{ "ldrd  $dst, $mem\t# double" %}
8242 
8243   ins_encode( aarch64_enc_ldrd(dst, mem) );
8244 
8245   ins_pipe(pipe_class_memory);
8246 %}
8247 
8248 
8249 // Load Int Constant
8250 instruct loadConI(iRegINoSp dst, immI src)
8251 %{
8252   match(Set dst src);
8253 
8254   ins_cost(INSN_COST);
8255   format %{ "mov $dst, $src\t# int" %}
8256 
8257   ins_encode( aarch64_enc_movw_imm(dst, src) );
8258 
8259   ins_pipe(ialu_imm);
8260 %}
8261 
8262 // Load Long Constant
8263 instruct loadConL(iRegLNoSp dst, immL src)
8264 %{
8265   match(Set dst src);
8266 
8267   ins_cost(INSN_COST);
8268   format %{ "mov $dst, $src\t# long" %}
8269 
8270   ins_encode( aarch64_enc_mov_imm(dst, src) );
8271 
8272   ins_pipe(ialu_imm);
8273 %}
8274 
8275 // Load Pointer Constant
8276 
8277 instruct loadConP(iRegPNoSp dst, immP con)
8278 %{
8279   match(Set dst con);
8280 
8281   ins_cost(INSN_COST * 4);
8282   format %{
8283     "mov  $dst, $con\t# ptr\n\t"
8284   %}
8285 
8286   ins_encode(aarch64_enc_mov_p(dst, con));
8287 
8288   ins_pipe(ialu_imm);
8289 %}
8290 
8291 // Load Null Pointer Constant
8292 
8293 instruct loadConP0(iRegPNoSp dst, immP0 con)
8294 %{
8295   match(Set dst con);
8296 
8297   ins_cost(INSN_COST);
8298   format %{ "mov  $dst, $con\t# NULL ptr" %}
8299 
8300   ins_encode(aarch64_enc_mov_p0(dst, con));
8301 
8302   ins_pipe(ialu_imm);
8303 %}
8304 
8305 // Load Pointer Constant One
8306 
8307 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8308 %{
8309   match(Set dst con);
8310 
8311   ins_cost(INSN_COST);
8312   format %{ "mov  $dst, $con\t# NULL ptr" %}
8313 
8314   ins_encode(aarch64_enc_mov_p1(dst, con));
8315 
8316   ins_pipe(ialu_imm);
8317 %}
8318 
8319 // Load Poll Page Constant
8320 
8321 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8322 %{
8323   match(Set dst con);
8324 
8325   ins_cost(INSN_COST);
8326   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8327 
8328   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8329 
8330   ins_pipe(ialu_imm);
8331 %}
8332 
8333 // Load Byte Map Base Constant
8334 
8335 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8336 %{
8337   match(Set dst con);
8338 
8339   ins_cost(INSN_COST);
8340   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8341 
8342   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8343 
8344   ins_pipe(ialu_imm);
8345 %}
8346 
8347 // Load Narrow Pointer Constant
8348 
8349 instruct loadConN(iRegNNoSp dst, immN con)
8350 %{
8351   match(Set dst con);
8352 
8353   ins_cost(INSN_COST * 4);
8354   format %{ "mov  $dst, $con\t# compressed ptr" %}
8355 
8356   ins_encode(aarch64_enc_mov_n(dst, con));
8357 
8358   ins_pipe(ialu_imm);
8359 %}
8360 
8361 // Load Narrow Null Pointer Constant
8362 
8363 instruct loadConN0(iRegNNoSp dst, immN0 con)
8364 %{
8365   match(Set dst con);
8366 
8367   ins_cost(INSN_COST);
8368   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8369 
8370   ins_encode(aarch64_enc_mov_n0(dst, con));
8371 
8372   ins_pipe(ialu_imm);
8373 %}
8374 
8375 // Load Narrow Klass Constant
8376 
8377 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8378 %{
8379   match(Set dst con);
8380 
8381   ins_cost(INSN_COST);
8382   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8383 
8384   ins_encode(aarch64_enc_mov_nk(dst, con));
8385 
8386   ins_pipe(ialu_imm);
8387 %}
8388 
8389 // Load Packed Float Constant
8390 
8391 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8392   match(Set dst con);
8393   ins_cost(INSN_COST * 4);
8394   format %{ "fmovs  $dst, $con"%}
8395   ins_encode %{
8396     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8397   %}
8398 
8399   ins_pipe(fp_imm_s);
8400 %}
8401 
8402 // Load Float Constant
8403 
8404 instruct loadConF(vRegF dst, immF con) %{
8405   match(Set dst con);
8406 
8407   ins_cost(INSN_COST * 4);
8408 
8409   format %{
8410     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8411   %}
8412 
8413   ins_encode %{
8414     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8415   %}
8416 
8417   ins_pipe(fp_load_constant_s);
8418 %}
8419 
8420 // Load Packed Double Constant
8421 
8422 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8423   match(Set dst con);
8424   ins_cost(INSN_COST);
8425   format %{ "fmovd  $dst, $con"%}
8426   ins_encode %{
8427     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8428   %}
8429 
8430   ins_pipe(fp_imm_d);
8431 %}
8432 
8433 // Load Double Constant
8434 
8435 instruct loadConD(vRegD dst, immD con) %{
8436   match(Set dst con);
8437 
8438   ins_cost(INSN_COST * 5);
8439   format %{
8440     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8441   %}
8442 
8443   ins_encode %{
8444     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8445   %}
8446 
8447   ins_pipe(fp_load_constant_d);
8448 %}
8449 
8450 // Store Instructions
8451 
8452 // Store CMS card-mark Immediate
8453 instruct storeimmCM0(immI0 zero, memory mem)
8454 %{
8455   match(Set mem (StoreCM mem zero));
8456   predicate(unnecessary_storestore(n));
8457 
8458   ins_cost(INSN_COST);
8459   format %{ "strb zr, $mem\t# byte" %}
8460 
8461   ins_encode(aarch64_enc_strb0(mem));
8462 
8463   ins_pipe(istore_mem);
8464 %}
8465 
8466 // Store CMS card-mark Immediate with intervening StoreStore
8467 // needed when using CMS with no conditional card marking
8468 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8469 %{
8470   match(Set mem (StoreCM mem zero));
8471 
8472   ins_cost(INSN_COST * 2);
8473   format %{ "dmb ishst"
8474       "\n\tstrb zr, $mem\t# byte" %}
8475 
8476   ins_encode(aarch64_enc_strb0_ordered(mem));
8477 
8478   ins_pipe(istore_mem);
8479 %}
8480 
8481 // Store Byte
8482 instruct storeB(iRegIorL2I src, memory mem)
8483 %{
8484   match(Set mem (StoreB mem src));
8485   predicate(!needs_releasing_store(n));
8486 
8487   ins_cost(INSN_COST);
8488   format %{ "strb  $src, $mem\t# byte" %}
8489 
8490   ins_encode(aarch64_enc_strb(src, mem));
8491 
8492   ins_pipe(istore_reg_mem);
8493 %}
8494 
8495 
8496 instruct storeimmB0(immI0 zero, memory mem)
8497 %{
8498   match(Set mem (StoreB mem zero));
8499   predicate(!needs_releasing_store(n));
8500 
8501   ins_cost(INSN_COST);
8502   format %{ "strb rscractch2, $mem\t# byte" %}
8503 
8504   ins_encode(aarch64_enc_strb0(mem));
8505 
8506   ins_pipe(istore_mem);
8507 %}
8508 
8509 // Store Char/Short
8510 instruct storeC(iRegIorL2I src, memory mem)
8511 %{
8512   match(Set mem (StoreC mem src));
8513   predicate(!needs_releasing_store(n));
8514 
8515   ins_cost(INSN_COST);
8516   format %{ "strh  $src, $mem\t# short" %}
8517 
8518   ins_encode(aarch64_enc_strh(src, mem));
8519 
8520   ins_pipe(istore_reg_mem);
8521 %}
8522 
8523 instruct storeimmC0(immI0 zero, memory mem)
8524 %{
8525   match(Set mem (StoreC mem zero));
8526   predicate(!needs_releasing_store(n));
8527 
8528   ins_cost(INSN_COST);
8529   format %{ "strh  zr, $mem\t# short" %}
8530 
8531   ins_encode(aarch64_enc_strh0(mem));
8532 
8533   ins_pipe(istore_mem);
8534 %}
8535 
8536 // Store Integer
8537 
8538 instruct storeI(iRegIorL2I src, memory mem)
8539 %{
8540   match(Set mem(StoreI mem src));
8541   predicate(!needs_releasing_store(n));
8542 
8543   ins_cost(INSN_COST);
8544   format %{ "strw  $src, $mem\t# int" %}
8545 
8546   ins_encode(aarch64_enc_strw(src, mem));
8547 
8548   ins_pipe(istore_reg_mem);
8549 %}
8550 
8551 instruct storeimmI0(immI0 zero, memory mem)
8552 %{
8553   match(Set mem(StoreI mem zero));
8554   predicate(!needs_releasing_store(n));
8555 
8556   ins_cost(INSN_COST);
8557   format %{ "strw  zr, $mem\t# int" %}
8558 
8559   ins_encode(aarch64_enc_strw0(mem));
8560 
8561   ins_pipe(istore_mem);
8562 %}
8563 
8564 // Store Long (64 bit signed)
8565 instruct storeL(iRegL src, memory mem)
8566 %{
8567   match(Set mem (StoreL mem src));
8568   predicate(!needs_releasing_store(n));
8569 
8570   ins_cost(INSN_COST);
8571   format %{ "str  $src, $mem\t# int" %}
8572 
8573   ins_encode(aarch64_enc_str(src, mem));
8574 
8575   ins_pipe(istore_reg_mem);
8576 %}
8577 
8578 // Store Long (64 bit signed)
8579 instruct storeimmL0(immL0 zero, memory mem)
8580 %{
8581   match(Set mem (StoreL mem zero));
8582   predicate(!needs_releasing_store(n));
8583 
8584   ins_cost(INSN_COST);
8585   format %{ "str  zr, $mem\t# int" %}
8586 
8587   ins_encode(aarch64_enc_str0(mem));
8588 
8589   ins_pipe(istore_mem);
8590 %}
8591 
8592 // Store Pointer
8593 instruct storeP(iRegP src, memory mem)
8594 %{
8595   match(Set mem (StoreP mem src));
8596   predicate(!needs_releasing_store(n));
8597 
8598   ins_cost(INSN_COST);
8599   format %{ "str  $src, $mem\t# ptr" %}
8600 
8601   ins_encode(aarch64_enc_str(src, mem));
8602 
8603   ins_pipe(istore_reg_mem);
8604 %}
8605 
8606 // Store Pointer
8607 instruct storeimmP0(immP0 zero, memory mem)
8608 %{
8609   match(Set mem (StoreP mem zero));
8610   predicate(!needs_releasing_store(n));
8611 
8612   ins_cost(INSN_COST);
8613   format %{ "str zr, $mem\t# ptr" %}
8614 
8615   ins_encode(aarch64_enc_str0(mem));
8616 
8617   ins_pipe(istore_mem);
8618 %}
8619 
8620 // Store Compressed Pointer
8621 instruct storeN(iRegN src, memory mem)
8622 %{
8623   match(Set mem (StoreN mem src));
8624   predicate(!needs_releasing_store(n));
8625 
8626   ins_cost(INSN_COST);
8627   format %{ "strw  $src, $mem\t# compressed ptr" %}
8628 
8629   ins_encode(aarch64_enc_strw(src, mem));
8630 
8631   ins_pipe(istore_reg_mem);
8632 %}
8633 
8634 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8635 %{
8636   match(Set mem (StoreN mem zero));
8637   predicate(Universe::narrow_oop_base() == NULL &&
8638             Universe::narrow_klass_base() == NULL &&
8639             (!needs_releasing_store(n)));
8640 
8641   ins_cost(INSN_COST);
8642   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8643 
8644   ins_encode(aarch64_enc_strw(heapbase, mem));
8645 
8646   ins_pipe(istore_reg_mem);
8647 %}
8648 
8649 // Store Float
8650 instruct storeF(vRegF src, memory mem)
8651 %{
8652   match(Set mem (StoreF mem src));
8653   predicate(!needs_releasing_store(n));
8654 
8655   ins_cost(INSN_COST);
8656   format %{ "strs  $src, $mem\t# float" %}
8657 
8658   ins_encode( aarch64_enc_strs(src, mem) );
8659 
8660   ins_pipe(pipe_class_memory);
8661 %}
8662 
8663 // TODO
8664 // implement storeImmF0 and storeFImmPacked
8665 
8666 // Store Double
8667 instruct storeD(vRegD src, memory mem)
8668 %{
8669   match(Set mem (StoreD mem src));
8670   predicate(!needs_releasing_store(n));
8671 
8672   ins_cost(INSN_COST);
8673   format %{ "strd  $src, $mem\t# double" %}
8674 
8675   ins_encode( aarch64_enc_strd(src, mem) );
8676 
8677   ins_pipe(pipe_class_memory);
8678 %}
8679 
8680 // Store Compressed Klass Pointer
8681 instruct storeNKlass(iRegN src, memory mem)
8682 %{
8683   predicate(!needs_releasing_store(n));
8684   match(Set mem (StoreNKlass mem src));
8685 
8686   ins_cost(INSN_COST);
8687   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8688 
8689   ins_encode(aarch64_enc_strw(src, mem));
8690 
8691   ins_pipe(istore_reg_mem);
8692 %}
8693 
8694 // TODO
8695 // implement storeImmD0 and storeDImmPacked
8696 
8697 // prefetch instructions
8698 // Must be safe to execute with invalid address (cannot fault).
8699 
8700 instruct prefetchalloc( memory mem ) %{
8701   match(PrefetchAllocation mem);
8702 
8703   ins_cost(INSN_COST);
8704   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8705 
8706   ins_encode( aarch64_enc_prefetchw(mem) );
8707 
8708   ins_pipe(iload_prefetch);
8709 %}
8710 
8711 //  ---------------- volatile loads and stores ----------------
8712 
8713 // Load Byte (8 bit signed)
8714 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8715 %{
8716   match(Set dst (LoadB mem));
8717 
8718   ins_cost(VOLATILE_REF_COST);
8719   format %{ "ldarsb  $dst, $mem\t# byte" %}
8720 
8721   ins_encode(aarch64_enc_ldarsb(dst, mem));
8722 
8723   ins_pipe(pipe_serial);
8724 %}
8725 
8726 // Load Byte (8 bit signed) into long
8727 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8728 %{
8729   match(Set dst (ConvI2L (LoadB mem)));
8730 
8731   ins_cost(VOLATILE_REF_COST);
8732   format %{ "ldarsb  $dst, $mem\t# byte" %}
8733 
8734   ins_encode(aarch64_enc_ldarsb(dst, mem));
8735 
8736   ins_pipe(pipe_serial);
8737 %}
8738 
8739 // Load Byte (8 bit unsigned)
8740 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8741 %{
8742   match(Set dst (LoadUB mem));
8743 
8744   ins_cost(VOLATILE_REF_COST);
8745   format %{ "ldarb  $dst, $mem\t# byte" %}
8746 
8747   ins_encode(aarch64_enc_ldarb(dst, mem));
8748 
8749   ins_pipe(pipe_serial);
8750 %}
8751 
8752 // Load Byte (8 bit unsigned) into long
8753 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8754 %{
8755   match(Set dst (ConvI2L (LoadUB mem)));
8756 
8757   ins_cost(VOLATILE_REF_COST);
8758   format %{ "ldarb  $dst, $mem\t# byte" %}
8759 
8760   ins_encode(aarch64_enc_ldarb(dst, mem));
8761 
8762   ins_pipe(pipe_serial);
8763 %}
8764 
8765 // Load Short (16 bit signed)
8766 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8767 %{
8768   match(Set dst (LoadS mem));
8769 
8770   ins_cost(VOLATILE_REF_COST);
8771   format %{ "ldarshw  $dst, $mem\t# short" %}
8772 
8773   ins_encode(aarch64_enc_ldarshw(dst, mem));
8774 
8775   ins_pipe(pipe_serial);
8776 %}
8777 
8778 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8779 %{
8780   match(Set dst (LoadUS mem));
8781 
8782   ins_cost(VOLATILE_REF_COST);
8783   format %{ "ldarhw  $dst, $mem\t# short" %}
8784 
8785   ins_encode(aarch64_enc_ldarhw(dst, mem));
8786 
8787   ins_pipe(pipe_serial);
8788 %}
8789 
8790 // Load Short/Char (16 bit unsigned) into long
8791 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8792 %{
8793   match(Set dst (ConvI2L (LoadUS mem)));
8794 
8795   ins_cost(VOLATILE_REF_COST);
8796   format %{ "ldarh  $dst, $mem\t# short" %}
8797 
8798   ins_encode(aarch64_enc_ldarh(dst, mem));
8799 
8800   ins_pipe(pipe_serial);
8801 %}
8802 
8803 // Load Short/Char (16 bit signed) into long
8804 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8805 %{
8806   match(Set dst (ConvI2L (LoadS mem)));
8807 
8808   ins_cost(VOLATILE_REF_COST);
8809   format %{ "ldarh  $dst, $mem\t# short" %}
8810 
8811   ins_encode(aarch64_enc_ldarsh(dst, mem));
8812 
8813   ins_pipe(pipe_serial);
8814 %}
8815 
8816 // Load Integer (32 bit signed)
8817 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8818 %{
8819   match(Set dst (LoadI mem));
8820 
8821   ins_cost(VOLATILE_REF_COST);
8822   format %{ "ldarw  $dst, $mem\t# int" %}
8823 
8824   ins_encode(aarch64_enc_ldarw(dst, mem));
8825 
8826   ins_pipe(pipe_serial);
8827 %}
8828 
8829 // Load Integer (32 bit unsigned) into long
8830 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8831 %{
8832   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8833 
8834   ins_cost(VOLATILE_REF_COST);
8835   format %{ "ldarw  $dst, $mem\t# int" %}
8836 
8837   ins_encode(aarch64_enc_ldarw(dst, mem));
8838 
8839   ins_pipe(pipe_serial);
8840 %}
8841 
8842 // Load Long (64 bit signed)
8843 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8844 %{
8845   match(Set dst (LoadL mem));
8846 
8847   ins_cost(VOLATILE_REF_COST);
8848   format %{ "ldar  $dst, $mem\t# int" %}
8849 
8850   ins_encode(aarch64_enc_ldar(dst, mem));
8851 
8852   ins_pipe(pipe_serial);
8853 %}
8854 
8855 // Load Pointer
8856 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8857 %{
8858   match(Set dst (LoadP mem));
8859 
8860   ins_cost(VOLATILE_REF_COST);
8861   format %{ "ldar  $dst, $mem\t# ptr" %}
8862 
8863   ins_encode(aarch64_enc_ldar(dst, mem));
8864 
8865   ins_pipe(pipe_serial);
8866 %}
8867 
8868 // Load Compressed Pointer
8869 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8870 %{
8871   match(Set dst (LoadN mem));
8872 
8873   ins_cost(VOLATILE_REF_COST);
8874   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8875 
8876   ins_encode(aarch64_enc_ldarw(dst, mem));
8877 
8878   ins_pipe(pipe_serial);
8879 %}
8880 
8881 // Load Float
8882 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8883 %{
8884   match(Set dst (LoadF mem));
8885 
8886   ins_cost(VOLATILE_REF_COST);
8887   format %{ "ldars  $dst, $mem\t# float" %}
8888 
8889   ins_encode( aarch64_enc_fldars(dst, mem) );
8890 
8891   ins_pipe(pipe_serial);
8892 %}
8893 
8894 // Load Double
8895 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8896 %{
8897   match(Set dst (LoadD mem));
8898 
8899   ins_cost(VOLATILE_REF_COST);
8900   format %{ "ldard  $dst, $mem\t# double" %}
8901 
8902   ins_encode( aarch64_enc_fldard(dst, mem) );
8903 
8904   ins_pipe(pipe_serial);
8905 %}
8906 
8907 // Store Byte
8908 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8909 %{
8910   match(Set mem (StoreB mem src));
8911 
8912   ins_cost(VOLATILE_REF_COST);
8913   format %{ "stlrb  $src, $mem\t# byte" %}
8914 
8915   ins_encode(aarch64_enc_stlrb(src, mem));
8916 
8917   ins_pipe(pipe_class_memory);
8918 %}
8919 
8920 // Store Char/Short
8921 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8922 %{
8923   match(Set mem (StoreC mem src));
8924 
8925   ins_cost(VOLATILE_REF_COST);
8926   format %{ "stlrh  $src, $mem\t# short" %}
8927 
8928   ins_encode(aarch64_enc_stlrh(src, mem));
8929 
8930   ins_pipe(pipe_class_memory);
8931 %}
8932 
8933 // Store Integer
8934 
8935 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8936 %{
8937   match(Set mem(StoreI mem src));
8938 
8939   ins_cost(VOLATILE_REF_COST);
8940   format %{ "stlrw  $src, $mem\t# int" %}
8941 
8942   ins_encode(aarch64_enc_stlrw(src, mem));
8943 
8944   ins_pipe(pipe_class_memory);
8945 %}
8946 
8947 // Store Long (64 bit signed)
8948 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8949 %{
8950   match(Set mem (StoreL mem src));
8951 
8952   ins_cost(VOLATILE_REF_COST);
8953   format %{ "stlr  $src, $mem\t# int" %}
8954 
8955   ins_encode(aarch64_enc_stlr(src, mem));
8956 
8957   ins_pipe(pipe_class_memory);
8958 %}
8959 
8960 // Store Pointer
8961 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8962 %{
8963   match(Set mem (StoreP mem src));
8964 
8965   ins_cost(VOLATILE_REF_COST);
8966   format %{ "stlr  $src, $mem\t# ptr" %}
8967 
8968   ins_encode(aarch64_enc_stlr(src, mem));
8969 
8970   ins_pipe(pipe_class_memory);
8971 %}
8972 
8973 // Store Compressed Pointer
8974 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8975 %{
8976   match(Set mem (StoreN mem src));
8977 
8978   ins_cost(VOLATILE_REF_COST);
8979   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8980 
8981   ins_encode(aarch64_enc_stlrw(src, mem));
8982 
8983   ins_pipe(pipe_class_memory);
8984 %}
8985 
8986 // Store Float
8987 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8988 %{
8989   match(Set mem (StoreF mem src));
8990 
8991   ins_cost(VOLATILE_REF_COST);
8992   format %{ "stlrs  $src, $mem\t# float" %}
8993 
8994   ins_encode( aarch64_enc_fstlrs(src, mem) );
8995 
8996   ins_pipe(pipe_class_memory);
8997 %}
8998 
8999 // TODO
9000 // implement storeImmF0 and storeFImmPacked
9001 
9002 // Store Double
9003 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
9004 %{
9005   match(Set mem (StoreD mem src));
9006 
9007   ins_cost(VOLATILE_REF_COST);
9008   format %{ "stlrd  $src, $mem\t# double" %}
9009 
9010   ins_encode( aarch64_enc_fstlrd(src, mem) );
9011 
9012   ins_pipe(pipe_class_memory);
9013 %}
9014 
9015 //  ---------------- end of volatile loads and stores ----------------
9016 
9017 // ============================================================================
9018 // BSWAP Instructions
9019 
9020 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
9021   match(Set dst (ReverseBytesI src));
9022 
9023   ins_cost(INSN_COST);
9024   format %{ "revw  $dst, $src" %}
9025 
9026   ins_encode %{
9027     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
9028   %}
9029 
9030   ins_pipe(ialu_reg);
9031 %}
9032 
9033 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
9034   match(Set dst (ReverseBytesL src));
9035 
9036   ins_cost(INSN_COST);
9037   format %{ "rev  $dst, $src" %}
9038 
9039   ins_encode %{
9040     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
9041   %}
9042 
9043   ins_pipe(ialu_reg);
9044 %}
9045 
9046 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
9047   match(Set dst (ReverseBytesUS src));
9048 
9049   ins_cost(INSN_COST);
9050   format %{ "rev16w  $dst, $src" %}
9051 
9052   ins_encode %{
9053     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9054   %}
9055 
9056   ins_pipe(ialu_reg);
9057 %}
9058 
9059 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9060   match(Set dst (ReverseBytesS src));
9061 
9062   ins_cost(INSN_COST);
9063   format %{ "rev16w  $dst, $src\n\t"
9064             "sbfmw $dst, $dst, #0, #15" %}
9065 
9066   ins_encode %{
9067     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9068     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9069   %}
9070 
9071   ins_pipe(ialu_reg);
9072 %}
9073 
9074 // ============================================================================
9075 // Zero Count Instructions
9076 
9077 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9078   match(Set dst (CountLeadingZerosI src));
9079 
9080   ins_cost(INSN_COST);
9081   format %{ "clzw  $dst, $src" %}
9082   ins_encode %{
9083     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9084   %}
9085 
9086   ins_pipe(ialu_reg);
9087 %}
9088 
9089 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9090   match(Set dst (CountLeadingZerosL src));
9091 
9092   ins_cost(INSN_COST);
9093   format %{ "clz   $dst, $src" %}
9094   ins_encode %{
9095     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9096   %}
9097 
9098   ins_pipe(ialu_reg);
9099 %}
9100 
9101 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9102   match(Set dst (CountTrailingZerosI src));
9103 
9104   ins_cost(INSN_COST * 2);
9105   format %{ "rbitw  $dst, $src\n\t"
9106             "clzw   $dst, $dst" %}
9107   ins_encode %{
9108     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9109     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9110   %}
9111 
9112   ins_pipe(ialu_reg);
9113 %}
9114 
9115 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9116   match(Set dst (CountTrailingZerosL src));
9117 
9118   ins_cost(INSN_COST * 2);
9119   format %{ "rbit   $dst, $src\n\t"
9120             "clz    $dst, $dst" %}
9121   ins_encode %{
9122     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9123     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9124   %}
9125 
9126   ins_pipe(ialu_reg);
9127 %}
9128 
9129 //---------- Population Count Instructions -------------------------------------
9130 //
9131 
9132 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9133   predicate(UsePopCountInstruction);
9134   match(Set dst (PopCountI src));
9135   effect(TEMP tmp);
9136   ins_cost(INSN_COST * 13);
9137 
9138   format %{ "movw   $src, $src\n\t"
9139             "mov    $tmp, $src\t# vector (1D)\n\t"
9140             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9141             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9142             "mov    $dst, $tmp\t# vector (1D)" %}
9143   ins_encode %{
9144     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9145     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9146     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9147     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9148     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9149   %}
9150 
9151   ins_pipe(pipe_class_default);
9152 %}
9153 
9154 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9155   predicate(UsePopCountInstruction);
9156   match(Set dst (PopCountI (LoadI mem)));
9157   effect(TEMP tmp);
9158   ins_cost(INSN_COST * 13);
9159 
9160   format %{ "ldrs   $tmp, $mem\n\t"
9161             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9162             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9163             "mov    $dst, $tmp\t# vector (1D)" %}
9164   ins_encode %{
9165     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9166     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9167                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9168     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9169     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9170     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9171   %}
9172 
9173   ins_pipe(pipe_class_default);
9174 %}
9175 
9176 // Note: Long.bitCount(long) returns an int.
9177 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9178   predicate(UsePopCountInstruction);
9179   match(Set dst (PopCountL src));
9180   effect(TEMP tmp);
9181   ins_cost(INSN_COST * 13);
9182 
9183   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9184             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9185             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9186             "mov    $dst, $tmp\t# vector (1D)" %}
9187   ins_encode %{
9188     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9189     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9190     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9191     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9192   %}
9193 
9194   ins_pipe(pipe_class_default);
9195 %}
9196 
9197 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9198   predicate(UsePopCountInstruction);
9199   match(Set dst (PopCountL (LoadL mem)));
9200   effect(TEMP tmp);
9201   ins_cost(INSN_COST * 13);
9202 
9203   format %{ "ldrd   $tmp, $mem\n\t"
9204             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9205             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9206             "mov    $dst, $tmp\t# vector (1D)" %}
9207   ins_encode %{
9208     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9209     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9210                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9211     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9212     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9213     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9214   %}
9215 
9216   ins_pipe(pipe_class_default);
9217 %}
9218 
9219 // ============================================================================
9220 // MemBar Instruction
9221 
9222 instruct load_fence() %{
9223   match(LoadFence);
9224   ins_cost(VOLATILE_REF_COST);
9225 
9226   format %{ "load_fence" %}
9227 
9228   ins_encode %{
9229     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9230   %}
9231   ins_pipe(pipe_serial);
9232 %}
9233 
9234 instruct unnecessary_membar_acquire() %{
9235   predicate(unnecessary_acquire(n));
9236   match(MemBarAcquire);
9237   ins_cost(0);
9238 
9239   format %{ "membar_acquire (elided)" %}
9240 
9241   ins_encode %{
9242     __ block_comment("membar_acquire (elided)");
9243   %}
9244 
9245   ins_pipe(pipe_class_empty);
9246 %}
9247 
9248 instruct membar_acquire() %{
9249   match(MemBarAcquire);
9250   ins_cost(VOLATILE_REF_COST);
9251 
9252   format %{ "membar_acquire" %}
9253 
9254   ins_encode %{
9255     __ block_comment("membar_acquire");
9256     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9257   %}
9258 
9259   ins_pipe(pipe_serial);
9260 %}
9261 
9262 
9263 instruct membar_acquire_lock() %{
9264   match(MemBarAcquireLock);
9265   ins_cost(VOLATILE_REF_COST);
9266 
9267   format %{ "membar_acquire_lock (elided)" %}
9268 
9269   ins_encode %{
9270     __ block_comment("membar_acquire_lock (elided)");
9271   %}
9272 
9273   ins_pipe(pipe_serial);
9274 %}
9275 
9276 instruct store_fence() %{
9277   match(StoreFence);
9278   ins_cost(VOLATILE_REF_COST);
9279 
9280   format %{ "store_fence" %}
9281 
9282   ins_encode %{
9283     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9284   %}
9285   ins_pipe(pipe_serial);
9286 %}
9287 
9288 instruct unnecessary_membar_release() %{
9289   predicate(unnecessary_release(n));
9290   match(MemBarRelease);
9291   ins_cost(0);
9292 
9293   format %{ "membar_release (elided)" %}
9294 
9295   ins_encode %{
9296     __ block_comment("membar_release (elided)");
9297   %}
9298   ins_pipe(pipe_serial);
9299 %}
9300 
9301 instruct membar_release() %{
9302   match(MemBarRelease);
9303   ins_cost(VOLATILE_REF_COST);
9304 
9305   format %{ "membar_release" %}
9306 
9307   ins_encode %{
9308     __ block_comment("membar_release");
9309     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9310   %}
9311   ins_pipe(pipe_serial);
9312 %}
9313 
9314 instruct membar_storestore() %{
9315   match(MemBarStoreStore);
9316   ins_cost(VOLATILE_REF_COST);
9317 
9318   format %{ "MEMBAR-store-store" %}
9319 
9320   ins_encode %{
9321     __ membar(Assembler::StoreStore);
9322   %}
9323   ins_pipe(pipe_serial);
9324 %}
9325 
9326 instruct membar_release_lock() %{
9327   match(MemBarReleaseLock);
9328   ins_cost(VOLATILE_REF_COST);
9329 
9330   format %{ "membar_release_lock (elided)" %}
9331 
9332   ins_encode %{
9333     __ block_comment("membar_release_lock (elided)");
9334   %}
9335 
9336   ins_pipe(pipe_serial);
9337 %}
9338 
9339 instruct unnecessary_membar_volatile() %{
9340   predicate(unnecessary_volatile(n));
9341   match(MemBarVolatile);
9342   ins_cost(0);
9343 
9344   format %{ "membar_volatile (elided)" %}
9345 
9346   ins_encode %{
9347     __ block_comment("membar_volatile (elided)");
9348   %}
9349 
9350   ins_pipe(pipe_serial);
9351 %}
9352 
9353 instruct membar_volatile() %{
9354   match(MemBarVolatile);
9355   ins_cost(VOLATILE_REF_COST*100);
9356 
9357   format %{ "membar_volatile" %}
9358 
9359   ins_encode %{
9360     __ block_comment("membar_volatile");
9361     __ membar(Assembler::StoreLoad);
9362   %}
9363 
9364   ins_pipe(pipe_serial);
9365 %}
9366 
9367 // ============================================================================
9368 // Cast/Convert Instructions
9369 
9370 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9371   match(Set dst (CastX2P src));
9372 
9373   ins_cost(INSN_COST);
9374   format %{ "mov $dst, $src\t# long -> ptr" %}
9375 
9376   ins_encode %{
9377     if ($dst$$reg != $src$$reg) {
9378       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9379     }
9380   %}
9381 
9382   ins_pipe(ialu_reg);
9383 %}
9384 
9385 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9386   match(Set dst (CastP2X src));
9387 
9388   ins_cost(INSN_COST);
9389   format %{ "mov $dst, $src\t# ptr -> long" %}
9390 
9391   ins_encode %{
9392     if ($dst$$reg != $src$$reg) {
9393       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9394     }
9395   %}
9396 
9397   ins_pipe(ialu_reg);
9398 %}
9399 
9400 // Convert oop into int for vectors alignment masking
9401 instruct convP2I(iRegINoSp dst, iRegP src) %{
9402   match(Set dst (ConvL2I (CastP2X src)));
9403 
9404   ins_cost(INSN_COST);
9405   format %{ "movw $dst, $src\t# ptr -> int" %}
9406   ins_encode %{
9407     __ movw($dst$$Register, $src$$Register);
9408   %}
9409 
9410   ins_pipe(ialu_reg);
9411 %}
9412 
9413 // Convert compressed oop into int for vectors alignment masking
9414 // in case of 32bit oops (heap < 4Gb).
9415 instruct convN2I(iRegINoSp dst, iRegN src)
9416 %{
9417   predicate(Universe::narrow_oop_shift() == 0);
9418   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9419 
9420   ins_cost(INSN_COST);
9421   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9422   ins_encode %{
9423     __ movw($dst$$Register, $src$$Register);
9424   %}
9425 
9426   ins_pipe(ialu_reg);
9427 %}
9428 
9429 
9430 // Convert oop pointer into compressed form
9431 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9432   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9433   match(Set dst (EncodeP src));
9434   effect(KILL cr);
9435   ins_cost(INSN_COST * 3);
9436   format %{ "encode_heap_oop $dst, $src" %}
9437   ins_encode %{
9438     Register s = $src$$Register;
9439     Register d = $dst$$Register;
9440     __ encode_heap_oop(d, s);
9441   %}
9442   ins_pipe(ialu_reg);
9443 %}
9444 
9445 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9446   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9447   match(Set dst (EncodeP src));
9448   ins_cost(INSN_COST * 3);
9449   format %{ "encode_heap_oop_not_null $dst, $src" %}
9450   ins_encode %{
9451     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9452   %}
9453   ins_pipe(ialu_reg);
9454 %}
9455 
9456 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9457   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9458             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9459   match(Set dst (DecodeN src));
9460   ins_cost(INSN_COST * 3);
9461   format %{ "decode_heap_oop $dst, $src" %}
9462   ins_encode %{
9463     Register s = $src$$Register;
9464     Register d = $dst$$Register;
9465     __ decode_heap_oop(d, s);
9466   %}
9467   ins_pipe(ialu_reg);
9468 %}
9469 
9470 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9471   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9472             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9473   match(Set dst (DecodeN src));
9474   ins_cost(INSN_COST * 3);
9475   format %{ "decode_heap_oop_not_null $dst, $src" %}
9476   ins_encode %{
9477     Register s = $src$$Register;
9478     Register d = $dst$$Register;
9479     __ decode_heap_oop_not_null(d, s);
9480   %}
9481   ins_pipe(ialu_reg);
9482 %}
9483 
9484 // n.b. AArch64 implementations of encode_klass_not_null and
9485 // decode_klass_not_null do not modify the flags register so, unlike
9486 // Intel, we don't kill CR as a side effect here
9487 
9488 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9489   match(Set dst (EncodePKlass src));
9490 
9491   ins_cost(INSN_COST * 3);
9492   format %{ "encode_klass_not_null $dst,$src" %}
9493 
9494   ins_encode %{
9495     Register src_reg = as_Register($src$$reg);
9496     Register dst_reg = as_Register($dst$$reg);
9497     __ encode_klass_not_null(dst_reg, src_reg);
9498   %}
9499 
9500    ins_pipe(ialu_reg);
9501 %}
9502 
9503 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9504   match(Set dst (DecodeNKlass src));
9505 
9506   ins_cost(INSN_COST * 3);
9507   format %{ "decode_klass_not_null $dst,$src" %}
9508 
9509   ins_encode %{
9510     Register src_reg = as_Register($src$$reg);
9511     Register dst_reg = as_Register($dst$$reg);
9512     if (dst_reg != src_reg) {
9513       __ decode_klass_not_null(dst_reg, src_reg);
9514     } else {
9515       __ decode_klass_not_null(dst_reg);
9516     }
9517   %}
9518 
9519    ins_pipe(ialu_reg);
9520 %}
9521 
9522 instruct checkCastPP(iRegPNoSp dst)
9523 %{
9524   match(Set dst (CheckCastPP dst));
9525 
9526   size(0);
9527   format %{ "# checkcastPP of $dst" %}
9528   ins_encode(/* empty encoding */);
9529   ins_pipe(pipe_class_empty);
9530 %}
9531 
9532 instruct castPP(iRegPNoSp dst)
9533 %{
9534   match(Set dst (CastPP dst));
9535 
9536   size(0);
9537   format %{ "# castPP of $dst" %}
9538   ins_encode(/* empty encoding */);
9539   ins_pipe(pipe_class_empty);
9540 %}
9541 
9542 instruct castII(iRegI dst)
9543 %{
9544   match(Set dst (CastII dst));
9545 
9546   size(0);
9547   format %{ "# castII of $dst" %}
9548   ins_encode(/* empty encoding */);
9549   ins_cost(0);
9550   ins_pipe(pipe_class_empty);
9551 %}
9552 
9553 // ============================================================================
9554 // Atomic operation instructions
9555 //
9556 // Intel and SPARC both implement Ideal Node LoadPLocked and
9557 // Store{PIL}Conditional instructions using a normal load for the
9558 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9559 //
9560 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9561 // pair to lock object allocations from Eden space when not using
9562 // TLABs.
9563 //
9564 // There does not appear to be a Load{IL}Locked Ideal Node and the
9565 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9566 // and to use StoreIConditional only for 32-bit and StoreLConditional
9567 // only for 64-bit.
9568 //
9569 // We implement LoadPLocked and StorePLocked instructions using,
9570 // respectively the AArch64 hw load-exclusive and store-conditional
9571 // instructions. Whereas we must implement each of
9572 // Store{IL}Conditional using a CAS which employs a pair of
9573 // instructions comprising a load-exclusive followed by a
9574 // store-conditional.
9575 
9576 
9577 // Locked-load (linked load) of the current heap-top
9578 // used when updating the eden heap top
9579 // implemented using ldaxr on AArch64
9580 
9581 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9582 %{
9583   match(Set dst (LoadPLocked mem));
9584 
9585   ins_cost(VOLATILE_REF_COST);
9586 
9587   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9588 
9589   ins_encode(aarch64_enc_ldaxr(dst, mem));
9590 
9591   ins_pipe(pipe_serial);
9592 %}
9593 
9594 // Conditional-store of the updated heap-top.
9595 // Used during allocation of the shared heap.
9596 // Sets flag (EQ) on success.
9597 // implemented using stlxr on AArch64.
9598 
9599 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9600 %{
9601   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9602 
9603   ins_cost(VOLATILE_REF_COST);
9604 
9605  // TODO
9606  // do we need to do a store-conditional release or can we just use a
9607  // plain store-conditional?
9608 
9609   format %{
9610     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9611     "cmpw rscratch1, zr\t# EQ on successful write"
9612   %}
9613 
9614   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9615 
9616   ins_pipe(pipe_serial);
9617 %}
9618 
9619 
9620 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9621 // when attempting to rebias a lock towards the current thread.  We
9622 // must use the acquire form of cmpxchg in order to guarantee acquire
9623 // semantics in this case.
9624 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9625 %{
9626   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9627 
9628   ins_cost(VOLATILE_REF_COST);
9629 
9630   format %{
9631     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9632     "cmpw rscratch1, zr\t# EQ on successful write"
9633   %}
9634 
9635   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9636 
9637   ins_pipe(pipe_slow);
9638 %}
9639 
9640 // storeIConditional also has acquire semantics, for no better reason
9641 // than matching storeLConditional.  At the time of writing this
9642 // comment storeIConditional was not used anywhere by AArch64.
9643 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9644 %{
9645   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9646 
9647   ins_cost(VOLATILE_REF_COST);
9648 
9649   format %{
9650     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9651     "cmpw rscratch1, zr\t# EQ on successful write"
9652   %}
9653 
9654   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9655 
9656   ins_pipe(pipe_slow);
9657 %}
9658 
9659 // standard CompareAndSwapX when we are using barriers
9660 // these have higher priority than the rules selected by a predicate
9661 
9662 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9663 // can't match them
9664 
9665 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9666 
9667   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9668   ins_cost(2 * VOLATILE_REF_COST);
9669 
9670   effect(KILL cr);
9671 
9672   format %{
9673     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9674     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9675   %}
9676 
9677   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9678             aarch64_enc_cset_eq(res));
9679 
9680   ins_pipe(pipe_slow);
9681 %}
9682 
9683 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9684 
9685   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9686   ins_cost(2 * VOLATILE_REF_COST);
9687 
9688   effect(KILL cr);
9689 
9690   format %{
9691     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9692     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9693   %}
9694 
9695   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9696             aarch64_enc_cset_eq(res));
9697 
9698   ins_pipe(pipe_slow);
9699 %}
9700 
9701 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9702 
9703   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9704   ins_cost(2 * VOLATILE_REF_COST);
9705 
9706   effect(KILL cr);
9707 
9708  format %{
9709     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9710     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9711  %}
9712 
9713  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9714             aarch64_enc_cset_eq(res));
9715 
9716   ins_pipe(pipe_slow);
9717 %}
9718 
9719 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9720 
9721   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9722   ins_cost(2 * VOLATILE_REF_COST);
9723 
9724   effect(KILL cr);
9725 
9726  format %{
9727     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9728     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9729  %}
9730 
9731  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9732             aarch64_enc_cset_eq(res));
9733 
9734   ins_pipe(pipe_slow);
9735 %}
9736 
9737 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9738 
9739   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9740   ins_cost(2 * VOLATILE_REF_COST);
9741 
9742   effect(KILL cr);
9743 
9744  format %{
9745     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9746     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9747  %}
9748 
9749  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9750             aarch64_enc_cset_eq(res));
9751 
9752   ins_pipe(pipe_slow);
9753 %}
9754 
9755 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9756 
9757   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9758   ins_cost(2 * VOLATILE_REF_COST);
9759 
9760   effect(KILL cr);
9761 
9762  format %{
9763     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9764     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9765  %}
9766 
9767  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9768             aarch64_enc_cset_eq(res));
9769 
9770   ins_pipe(pipe_slow);
9771 %}
9772 
9773 // alternative CompareAndSwapX when we are eliding barriers
9774 
9775 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9776 
9777   predicate(needs_acquiring_load_exclusive(n));
9778   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9779   ins_cost(VOLATILE_REF_COST);
9780 
9781   effect(KILL cr);
9782 
9783  format %{
9784     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9785     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9786  %}
9787 
9788  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9789             aarch64_enc_cset_eq(res));
9790 
9791   ins_pipe(pipe_slow);
9792 %}
9793 
9794 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9795 
9796   predicate(needs_acquiring_load_exclusive(n));
9797   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9798   ins_cost(VOLATILE_REF_COST);
9799 
9800   effect(KILL cr);
9801 
9802  format %{
9803     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9804     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9805  %}
9806 
9807  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9808             aarch64_enc_cset_eq(res));
9809 
9810   ins_pipe(pipe_slow);
9811 %}
9812 
9813 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9814 
9815   predicate(needs_acquiring_load_exclusive(n));
9816   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9817   ins_cost(VOLATILE_REF_COST);
9818 
9819   effect(KILL cr);
9820 
9821  format %{
9822     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9823     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9824  %}
9825 
9826  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9827             aarch64_enc_cset_eq(res));
9828 
9829   ins_pipe(pipe_slow);
9830 %}
9831 
9832 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9833 
9834   predicate(needs_acquiring_load_exclusive(n));
9835   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9836   ins_cost(VOLATILE_REF_COST);
9837 
9838   effect(KILL cr);
9839 
9840  format %{
9841     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9842     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9843  %}
9844 
9845  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9846             aarch64_enc_cset_eq(res));
9847 
9848   ins_pipe(pipe_slow);
9849 %}
9850 
9851 
9852 // ---------------------------------------------------------------------
9853 
9854 
9855 // BEGIN This section of the file is automatically generated. Do not edit --------------
9856 
9857 // Sundry CAS operations.  Note that release is always true,
9858 // regardless of the memory ordering of the CAS.  This is because we
9859 // need the volatile case to be sequentially consistent but there is
9860 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9861 // can't check the type of memory ordering here, so we always emit a
9862 // STLXR.
9863 
9864 // This section is generated from aarch64_ad_cas.m4
9865 
9866 
9867 
9868 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9869   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9870   ins_cost(2 * VOLATILE_REF_COST);
9871   effect(TEMP_DEF res, KILL cr);
9872   format %{
9873     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9874   %}
9875   ins_encode %{
9876     __ uxtbw(rscratch2, $oldval$$Register);
9877     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9878                Assembler::byte, /*acquire*/ false, /*release*/ true,
9879                /*weak*/ false, $res$$Register);
9880     __ sxtbw($res$$Register, $res$$Register);
9881   %}
9882   ins_pipe(pipe_slow);
9883 %}
9884 
9885 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9886   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9887   ins_cost(2 * VOLATILE_REF_COST);
9888   effect(TEMP_DEF res, KILL cr);
9889   format %{
9890     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9891   %}
9892   ins_encode %{
9893     __ uxthw(rscratch2, $oldval$$Register);
9894     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9895                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9896                /*weak*/ false, $res$$Register);
9897     __ sxthw($res$$Register, $res$$Register);
9898   %}
9899   ins_pipe(pipe_slow);
9900 %}
9901 
9902 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9903   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9904   ins_cost(2 * VOLATILE_REF_COST);
9905   effect(TEMP_DEF res, KILL cr);
9906   format %{
9907     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9908   %}
9909   ins_encode %{
9910     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9911                Assembler::word, /*acquire*/ false, /*release*/ true,
9912                /*weak*/ false, $res$$Register);
9913   %}
9914   ins_pipe(pipe_slow);
9915 %}
9916 
9917 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9918   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9919   ins_cost(2 * VOLATILE_REF_COST);
9920   effect(TEMP_DEF res, KILL cr);
9921   format %{
9922     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9923   %}
9924   ins_encode %{
9925     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9926                Assembler::xword, /*acquire*/ false, /*release*/ true,
9927                /*weak*/ false, $res$$Register);
9928   %}
9929   ins_pipe(pipe_slow);
9930 %}
9931 
9932 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9933   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9934   ins_cost(2 * VOLATILE_REF_COST);
9935   effect(TEMP_DEF res, KILL cr);
9936   format %{
9937     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9938   %}
9939   ins_encode %{
9940     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9941                Assembler::word, /*acquire*/ false, /*release*/ true,
9942                /*weak*/ false, $res$$Register);
9943   %}
9944   ins_pipe(pipe_slow);
9945 %}
9946 
9947 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9948   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9949   ins_cost(2 * VOLATILE_REF_COST);
9950   effect(TEMP_DEF res, KILL cr);
9951   format %{
9952     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9953   %}
9954   ins_encode %{
9955     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9956                Assembler::xword, /*acquire*/ false, /*release*/ true,
9957                /*weak*/ false, $res$$Register);
9958   %}
9959   ins_pipe(pipe_slow);
9960 %}
9961 
9962 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9963   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9964   ins_cost(2 * VOLATILE_REF_COST);
9965   effect(KILL cr);
9966   format %{
9967     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9968     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9969   %}
9970   ins_encode %{
9971     __ uxtbw(rscratch2, $oldval$$Register);
9972     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9973                Assembler::byte, /*acquire*/ false, /*release*/ true,
9974                /*weak*/ true, noreg);
9975     __ csetw($res$$Register, Assembler::EQ);
9976   %}
9977   ins_pipe(pipe_slow);
9978 %}
9979 
9980 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9981   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9982   ins_cost(2 * VOLATILE_REF_COST);
9983   effect(KILL cr);
9984   format %{
9985     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9986     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9987   %}
9988   ins_encode %{
9989     __ uxthw(rscratch2, $oldval$$Register);
9990     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9991                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9992                /*weak*/ true, noreg);
9993     __ csetw($res$$Register, Assembler::EQ);
9994   %}
9995   ins_pipe(pipe_slow);
9996 %}
9997 
9998 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9999   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10000   ins_cost(2 * VOLATILE_REF_COST);
10001   effect(KILL cr);
10002   format %{
10003     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10004     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10005   %}
10006   ins_encode %{
10007     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10008                Assembler::word, /*acquire*/ false, /*release*/ true,
10009                /*weak*/ true, noreg);
10010     __ csetw($res$$Register, Assembler::EQ);
10011   %}
10012   ins_pipe(pipe_slow);
10013 %}
10014 
10015 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10016   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10017   ins_cost(2 * VOLATILE_REF_COST);
10018   effect(KILL cr);
10019   format %{
10020     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10021     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10022   %}
10023   ins_encode %{
10024     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10025                Assembler::xword, /*acquire*/ false, /*release*/ true,
10026                /*weak*/ true, noreg);
10027     __ csetw($res$$Register, Assembler::EQ);
10028   %}
10029   ins_pipe(pipe_slow);
10030 %}
10031 
10032 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10033   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10034   ins_cost(2 * VOLATILE_REF_COST);
10035   effect(KILL cr);
10036   format %{
10037     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10038     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10039   %}
10040   ins_encode %{
10041     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10042                Assembler::word, /*acquire*/ false, /*release*/ true,
10043                /*weak*/ true, noreg);
10044     __ csetw($res$$Register, Assembler::EQ);
10045   %}
10046   ins_pipe(pipe_slow);
10047 %}
10048 
10049 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10050   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10051   ins_cost(2 * VOLATILE_REF_COST);
10052   effect(KILL cr);
10053   format %{
10054     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10055     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10056   %}
10057   ins_encode %{
10058     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10059                Assembler::xword, /*acquire*/ false, /*release*/ true,
10060                /*weak*/ true, noreg);
10061     __ csetw($res$$Register, Assembler::EQ);
10062   %}
10063   ins_pipe(pipe_slow);
10064 %}
10065 
10066 // END This section of the file is automatically generated. Do not edit --------------
10067 // ---------------------------------------------------------------------
10068 
10069 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10070   match(Set prev (GetAndSetI mem newv));
10071   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10072   ins_encode %{
10073     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10074   %}
10075   ins_pipe(pipe_serial);
10076 %}
10077 
10078 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10079   match(Set prev (GetAndSetL mem newv));
10080   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10081   ins_encode %{
10082     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10083   %}
10084   ins_pipe(pipe_serial);
10085 %}
10086 
10087 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10088   match(Set prev (GetAndSetN mem newv));
10089   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10090   ins_encode %{
10091     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10092   %}
10093   ins_pipe(pipe_serial);
10094 %}
10095 
10096 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10097   match(Set prev (GetAndSetP mem newv));
10098   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10099   ins_encode %{
10100     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10101   %}
10102   ins_pipe(pipe_serial);
10103 %}
10104 
10105 
10106 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10107   match(Set newval (GetAndAddL mem incr));
10108   ins_cost(INSN_COST * 10);
10109   format %{ "get_and_addL $newval, [$mem], $incr" %}
10110   ins_encode %{
10111     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10112   %}
10113   ins_pipe(pipe_serial);
10114 %}
10115 
10116 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10117   predicate(n->as_LoadStore()->result_not_used());
10118   match(Set dummy (GetAndAddL mem incr));
10119   ins_cost(INSN_COST * 9);
10120   format %{ "get_and_addL [$mem], $incr" %}
10121   ins_encode %{
10122     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10123   %}
10124   ins_pipe(pipe_serial);
10125 %}
10126 
10127 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10128   match(Set newval (GetAndAddL mem incr));
10129   ins_cost(INSN_COST * 10);
10130   format %{ "get_and_addL $newval, [$mem], $incr" %}
10131   ins_encode %{
10132     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10133   %}
10134   ins_pipe(pipe_serial);
10135 %}
10136 
10137 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10138   predicate(n->as_LoadStore()->result_not_used());
10139   match(Set dummy (GetAndAddL mem incr));
10140   ins_cost(INSN_COST * 9);
10141   format %{ "get_and_addL [$mem], $incr" %}
10142   ins_encode %{
10143     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10144   %}
10145   ins_pipe(pipe_serial);
10146 %}
10147 
10148 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10149   match(Set newval (GetAndAddI mem incr));
10150   ins_cost(INSN_COST * 10);
10151   format %{ "get_and_addI $newval, [$mem], $incr" %}
10152   ins_encode %{
10153     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10154   %}
10155   ins_pipe(pipe_serial);
10156 %}
10157 
10158 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10159   predicate(n->as_LoadStore()->result_not_used());
10160   match(Set dummy (GetAndAddI mem incr));
10161   ins_cost(INSN_COST * 9);
10162   format %{ "get_and_addI [$mem], $incr" %}
10163   ins_encode %{
10164     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10165   %}
10166   ins_pipe(pipe_serial);
10167 %}
10168 
10169 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10170   match(Set newval (GetAndAddI mem incr));
10171   ins_cost(INSN_COST * 10);
10172   format %{ "get_and_addI $newval, [$mem], $incr" %}
10173   ins_encode %{
10174     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10175   %}
10176   ins_pipe(pipe_serial);
10177 %}
10178 
10179 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10180   predicate(n->as_LoadStore()->result_not_used());
10181   match(Set dummy (GetAndAddI mem incr));
10182   ins_cost(INSN_COST * 9);
10183   format %{ "get_and_addI [$mem], $incr" %}
10184   ins_encode %{
10185     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10186   %}
10187   ins_pipe(pipe_serial);
10188 %}
10189 
10190 // Manifest a CmpL result in an integer register.
10191 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10192 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10193 %{
10194   match(Set dst (CmpL3 src1 src2));
10195   effect(KILL flags);
10196 
10197   ins_cost(INSN_COST * 6);
10198   format %{
10199       "cmp $src1, $src2"
10200       "csetw $dst, ne"
10201       "cnegw $dst, lt"
10202   %}
10203   // format %{ "CmpL3 $dst, $src1, $src2" %}
10204   ins_encode %{
10205     __ cmp($src1$$Register, $src2$$Register);
10206     __ csetw($dst$$Register, Assembler::NE);
10207     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10208   %}
10209 
10210   ins_pipe(pipe_class_default);
10211 %}
10212 
10213 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10214 %{
10215   match(Set dst (CmpL3 src1 src2));
10216   effect(KILL flags);
10217 
10218   ins_cost(INSN_COST * 6);
10219   format %{
10220       "cmp $src1, $src2"
10221       "csetw $dst, ne"
10222       "cnegw $dst, lt"
10223   %}
10224   ins_encode %{
10225     int32_t con = (int32_t)$src2$$constant;
10226      if (con < 0) {
10227       __ adds(zr, $src1$$Register, -con);
10228     } else {
10229       __ subs(zr, $src1$$Register, con);
10230     }
10231     __ csetw($dst$$Register, Assembler::NE);
10232     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10233   %}
10234 
10235   ins_pipe(pipe_class_default);
10236 %}
10237 
10238 // ============================================================================
10239 // Conditional Move Instructions
10240 
10241 // n.b. we have identical rules for both a signed compare op (cmpOp)
10242 // and an unsigned compare op (cmpOpU). it would be nice if we could
10243 // define an op class which merged both inputs and use it to type the
10244 // argument to a single rule. unfortunatelyt his fails because the
10245 // opclass does not live up to the COND_INTER interface of its
10246 // component operands. When the generic code tries to negate the
10247 // operand it ends up running the generci Machoper::negate method
10248 // which throws a ShouldNotHappen. So, we have to provide two flavours
10249 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10250 
10251 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10252   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10253 
10254   ins_cost(INSN_COST * 2);
10255   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10256 
10257   ins_encode %{
10258     __ cselw(as_Register($dst$$reg),
10259              as_Register($src2$$reg),
10260              as_Register($src1$$reg),
10261              (Assembler::Condition)$cmp$$cmpcode);
10262   %}
10263 
10264   ins_pipe(icond_reg_reg);
10265 %}
10266 
10267 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10268   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10269 
10270   ins_cost(INSN_COST * 2);
10271   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10272 
10273   ins_encode %{
10274     __ cselw(as_Register($dst$$reg),
10275              as_Register($src2$$reg),
10276              as_Register($src1$$reg),
10277              (Assembler::Condition)$cmp$$cmpcode);
10278   %}
10279 
10280   ins_pipe(icond_reg_reg);
10281 %}
10282 
10283 // special cases where one arg is zero
10284 
10285 // n.b. this is selected in preference to the rule above because it
10286 // avoids loading constant 0 into a source register
10287 
10288 // TODO
10289 // we ought only to be able to cull one of these variants as the ideal
10290 // transforms ought always to order the zero consistently (to left/right?)
10291 
10292 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10293   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10294 
10295   ins_cost(INSN_COST * 2);
10296   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10297 
10298   ins_encode %{
10299     __ cselw(as_Register($dst$$reg),
10300              as_Register($src$$reg),
10301              zr,
10302              (Assembler::Condition)$cmp$$cmpcode);
10303   %}
10304 
10305   ins_pipe(icond_reg);
10306 %}
10307 
10308 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10309   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10310 
10311   ins_cost(INSN_COST * 2);
10312   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10313 
10314   ins_encode %{
10315     __ cselw(as_Register($dst$$reg),
10316              as_Register($src$$reg),
10317              zr,
10318              (Assembler::Condition)$cmp$$cmpcode);
10319   %}
10320 
10321   ins_pipe(icond_reg);
10322 %}
10323 
10324 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10325   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10326 
10327   ins_cost(INSN_COST * 2);
10328   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10329 
10330   ins_encode %{
10331     __ cselw(as_Register($dst$$reg),
10332              zr,
10333              as_Register($src$$reg),
10334              (Assembler::Condition)$cmp$$cmpcode);
10335   %}
10336 
10337   ins_pipe(icond_reg);
10338 %}
10339 
10340 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10341   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10342 
10343   ins_cost(INSN_COST * 2);
10344   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10345 
10346   ins_encode %{
10347     __ cselw(as_Register($dst$$reg),
10348              zr,
10349              as_Register($src$$reg),
10350              (Assembler::Condition)$cmp$$cmpcode);
10351   %}
10352 
10353   ins_pipe(icond_reg);
10354 %}
10355 
10356 // special case for creating a boolean 0 or 1
10357 
10358 // n.b. this is selected in preference to the rule above because it
10359 // avoids loading constants 0 and 1 into a source register
10360 
10361 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10362   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10363 
10364   ins_cost(INSN_COST * 2);
10365   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10366 
10367   ins_encode %{
10368     // equivalently
10369     // cset(as_Register($dst$$reg),
10370     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10371     __ csincw(as_Register($dst$$reg),
10372              zr,
10373              zr,
10374              (Assembler::Condition)$cmp$$cmpcode);
10375   %}
10376 
10377   ins_pipe(icond_none);
10378 %}
10379 
10380 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10381   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10382 
10383   ins_cost(INSN_COST * 2);
10384   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10385 
10386   ins_encode %{
10387     // equivalently
10388     // cset(as_Register($dst$$reg),
10389     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10390     __ csincw(as_Register($dst$$reg),
10391              zr,
10392              zr,
10393              (Assembler::Condition)$cmp$$cmpcode);
10394   %}
10395 
10396   ins_pipe(icond_none);
10397 %}
10398 
10399 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10400   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10401 
10402   ins_cost(INSN_COST * 2);
10403   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10404 
10405   ins_encode %{
10406     __ csel(as_Register($dst$$reg),
10407             as_Register($src2$$reg),
10408             as_Register($src1$$reg),
10409             (Assembler::Condition)$cmp$$cmpcode);
10410   %}
10411 
10412   ins_pipe(icond_reg_reg);
10413 %}
10414 
10415 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10416   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10417 
10418   ins_cost(INSN_COST * 2);
10419   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10420 
10421   ins_encode %{
10422     __ csel(as_Register($dst$$reg),
10423             as_Register($src2$$reg),
10424             as_Register($src1$$reg),
10425             (Assembler::Condition)$cmp$$cmpcode);
10426   %}
10427 
10428   ins_pipe(icond_reg_reg);
10429 %}
10430 
10431 // special cases where one arg is zero
10432 
10433 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10434   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10435 
10436   ins_cost(INSN_COST * 2);
10437   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10438 
10439   ins_encode %{
10440     __ csel(as_Register($dst$$reg),
10441             zr,
10442             as_Register($src$$reg),
10443             (Assembler::Condition)$cmp$$cmpcode);
10444   %}
10445 
10446   ins_pipe(icond_reg);
10447 %}
10448 
10449 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10450   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10451 
10452   ins_cost(INSN_COST * 2);
10453   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10454 
10455   ins_encode %{
10456     __ csel(as_Register($dst$$reg),
10457             zr,
10458             as_Register($src$$reg),
10459             (Assembler::Condition)$cmp$$cmpcode);
10460   %}
10461 
10462   ins_pipe(icond_reg);
10463 %}
10464 
10465 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10466   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10467 
10468   ins_cost(INSN_COST * 2);
10469   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10470 
10471   ins_encode %{
10472     __ csel(as_Register($dst$$reg),
10473             as_Register($src$$reg),
10474             zr,
10475             (Assembler::Condition)$cmp$$cmpcode);
10476   %}
10477 
10478   ins_pipe(icond_reg);
10479 %}
10480 
10481 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10482   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10483 
10484   ins_cost(INSN_COST * 2);
10485   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10486 
10487   ins_encode %{
10488     __ csel(as_Register($dst$$reg),
10489             as_Register($src$$reg),
10490             zr,
10491             (Assembler::Condition)$cmp$$cmpcode);
10492   %}
10493 
10494   ins_pipe(icond_reg);
10495 %}
10496 
10497 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10498   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10499 
10500   ins_cost(INSN_COST * 2);
10501   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10502 
10503   ins_encode %{
10504     __ csel(as_Register($dst$$reg),
10505             as_Register($src2$$reg),
10506             as_Register($src1$$reg),
10507             (Assembler::Condition)$cmp$$cmpcode);
10508   %}
10509 
10510   ins_pipe(icond_reg_reg);
10511 %}
10512 
10513 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10514   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10515 
10516   ins_cost(INSN_COST * 2);
10517   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10518 
10519   ins_encode %{
10520     __ csel(as_Register($dst$$reg),
10521             as_Register($src2$$reg),
10522             as_Register($src1$$reg),
10523             (Assembler::Condition)$cmp$$cmpcode);
10524   %}
10525 
10526   ins_pipe(icond_reg_reg);
10527 %}
10528 
10529 // special cases where one arg is zero
10530 
10531 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10532   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10533 
10534   ins_cost(INSN_COST * 2);
10535   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10536 
10537   ins_encode %{
10538     __ csel(as_Register($dst$$reg),
10539             zr,
10540             as_Register($src$$reg),
10541             (Assembler::Condition)$cmp$$cmpcode);
10542   %}
10543 
10544   ins_pipe(icond_reg);
10545 %}
10546 
10547 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10548   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10549 
10550   ins_cost(INSN_COST * 2);
10551   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10552 
10553   ins_encode %{
10554     __ csel(as_Register($dst$$reg),
10555             zr,
10556             as_Register($src$$reg),
10557             (Assembler::Condition)$cmp$$cmpcode);
10558   %}
10559 
10560   ins_pipe(icond_reg);
10561 %}
10562 
10563 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10564   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10565 
10566   ins_cost(INSN_COST * 2);
10567   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10568 
10569   ins_encode %{
10570     __ csel(as_Register($dst$$reg),
10571             as_Register($src$$reg),
10572             zr,
10573             (Assembler::Condition)$cmp$$cmpcode);
10574   %}
10575 
10576   ins_pipe(icond_reg);
10577 %}
10578 
10579 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10580   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10581 
10582   ins_cost(INSN_COST * 2);
10583   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10584 
10585   ins_encode %{
10586     __ csel(as_Register($dst$$reg),
10587             as_Register($src$$reg),
10588             zr,
10589             (Assembler::Condition)$cmp$$cmpcode);
10590   %}
10591 
10592   ins_pipe(icond_reg);
10593 %}
10594 
10595 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10596   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10597 
10598   ins_cost(INSN_COST * 2);
10599   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10600 
10601   ins_encode %{
10602     __ cselw(as_Register($dst$$reg),
10603              as_Register($src2$$reg),
10604              as_Register($src1$$reg),
10605              (Assembler::Condition)$cmp$$cmpcode);
10606   %}
10607 
10608   ins_pipe(icond_reg_reg);
10609 %}
10610 
10611 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10612   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10613 
10614   ins_cost(INSN_COST * 2);
10615   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10616 
10617   ins_encode %{
10618     __ cselw(as_Register($dst$$reg),
10619              as_Register($src2$$reg),
10620              as_Register($src1$$reg),
10621              (Assembler::Condition)$cmp$$cmpcode);
10622   %}
10623 
10624   ins_pipe(icond_reg_reg);
10625 %}
10626 
10627 // special cases where one arg is zero
10628 
10629 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10630   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10631 
10632   ins_cost(INSN_COST * 2);
10633   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10634 
10635   ins_encode %{
10636     __ cselw(as_Register($dst$$reg),
10637              zr,
10638              as_Register($src$$reg),
10639              (Assembler::Condition)$cmp$$cmpcode);
10640   %}
10641 
10642   ins_pipe(icond_reg);
10643 %}
10644 
10645 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10646   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10647 
10648   ins_cost(INSN_COST * 2);
10649   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10650 
10651   ins_encode %{
10652     __ cselw(as_Register($dst$$reg),
10653              zr,
10654              as_Register($src$$reg),
10655              (Assembler::Condition)$cmp$$cmpcode);
10656   %}
10657 
10658   ins_pipe(icond_reg);
10659 %}
10660 
10661 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10662   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10663 
10664   ins_cost(INSN_COST * 2);
10665   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10666 
10667   ins_encode %{
10668     __ cselw(as_Register($dst$$reg),
10669              as_Register($src$$reg),
10670              zr,
10671              (Assembler::Condition)$cmp$$cmpcode);
10672   %}
10673 
10674   ins_pipe(icond_reg);
10675 %}
10676 
10677 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10678   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10679 
10680   ins_cost(INSN_COST * 2);
10681   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10682 
10683   ins_encode %{
10684     __ cselw(as_Register($dst$$reg),
10685              as_Register($src$$reg),
10686              zr,
10687              (Assembler::Condition)$cmp$$cmpcode);
10688   %}
10689 
10690   ins_pipe(icond_reg);
10691 %}
10692 
10693 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10694 %{
10695   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10696 
10697   ins_cost(INSN_COST * 3);
10698 
10699   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10700   ins_encode %{
10701     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10702     __ fcsels(as_FloatRegister($dst$$reg),
10703               as_FloatRegister($src2$$reg),
10704               as_FloatRegister($src1$$reg),
10705               cond);
10706   %}
10707 
10708   ins_pipe(fp_cond_reg_reg_s);
10709 %}
10710 
10711 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10712 %{
10713   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10714 
10715   ins_cost(INSN_COST * 3);
10716 
10717   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10718   ins_encode %{
10719     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10720     __ fcsels(as_FloatRegister($dst$$reg),
10721               as_FloatRegister($src2$$reg),
10722               as_FloatRegister($src1$$reg),
10723               cond);
10724   %}
10725 
10726   ins_pipe(fp_cond_reg_reg_s);
10727 %}
10728 
10729 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10730 %{
10731   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10732 
10733   ins_cost(INSN_COST * 3);
10734 
10735   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10736   ins_encode %{
10737     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10738     __ fcseld(as_FloatRegister($dst$$reg),
10739               as_FloatRegister($src2$$reg),
10740               as_FloatRegister($src1$$reg),
10741               cond);
10742   %}
10743 
10744   ins_pipe(fp_cond_reg_reg_d);
10745 %}
10746 
10747 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10748 %{
10749   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10750 
10751   ins_cost(INSN_COST * 3);
10752 
10753   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10754   ins_encode %{
10755     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10756     __ fcseld(as_FloatRegister($dst$$reg),
10757               as_FloatRegister($src2$$reg),
10758               as_FloatRegister($src1$$reg),
10759               cond);
10760   %}
10761 
10762   ins_pipe(fp_cond_reg_reg_d);
10763 %}
10764 
10765 // ============================================================================
10766 // Arithmetic Instructions
10767 //
10768 
10769 // Integer Addition
10770 
10771 // TODO
10772 // these currently employ operations which do not set CR and hence are
10773 // not flagged as killing CR but we would like to isolate the cases
10774 // where we want to set flags from those where we don't. need to work
10775 // out how to do that.
10776 
10777 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10778   match(Set dst (AddI src1 src2));
10779 
10780   ins_cost(INSN_COST);
10781   format %{ "addw  $dst, $src1, $src2" %}
10782 
10783   ins_encode %{
10784     __ addw(as_Register($dst$$reg),
10785             as_Register($src1$$reg),
10786             as_Register($src2$$reg));
10787   %}
10788 
10789   ins_pipe(ialu_reg_reg);
10790 %}
10791 
10792 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10793   match(Set dst (AddI src1 src2));
10794 
10795   ins_cost(INSN_COST);
10796   format %{ "addw $dst, $src1, $src2" %}
10797 
10798   // use opcode to indicate that this is an add not a sub
10799   opcode(0x0);
10800 
10801   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10802 
10803   ins_pipe(ialu_reg_imm);
10804 %}
10805 
10806 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10807   match(Set dst (AddI (ConvL2I src1) src2));
10808 
10809   ins_cost(INSN_COST);
10810   format %{ "addw $dst, $src1, $src2" %}
10811 
10812   // use opcode to indicate that this is an add not a sub
10813   opcode(0x0);
10814 
10815   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10816 
10817   ins_pipe(ialu_reg_imm);
10818 %}
10819 
10820 // Pointer Addition
10821 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10822   match(Set dst (AddP src1 src2));
10823 
10824   ins_cost(INSN_COST);
10825   format %{ "add $dst, $src1, $src2\t# ptr" %}
10826 
10827   ins_encode %{
10828     __ add(as_Register($dst$$reg),
10829            as_Register($src1$$reg),
10830            as_Register($src2$$reg));
10831   %}
10832 
10833   ins_pipe(ialu_reg_reg);
10834 %}
10835 
10836 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10837   match(Set dst (AddP src1 (ConvI2L src2)));
10838 
10839   ins_cost(1.9 * INSN_COST);
10840   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10841 
10842   ins_encode %{
10843     __ add(as_Register($dst$$reg),
10844            as_Register($src1$$reg),
10845            as_Register($src2$$reg), ext::sxtw);
10846   %}
10847 
10848   ins_pipe(ialu_reg_reg);
10849 %}
10850 
10851 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10852   match(Set dst (AddP src1 (LShiftL src2 scale)));
10853 
10854   ins_cost(1.9 * INSN_COST);
10855   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10856 
10857   ins_encode %{
10858     __ lea(as_Register($dst$$reg),
10859            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10860                    Address::lsl($scale$$constant)));
10861   %}
10862 
10863   ins_pipe(ialu_reg_reg_shift);
10864 %}
10865 
10866 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10867   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10868 
10869   ins_cost(1.9 * INSN_COST);
10870   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10871 
10872   ins_encode %{
10873     __ lea(as_Register($dst$$reg),
10874            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10875                    Address::sxtw($scale$$constant)));
10876   %}
10877 
10878   ins_pipe(ialu_reg_reg_shift);
10879 %}
10880 
10881 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10882   match(Set dst (LShiftL (ConvI2L src) scale));
10883 
10884   ins_cost(INSN_COST);
10885   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10886 
10887   ins_encode %{
10888     __ sbfiz(as_Register($dst$$reg),
10889           as_Register($src$$reg),
10890           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10891   %}
10892 
10893   ins_pipe(ialu_reg_shift);
10894 %}
10895 
10896 // Pointer Immediate Addition
10897 // n.b. this needs to be more expensive than using an indirect memory
10898 // operand
10899 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10900   match(Set dst (AddP src1 src2));
10901 
10902   ins_cost(INSN_COST);
10903   format %{ "add $dst, $src1, $src2\t# ptr" %}
10904 
10905   // use opcode to indicate that this is an add not a sub
10906   opcode(0x0);
10907 
10908   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10909 
10910   ins_pipe(ialu_reg_imm);
10911 %}
10912 
10913 // Long Addition
10914 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10915 
10916   match(Set dst (AddL src1 src2));
10917 
10918   ins_cost(INSN_COST);
10919   format %{ "add  $dst, $src1, $src2" %}
10920 
10921   ins_encode %{
10922     __ add(as_Register($dst$$reg),
10923            as_Register($src1$$reg),
10924            as_Register($src2$$reg));
10925   %}
10926 
10927   ins_pipe(ialu_reg_reg);
10928 %}
10929 
10930 // No constant pool entries requiredLong Immediate Addition.
10931 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10932   match(Set dst (AddL src1 src2));
10933 
10934   ins_cost(INSN_COST);
10935   format %{ "add $dst, $src1, $src2" %}
10936 
10937   // use opcode to indicate that this is an add not a sub
10938   opcode(0x0);
10939 
10940   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10941 
10942   ins_pipe(ialu_reg_imm);
10943 %}
10944 
10945 // Integer Subtraction
10946 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10947   match(Set dst (SubI src1 src2));
10948 
10949   ins_cost(INSN_COST);
10950   format %{ "subw  $dst, $src1, $src2" %}
10951 
10952   ins_encode %{
10953     __ subw(as_Register($dst$$reg),
10954             as_Register($src1$$reg),
10955             as_Register($src2$$reg));
10956   %}
10957 
10958   ins_pipe(ialu_reg_reg);
10959 %}
10960 
10961 // Immediate Subtraction
10962 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10963   match(Set dst (SubI src1 src2));
10964 
10965   ins_cost(INSN_COST);
10966   format %{ "subw $dst, $src1, $src2" %}
10967 
10968   // use opcode to indicate that this is a sub not an add
10969   opcode(0x1);
10970 
10971   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10972 
10973   ins_pipe(ialu_reg_imm);
10974 %}
10975 
10976 // Long Subtraction
10977 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10978 
10979   match(Set dst (SubL src1 src2));
10980 
10981   ins_cost(INSN_COST);
10982   format %{ "sub  $dst, $src1, $src2" %}
10983 
10984   ins_encode %{
10985     __ sub(as_Register($dst$$reg),
10986            as_Register($src1$$reg),
10987            as_Register($src2$$reg));
10988   %}
10989 
10990   ins_pipe(ialu_reg_reg);
10991 %}
10992 
10993 // No constant pool entries requiredLong Immediate Subtraction.
10994 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10995   match(Set dst (SubL src1 src2));
10996 
10997   ins_cost(INSN_COST);
10998   format %{ "sub$dst, $src1, $src2" %}
10999 
11000   // use opcode to indicate that this is a sub not an add
11001   opcode(0x1);
11002 
11003   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11004 
11005   ins_pipe(ialu_reg_imm);
11006 %}
11007 
11008 // Integer Negation (special case for sub)
11009 
11010 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11011   match(Set dst (SubI zero src));
11012 
11013   ins_cost(INSN_COST);
11014   format %{ "negw $dst, $src\t# int" %}
11015 
11016   ins_encode %{
11017     __ negw(as_Register($dst$$reg),
11018             as_Register($src$$reg));
11019   %}
11020 
11021   ins_pipe(ialu_reg);
11022 %}
11023 
11024 // Long Negation
11025 
11026 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
11027   match(Set dst (SubL zero src));
11028 
11029   ins_cost(INSN_COST);
11030   format %{ "neg $dst, $src\t# long" %}
11031 
11032   ins_encode %{
11033     __ neg(as_Register($dst$$reg),
11034            as_Register($src$$reg));
11035   %}
11036 
11037   ins_pipe(ialu_reg);
11038 %}
11039 
11040 // Integer Multiply
11041 
11042 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11043   match(Set dst (MulI src1 src2));
11044 
11045   ins_cost(INSN_COST * 3);
11046   format %{ "mulw  $dst, $src1, $src2" %}
11047 
11048   ins_encode %{
11049     __ mulw(as_Register($dst$$reg),
11050             as_Register($src1$$reg),
11051             as_Register($src2$$reg));
11052   %}
11053 
11054   ins_pipe(imul_reg_reg);
11055 %}
11056 
11057 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11058   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11059 
11060   ins_cost(INSN_COST * 3);
11061   format %{ "smull  $dst, $src1, $src2" %}
11062 
11063   ins_encode %{
11064     __ smull(as_Register($dst$$reg),
11065              as_Register($src1$$reg),
11066              as_Register($src2$$reg));
11067   %}
11068 
11069   ins_pipe(imul_reg_reg);
11070 %}
11071 
11072 // Long Multiply
11073 
11074 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11075   match(Set dst (MulL src1 src2));
11076 
11077   ins_cost(INSN_COST * 5);
11078   format %{ "mul  $dst, $src1, $src2" %}
11079 
11080   ins_encode %{
11081     __ mul(as_Register($dst$$reg),
11082            as_Register($src1$$reg),
11083            as_Register($src2$$reg));
11084   %}
11085 
11086   ins_pipe(lmul_reg_reg);
11087 %}
11088 
11089 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11090 %{
11091   match(Set dst (MulHiL src1 src2));
11092 
11093   ins_cost(INSN_COST * 7);
11094   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11095 
11096   ins_encode %{
11097     __ smulh(as_Register($dst$$reg),
11098              as_Register($src1$$reg),
11099              as_Register($src2$$reg));
11100   %}
11101 
11102   ins_pipe(lmul_reg_reg);
11103 %}
11104 
11105 // Combined Integer Multiply & Add/Sub
11106 
11107 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11108   match(Set dst (AddI src3 (MulI src1 src2)));
11109 
11110   ins_cost(INSN_COST * 3);
11111   format %{ "madd  $dst, $src1, $src2, $src3" %}
11112 
11113   ins_encode %{
11114     __ maddw(as_Register($dst$$reg),
11115              as_Register($src1$$reg),
11116              as_Register($src2$$reg),
11117              as_Register($src3$$reg));
11118   %}
11119 
11120   ins_pipe(imac_reg_reg);
11121 %}
11122 
11123 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11124   match(Set dst (SubI src3 (MulI src1 src2)));
11125 
11126   ins_cost(INSN_COST * 3);
11127   format %{ "msub  $dst, $src1, $src2, $src3" %}
11128 
11129   ins_encode %{
11130     __ msubw(as_Register($dst$$reg),
11131              as_Register($src1$$reg),
11132              as_Register($src2$$reg),
11133              as_Register($src3$$reg));
11134   %}
11135 
11136   ins_pipe(imac_reg_reg);
11137 %}
11138 
11139 // Combined Long Multiply & Add/Sub
11140 
11141 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11142   match(Set dst (AddL src3 (MulL src1 src2)));
11143 
11144   ins_cost(INSN_COST * 5);
11145   format %{ "madd  $dst, $src1, $src2, $src3" %}
11146 
11147   ins_encode %{
11148     __ madd(as_Register($dst$$reg),
11149             as_Register($src1$$reg),
11150             as_Register($src2$$reg),
11151             as_Register($src3$$reg));
11152   %}
11153 
11154   ins_pipe(lmac_reg_reg);
11155 %}
11156 
11157 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11158   match(Set dst (SubL src3 (MulL src1 src2)));
11159 
11160   ins_cost(INSN_COST * 5);
11161   format %{ "msub  $dst, $src1, $src2, $src3" %}
11162 
11163   ins_encode %{
11164     __ msub(as_Register($dst$$reg),
11165             as_Register($src1$$reg),
11166             as_Register($src2$$reg),
11167             as_Register($src3$$reg));
11168   %}
11169 
11170   ins_pipe(lmac_reg_reg);
11171 %}
11172 
11173 // Integer Divide
11174 
11175 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11176   match(Set dst (DivI src1 src2));
11177 
11178   ins_cost(INSN_COST * 19);
11179   format %{ "sdivw  $dst, $src1, $src2" %}
11180 
11181   ins_encode(aarch64_enc_divw(dst, src1, src2));
11182   ins_pipe(idiv_reg_reg);
11183 %}
11184 
11185 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11186   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11187   ins_cost(INSN_COST);
11188   format %{ "lsrw $dst, $src1, $div1" %}
11189   ins_encode %{
11190     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11191   %}
11192   ins_pipe(ialu_reg_shift);
11193 %}
11194 
11195 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11196   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11197   ins_cost(INSN_COST);
11198   format %{ "addw $dst, $src, LSR $div1" %}
11199 
11200   ins_encode %{
11201     __ addw(as_Register($dst$$reg),
11202               as_Register($src$$reg),
11203               as_Register($src$$reg),
11204               Assembler::LSR, 31);
11205   %}
11206   ins_pipe(ialu_reg);
11207 %}
11208 
11209 // Long Divide
11210 
11211 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11212   match(Set dst (DivL src1 src2));
11213 
11214   ins_cost(INSN_COST * 35);
11215   format %{ "sdiv   $dst, $src1, $src2" %}
11216 
11217   ins_encode(aarch64_enc_div(dst, src1, src2));
11218   ins_pipe(ldiv_reg_reg);
11219 %}
11220 
11221 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11222   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11223   ins_cost(INSN_COST);
11224   format %{ "lsr $dst, $src1, $div1" %}
11225   ins_encode %{
11226     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11227   %}
11228   ins_pipe(ialu_reg_shift);
11229 %}
11230 
11231 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11232   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11233   ins_cost(INSN_COST);
11234   format %{ "add $dst, $src, $div1" %}
11235 
11236   ins_encode %{
11237     __ add(as_Register($dst$$reg),
11238               as_Register($src$$reg),
11239               as_Register($src$$reg),
11240               Assembler::LSR, 63);
11241   %}
11242   ins_pipe(ialu_reg);
11243 %}
11244 
11245 // Integer Remainder
11246 
11247 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11248   match(Set dst (ModI src1 src2));
11249 
11250   ins_cost(INSN_COST * 22);
11251   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11252             "msubw($dst, rscratch1, $src2, $src1" %}
11253 
11254   ins_encode(aarch64_enc_modw(dst, src1, src2));
11255   ins_pipe(idiv_reg_reg);
11256 %}
11257 
11258 // Long Remainder
11259 
11260 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11261   match(Set dst (ModL src1 src2));
11262 
11263   ins_cost(INSN_COST * 38);
11264   format %{ "sdiv   rscratch1, $src1, $src2\n"
11265             "msub($dst, rscratch1, $src2, $src1" %}
11266 
11267   ins_encode(aarch64_enc_mod(dst, src1, src2));
11268   ins_pipe(ldiv_reg_reg);
11269 %}
11270 
11271 // Integer Shifts
11272 
11273 // Shift Left Register
11274 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11275   match(Set dst (LShiftI src1 src2));
11276 
11277   ins_cost(INSN_COST * 2);
11278   format %{ "lslvw  $dst, $src1, $src2" %}
11279 
11280   ins_encode %{
11281     __ lslvw(as_Register($dst$$reg),
11282              as_Register($src1$$reg),
11283              as_Register($src2$$reg));
11284   %}
11285 
11286   ins_pipe(ialu_reg_reg_vshift);
11287 %}
11288 
11289 // Shift Left Immediate
11290 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11291   match(Set dst (LShiftI src1 src2));
11292 
11293   ins_cost(INSN_COST);
11294   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11295 
11296   ins_encode %{
11297     __ lslw(as_Register($dst$$reg),
11298             as_Register($src1$$reg),
11299             $src2$$constant & 0x1f);
11300   %}
11301 
11302   ins_pipe(ialu_reg_shift);
11303 %}
11304 
11305 // Shift Right Logical Register
11306 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11307   match(Set dst (URShiftI src1 src2));
11308 
11309   ins_cost(INSN_COST * 2);
11310   format %{ "lsrvw  $dst, $src1, $src2" %}
11311 
11312   ins_encode %{
11313     __ lsrvw(as_Register($dst$$reg),
11314              as_Register($src1$$reg),
11315              as_Register($src2$$reg));
11316   %}
11317 
11318   ins_pipe(ialu_reg_reg_vshift);
11319 %}
11320 
11321 // Shift Right Logical Immediate
11322 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11323   match(Set dst (URShiftI src1 src2));
11324 
11325   ins_cost(INSN_COST);
11326   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11327 
11328   ins_encode %{
11329     __ lsrw(as_Register($dst$$reg),
11330             as_Register($src1$$reg),
11331             $src2$$constant & 0x1f);
11332   %}
11333 
11334   ins_pipe(ialu_reg_shift);
11335 %}
11336 
11337 // Shift Right Arithmetic Register
11338 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11339   match(Set dst (RShiftI src1 src2));
11340 
11341   ins_cost(INSN_COST * 2);
11342   format %{ "asrvw  $dst, $src1, $src2" %}
11343 
11344   ins_encode %{
11345     __ asrvw(as_Register($dst$$reg),
11346              as_Register($src1$$reg),
11347              as_Register($src2$$reg));
11348   %}
11349 
11350   ins_pipe(ialu_reg_reg_vshift);
11351 %}
11352 
11353 // Shift Right Arithmetic Immediate
11354 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11355   match(Set dst (RShiftI src1 src2));
11356 
11357   ins_cost(INSN_COST);
11358   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11359 
11360   ins_encode %{
11361     __ asrw(as_Register($dst$$reg),
11362             as_Register($src1$$reg),
11363             $src2$$constant & 0x1f);
11364   %}
11365 
11366   ins_pipe(ialu_reg_shift);
11367 %}
11368 
11369 // Combined Int Mask and Right Shift (using UBFM)
11370 // TODO
11371 
11372 // Long Shifts
11373 
11374 // Shift Left Register
11375 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11376   match(Set dst (LShiftL src1 src2));
11377 
11378   ins_cost(INSN_COST * 2);
11379   format %{ "lslv  $dst, $src1, $src2" %}
11380 
11381   ins_encode %{
11382     __ lslv(as_Register($dst$$reg),
11383             as_Register($src1$$reg),
11384             as_Register($src2$$reg));
11385   %}
11386 
11387   ins_pipe(ialu_reg_reg_vshift);
11388 %}
11389 
11390 // Shift Left Immediate
11391 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11392   match(Set dst (LShiftL src1 src2));
11393 
11394   ins_cost(INSN_COST);
11395   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11396 
11397   ins_encode %{
11398     __ lsl(as_Register($dst$$reg),
11399             as_Register($src1$$reg),
11400             $src2$$constant & 0x3f);
11401   %}
11402 
11403   ins_pipe(ialu_reg_shift);
11404 %}
11405 
11406 // Shift Right Logical Register
11407 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11408   match(Set dst (URShiftL src1 src2));
11409 
11410   ins_cost(INSN_COST * 2);
11411   format %{ "lsrv  $dst, $src1, $src2" %}
11412 
11413   ins_encode %{
11414     __ lsrv(as_Register($dst$$reg),
11415             as_Register($src1$$reg),
11416             as_Register($src2$$reg));
11417   %}
11418 
11419   ins_pipe(ialu_reg_reg_vshift);
11420 %}
11421 
11422 // Shift Right Logical Immediate
11423 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11424   match(Set dst (URShiftL src1 src2));
11425 
11426   ins_cost(INSN_COST);
11427   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11428 
11429   ins_encode %{
11430     __ lsr(as_Register($dst$$reg),
11431            as_Register($src1$$reg),
11432            $src2$$constant & 0x3f);
11433   %}
11434 
11435   ins_pipe(ialu_reg_shift);
11436 %}
11437 
11438 // A special-case pattern for card table stores.
11439 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11440   match(Set dst (URShiftL (CastP2X src1) src2));
11441 
11442   ins_cost(INSN_COST);
11443   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11444 
11445   ins_encode %{
11446     __ lsr(as_Register($dst$$reg),
11447            as_Register($src1$$reg),
11448            $src2$$constant & 0x3f);
11449   %}
11450 
11451   ins_pipe(ialu_reg_shift);
11452 %}
11453 
11454 // Shift Right Arithmetic Register
11455 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11456   match(Set dst (RShiftL src1 src2));
11457 
11458   ins_cost(INSN_COST * 2);
11459   format %{ "asrv  $dst, $src1, $src2" %}
11460 
11461   ins_encode %{
11462     __ asrv(as_Register($dst$$reg),
11463             as_Register($src1$$reg),
11464             as_Register($src2$$reg));
11465   %}
11466 
11467   ins_pipe(ialu_reg_reg_vshift);
11468 %}
11469 
11470 // Shift Right Arithmetic Immediate
11471 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11472   match(Set dst (RShiftL src1 src2));
11473 
11474   ins_cost(INSN_COST);
11475   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11476 
11477   ins_encode %{
11478     __ asr(as_Register($dst$$reg),
11479            as_Register($src1$$reg),
11480            $src2$$constant & 0x3f);
11481   %}
11482 
11483   ins_pipe(ialu_reg_shift);
11484 %}
11485 
11486 // BEGIN This section of the file is automatically generated. Do not edit --------------
11487 
11488 instruct regL_not_reg(iRegLNoSp dst,
11489                          iRegL src1, immL_M1 m1,
11490                          rFlagsReg cr) %{
11491   match(Set dst (XorL src1 m1));
11492   ins_cost(INSN_COST);
11493   format %{ "eon  $dst, $src1, zr" %}
11494 
11495   ins_encode %{
11496     __ eon(as_Register($dst$$reg),
11497               as_Register($src1$$reg),
11498               zr,
11499               Assembler::LSL, 0);
11500   %}
11501 
11502   ins_pipe(ialu_reg);
11503 %}
11504 instruct regI_not_reg(iRegINoSp dst,
11505                          iRegIorL2I src1, immI_M1 m1,
11506                          rFlagsReg cr) %{
11507   match(Set dst (XorI src1 m1));
11508   ins_cost(INSN_COST);
11509   format %{ "eonw  $dst, $src1, zr" %}
11510 
11511   ins_encode %{
11512     __ eonw(as_Register($dst$$reg),
11513               as_Register($src1$$reg),
11514               zr,
11515               Assembler::LSL, 0);
11516   %}
11517 
11518   ins_pipe(ialu_reg);
11519 %}
11520 
11521 instruct AndI_reg_not_reg(iRegINoSp dst,
11522                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11523                          rFlagsReg cr) %{
11524   match(Set dst (AndI src1 (XorI src2 m1)));
11525   ins_cost(INSN_COST);
11526   format %{ "bicw  $dst, $src1, $src2" %}
11527 
11528   ins_encode %{
11529     __ bicw(as_Register($dst$$reg),
11530               as_Register($src1$$reg),
11531               as_Register($src2$$reg),
11532               Assembler::LSL, 0);
11533   %}
11534 
11535   ins_pipe(ialu_reg_reg);
11536 %}
11537 
11538 instruct AndL_reg_not_reg(iRegLNoSp dst,
11539                          iRegL src1, iRegL src2, immL_M1 m1,
11540                          rFlagsReg cr) %{
11541   match(Set dst (AndL src1 (XorL src2 m1)));
11542   ins_cost(INSN_COST);
11543   format %{ "bic  $dst, $src1, $src2" %}
11544 
11545   ins_encode %{
11546     __ bic(as_Register($dst$$reg),
11547               as_Register($src1$$reg),
11548               as_Register($src2$$reg),
11549               Assembler::LSL, 0);
11550   %}
11551 
11552   ins_pipe(ialu_reg_reg);
11553 %}
11554 
11555 instruct OrI_reg_not_reg(iRegINoSp dst,
11556                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11557                          rFlagsReg cr) %{
11558   match(Set dst (OrI src1 (XorI src2 m1)));
11559   ins_cost(INSN_COST);
11560   format %{ "ornw  $dst, $src1, $src2" %}
11561 
11562   ins_encode %{
11563     __ ornw(as_Register($dst$$reg),
11564               as_Register($src1$$reg),
11565               as_Register($src2$$reg),
11566               Assembler::LSL, 0);
11567   %}
11568 
11569   ins_pipe(ialu_reg_reg);
11570 %}
11571 
11572 instruct OrL_reg_not_reg(iRegLNoSp dst,
11573                          iRegL src1, iRegL src2, immL_M1 m1,
11574                          rFlagsReg cr) %{
11575   match(Set dst (OrL src1 (XorL src2 m1)));
11576   ins_cost(INSN_COST);
11577   format %{ "orn  $dst, $src1, $src2" %}
11578 
11579   ins_encode %{
11580     __ orn(as_Register($dst$$reg),
11581               as_Register($src1$$reg),
11582               as_Register($src2$$reg),
11583               Assembler::LSL, 0);
11584   %}
11585 
11586   ins_pipe(ialu_reg_reg);
11587 %}
11588 
11589 instruct XorI_reg_not_reg(iRegINoSp dst,
11590                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11591                          rFlagsReg cr) %{
11592   match(Set dst (XorI m1 (XorI src2 src1)));
11593   ins_cost(INSN_COST);
11594   format %{ "eonw  $dst, $src1, $src2" %}
11595 
11596   ins_encode %{
11597     __ eonw(as_Register($dst$$reg),
11598               as_Register($src1$$reg),
11599               as_Register($src2$$reg),
11600               Assembler::LSL, 0);
11601   %}
11602 
11603   ins_pipe(ialu_reg_reg);
11604 %}
11605 
11606 instruct XorL_reg_not_reg(iRegLNoSp dst,
11607                          iRegL src1, iRegL src2, immL_M1 m1,
11608                          rFlagsReg cr) %{
11609   match(Set dst (XorL m1 (XorL src2 src1)));
11610   ins_cost(INSN_COST);
11611   format %{ "eon  $dst, $src1, $src2" %}
11612 
11613   ins_encode %{
11614     __ eon(as_Register($dst$$reg),
11615               as_Register($src1$$reg),
11616               as_Register($src2$$reg),
11617               Assembler::LSL, 0);
11618   %}
11619 
11620   ins_pipe(ialu_reg_reg);
11621 %}
11622 
11623 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11624                          iRegIorL2I src1, iRegIorL2I src2,
11625                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11626   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11627   ins_cost(1.9 * INSN_COST);
11628   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11629 
11630   ins_encode %{
11631     __ bicw(as_Register($dst$$reg),
11632               as_Register($src1$$reg),
11633               as_Register($src2$$reg),
11634               Assembler::LSR,
11635               $src3$$constant & 0x1f);
11636   %}
11637 
11638   ins_pipe(ialu_reg_reg_shift);
11639 %}
11640 
11641 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11642                          iRegL src1, iRegL src2,
11643                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11644   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11645   ins_cost(1.9 * INSN_COST);
11646   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11647 
11648   ins_encode %{
11649     __ bic(as_Register($dst$$reg),
11650               as_Register($src1$$reg),
11651               as_Register($src2$$reg),
11652               Assembler::LSR,
11653               $src3$$constant & 0x3f);
11654   %}
11655 
11656   ins_pipe(ialu_reg_reg_shift);
11657 %}
11658 
11659 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11660                          iRegIorL2I src1, iRegIorL2I src2,
11661                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11662   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11663   ins_cost(1.9 * INSN_COST);
11664   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11665 
11666   ins_encode %{
11667     __ bicw(as_Register($dst$$reg),
11668               as_Register($src1$$reg),
11669               as_Register($src2$$reg),
11670               Assembler::ASR,
11671               $src3$$constant & 0x1f);
11672   %}
11673 
11674   ins_pipe(ialu_reg_reg_shift);
11675 %}
11676 
11677 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11678                          iRegL src1, iRegL src2,
11679                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11680   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11681   ins_cost(1.9 * INSN_COST);
11682   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11683 
11684   ins_encode %{
11685     __ bic(as_Register($dst$$reg),
11686               as_Register($src1$$reg),
11687               as_Register($src2$$reg),
11688               Assembler::ASR,
11689               $src3$$constant & 0x3f);
11690   %}
11691 
11692   ins_pipe(ialu_reg_reg_shift);
11693 %}
11694 
11695 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11696                          iRegIorL2I src1, iRegIorL2I src2,
11697                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11698   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11699   ins_cost(1.9 * INSN_COST);
11700   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11701 
11702   ins_encode %{
11703     __ bicw(as_Register($dst$$reg),
11704               as_Register($src1$$reg),
11705               as_Register($src2$$reg),
11706               Assembler::LSL,
11707               $src3$$constant & 0x1f);
11708   %}
11709 
11710   ins_pipe(ialu_reg_reg_shift);
11711 %}
11712 
11713 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11714                          iRegL src1, iRegL src2,
11715                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11716   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11717   ins_cost(1.9 * INSN_COST);
11718   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11719 
11720   ins_encode %{
11721     __ bic(as_Register($dst$$reg),
11722               as_Register($src1$$reg),
11723               as_Register($src2$$reg),
11724               Assembler::LSL,
11725               $src3$$constant & 0x3f);
11726   %}
11727 
11728   ins_pipe(ialu_reg_reg_shift);
11729 %}
11730 
11731 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11732                          iRegIorL2I src1, iRegIorL2I src2,
11733                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11734   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11735   ins_cost(1.9 * INSN_COST);
11736   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11737 
11738   ins_encode %{
11739     __ eonw(as_Register($dst$$reg),
11740               as_Register($src1$$reg),
11741               as_Register($src2$$reg),
11742               Assembler::LSR,
11743               $src3$$constant & 0x1f);
11744   %}
11745 
11746   ins_pipe(ialu_reg_reg_shift);
11747 %}
11748 
11749 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11750                          iRegL src1, iRegL src2,
11751                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11752   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11753   ins_cost(1.9 * INSN_COST);
11754   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11755 
11756   ins_encode %{
11757     __ eon(as_Register($dst$$reg),
11758               as_Register($src1$$reg),
11759               as_Register($src2$$reg),
11760               Assembler::LSR,
11761               $src3$$constant & 0x3f);
11762   %}
11763 
11764   ins_pipe(ialu_reg_reg_shift);
11765 %}
11766 
11767 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11768                          iRegIorL2I src1, iRegIorL2I src2,
11769                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11770   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11771   ins_cost(1.9 * INSN_COST);
11772   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11773 
11774   ins_encode %{
11775     __ eonw(as_Register($dst$$reg),
11776               as_Register($src1$$reg),
11777               as_Register($src2$$reg),
11778               Assembler::ASR,
11779               $src3$$constant & 0x1f);
11780   %}
11781 
11782   ins_pipe(ialu_reg_reg_shift);
11783 %}
11784 
11785 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11786                          iRegL src1, iRegL src2,
11787                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11788   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11789   ins_cost(1.9 * INSN_COST);
11790   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11791 
11792   ins_encode %{
11793     __ eon(as_Register($dst$$reg),
11794               as_Register($src1$$reg),
11795               as_Register($src2$$reg),
11796               Assembler::ASR,
11797               $src3$$constant & 0x3f);
11798   %}
11799 
11800   ins_pipe(ialu_reg_reg_shift);
11801 %}
11802 
11803 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11804                          iRegIorL2I src1, iRegIorL2I src2,
11805                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11806   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11807   ins_cost(1.9 * INSN_COST);
11808   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11809 
11810   ins_encode %{
11811     __ eonw(as_Register($dst$$reg),
11812               as_Register($src1$$reg),
11813               as_Register($src2$$reg),
11814               Assembler::LSL,
11815               $src3$$constant & 0x1f);
11816   %}
11817 
11818   ins_pipe(ialu_reg_reg_shift);
11819 %}
11820 
11821 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11822                          iRegL src1, iRegL src2,
11823                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11824   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11825   ins_cost(1.9 * INSN_COST);
11826   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11827 
11828   ins_encode %{
11829     __ eon(as_Register($dst$$reg),
11830               as_Register($src1$$reg),
11831               as_Register($src2$$reg),
11832               Assembler::LSL,
11833               $src3$$constant & 0x3f);
11834   %}
11835 
11836   ins_pipe(ialu_reg_reg_shift);
11837 %}
11838 
11839 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11840                          iRegIorL2I src1, iRegIorL2I src2,
11841                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11842   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11843   ins_cost(1.9 * INSN_COST);
11844   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11845 
11846   ins_encode %{
11847     __ ornw(as_Register($dst$$reg),
11848               as_Register($src1$$reg),
11849               as_Register($src2$$reg),
11850               Assembler::LSR,
11851               $src3$$constant & 0x1f);
11852   %}
11853 
11854   ins_pipe(ialu_reg_reg_shift);
11855 %}
11856 
11857 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11858                          iRegL src1, iRegL src2,
11859                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11860   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11861   ins_cost(1.9 * INSN_COST);
11862   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11863 
11864   ins_encode %{
11865     __ orn(as_Register($dst$$reg),
11866               as_Register($src1$$reg),
11867               as_Register($src2$$reg),
11868               Assembler::LSR,
11869               $src3$$constant & 0x3f);
11870   %}
11871 
11872   ins_pipe(ialu_reg_reg_shift);
11873 %}
11874 
11875 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11876                          iRegIorL2I src1, iRegIorL2I src2,
11877                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11878   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11879   ins_cost(1.9 * INSN_COST);
11880   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11881 
11882   ins_encode %{
11883     __ ornw(as_Register($dst$$reg),
11884               as_Register($src1$$reg),
11885               as_Register($src2$$reg),
11886               Assembler::ASR,
11887               $src3$$constant & 0x1f);
11888   %}
11889 
11890   ins_pipe(ialu_reg_reg_shift);
11891 %}
11892 
11893 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11894                          iRegL src1, iRegL src2,
11895                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11896   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11897   ins_cost(1.9 * INSN_COST);
11898   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11899 
11900   ins_encode %{
11901     __ orn(as_Register($dst$$reg),
11902               as_Register($src1$$reg),
11903               as_Register($src2$$reg),
11904               Assembler::ASR,
11905               $src3$$constant & 0x3f);
11906   %}
11907 
11908   ins_pipe(ialu_reg_reg_shift);
11909 %}
11910 
11911 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11912                          iRegIorL2I src1, iRegIorL2I src2,
11913                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11914   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11915   ins_cost(1.9 * INSN_COST);
11916   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11917 
11918   ins_encode %{
11919     __ ornw(as_Register($dst$$reg),
11920               as_Register($src1$$reg),
11921               as_Register($src2$$reg),
11922               Assembler::LSL,
11923               $src3$$constant & 0x1f);
11924   %}
11925 
11926   ins_pipe(ialu_reg_reg_shift);
11927 %}
11928 
11929 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11930                          iRegL src1, iRegL src2,
11931                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11932   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11933   ins_cost(1.9 * INSN_COST);
11934   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11935 
11936   ins_encode %{
11937     __ orn(as_Register($dst$$reg),
11938               as_Register($src1$$reg),
11939               as_Register($src2$$reg),
11940               Assembler::LSL,
11941               $src3$$constant & 0x3f);
11942   %}
11943 
11944   ins_pipe(ialu_reg_reg_shift);
11945 %}
11946 
11947 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11948                          iRegIorL2I src1, iRegIorL2I src2,
11949                          immI src3, rFlagsReg cr) %{
11950   match(Set dst (AndI src1 (URShiftI src2 src3)));
11951 
11952   ins_cost(1.9 * INSN_COST);
11953   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11954 
11955   ins_encode %{
11956     __ andw(as_Register($dst$$reg),
11957               as_Register($src1$$reg),
11958               as_Register($src2$$reg),
11959               Assembler::LSR,
11960               $src3$$constant & 0x1f);
11961   %}
11962 
11963   ins_pipe(ialu_reg_reg_shift);
11964 %}
11965 
11966 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11967                          iRegL src1, iRegL src2,
11968                          immI src3, rFlagsReg cr) %{
11969   match(Set dst (AndL src1 (URShiftL src2 src3)));
11970 
11971   ins_cost(1.9 * INSN_COST);
11972   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11973 
11974   ins_encode %{
11975     __ andr(as_Register($dst$$reg),
11976               as_Register($src1$$reg),
11977               as_Register($src2$$reg),
11978               Assembler::LSR,
11979               $src3$$constant & 0x3f);
11980   %}
11981 
11982   ins_pipe(ialu_reg_reg_shift);
11983 %}
11984 
11985 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11986                          iRegIorL2I src1, iRegIorL2I src2,
11987                          immI src3, rFlagsReg cr) %{
11988   match(Set dst (AndI src1 (RShiftI src2 src3)));
11989 
11990   ins_cost(1.9 * INSN_COST);
11991   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11992 
11993   ins_encode %{
11994     __ andw(as_Register($dst$$reg),
11995               as_Register($src1$$reg),
11996               as_Register($src2$$reg),
11997               Assembler::ASR,
11998               $src3$$constant & 0x1f);
11999   %}
12000 
12001   ins_pipe(ialu_reg_reg_shift);
12002 %}
12003 
12004 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12005                          iRegL src1, iRegL src2,
12006                          immI src3, rFlagsReg cr) %{
12007   match(Set dst (AndL src1 (RShiftL src2 src3)));
12008 
12009   ins_cost(1.9 * INSN_COST);
12010   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12011 
12012   ins_encode %{
12013     __ andr(as_Register($dst$$reg),
12014               as_Register($src1$$reg),
12015               as_Register($src2$$reg),
12016               Assembler::ASR,
12017               $src3$$constant & 0x3f);
12018   %}
12019 
12020   ins_pipe(ialu_reg_reg_shift);
12021 %}
12022 
12023 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12024                          iRegIorL2I src1, iRegIorL2I src2,
12025                          immI src3, rFlagsReg cr) %{
12026   match(Set dst (AndI src1 (LShiftI src2 src3)));
12027 
12028   ins_cost(1.9 * INSN_COST);
12029   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12030 
12031   ins_encode %{
12032     __ andw(as_Register($dst$$reg),
12033               as_Register($src1$$reg),
12034               as_Register($src2$$reg),
12035               Assembler::LSL,
12036               $src3$$constant & 0x1f);
12037   %}
12038 
12039   ins_pipe(ialu_reg_reg_shift);
12040 %}
12041 
12042 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12043                          iRegL src1, iRegL src2,
12044                          immI src3, rFlagsReg cr) %{
12045   match(Set dst (AndL src1 (LShiftL src2 src3)));
12046 
12047   ins_cost(1.9 * INSN_COST);
12048   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12049 
12050   ins_encode %{
12051     __ andr(as_Register($dst$$reg),
12052               as_Register($src1$$reg),
12053               as_Register($src2$$reg),
12054               Assembler::LSL,
12055               $src3$$constant & 0x3f);
12056   %}
12057 
12058   ins_pipe(ialu_reg_reg_shift);
12059 %}
12060 
12061 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12062                          iRegIorL2I src1, iRegIorL2I src2,
12063                          immI src3, rFlagsReg cr) %{
12064   match(Set dst (XorI src1 (URShiftI src2 src3)));
12065 
12066   ins_cost(1.9 * INSN_COST);
12067   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12068 
12069   ins_encode %{
12070     __ eorw(as_Register($dst$$reg),
12071               as_Register($src1$$reg),
12072               as_Register($src2$$reg),
12073               Assembler::LSR,
12074               $src3$$constant & 0x1f);
12075   %}
12076 
12077   ins_pipe(ialu_reg_reg_shift);
12078 %}
12079 
12080 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12081                          iRegL src1, iRegL src2,
12082                          immI src3, rFlagsReg cr) %{
12083   match(Set dst (XorL src1 (URShiftL src2 src3)));
12084 
12085   ins_cost(1.9 * INSN_COST);
12086   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12087 
12088   ins_encode %{
12089     __ eor(as_Register($dst$$reg),
12090               as_Register($src1$$reg),
12091               as_Register($src2$$reg),
12092               Assembler::LSR,
12093               $src3$$constant & 0x3f);
12094   %}
12095 
12096   ins_pipe(ialu_reg_reg_shift);
12097 %}
12098 
12099 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12100                          iRegIorL2I src1, iRegIorL2I src2,
12101                          immI src3, rFlagsReg cr) %{
12102   match(Set dst (XorI src1 (RShiftI src2 src3)));
12103 
12104   ins_cost(1.9 * INSN_COST);
12105   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12106 
12107   ins_encode %{
12108     __ eorw(as_Register($dst$$reg),
12109               as_Register($src1$$reg),
12110               as_Register($src2$$reg),
12111               Assembler::ASR,
12112               $src3$$constant & 0x1f);
12113   %}
12114 
12115   ins_pipe(ialu_reg_reg_shift);
12116 %}
12117 
12118 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12119                          iRegL src1, iRegL src2,
12120                          immI src3, rFlagsReg cr) %{
12121   match(Set dst (XorL src1 (RShiftL src2 src3)));
12122 
12123   ins_cost(1.9 * INSN_COST);
12124   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12125 
12126   ins_encode %{
12127     __ eor(as_Register($dst$$reg),
12128               as_Register($src1$$reg),
12129               as_Register($src2$$reg),
12130               Assembler::ASR,
12131               $src3$$constant & 0x3f);
12132   %}
12133 
12134   ins_pipe(ialu_reg_reg_shift);
12135 %}
12136 
12137 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12138                          iRegIorL2I src1, iRegIorL2I src2,
12139                          immI src3, rFlagsReg cr) %{
12140   match(Set dst (XorI src1 (LShiftI src2 src3)));
12141 
12142   ins_cost(1.9 * INSN_COST);
12143   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12144 
12145   ins_encode %{
12146     __ eorw(as_Register($dst$$reg),
12147               as_Register($src1$$reg),
12148               as_Register($src2$$reg),
12149               Assembler::LSL,
12150               $src3$$constant & 0x1f);
12151   %}
12152 
12153   ins_pipe(ialu_reg_reg_shift);
12154 %}
12155 
12156 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12157                          iRegL src1, iRegL src2,
12158                          immI src3, rFlagsReg cr) %{
12159   match(Set dst (XorL src1 (LShiftL src2 src3)));
12160 
12161   ins_cost(1.9 * INSN_COST);
12162   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12163 
12164   ins_encode %{
12165     __ eor(as_Register($dst$$reg),
12166               as_Register($src1$$reg),
12167               as_Register($src2$$reg),
12168               Assembler::LSL,
12169               $src3$$constant & 0x3f);
12170   %}
12171 
12172   ins_pipe(ialu_reg_reg_shift);
12173 %}
12174 
12175 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12176                          iRegIorL2I src1, iRegIorL2I src2,
12177                          immI src3, rFlagsReg cr) %{
12178   match(Set dst (OrI src1 (URShiftI src2 src3)));
12179 
12180   ins_cost(1.9 * INSN_COST);
12181   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12182 
12183   ins_encode %{
12184     __ orrw(as_Register($dst$$reg),
12185               as_Register($src1$$reg),
12186               as_Register($src2$$reg),
12187               Assembler::LSR,
12188               $src3$$constant & 0x1f);
12189   %}
12190 
12191   ins_pipe(ialu_reg_reg_shift);
12192 %}
12193 
12194 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12195                          iRegL src1, iRegL src2,
12196                          immI src3, rFlagsReg cr) %{
12197   match(Set dst (OrL src1 (URShiftL src2 src3)));
12198 
12199   ins_cost(1.9 * INSN_COST);
12200   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12201 
12202   ins_encode %{
12203     __ orr(as_Register($dst$$reg),
12204               as_Register($src1$$reg),
12205               as_Register($src2$$reg),
12206               Assembler::LSR,
12207               $src3$$constant & 0x3f);
12208   %}
12209 
12210   ins_pipe(ialu_reg_reg_shift);
12211 %}
12212 
12213 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12214                          iRegIorL2I src1, iRegIorL2I src2,
12215                          immI src3, rFlagsReg cr) %{
12216   match(Set dst (OrI src1 (RShiftI src2 src3)));
12217 
12218   ins_cost(1.9 * INSN_COST);
12219   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12220 
12221   ins_encode %{
12222     __ orrw(as_Register($dst$$reg),
12223               as_Register($src1$$reg),
12224               as_Register($src2$$reg),
12225               Assembler::ASR,
12226               $src3$$constant & 0x1f);
12227   %}
12228 
12229   ins_pipe(ialu_reg_reg_shift);
12230 %}
12231 
12232 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12233                          iRegL src1, iRegL src2,
12234                          immI src3, rFlagsReg cr) %{
12235   match(Set dst (OrL src1 (RShiftL src2 src3)));
12236 
12237   ins_cost(1.9 * INSN_COST);
12238   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12239 
12240   ins_encode %{
12241     __ orr(as_Register($dst$$reg),
12242               as_Register($src1$$reg),
12243               as_Register($src2$$reg),
12244               Assembler::ASR,
12245               $src3$$constant & 0x3f);
12246   %}
12247 
12248   ins_pipe(ialu_reg_reg_shift);
12249 %}
12250 
12251 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12252                          iRegIorL2I src1, iRegIorL2I src2,
12253                          immI src3, rFlagsReg cr) %{
12254   match(Set dst (OrI src1 (LShiftI src2 src3)));
12255 
12256   ins_cost(1.9 * INSN_COST);
12257   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12258 
12259   ins_encode %{
12260     __ orrw(as_Register($dst$$reg),
12261               as_Register($src1$$reg),
12262               as_Register($src2$$reg),
12263               Assembler::LSL,
12264               $src3$$constant & 0x1f);
12265   %}
12266 
12267   ins_pipe(ialu_reg_reg_shift);
12268 %}
12269 
12270 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12271                          iRegL src1, iRegL src2,
12272                          immI src3, rFlagsReg cr) %{
12273   match(Set dst (OrL src1 (LShiftL src2 src3)));
12274 
12275   ins_cost(1.9 * INSN_COST);
12276   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12277 
12278   ins_encode %{
12279     __ orr(as_Register($dst$$reg),
12280               as_Register($src1$$reg),
12281               as_Register($src2$$reg),
12282               Assembler::LSL,
12283               $src3$$constant & 0x3f);
12284   %}
12285 
12286   ins_pipe(ialu_reg_reg_shift);
12287 %}
12288 
12289 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12290                          iRegIorL2I src1, iRegIorL2I src2,
12291                          immI src3, rFlagsReg cr) %{
12292   match(Set dst (AddI src1 (URShiftI src2 src3)));
12293 
12294   ins_cost(1.9 * INSN_COST);
12295   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12296 
12297   ins_encode %{
12298     __ addw(as_Register($dst$$reg),
12299               as_Register($src1$$reg),
12300               as_Register($src2$$reg),
12301               Assembler::LSR,
12302               $src3$$constant & 0x1f);
12303   %}
12304 
12305   ins_pipe(ialu_reg_reg_shift);
12306 %}
12307 
12308 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12309                          iRegL src1, iRegL src2,
12310                          immI src3, rFlagsReg cr) %{
12311   match(Set dst (AddL src1 (URShiftL src2 src3)));
12312 
12313   ins_cost(1.9 * INSN_COST);
12314   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12315 
12316   ins_encode %{
12317     __ add(as_Register($dst$$reg),
12318               as_Register($src1$$reg),
12319               as_Register($src2$$reg),
12320               Assembler::LSR,
12321               $src3$$constant & 0x3f);
12322   %}
12323 
12324   ins_pipe(ialu_reg_reg_shift);
12325 %}
12326 
12327 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12328                          iRegIorL2I src1, iRegIorL2I src2,
12329                          immI src3, rFlagsReg cr) %{
12330   match(Set dst (AddI src1 (RShiftI src2 src3)));
12331 
12332   ins_cost(1.9 * INSN_COST);
12333   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12334 
12335   ins_encode %{
12336     __ addw(as_Register($dst$$reg),
12337               as_Register($src1$$reg),
12338               as_Register($src2$$reg),
12339               Assembler::ASR,
12340               $src3$$constant & 0x1f);
12341   %}
12342 
12343   ins_pipe(ialu_reg_reg_shift);
12344 %}
12345 
12346 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12347                          iRegL src1, iRegL src2,
12348                          immI src3, rFlagsReg cr) %{
12349   match(Set dst (AddL src1 (RShiftL src2 src3)));
12350 
12351   ins_cost(1.9 * INSN_COST);
12352   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12353 
12354   ins_encode %{
12355     __ add(as_Register($dst$$reg),
12356               as_Register($src1$$reg),
12357               as_Register($src2$$reg),
12358               Assembler::ASR,
12359               $src3$$constant & 0x3f);
12360   %}
12361 
12362   ins_pipe(ialu_reg_reg_shift);
12363 %}
12364 
12365 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12366                          iRegIorL2I src1, iRegIorL2I src2,
12367                          immI src3, rFlagsReg cr) %{
12368   match(Set dst (AddI src1 (LShiftI src2 src3)));
12369 
12370   ins_cost(1.9 * INSN_COST);
12371   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12372 
12373   ins_encode %{
12374     __ addw(as_Register($dst$$reg),
12375               as_Register($src1$$reg),
12376               as_Register($src2$$reg),
12377               Assembler::LSL,
12378               $src3$$constant & 0x1f);
12379   %}
12380 
12381   ins_pipe(ialu_reg_reg_shift);
12382 %}
12383 
12384 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12385                          iRegL src1, iRegL src2,
12386                          immI src3, rFlagsReg cr) %{
12387   match(Set dst (AddL src1 (LShiftL src2 src3)));
12388 
12389   ins_cost(1.9 * INSN_COST);
12390   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12391 
12392   ins_encode %{
12393     __ add(as_Register($dst$$reg),
12394               as_Register($src1$$reg),
12395               as_Register($src2$$reg),
12396               Assembler::LSL,
12397               $src3$$constant & 0x3f);
12398   %}
12399 
12400   ins_pipe(ialu_reg_reg_shift);
12401 %}
12402 
12403 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12404                          iRegIorL2I src1, iRegIorL2I src2,
12405                          immI src3, rFlagsReg cr) %{
12406   match(Set dst (SubI src1 (URShiftI src2 src3)));
12407 
12408   ins_cost(1.9 * INSN_COST);
12409   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12410 
12411   ins_encode %{
12412     __ subw(as_Register($dst$$reg),
12413               as_Register($src1$$reg),
12414               as_Register($src2$$reg),
12415               Assembler::LSR,
12416               $src3$$constant & 0x1f);
12417   %}
12418 
12419   ins_pipe(ialu_reg_reg_shift);
12420 %}
12421 
12422 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12423                          iRegL src1, iRegL src2,
12424                          immI src3, rFlagsReg cr) %{
12425   match(Set dst (SubL src1 (URShiftL src2 src3)));
12426 
12427   ins_cost(1.9 * INSN_COST);
12428   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12429 
12430   ins_encode %{
12431     __ sub(as_Register($dst$$reg),
12432               as_Register($src1$$reg),
12433               as_Register($src2$$reg),
12434               Assembler::LSR,
12435               $src3$$constant & 0x3f);
12436   %}
12437 
12438   ins_pipe(ialu_reg_reg_shift);
12439 %}
12440 
12441 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12442                          iRegIorL2I src1, iRegIorL2I src2,
12443                          immI src3, rFlagsReg cr) %{
12444   match(Set dst (SubI src1 (RShiftI src2 src3)));
12445 
12446   ins_cost(1.9 * INSN_COST);
12447   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12448 
12449   ins_encode %{
12450     __ subw(as_Register($dst$$reg),
12451               as_Register($src1$$reg),
12452               as_Register($src2$$reg),
12453               Assembler::ASR,
12454               $src3$$constant & 0x1f);
12455   %}
12456 
12457   ins_pipe(ialu_reg_reg_shift);
12458 %}
12459 
12460 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12461                          iRegL src1, iRegL src2,
12462                          immI src3, rFlagsReg cr) %{
12463   match(Set dst (SubL src1 (RShiftL src2 src3)));
12464 
12465   ins_cost(1.9 * INSN_COST);
12466   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12467 
12468   ins_encode %{
12469     __ sub(as_Register($dst$$reg),
12470               as_Register($src1$$reg),
12471               as_Register($src2$$reg),
12472               Assembler::ASR,
12473               $src3$$constant & 0x3f);
12474   %}
12475 
12476   ins_pipe(ialu_reg_reg_shift);
12477 %}
12478 
12479 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12480                          iRegIorL2I src1, iRegIorL2I src2,
12481                          immI src3, rFlagsReg cr) %{
12482   match(Set dst (SubI src1 (LShiftI src2 src3)));
12483 
12484   ins_cost(1.9 * INSN_COST);
12485   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12486 
12487   ins_encode %{
12488     __ subw(as_Register($dst$$reg),
12489               as_Register($src1$$reg),
12490               as_Register($src2$$reg),
12491               Assembler::LSL,
12492               $src3$$constant & 0x1f);
12493   %}
12494 
12495   ins_pipe(ialu_reg_reg_shift);
12496 %}
12497 
12498 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12499                          iRegL src1, iRegL src2,
12500                          immI src3, rFlagsReg cr) %{
12501   match(Set dst (SubL src1 (LShiftL src2 src3)));
12502 
12503   ins_cost(1.9 * INSN_COST);
12504   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12505 
12506   ins_encode %{
12507     __ sub(as_Register($dst$$reg),
12508               as_Register($src1$$reg),
12509               as_Register($src2$$reg),
12510               Assembler::LSL,
12511               $src3$$constant & 0x3f);
12512   %}
12513 
12514   ins_pipe(ialu_reg_reg_shift);
12515 %}
12516 
12517 
12518 
12519 // Shift Left followed by Shift Right.
12520 // This idiom is used by the compiler for the i2b bytecode etc.
12521 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12522 %{
12523   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12524   // Make sure we are not going to exceed what sbfm can do.
12525   predicate((unsigned int)n->in(2)->get_int() <= 63
12526             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12527 
12528   ins_cost(INSN_COST * 2);
12529   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12530   ins_encode %{
12531     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12532     int s = 63 - lshift;
12533     int r = (rshift - lshift) & 63;
12534     __ sbfm(as_Register($dst$$reg),
12535             as_Register($src$$reg),
12536             r, s);
12537   %}
12538 
12539   ins_pipe(ialu_reg_shift);
12540 %}
12541 
12542 // Shift Left followed by Shift Right.
12543 // This idiom is used by the compiler for the i2b bytecode etc.
12544 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12545 %{
12546   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12547   // Make sure we are not going to exceed what sbfmw can do.
12548   predicate((unsigned int)n->in(2)->get_int() <= 31
12549             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12550 
12551   ins_cost(INSN_COST * 2);
12552   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12553   ins_encode %{
12554     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12555     int s = 31 - lshift;
12556     int r = (rshift - lshift) & 31;
12557     __ sbfmw(as_Register($dst$$reg),
12558             as_Register($src$$reg),
12559             r, s);
12560   %}
12561 
12562   ins_pipe(ialu_reg_shift);
12563 %}
12564 
12565 // Shift Left followed by Shift Right.
12566 // This idiom is used by the compiler for the i2b bytecode etc.
12567 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12568 %{
12569   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12570   // Make sure we are not going to exceed what ubfm can do.
12571   predicate((unsigned int)n->in(2)->get_int() <= 63
12572             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12573 
12574   ins_cost(INSN_COST * 2);
12575   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12576   ins_encode %{
12577     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12578     int s = 63 - lshift;
12579     int r = (rshift - lshift) & 63;
12580     __ ubfm(as_Register($dst$$reg),
12581             as_Register($src$$reg),
12582             r, s);
12583   %}
12584 
12585   ins_pipe(ialu_reg_shift);
12586 %}
12587 
12588 // Shift Left followed by Shift Right.
12589 // This idiom is used by the compiler for the i2b bytecode etc.
12590 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12591 %{
12592   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12593   // Make sure we are not going to exceed what ubfmw can do.
12594   predicate((unsigned int)n->in(2)->get_int() <= 31
12595             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12596 
12597   ins_cost(INSN_COST * 2);
12598   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12599   ins_encode %{
12600     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12601     int s = 31 - lshift;
12602     int r = (rshift - lshift) & 31;
12603     __ ubfmw(as_Register($dst$$reg),
12604             as_Register($src$$reg),
12605             r, s);
12606   %}
12607 
12608   ins_pipe(ialu_reg_shift);
12609 %}
12610 // Bitfield extract with shift & mask
12611 
12612 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12613 %{
12614   match(Set dst (AndI (URShiftI src rshift) mask));
12615 
12616   ins_cost(INSN_COST);
12617   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12618   ins_encode %{
12619     int rshift = $rshift$$constant;
12620     long mask = $mask$$constant;
12621     int width = exact_log2(mask+1);
12622     __ ubfxw(as_Register($dst$$reg),
12623             as_Register($src$$reg), rshift, width);
12624   %}
12625   ins_pipe(ialu_reg_shift);
12626 %}
12627 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12628 %{
12629   match(Set dst (AndL (URShiftL src rshift) mask));
12630 
12631   ins_cost(INSN_COST);
12632   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12633   ins_encode %{
12634     int rshift = $rshift$$constant;
12635     long mask = $mask$$constant;
12636     int width = exact_log2(mask+1);
12637     __ ubfx(as_Register($dst$$reg),
12638             as_Register($src$$reg), rshift, width);
12639   %}
12640   ins_pipe(ialu_reg_shift);
12641 %}
12642 
12643 // We can use ubfx when extending an And with a mask when we know mask
12644 // is positive.  We know that because immI_bitmask guarantees it.
12645 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12646 %{
12647   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12648 
12649   ins_cost(INSN_COST * 2);
12650   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12651   ins_encode %{
12652     int rshift = $rshift$$constant;
12653     long mask = $mask$$constant;
12654     int width = exact_log2(mask+1);
12655     __ ubfx(as_Register($dst$$reg),
12656             as_Register($src$$reg), rshift, width);
12657   %}
12658   ins_pipe(ialu_reg_shift);
12659 %}
12660 
12661 // We can use ubfiz when masking by a positive number and then left shifting the result.
12662 // We know that the mask is positive because immI_bitmask guarantees it.
12663 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12664 %{
12665   match(Set dst (LShiftI (AndI src mask) lshift));
12666   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12667     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12668 
12669   ins_cost(INSN_COST);
12670   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12671   ins_encode %{
12672     int lshift = $lshift$$constant;
12673     long mask = $mask$$constant;
12674     int width = exact_log2(mask+1);
12675     __ ubfizw(as_Register($dst$$reg),
12676           as_Register($src$$reg), lshift, width);
12677   %}
12678   ins_pipe(ialu_reg_shift);
12679 %}
12680 // We can use ubfiz when masking by a positive number and then left shifting the result.
12681 // We know that the mask is positive because immL_bitmask guarantees it.
12682 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12683 %{
12684   match(Set dst (LShiftL (AndL src mask) lshift));
12685   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12686     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12687 
12688   ins_cost(INSN_COST);
12689   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12690   ins_encode %{
12691     int lshift = $lshift$$constant;
12692     long mask = $mask$$constant;
12693     int width = exact_log2(mask+1);
12694     __ ubfiz(as_Register($dst$$reg),
12695           as_Register($src$$reg), lshift, width);
12696   %}
12697   ins_pipe(ialu_reg_shift);
12698 %}
12699 
12700 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12701 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12702 %{
12703   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12704   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12705     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12706 
12707   ins_cost(INSN_COST);
12708   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12709   ins_encode %{
12710     int lshift = $lshift$$constant;
12711     long mask = $mask$$constant;
12712     int width = exact_log2(mask+1);
12713     __ ubfiz(as_Register($dst$$reg),
12714              as_Register($src$$reg), lshift, width);
12715   %}
12716   ins_pipe(ialu_reg_shift);
12717 %}
12718 
12719 // Rotations
12720 
12721 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12722 %{
12723   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12724   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12725 
12726   ins_cost(INSN_COST);
12727   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12728 
12729   ins_encode %{
12730     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12731             $rshift$$constant & 63);
12732   %}
12733   ins_pipe(ialu_reg_reg_extr);
12734 %}
12735 
12736 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12737 %{
12738   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12739   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12740 
12741   ins_cost(INSN_COST);
12742   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12743 
12744   ins_encode %{
12745     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12746             $rshift$$constant & 31);
12747   %}
12748   ins_pipe(ialu_reg_reg_extr);
12749 %}
12750 
12751 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12752 %{
12753   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12754   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12755 
12756   ins_cost(INSN_COST);
12757   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12758 
12759   ins_encode %{
12760     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12761             $rshift$$constant & 63);
12762   %}
12763   ins_pipe(ialu_reg_reg_extr);
12764 %}
12765 
12766 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12767 %{
12768   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12769   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12770 
12771   ins_cost(INSN_COST);
12772   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12773 
12774   ins_encode %{
12775     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12776             $rshift$$constant & 31);
12777   %}
12778   ins_pipe(ialu_reg_reg_extr);
12779 %}
12780 
12781 
12782 // rol expander
12783 
12784 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12785 %{
12786   effect(DEF dst, USE src, USE shift);
12787 
12788   format %{ "rol    $dst, $src, $shift" %}
12789   ins_cost(INSN_COST * 3);
12790   ins_encode %{
12791     __ subw(rscratch1, zr, as_Register($shift$$reg));
12792     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12793             rscratch1);
12794     %}
12795   ins_pipe(ialu_reg_reg_vshift);
12796 %}
12797 
12798 // rol expander
12799 
12800 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12801 %{
12802   effect(DEF dst, USE src, USE shift);
12803 
12804   format %{ "rol    $dst, $src, $shift" %}
12805   ins_cost(INSN_COST * 3);
12806   ins_encode %{
12807     __ subw(rscratch1, zr, as_Register($shift$$reg));
12808     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12809             rscratch1);
12810     %}
12811   ins_pipe(ialu_reg_reg_vshift);
12812 %}
12813 
12814 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12815 %{
12816   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12817 
12818   expand %{
12819     rolL_rReg(dst, src, shift, cr);
12820   %}
12821 %}
12822 
12823 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12824 %{
12825   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12826 
12827   expand %{
12828     rolL_rReg(dst, src, shift, cr);
12829   %}
12830 %}
12831 
12832 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12833 %{
12834   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12835 
12836   expand %{
12837     rolI_rReg(dst, src, shift, cr);
12838   %}
12839 %}
12840 
12841 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12842 %{
12843   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12844 
12845   expand %{
12846     rolI_rReg(dst, src, shift, cr);
12847   %}
12848 %}
12849 
12850 // ror expander
12851 
12852 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12853 %{
12854   effect(DEF dst, USE src, USE shift);
12855 
12856   format %{ "ror    $dst, $src, $shift" %}
12857   ins_cost(INSN_COST);
12858   ins_encode %{
12859     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12860             as_Register($shift$$reg));
12861     %}
12862   ins_pipe(ialu_reg_reg_vshift);
12863 %}
12864 
12865 // ror expander
12866 
12867 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12868 %{
12869   effect(DEF dst, USE src, USE shift);
12870 
12871   format %{ "ror    $dst, $src, $shift" %}
12872   ins_cost(INSN_COST);
12873   ins_encode %{
12874     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12875             as_Register($shift$$reg));
12876     %}
12877   ins_pipe(ialu_reg_reg_vshift);
12878 %}
12879 
12880 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12881 %{
12882   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12883 
12884   expand %{
12885     rorL_rReg(dst, src, shift, cr);
12886   %}
12887 %}
12888 
12889 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12890 %{
12891   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12892 
12893   expand %{
12894     rorL_rReg(dst, src, shift, cr);
12895   %}
12896 %}
12897 
12898 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12899 %{
12900   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12901 
12902   expand %{
12903     rorI_rReg(dst, src, shift, cr);
12904   %}
12905 %}
12906 
12907 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12908 %{
12909   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12910 
12911   expand %{
12912     rorI_rReg(dst, src, shift, cr);
12913   %}
12914 %}
12915 
12916 // Add/subtract (extended)
12917 
12918 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12919 %{
12920   match(Set dst (AddL src1 (ConvI2L src2)));
12921   ins_cost(INSN_COST);
12922   format %{ "add  $dst, $src1, $src2, sxtw" %}
12923 
12924    ins_encode %{
12925      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12926             as_Register($src2$$reg), ext::sxtw);
12927    %}
12928   ins_pipe(ialu_reg_reg);
12929 %};
12930 
12931 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12932 %{
12933   match(Set dst (SubL src1 (ConvI2L src2)));
12934   ins_cost(INSN_COST);
12935   format %{ "sub  $dst, $src1, $src2, sxtw" %}
12936 
12937    ins_encode %{
12938      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12939             as_Register($src2$$reg), ext::sxtw);
12940    %}
12941   ins_pipe(ialu_reg_reg);
12942 %};
12943 
12944 
12945 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12946 %{
12947   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12948   ins_cost(INSN_COST);
12949   format %{ "add  $dst, $src1, $src2, sxth" %}
12950 
12951    ins_encode %{
12952      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12953             as_Register($src2$$reg), ext::sxth);
12954    %}
12955   ins_pipe(ialu_reg_reg);
12956 %}
12957 
12958 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12959 %{
12960   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12961   ins_cost(INSN_COST);
12962   format %{ "add  $dst, $src1, $src2, sxtb" %}
12963 
12964    ins_encode %{
12965      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12966             as_Register($src2$$reg), ext::sxtb);
12967    %}
12968   ins_pipe(ialu_reg_reg);
12969 %}
12970 
12971 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12972 %{
12973   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12974   ins_cost(INSN_COST);
12975   format %{ "add  $dst, $src1, $src2, uxtb" %}
12976 
12977    ins_encode %{
12978      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12979             as_Register($src2$$reg), ext::uxtb);
12980    %}
12981   ins_pipe(ialu_reg_reg);
12982 %}
12983 
12984 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12985 %{
12986   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12987   ins_cost(INSN_COST);
12988   format %{ "add  $dst, $src1, $src2, sxth" %}
12989 
12990    ins_encode %{
12991      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12992             as_Register($src2$$reg), ext::sxth);
12993    %}
12994   ins_pipe(ialu_reg_reg);
12995 %}
12996 
12997 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12998 %{
12999   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13000   ins_cost(INSN_COST);
13001   format %{ "add  $dst, $src1, $src2, sxtw" %}
13002 
13003    ins_encode %{
13004      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13005             as_Register($src2$$reg), ext::sxtw);
13006    %}
13007   ins_pipe(ialu_reg_reg);
13008 %}
13009 
13010 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13011 %{
13012   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13013   ins_cost(INSN_COST);
13014   format %{ "add  $dst, $src1, $src2, sxtb" %}
13015 
13016    ins_encode %{
13017      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13018             as_Register($src2$$reg), ext::sxtb);
13019    %}
13020   ins_pipe(ialu_reg_reg);
13021 %}
13022 
13023 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13024 %{
13025   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13026   ins_cost(INSN_COST);
13027   format %{ "add  $dst, $src1, $src2, uxtb" %}
13028 
13029    ins_encode %{
13030      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13031             as_Register($src2$$reg), ext::uxtb);
13032    %}
13033   ins_pipe(ialu_reg_reg);
13034 %}
13035 
13036 
13037 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13038 %{
13039   match(Set dst (AddI src1 (AndI src2 mask)));
13040   ins_cost(INSN_COST);
13041   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13042 
13043    ins_encode %{
13044      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13045             as_Register($src2$$reg), ext::uxtb);
13046    %}
13047   ins_pipe(ialu_reg_reg);
13048 %}
13049 
13050 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13051 %{
13052   match(Set dst (AddI src1 (AndI src2 mask)));
13053   ins_cost(INSN_COST);
13054   format %{ "addw  $dst, $src1, $src2, uxth" %}
13055 
13056    ins_encode %{
13057      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13058             as_Register($src2$$reg), ext::uxth);
13059    %}
13060   ins_pipe(ialu_reg_reg);
13061 %}
13062 
13063 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13064 %{
13065   match(Set dst (AddL src1 (AndL src2 mask)));
13066   ins_cost(INSN_COST);
13067   format %{ "add  $dst, $src1, $src2, uxtb" %}
13068 
13069    ins_encode %{
13070      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13071             as_Register($src2$$reg), ext::uxtb);
13072    %}
13073   ins_pipe(ialu_reg_reg);
13074 %}
13075 
13076 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13077 %{
13078   match(Set dst (AddL src1 (AndL src2 mask)));
13079   ins_cost(INSN_COST);
13080   format %{ "add  $dst, $src1, $src2, uxth" %}
13081 
13082    ins_encode %{
13083      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13084             as_Register($src2$$reg), ext::uxth);
13085    %}
13086   ins_pipe(ialu_reg_reg);
13087 %}
13088 
13089 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13090 %{
13091   match(Set dst (AddL src1 (AndL src2 mask)));
13092   ins_cost(INSN_COST);
13093   format %{ "add  $dst, $src1, $src2, uxtw" %}
13094 
13095    ins_encode %{
13096      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13097             as_Register($src2$$reg), ext::uxtw);
13098    %}
13099   ins_pipe(ialu_reg_reg);
13100 %}
13101 
13102 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13103 %{
13104   match(Set dst (SubI src1 (AndI src2 mask)));
13105   ins_cost(INSN_COST);
13106   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13107 
13108    ins_encode %{
13109      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13110             as_Register($src2$$reg), ext::uxtb);
13111    %}
13112   ins_pipe(ialu_reg_reg);
13113 %}
13114 
13115 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13116 %{
13117   match(Set dst (SubI src1 (AndI src2 mask)));
13118   ins_cost(INSN_COST);
13119   format %{ "subw  $dst, $src1, $src2, uxth" %}
13120 
13121    ins_encode %{
13122      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13123             as_Register($src2$$reg), ext::uxth);
13124    %}
13125   ins_pipe(ialu_reg_reg);
13126 %}
13127 
13128 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13129 %{
13130   match(Set dst (SubL src1 (AndL src2 mask)));
13131   ins_cost(INSN_COST);
13132   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13133 
13134    ins_encode %{
13135      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13136             as_Register($src2$$reg), ext::uxtb);
13137    %}
13138   ins_pipe(ialu_reg_reg);
13139 %}
13140 
13141 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13142 %{
13143   match(Set dst (SubL src1 (AndL src2 mask)));
13144   ins_cost(INSN_COST);
13145   format %{ "sub  $dst, $src1, $src2, uxth" %}
13146 
13147    ins_encode %{
13148      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13149             as_Register($src2$$reg), ext::uxth);
13150    %}
13151   ins_pipe(ialu_reg_reg);
13152 %}
13153 
13154 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13155 %{
13156   match(Set dst (SubL src1 (AndL src2 mask)));
13157   ins_cost(INSN_COST);
13158   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13159 
13160    ins_encode %{
13161      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13162             as_Register($src2$$reg), ext::uxtw);
13163    %}
13164   ins_pipe(ialu_reg_reg);
13165 %}
13166 
13167 
13168 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13169 %{
13170   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13171   ins_cost(1.9 * INSN_COST);
13172   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13173 
13174    ins_encode %{
13175      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13176             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13177    %}
13178   ins_pipe(ialu_reg_reg_shift);
13179 %}
13180 
13181 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13182 %{
13183   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13184   ins_cost(1.9 * INSN_COST);
13185   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13186 
13187    ins_encode %{
13188      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13189             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13190    %}
13191   ins_pipe(ialu_reg_reg_shift);
13192 %}
13193 
13194 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13195 %{
13196   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13197   ins_cost(1.9 * INSN_COST);
13198   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13199 
13200    ins_encode %{
13201      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13202             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13203    %}
13204   ins_pipe(ialu_reg_reg_shift);
13205 %}
13206 
13207 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13208 %{
13209   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13210   ins_cost(1.9 * INSN_COST);
13211   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13212 
13213    ins_encode %{
13214      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13215             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13216    %}
13217   ins_pipe(ialu_reg_reg_shift);
13218 %}
13219 
13220 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13221 %{
13222   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13223   ins_cost(1.9 * INSN_COST);
13224   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13225 
13226    ins_encode %{
13227      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13228             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13229    %}
13230   ins_pipe(ialu_reg_reg_shift);
13231 %}
13232 
13233 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13234 %{
13235   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13236   ins_cost(1.9 * INSN_COST);
13237   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13238 
13239    ins_encode %{
13240      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13241             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13242    %}
13243   ins_pipe(ialu_reg_reg_shift);
13244 %}
13245 
13246 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13247 %{
13248   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13249   ins_cost(1.9 * INSN_COST);
13250   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13251 
13252    ins_encode %{
13253      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13254             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13255    %}
13256   ins_pipe(ialu_reg_reg_shift);
13257 %}
13258 
13259 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13260 %{
13261   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13262   ins_cost(1.9 * INSN_COST);
13263   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13264 
13265    ins_encode %{
13266      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13267             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13268    %}
13269   ins_pipe(ialu_reg_reg_shift);
13270 %}
13271 
13272 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13273 %{
13274   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13275   ins_cost(1.9 * INSN_COST);
13276   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13277 
13278    ins_encode %{
13279      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13280             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13281    %}
13282   ins_pipe(ialu_reg_reg_shift);
13283 %}
13284 
13285 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13286 %{
13287   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13288   ins_cost(1.9 * INSN_COST);
13289   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13290 
13291    ins_encode %{
13292      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13293             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13294    %}
13295   ins_pipe(ialu_reg_reg_shift);
13296 %}
13297 
13298 
13299 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13300 %{
13301   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13302   ins_cost(1.9 * INSN_COST);
13303   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13304 
13305    ins_encode %{
13306      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13307             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13308    %}
13309   ins_pipe(ialu_reg_reg_shift);
13310 %};
13311 
13312 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13313 %{
13314   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13315   ins_cost(1.9 * INSN_COST);
13316   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13317 
13318    ins_encode %{
13319      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13320             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13321    %}
13322   ins_pipe(ialu_reg_reg_shift);
13323 %};
13324 
13325 
13326 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13327 %{
13328   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13329   ins_cost(1.9 * INSN_COST);
13330   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13331 
13332    ins_encode %{
13333      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13334             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13335    %}
13336   ins_pipe(ialu_reg_reg_shift);
13337 %}
13338 
13339 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13340 %{
13341   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13342   ins_cost(1.9 * INSN_COST);
13343   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13344 
13345    ins_encode %{
13346      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13347             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13348    %}
13349   ins_pipe(ialu_reg_reg_shift);
13350 %}
13351 
13352 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13353 %{
13354   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13355   ins_cost(1.9 * INSN_COST);
13356   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13357 
13358    ins_encode %{
13359      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13360             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13361    %}
13362   ins_pipe(ialu_reg_reg_shift);
13363 %}
13364 
13365 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13366 %{
13367   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13368   ins_cost(1.9 * INSN_COST);
13369   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13370 
13371    ins_encode %{
13372      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13373             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13374    %}
13375   ins_pipe(ialu_reg_reg_shift);
13376 %}
13377 
13378 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13379 %{
13380   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13381   ins_cost(1.9 * INSN_COST);
13382   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13383 
13384    ins_encode %{
13385      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13386             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13387    %}
13388   ins_pipe(ialu_reg_reg_shift);
13389 %}
13390 
13391 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13392 %{
13393   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13394   ins_cost(1.9 * INSN_COST);
13395   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13396 
13397    ins_encode %{
13398      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13399             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13400    %}
13401   ins_pipe(ialu_reg_reg_shift);
13402 %}
13403 
13404 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13405 %{
13406   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13407   ins_cost(1.9 * INSN_COST);
13408   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13409 
13410    ins_encode %{
13411      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13412             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13413    %}
13414   ins_pipe(ialu_reg_reg_shift);
13415 %}
13416 
13417 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13418 %{
13419   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13420   ins_cost(1.9 * INSN_COST);
13421   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13422 
13423    ins_encode %{
13424      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13425             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13426    %}
13427   ins_pipe(ialu_reg_reg_shift);
13428 %}
13429 
13430 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13431 %{
13432   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13433   ins_cost(1.9 * INSN_COST);
13434   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13435 
13436    ins_encode %{
13437      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13438             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13439    %}
13440   ins_pipe(ialu_reg_reg_shift);
13441 %}
13442 
13443 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13444 %{
13445   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13446   ins_cost(1.9 * INSN_COST);
13447   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13448 
13449    ins_encode %{
13450      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13451             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13452    %}
13453   ins_pipe(ialu_reg_reg_shift);
13454 %}
13455 // END This section of the file is automatically generated. Do not edit --------------
13456 
13457 // ============================================================================
13458 // Floating Point Arithmetic Instructions
13459 
13460 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13461   match(Set dst (AddF src1 src2));
13462 
13463   ins_cost(INSN_COST * 5);
13464   format %{ "fadds   $dst, $src1, $src2" %}
13465 
13466   ins_encode %{
13467     __ fadds(as_FloatRegister($dst$$reg),
13468              as_FloatRegister($src1$$reg),
13469              as_FloatRegister($src2$$reg));
13470   %}
13471 
13472   ins_pipe(fp_dop_reg_reg_s);
13473 %}
13474 
13475 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13476   match(Set dst (AddD src1 src2));
13477 
13478   ins_cost(INSN_COST * 5);
13479   format %{ "faddd   $dst, $src1, $src2" %}
13480 
13481   ins_encode %{
13482     __ faddd(as_FloatRegister($dst$$reg),
13483              as_FloatRegister($src1$$reg),
13484              as_FloatRegister($src2$$reg));
13485   %}
13486 
13487   ins_pipe(fp_dop_reg_reg_d);
13488 %}
13489 
13490 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13491   match(Set dst (SubF src1 src2));
13492 
13493   ins_cost(INSN_COST * 5);
13494   format %{ "fsubs   $dst, $src1, $src2" %}
13495 
13496   ins_encode %{
13497     __ fsubs(as_FloatRegister($dst$$reg),
13498              as_FloatRegister($src1$$reg),
13499              as_FloatRegister($src2$$reg));
13500   %}
13501 
13502   ins_pipe(fp_dop_reg_reg_s);
13503 %}
13504 
13505 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13506   match(Set dst (SubD src1 src2));
13507 
13508   ins_cost(INSN_COST * 5);
13509   format %{ "fsubd   $dst, $src1, $src2" %}
13510 
13511   ins_encode %{
13512     __ fsubd(as_FloatRegister($dst$$reg),
13513              as_FloatRegister($src1$$reg),
13514              as_FloatRegister($src2$$reg));
13515   %}
13516 
13517   ins_pipe(fp_dop_reg_reg_d);
13518 %}
13519 
13520 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13521   match(Set dst (MulF src1 src2));
13522 
13523   ins_cost(INSN_COST * 6);
13524   format %{ "fmuls   $dst, $src1, $src2" %}
13525 
13526   ins_encode %{
13527     __ fmuls(as_FloatRegister($dst$$reg),
13528              as_FloatRegister($src1$$reg),
13529              as_FloatRegister($src2$$reg));
13530   %}
13531 
13532   ins_pipe(fp_dop_reg_reg_s);
13533 %}
13534 
13535 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13536   match(Set dst (MulD src1 src2));
13537 
13538   ins_cost(INSN_COST * 6);
13539   format %{ "fmuld   $dst, $src1, $src2" %}
13540 
13541   ins_encode %{
13542     __ fmuld(as_FloatRegister($dst$$reg),
13543              as_FloatRegister($src1$$reg),
13544              as_FloatRegister($src2$$reg));
13545   %}
13546 
13547   ins_pipe(fp_dop_reg_reg_d);
13548 %}
13549 
13550 // src1 * src2 + src3
13551 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13552   predicate(UseFMA);
13553   match(Set dst (FmaF src3 (Binary src1 src2)));
13554 
13555   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13556 
13557   ins_encode %{
13558     __ fmadds(as_FloatRegister($dst$$reg),
13559              as_FloatRegister($src1$$reg),
13560              as_FloatRegister($src2$$reg),
13561              as_FloatRegister($src3$$reg));
13562   %}
13563 
13564   ins_pipe(pipe_class_default);
13565 %}
13566 
13567 // src1 * src2 + src3
13568 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13569   predicate(UseFMA);
13570   match(Set dst (FmaD src3 (Binary src1 src2)));
13571 
13572   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13573 
13574   ins_encode %{
13575     __ fmaddd(as_FloatRegister($dst$$reg),
13576              as_FloatRegister($src1$$reg),
13577              as_FloatRegister($src2$$reg),
13578              as_FloatRegister($src3$$reg));
13579   %}
13580 
13581   ins_pipe(pipe_class_default);
13582 %}
13583 
13584 // -src1 * src2 + src3
13585 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13586   predicate(UseFMA);
13587   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13588   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13589 
13590   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13591 
13592   ins_encode %{
13593     __ fmsubs(as_FloatRegister($dst$$reg),
13594               as_FloatRegister($src1$$reg),
13595               as_FloatRegister($src2$$reg),
13596               as_FloatRegister($src3$$reg));
13597   %}
13598 
13599   ins_pipe(pipe_class_default);
13600 %}
13601 
13602 // -src1 * src2 + src3
13603 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13604   predicate(UseFMA);
13605   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13606   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13607 
13608   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13609 
13610   ins_encode %{
13611     __ fmsubd(as_FloatRegister($dst$$reg),
13612               as_FloatRegister($src1$$reg),
13613               as_FloatRegister($src2$$reg),
13614               as_FloatRegister($src3$$reg));
13615   %}
13616 
13617   ins_pipe(pipe_class_default);
13618 %}
13619 
13620 // -src1 * src2 - src3
13621 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13622   predicate(UseFMA);
13623   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13624   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13625 
13626   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13627 
13628   ins_encode %{
13629     __ fnmadds(as_FloatRegister($dst$$reg),
13630                as_FloatRegister($src1$$reg),
13631                as_FloatRegister($src2$$reg),
13632                as_FloatRegister($src3$$reg));
13633   %}
13634 
13635   ins_pipe(pipe_class_default);
13636 %}
13637 
13638 // -src1 * src2 - src3
13639 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13640   predicate(UseFMA);
13641   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13642   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13643 
13644   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13645 
13646   ins_encode %{
13647     __ fnmaddd(as_FloatRegister($dst$$reg),
13648                as_FloatRegister($src1$$reg),
13649                as_FloatRegister($src2$$reg),
13650                as_FloatRegister($src3$$reg));
13651   %}
13652 
13653   ins_pipe(pipe_class_default);
13654 %}
13655 
13656 // src1 * src2 - src3
13657 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13658   predicate(UseFMA);
13659   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13660 
13661   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13662 
13663   ins_encode %{
13664     __ fnmsubs(as_FloatRegister($dst$$reg),
13665                as_FloatRegister($src1$$reg),
13666                as_FloatRegister($src2$$reg),
13667                as_FloatRegister($src3$$reg));
13668   %}
13669 
13670   ins_pipe(pipe_class_default);
13671 %}
13672 
13673 // src1 * src2 - src3
13674 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13675   predicate(UseFMA);
13676   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13677 
13678   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13679 
13680   ins_encode %{
13681   // n.b. insn name should be fnmsubd
13682     __ fnmsub(as_FloatRegister($dst$$reg),
13683               as_FloatRegister($src1$$reg),
13684               as_FloatRegister($src2$$reg),
13685               as_FloatRegister($src3$$reg));
13686   %}
13687 
13688   ins_pipe(pipe_class_default);
13689 %}
13690 
13691 
13692 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13693   match(Set dst (DivF src1  src2));
13694 
13695   ins_cost(INSN_COST * 18);
13696   format %{ "fdivs   $dst, $src1, $src2" %}
13697 
13698   ins_encode %{
13699     __ fdivs(as_FloatRegister($dst$$reg),
13700              as_FloatRegister($src1$$reg),
13701              as_FloatRegister($src2$$reg));
13702   %}
13703 
13704   ins_pipe(fp_div_s);
13705 %}
13706 
13707 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13708   match(Set dst (DivD src1  src2));
13709 
13710   ins_cost(INSN_COST * 32);
13711   format %{ "fdivd   $dst, $src1, $src2" %}
13712 
13713   ins_encode %{
13714     __ fdivd(as_FloatRegister($dst$$reg),
13715              as_FloatRegister($src1$$reg),
13716              as_FloatRegister($src2$$reg));
13717   %}
13718 
13719   ins_pipe(fp_div_d);
13720 %}
13721 
13722 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13723   match(Set dst (NegF src));
13724 
13725   ins_cost(INSN_COST * 3);
13726   format %{ "fneg   $dst, $src" %}
13727 
13728   ins_encode %{
13729     __ fnegs(as_FloatRegister($dst$$reg),
13730              as_FloatRegister($src$$reg));
13731   %}
13732 
13733   ins_pipe(fp_uop_s);
13734 %}
13735 
13736 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13737   match(Set dst (NegD src));
13738 
13739   ins_cost(INSN_COST * 3);
13740   format %{ "fnegd   $dst, $src" %}
13741 
13742   ins_encode %{
13743     __ fnegd(as_FloatRegister($dst$$reg),
13744              as_FloatRegister($src$$reg));
13745   %}
13746 
13747   ins_pipe(fp_uop_d);
13748 %}
13749 
13750 instruct absF_reg(vRegF dst, vRegF src) %{
13751   match(Set dst (AbsF src));
13752 
13753   ins_cost(INSN_COST * 3);
13754   format %{ "fabss   $dst, $src" %}
13755   ins_encode %{
13756     __ fabss(as_FloatRegister($dst$$reg),
13757              as_FloatRegister($src$$reg));
13758   %}
13759 
13760   ins_pipe(fp_uop_s);
13761 %}
13762 
13763 instruct absD_reg(vRegD dst, vRegD src) %{
13764   match(Set dst (AbsD src));
13765 
13766   ins_cost(INSN_COST * 3);
13767   format %{ "fabsd   $dst, $src" %}
13768   ins_encode %{
13769     __ fabsd(as_FloatRegister($dst$$reg),
13770              as_FloatRegister($src$$reg));
13771   %}
13772 
13773   ins_pipe(fp_uop_d);
13774 %}
13775 
13776 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13777   match(Set dst (SqrtD src));
13778 
13779   ins_cost(INSN_COST * 50);
13780   format %{ "fsqrtd  $dst, $src" %}
13781   ins_encode %{
13782     __ fsqrtd(as_FloatRegister($dst$$reg),
13783              as_FloatRegister($src$$reg));
13784   %}
13785 
13786   ins_pipe(fp_div_s);
13787 %}
13788 
13789 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13790   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13791 
13792   ins_cost(INSN_COST * 50);
13793   format %{ "fsqrts  $dst, $src" %}
13794   ins_encode %{
13795     __ fsqrts(as_FloatRegister($dst$$reg),
13796              as_FloatRegister($src$$reg));
13797   %}
13798 
13799   ins_pipe(fp_div_d);
13800 %}
13801 
13802 // ============================================================================
13803 // Logical Instructions
13804 
13805 // Integer Logical Instructions
13806 
13807 // And Instructions
13808 
13809 
13810 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13811   match(Set dst (AndI src1 src2));
13812 
13813   format %{ "andw  $dst, $src1, $src2\t# int" %}
13814 
13815   ins_cost(INSN_COST);
13816   ins_encode %{
13817     __ andw(as_Register($dst$$reg),
13818             as_Register($src1$$reg),
13819             as_Register($src2$$reg));
13820   %}
13821 
13822   ins_pipe(ialu_reg_reg);
13823 %}
13824 
13825 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13826   match(Set dst (AndI src1 src2));
13827 
13828   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13829 
13830   ins_cost(INSN_COST);
13831   ins_encode %{
13832     __ andw(as_Register($dst$$reg),
13833             as_Register($src1$$reg),
13834             (unsigned long)($src2$$constant));
13835   %}
13836 
13837   ins_pipe(ialu_reg_imm);
13838 %}
13839 
13840 // Or Instructions
13841 
13842 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13843   match(Set dst (OrI src1 src2));
13844 
13845   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13846 
13847   ins_cost(INSN_COST);
13848   ins_encode %{
13849     __ orrw(as_Register($dst$$reg),
13850             as_Register($src1$$reg),
13851             as_Register($src2$$reg));
13852   %}
13853 
13854   ins_pipe(ialu_reg_reg);
13855 %}
13856 
13857 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13858   match(Set dst (OrI src1 src2));
13859 
13860   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13861 
13862   ins_cost(INSN_COST);
13863   ins_encode %{
13864     __ orrw(as_Register($dst$$reg),
13865             as_Register($src1$$reg),
13866             (unsigned long)($src2$$constant));
13867   %}
13868 
13869   ins_pipe(ialu_reg_imm);
13870 %}
13871 
13872 // Xor Instructions
13873 
13874 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13875   match(Set dst (XorI src1 src2));
13876 
13877   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13878 
13879   ins_cost(INSN_COST);
13880   ins_encode %{
13881     __ eorw(as_Register($dst$$reg),
13882             as_Register($src1$$reg),
13883             as_Register($src2$$reg));
13884   %}
13885 
13886   ins_pipe(ialu_reg_reg);
13887 %}
13888 
13889 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13890   match(Set dst (XorI src1 src2));
13891 
13892   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13893 
13894   ins_cost(INSN_COST);
13895   ins_encode %{
13896     __ eorw(as_Register($dst$$reg),
13897             as_Register($src1$$reg),
13898             (unsigned long)($src2$$constant));
13899   %}
13900 
13901   ins_pipe(ialu_reg_imm);
13902 %}
13903 
13904 // Long Logical Instructions
13905 // TODO
13906 
13907 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13908   match(Set dst (AndL src1 src2));
13909 
13910   format %{ "and  $dst, $src1, $src2\t# int" %}
13911 
13912   ins_cost(INSN_COST);
13913   ins_encode %{
13914     __ andr(as_Register($dst$$reg),
13915             as_Register($src1$$reg),
13916             as_Register($src2$$reg));
13917   %}
13918 
13919   ins_pipe(ialu_reg_reg);
13920 %}
13921 
13922 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13923   match(Set dst (AndL src1 src2));
13924 
13925   format %{ "and  $dst, $src1, $src2\t# int" %}
13926 
13927   ins_cost(INSN_COST);
13928   ins_encode %{
13929     __ andr(as_Register($dst$$reg),
13930             as_Register($src1$$reg),
13931             (unsigned long)($src2$$constant));
13932   %}
13933 
13934   ins_pipe(ialu_reg_imm);
13935 %}
13936 
13937 // Or Instructions
13938 
13939 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13940   match(Set dst (OrL src1 src2));
13941 
13942   format %{ "orr  $dst, $src1, $src2\t# int" %}
13943 
13944   ins_cost(INSN_COST);
13945   ins_encode %{
13946     __ orr(as_Register($dst$$reg),
13947            as_Register($src1$$reg),
13948            as_Register($src2$$reg));
13949   %}
13950 
13951   ins_pipe(ialu_reg_reg);
13952 %}
13953 
13954 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13955   match(Set dst (OrL src1 src2));
13956 
13957   format %{ "orr  $dst, $src1, $src2\t# int" %}
13958 
13959   ins_cost(INSN_COST);
13960   ins_encode %{
13961     __ orr(as_Register($dst$$reg),
13962            as_Register($src1$$reg),
13963            (unsigned long)($src2$$constant));
13964   %}
13965 
13966   ins_pipe(ialu_reg_imm);
13967 %}
13968 
13969 // Xor Instructions
13970 
13971 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13972   match(Set dst (XorL src1 src2));
13973 
13974   format %{ "eor  $dst, $src1, $src2\t# int" %}
13975 
13976   ins_cost(INSN_COST);
13977   ins_encode %{
13978     __ eor(as_Register($dst$$reg),
13979            as_Register($src1$$reg),
13980            as_Register($src2$$reg));
13981   %}
13982 
13983   ins_pipe(ialu_reg_reg);
13984 %}
13985 
13986 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13987   match(Set dst (XorL src1 src2));
13988 
13989   ins_cost(INSN_COST);
13990   format %{ "eor  $dst, $src1, $src2\t# int" %}
13991 
13992   ins_encode %{
13993     __ eor(as_Register($dst$$reg),
13994            as_Register($src1$$reg),
13995            (unsigned long)($src2$$constant));
13996   %}
13997 
13998   ins_pipe(ialu_reg_imm);
13999 %}
14000 
14001 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
14002 %{
14003   match(Set dst (ConvI2L src));
14004 
14005   ins_cost(INSN_COST);
14006   format %{ "sxtw  $dst, $src\t# i2l" %}
14007   ins_encode %{
14008     __ sbfm($dst$$Register, $src$$Register, 0, 31);
14009   %}
14010   ins_pipe(ialu_reg_shift);
14011 %}
14012 
14013 // this pattern occurs in bigmath arithmetic
14014 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
14015 %{
14016   match(Set dst (AndL (ConvI2L src) mask));
14017 
14018   ins_cost(INSN_COST);
14019   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
14020   ins_encode %{
14021     __ ubfm($dst$$Register, $src$$Register, 0, 31);
14022   %}
14023 
14024   ins_pipe(ialu_reg_shift);
14025 %}
14026 
14027 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
14028   match(Set dst (ConvL2I src));
14029 
14030   ins_cost(INSN_COST);
14031   format %{ "movw  $dst, $src \t// l2i" %}
14032 
14033   ins_encode %{
14034     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
14035   %}
14036 
14037   ins_pipe(ialu_reg);
14038 %}
14039 
14040 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
14041 %{
14042   match(Set dst (Conv2B src));
14043   effect(KILL cr);
14044 
14045   format %{
14046     "cmpw $src, zr\n\t"
14047     "cset $dst, ne"
14048   %}
14049 
14050   ins_encode %{
14051     __ cmpw(as_Register($src$$reg), zr);
14052     __ cset(as_Register($dst$$reg), Assembler::NE);
14053   %}
14054 
14055   ins_pipe(ialu_reg);
14056 %}
14057 
14058 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14059 %{
14060   match(Set dst (Conv2B src));
14061   effect(KILL cr);
14062 
14063   format %{
14064     "cmp  $src, zr\n\t"
14065     "cset $dst, ne"
14066   %}
14067 
14068   ins_encode %{
14069     __ cmp(as_Register($src$$reg), zr);
14070     __ cset(as_Register($dst$$reg), Assembler::NE);
14071   %}
14072 
14073   ins_pipe(ialu_reg);
14074 %}
14075 
14076 instruct convD2F_reg(vRegF dst, vRegD src) %{
14077   match(Set dst (ConvD2F src));
14078 
14079   ins_cost(INSN_COST * 5);
14080   format %{ "fcvtd  $dst, $src \t// d2f" %}
14081 
14082   ins_encode %{
14083     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14084   %}
14085 
14086   ins_pipe(fp_d2f);
14087 %}
14088 
14089 instruct convF2D_reg(vRegD dst, vRegF src) %{
14090   match(Set dst (ConvF2D src));
14091 
14092   ins_cost(INSN_COST * 5);
14093   format %{ "fcvts  $dst, $src \t// f2d" %}
14094 
14095   ins_encode %{
14096     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14097   %}
14098 
14099   ins_pipe(fp_f2d);
14100 %}
14101 
14102 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14103   match(Set dst (ConvF2I src));
14104 
14105   ins_cost(INSN_COST * 5);
14106   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14107 
14108   ins_encode %{
14109     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14110   %}
14111 
14112   ins_pipe(fp_f2i);
14113 %}
14114 
14115 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14116   match(Set dst (ConvF2L src));
14117 
14118   ins_cost(INSN_COST * 5);
14119   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14120 
14121   ins_encode %{
14122     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14123   %}
14124 
14125   ins_pipe(fp_f2l);
14126 %}
14127 
14128 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14129   match(Set dst (ConvI2F src));
14130 
14131   ins_cost(INSN_COST * 5);
14132   format %{ "scvtfws  $dst, $src \t// i2f" %}
14133 
14134   ins_encode %{
14135     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14136   %}
14137 
14138   ins_pipe(fp_i2f);
14139 %}
14140 
14141 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14142   match(Set dst (ConvL2F src));
14143 
14144   ins_cost(INSN_COST * 5);
14145   format %{ "scvtfs  $dst, $src \t// l2f" %}
14146 
14147   ins_encode %{
14148     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14149   %}
14150 
14151   ins_pipe(fp_l2f);
14152 %}
14153 
14154 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14155   match(Set dst (ConvD2I src));
14156 
14157   ins_cost(INSN_COST * 5);
14158   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14159 
14160   ins_encode %{
14161     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14162   %}
14163 
14164   ins_pipe(fp_d2i);
14165 %}
14166 
14167 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14168   match(Set dst (ConvD2L src));
14169 
14170   ins_cost(INSN_COST * 5);
14171   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14172 
14173   ins_encode %{
14174     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14175   %}
14176 
14177   ins_pipe(fp_d2l);
14178 %}
14179 
14180 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14181   match(Set dst (ConvI2D src));
14182 
14183   ins_cost(INSN_COST * 5);
14184   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14185 
14186   ins_encode %{
14187     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14188   %}
14189 
14190   ins_pipe(fp_i2d);
14191 %}
14192 
14193 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14194   match(Set dst (ConvL2D src));
14195 
14196   ins_cost(INSN_COST * 5);
14197   format %{ "scvtfd  $dst, $src \t// l2d" %}
14198 
14199   ins_encode %{
14200     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14201   %}
14202 
14203   ins_pipe(fp_l2d);
14204 %}
14205 
14206 // stack <-> reg and reg <-> reg shuffles with no conversion
14207 
14208 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14209 
14210   match(Set dst (MoveF2I src));
14211 
14212   effect(DEF dst, USE src);
14213 
14214   ins_cost(4 * INSN_COST);
14215 
14216   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14217 
14218   ins_encode %{
14219     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14220   %}
14221 
14222   ins_pipe(iload_reg_reg);
14223 
14224 %}
14225 
14226 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14227 
14228   match(Set dst (MoveI2F src));
14229 
14230   effect(DEF dst, USE src);
14231 
14232   ins_cost(4 * INSN_COST);
14233 
14234   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14235 
14236   ins_encode %{
14237     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14238   %}
14239 
14240   ins_pipe(pipe_class_memory);
14241 
14242 %}
14243 
14244 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14245 
14246   match(Set dst (MoveD2L src));
14247 
14248   effect(DEF dst, USE src);
14249 
14250   ins_cost(4 * INSN_COST);
14251 
14252   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14253 
14254   ins_encode %{
14255     __ ldr($dst$$Register, Address(sp, $src$$disp));
14256   %}
14257 
14258   ins_pipe(iload_reg_reg);
14259 
14260 %}
14261 
14262 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14263 
14264   match(Set dst (MoveL2D src));
14265 
14266   effect(DEF dst, USE src);
14267 
14268   ins_cost(4 * INSN_COST);
14269 
14270   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14271 
14272   ins_encode %{
14273     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14274   %}
14275 
14276   ins_pipe(pipe_class_memory);
14277 
14278 %}
14279 
14280 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14281 
14282   match(Set dst (MoveF2I src));
14283 
14284   effect(DEF dst, USE src);
14285 
14286   ins_cost(INSN_COST);
14287 
14288   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14289 
14290   ins_encode %{
14291     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14292   %}
14293 
14294   ins_pipe(pipe_class_memory);
14295 
14296 %}
14297 
14298 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14299 
14300   match(Set dst (MoveI2F src));
14301 
14302   effect(DEF dst, USE src);
14303 
14304   ins_cost(INSN_COST);
14305 
14306   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14307 
14308   ins_encode %{
14309     __ strw($src$$Register, Address(sp, $dst$$disp));
14310   %}
14311 
14312   ins_pipe(istore_reg_reg);
14313 
14314 %}
14315 
14316 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14317 
14318   match(Set dst (MoveD2L src));
14319 
14320   effect(DEF dst, USE src);
14321 
14322   ins_cost(INSN_COST);
14323 
14324   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14325 
14326   ins_encode %{
14327     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14328   %}
14329 
14330   ins_pipe(pipe_class_memory);
14331 
14332 %}
14333 
14334 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14335 
14336   match(Set dst (MoveL2D src));
14337 
14338   effect(DEF dst, USE src);
14339 
14340   ins_cost(INSN_COST);
14341 
14342   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14343 
14344   ins_encode %{
14345     __ str($src$$Register, Address(sp, $dst$$disp));
14346   %}
14347 
14348   ins_pipe(istore_reg_reg);
14349 
14350 %}
14351 
14352 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14353 
14354   match(Set dst (MoveF2I src));
14355 
14356   effect(DEF dst, USE src);
14357 
14358   ins_cost(INSN_COST);
14359 
14360   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14361 
14362   ins_encode %{
14363     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14364   %}
14365 
14366   ins_pipe(fp_f2i);
14367 
14368 %}
14369 
14370 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14371 
14372   match(Set dst (MoveI2F src));
14373 
14374   effect(DEF dst, USE src);
14375 
14376   ins_cost(INSN_COST);
14377 
14378   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14379 
14380   ins_encode %{
14381     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14382   %}
14383 
14384   ins_pipe(fp_i2f);
14385 
14386 %}
14387 
14388 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14389 
14390   match(Set dst (MoveD2L src));
14391 
14392   effect(DEF dst, USE src);
14393 
14394   ins_cost(INSN_COST);
14395 
14396   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14397 
14398   ins_encode %{
14399     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14400   %}
14401 
14402   ins_pipe(fp_d2l);
14403 
14404 %}
14405 
14406 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14407 
14408   match(Set dst (MoveL2D src));
14409 
14410   effect(DEF dst, USE src);
14411 
14412   ins_cost(INSN_COST);
14413 
14414   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14415 
14416   ins_encode %{
14417     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14418   %}
14419 
14420   ins_pipe(fp_l2d);
14421 
14422 %}
14423 
14424 // ============================================================================
14425 // clearing of an array
14426 
14427 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14428 %{
14429   match(Set dummy (ClearArray cnt base));
14430   effect(USE_KILL cnt, USE_KILL base);
14431 
14432   ins_cost(4 * INSN_COST);
14433   format %{ "ClearArray $cnt, $base" %}
14434 
14435   ins_encode %{
14436     __ zero_words($base$$Register, $cnt$$Register);
14437   %}
14438 
14439   ins_pipe(pipe_class_memory);
14440 %}
14441 
14442 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14443 %{
14444   predicate((u_int64_t)n->in(2)->get_long()
14445             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14446   match(Set dummy (ClearArray cnt base));
14447   effect(USE_KILL base);
14448 
14449   ins_cost(4 * INSN_COST);
14450   format %{ "ClearArray $cnt, $base" %}
14451 
14452   ins_encode %{
14453     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14454   %}
14455 
14456   ins_pipe(pipe_class_memory);
14457 %}
14458 
14459 // ============================================================================
14460 // Overflow Math Instructions
14461 
14462 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14463 %{
14464   match(Set cr (OverflowAddI op1 op2));
14465 
14466   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14467   ins_cost(INSN_COST);
14468   ins_encode %{
14469     __ cmnw($op1$$Register, $op2$$Register);
14470   %}
14471 
14472   ins_pipe(icmp_reg_reg);
14473 %}
14474 
14475 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14476 %{
14477   match(Set cr (OverflowAddI op1 op2));
14478 
14479   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14480   ins_cost(INSN_COST);
14481   ins_encode %{
14482     __ cmnw($op1$$Register, $op2$$constant);
14483   %}
14484 
14485   ins_pipe(icmp_reg_imm);
14486 %}
14487 
14488 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14489 %{
14490   match(Set cr (OverflowAddL op1 op2));
14491 
14492   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14493   ins_cost(INSN_COST);
14494   ins_encode %{
14495     __ cmn($op1$$Register, $op2$$Register);
14496   %}
14497 
14498   ins_pipe(icmp_reg_reg);
14499 %}
14500 
14501 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14502 %{
14503   match(Set cr (OverflowAddL op1 op2));
14504 
14505   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14506   ins_cost(INSN_COST);
14507   ins_encode %{
14508     __ cmn($op1$$Register, $op2$$constant);
14509   %}
14510 
14511   ins_pipe(icmp_reg_imm);
14512 %}
14513 
14514 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14515 %{
14516   match(Set cr (OverflowSubI op1 op2));
14517 
14518   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14519   ins_cost(INSN_COST);
14520   ins_encode %{
14521     __ cmpw($op1$$Register, $op2$$Register);
14522   %}
14523 
14524   ins_pipe(icmp_reg_reg);
14525 %}
14526 
14527 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14528 %{
14529   match(Set cr (OverflowSubI op1 op2));
14530 
14531   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14532   ins_cost(INSN_COST);
14533   ins_encode %{
14534     __ cmpw($op1$$Register, $op2$$constant);
14535   %}
14536 
14537   ins_pipe(icmp_reg_imm);
14538 %}
14539 
14540 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14541 %{
14542   match(Set cr (OverflowSubL op1 op2));
14543 
14544   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14545   ins_cost(INSN_COST);
14546   ins_encode %{
14547     __ cmp($op1$$Register, $op2$$Register);
14548   %}
14549 
14550   ins_pipe(icmp_reg_reg);
14551 %}
14552 
14553 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14554 %{
14555   match(Set cr (OverflowSubL op1 op2));
14556 
14557   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14558   ins_cost(INSN_COST);
14559   ins_encode %{
14560     __ cmp($op1$$Register, $op2$$constant);
14561   %}
14562 
14563   ins_pipe(icmp_reg_imm);
14564 %}
14565 
14566 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14567 %{
14568   match(Set cr (OverflowSubI zero op1));
14569 
14570   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14571   ins_cost(INSN_COST);
14572   ins_encode %{
14573     __ cmpw(zr, $op1$$Register);
14574   %}
14575 
14576   ins_pipe(icmp_reg_imm);
14577 %}
14578 
14579 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14580 %{
14581   match(Set cr (OverflowSubL zero op1));
14582 
14583   format %{ "cmp   zr, $op1\t# overflow check long" %}
14584   ins_cost(INSN_COST);
14585   ins_encode %{
14586     __ cmp(zr, $op1$$Register);
14587   %}
14588 
14589   ins_pipe(icmp_reg_imm);
14590 %}
14591 
14592 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14593 %{
14594   match(Set cr (OverflowMulI op1 op2));
14595 
14596   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14597             "cmp   rscratch1, rscratch1, sxtw\n\t"
14598             "movw  rscratch1, #0x80000000\n\t"
14599             "cselw rscratch1, rscratch1, zr, NE\n\t"
14600             "cmpw  rscratch1, #1" %}
14601   ins_cost(5 * INSN_COST);
14602   ins_encode %{
14603     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14604     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14605     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14606     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14607     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14608   %}
14609 
14610   ins_pipe(pipe_slow);
14611 %}
14612 
14613 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14614 %{
14615   match(If cmp (OverflowMulI op1 op2));
14616   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14617             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14618   effect(USE labl, KILL cr);
14619 
14620   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14621             "cmp   rscratch1, rscratch1, sxtw\n\t"
14622             "b$cmp   $labl" %}
14623   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14624   ins_encode %{
14625     Label* L = $labl$$label;
14626     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14627     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14628     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14629     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14630   %}
14631 
14632   ins_pipe(pipe_serial);
14633 %}
14634 
14635 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14636 %{
14637   match(Set cr (OverflowMulL op1 op2));
14638 
14639   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14640             "smulh rscratch2, $op1, $op2\n\t"
14641             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14642             "movw  rscratch1, #0x80000000\n\t"
14643             "cselw rscratch1, rscratch1, zr, NE\n\t"
14644             "cmpw  rscratch1, #1" %}
14645   ins_cost(6 * INSN_COST);
14646   ins_encode %{
14647     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14648     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14649     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14650     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14651     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14652     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14653   %}
14654 
14655   ins_pipe(pipe_slow);
14656 %}
14657 
14658 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14659 %{
14660   match(If cmp (OverflowMulL op1 op2));
14661   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14662             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14663   effect(USE labl, KILL cr);
14664 
14665   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14666             "smulh rscratch2, $op1, $op2\n\t"
14667             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14668             "b$cmp $labl" %}
14669   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14670   ins_encode %{
14671     Label* L = $labl$$label;
14672     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14673     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14674     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14675     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14676     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14677   %}
14678 
14679   ins_pipe(pipe_serial);
14680 %}
14681 
14682 // ============================================================================
14683 // Compare Instructions
14684 
14685 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14686 %{
14687   match(Set cr (CmpI op1 op2));
14688 
14689   effect(DEF cr, USE op1, USE op2);
14690 
14691   ins_cost(INSN_COST);
14692   format %{ "cmpw  $op1, $op2" %}
14693 
14694   ins_encode(aarch64_enc_cmpw(op1, op2));
14695 
14696   ins_pipe(icmp_reg_reg);
14697 %}
14698 
14699 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14700 %{
14701   match(Set cr (CmpI op1 zero));
14702 
14703   effect(DEF cr, USE op1);
14704 
14705   ins_cost(INSN_COST);
14706   format %{ "cmpw $op1, 0" %}
14707 
14708   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14709 
14710   ins_pipe(icmp_reg_imm);
14711 %}
14712 
14713 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14714 %{
14715   match(Set cr (CmpI op1 op2));
14716 
14717   effect(DEF cr, USE op1);
14718 
14719   ins_cost(INSN_COST);
14720   format %{ "cmpw  $op1, $op2" %}
14721 
14722   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14723 
14724   ins_pipe(icmp_reg_imm);
14725 %}
14726 
14727 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14728 %{
14729   match(Set cr (CmpI op1 op2));
14730 
14731   effect(DEF cr, USE op1);
14732 
14733   ins_cost(INSN_COST * 2);
14734   format %{ "cmpw  $op1, $op2" %}
14735 
14736   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14737 
14738   ins_pipe(icmp_reg_imm);
14739 %}
14740 
14741 // Unsigned compare Instructions; really, same as signed compare
14742 // except it should only be used to feed an If or a CMovI which takes a
14743 // cmpOpU.
14744 
14745 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14746 %{
14747   match(Set cr (CmpU op1 op2));
14748 
14749   effect(DEF cr, USE op1, USE op2);
14750 
14751   ins_cost(INSN_COST);
14752   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14753 
14754   ins_encode(aarch64_enc_cmpw(op1, op2));
14755 
14756   ins_pipe(icmp_reg_reg);
14757 %}
14758 
14759 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14760 %{
14761   match(Set cr (CmpU op1 zero));
14762 
14763   effect(DEF cr, USE op1);
14764 
14765   ins_cost(INSN_COST);
14766   format %{ "cmpw $op1, #0\t# unsigned" %}
14767 
14768   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14769 
14770   ins_pipe(icmp_reg_imm);
14771 %}
14772 
14773 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14774 %{
14775   match(Set cr (CmpU op1 op2));
14776 
14777   effect(DEF cr, USE op1);
14778 
14779   ins_cost(INSN_COST);
14780   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14781 
14782   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14783 
14784   ins_pipe(icmp_reg_imm);
14785 %}
14786 
14787 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14788 %{
14789   match(Set cr (CmpU op1 op2));
14790 
14791   effect(DEF cr, USE op1);
14792 
14793   ins_cost(INSN_COST * 2);
14794   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14795 
14796   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14797 
14798   ins_pipe(icmp_reg_imm);
14799 %}
14800 
14801 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14802 %{
14803   match(Set cr (CmpL op1 op2));
14804 
14805   effect(DEF cr, USE op1, USE op2);
14806 
14807   ins_cost(INSN_COST);
14808   format %{ "cmp  $op1, $op2" %}
14809 
14810   ins_encode(aarch64_enc_cmp(op1, op2));
14811 
14812   ins_pipe(icmp_reg_reg);
14813 %}
14814 
14815 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14816 %{
14817   match(Set cr (CmpL op1 zero));
14818 
14819   effect(DEF cr, USE op1);
14820 
14821   ins_cost(INSN_COST);
14822   format %{ "tst  $op1" %}
14823 
14824   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14825 
14826   ins_pipe(icmp_reg_imm);
14827 %}
14828 
14829 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14830 %{
14831   match(Set cr (CmpL op1 op2));
14832 
14833   effect(DEF cr, USE op1);
14834 
14835   ins_cost(INSN_COST);
14836   format %{ "cmp  $op1, $op2" %}
14837 
14838   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14839 
14840   ins_pipe(icmp_reg_imm);
14841 %}
14842 
14843 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14844 %{
14845   match(Set cr (CmpL op1 op2));
14846 
14847   effect(DEF cr, USE op1);
14848 
14849   ins_cost(INSN_COST * 2);
14850   format %{ "cmp  $op1, $op2" %}
14851 
14852   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14853 
14854   ins_pipe(icmp_reg_imm);
14855 %}
14856 
14857 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14858 %{
14859   match(Set cr (CmpUL op1 op2));
14860 
14861   effect(DEF cr, USE op1, USE op2);
14862 
14863   ins_cost(INSN_COST);
14864   format %{ "cmp  $op1, $op2" %}
14865 
14866   ins_encode(aarch64_enc_cmp(op1, op2));
14867 
14868   ins_pipe(icmp_reg_reg);
14869 %}
14870 
14871 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14872 %{
14873   match(Set cr (CmpUL op1 zero));
14874 
14875   effect(DEF cr, USE op1);
14876 
14877   ins_cost(INSN_COST);
14878   format %{ "tst  $op1" %}
14879 
14880   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14881 
14882   ins_pipe(icmp_reg_imm);
14883 %}
14884 
14885 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14886 %{
14887   match(Set cr (CmpUL op1 op2));
14888 
14889   effect(DEF cr, USE op1);
14890 
14891   ins_cost(INSN_COST);
14892   format %{ "cmp  $op1, $op2" %}
14893 
14894   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14895 
14896   ins_pipe(icmp_reg_imm);
14897 %}
14898 
14899 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14900 %{
14901   match(Set cr (CmpUL op1 op2));
14902 
14903   effect(DEF cr, USE op1);
14904 
14905   ins_cost(INSN_COST * 2);
14906   format %{ "cmp  $op1, $op2" %}
14907 
14908   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14909 
14910   ins_pipe(icmp_reg_imm);
14911 %}
14912 
14913 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14914 %{
14915   match(Set cr (CmpP op1 op2));
14916 
14917   effect(DEF cr, USE op1, USE op2);
14918 
14919   ins_cost(INSN_COST);
14920   format %{ "cmp  $op1, $op2\t // ptr" %}
14921 
14922   ins_encode(aarch64_enc_cmpp(op1, op2));
14923 
14924   ins_pipe(icmp_reg_reg);
14925 %}
14926 
14927 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14928 %{
14929   match(Set cr (CmpN op1 op2));
14930 
14931   effect(DEF cr, USE op1, USE op2);
14932 
14933   ins_cost(INSN_COST);
14934   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14935 
14936   ins_encode(aarch64_enc_cmpn(op1, op2));
14937 
14938   ins_pipe(icmp_reg_reg);
14939 %}
14940 
14941 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14942 %{
14943   match(Set cr (CmpP op1 zero));
14944 
14945   effect(DEF cr, USE op1, USE zero);
14946 
14947   ins_cost(INSN_COST);
14948   format %{ "cmp  $op1, 0\t // ptr" %}
14949 
14950   ins_encode(aarch64_enc_testp(op1));
14951 
14952   ins_pipe(icmp_reg_imm);
14953 %}
14954 
14955 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14956 %{
14957   match(Set cr (CmpN op1 zero));
14958 
14959   effect(DEF cr, USE op1, USE zero);
14960 
14961   ins_cost(INSN_COST);
14962   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14963 
14964   ins_encode(aarch64_enc_testn(op1));
14965 
14966   ins_pipe(icmp_reg_imm);
14967 %}
14968 
14969 // FP comparisons
14970 //
14971 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14972 // using normal cmpOp. See declaration of rFlagsReg for details.
14973 
14974 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14975 %{
14976   match(Set cr (CmpF src1 src2));
14977 
14978   ins_cost(3 * INSN_COST);
14979   format %{ "fcmps $src1, $src2" %}
14980 
14981   ins_encode %{
14982     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14983   %}
14984 
14985   ins_pipe(pipe_class_compare);
14986 %}
14987 
14988 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14989 %{
14990   match(Set cr (CmpF src1 src2));
14991 
14992   ins_cost(3 * INSN_COST);
14993   format %{ "fcmps $src1, 0.0" %}
14994 
14995   ins_encode %{
14996     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14997   %}
14998 
14999   ins_pipe(pipe_class_compare);
15000 %}
15001 // FROM HERE
15002 
15003 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
15004 %{
15005   match(Set cr (CmpD src1 src2));
15006 
15007   ins_cost(3 * INSN_COST);
15008   format %{ "fcmpd $src1, $src2" %}
15009 
15010   ins_encode %{
15011     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15012   %}
15013 
15014   ins_pipe(pipe_class_compare);
15015 %}
15016 
15017 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
15018 %{
15019   match(Set cr (CmpD src1 src2));
15020 
15021   ins_cost(3 * INSN_COST);
15022   format %{ "fcmpd $src1, 0.0" %}
15023 
15024   ins_encode %{
15025     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
15026   %}
15027 
15028   ins_pipe(pipe_class_compare);
15029 %}
15030 
15031 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
15032 %{
15033   match(Set dst (CmpF3 src1 src2));
15034   effect(KILL cr);
15035 
15036   ins_cost(5 * INSN_COST);
15037   format %{ "fcmps $src1, $src2\n\t"
15038             "csinvw($dst, zr, zr, eq\n\t"
15039             "csnegw($dst, $dst, $dst, lt)"
15040   %}
15041 
15042   ins_encode %{
15043     Label done;
15044     FloatRegister s1 = as_FloatRegister($src1$$reg);
15045     FloatRegister s2 = as_FloatRegister($src2$$reg);
15046     Register d = as_Register($dst$$reg);
15047     __ fcmps(s1, s2);
15048     // installs 0 if EQ else -1
15049     __ csinvw(d, zr, zr, Assembler::EQ);
15050     // keeps -1 if less or unordered else installs 1
15051     __ csnegw(d, d, d, Assembler::LT);
15052     __ bind(done);
15053   %}
15054 
15055   ins_pipe(pipe_class_default);
15056 
15057 %}
15058 
15059 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15060 %{
15061   match(Set dst (CmpD3 src1 src2));
15062   effect(KILL cr);
15063 
15064   ins_cost(5 * INSN_COST);
15065   format %{ "fcmpd $src1, $src2\n\t"
15066             "csinvw($dst, zr, zr, eq\n\t"
15067             "csnegw($dst, $dst, $dst, lt)"
15068   %}
15069 
15070   ins_encode %{
15071     Label done;
15072     FloatRegister s1 = as_FloatRegister($src1$$reg);
15073     FloatRegister s2 = as_FloatRegister($src2$$reg);
15074     Register d = as_Register($dst$$reg);
15075     __ fcmpd(s1, s2);
15076     // installs 0 if EQ else -1
15077     __ csinvw(d, zr, zr, Assembler::EQ);
15078     // keeps -1 if less or unordered else installs 1
15079     __ csnegw(d, d, d, Assembler::LT);
15080     __ bind(done);
15081   %}
15082   ins_pipe(pipe_class_default);
15083 
15084 %}
15085 
15086 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15087 %{
15088   match(Set dst (CmpF3 src1 zero));
15089   effect(KILL cr);
15090 
15091   ins_cost(5 * INSN_COST);
15092   format %{ "fcmps $src1, 0.0\n\t"
15093             "csinvw($dst, zr, zr, eq\n\t"
15094             "csnegw($dst, $dst, $dst, lt)"
15095   %}
15096 
15097   ins_encode %{
15098     Label done;
15099     FloatRegister s1 = as_FloatRegister($src1$$reg);
15100     Register d = as_Register($dst$$reg);
15101     __ fcmps(s1, 0.0D);
15102     // installs 0 if EQ else -1
15103     __ csinvw(d, zr, zr, Assembler::EQ);
15104     // keeps -1 if less or unordered else installs 1
15105     __ csnegw(d, d, d, Assembler::LT);
15106     __ bind(done);
15107   %}
15108 
15109   ins_pipe(pipe_class_default);
15110 
15111 %}
15112 
15113 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15114 %{
15115   match(Set dst (CmpD3 src1 zero));
15116   effect(KILL cr);
15117 
15118   ins_cost(5 * INSN_COST);
15119   format %{ "fcmpd $src1, 0.0\n\t"
15120             "csinvw($dst, zr, zr, eq\n\t"
15121             "csnegw($dst, $dst, $dst, lt)"
15122   %}
15123 
15124   ins_encode %{
15125     Label done;
15126     FloatRegister s1 = as_FloatRegister($src1$$reg);
15127     Register d = as_Register($dst$$reg);
15128     __ fcmpd(s1, 0.0D);
15129     // installs 0 if EQ else -1
15130     __ csinvw(d, zr, zr, Assembler::EQ);
15131     // keeps -1 if less or unordered else installs 1
15132     __ csnegw(d, d, d, Assembler::LT);
15133     __ bind(done);
15134   %}
15135   ins_pipe(pipe_class_default);
15136 
15137 %}
15138 
15139 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15140 %{
15141   match(Set dst (CmpLTMask p q));
15142   effect(KILL cr);
15143 
15144   ins_cost(3 * INSN_COST);
15145 
15146   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15147             "csetw $dst, lt\n\t"
15148             "subw $dst, zr, $dst"
15149   %}
15150 
15151   ins_encode %{
15152     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15153     __ csetw(as_Register($dst$$reg), Assembler::LT);
15154     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15155   %}
15156 
15157   ins_pipe(ialu_reg_reg);
15158 %}
15159 
15160 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15161 %{
15162   match(Set dst (CmpLTMask src zero));
15163   effect(KILL cr);
15164 
15165   ins_cost(INSN_COST);
15166 
15167   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15168 
15169   ins_encode %{
15170     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15171   %}
15172 
15173   ins_pipe(ialu_reg_shift);
15174 %}
15175 
15176 // ============================================================================
15177 // Max and Min
15178 
15179 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15180 %{
15181   match(Set dst (MinI src1 src2));
15182 
15183   effect(DEF dst, USE src1, USE src2, KILL cr);
15184   size(8);
15185 
15186   ins_cost(INSN_COST * 3);
15187   format %{
15188     "cmpw $src1 $src2\t signed int\n\t"
15189     "cselw $dst, $src1, $src2 lt\t"
15190   %}
15191 
15192   ins_encode %{
15193     __ cmpw(as_Register($src1$$reg),
15194             as_Register($src2$$reg));
15195     __ cselw(as_Register($dst$$reg),
15196              as_Register($src1$$reg),
15197              as_Register($src2$$reg),
15198              Assembler::LT);
15199   %}
15200 
15201   ins_pipe(ialu_reg_reg);
15202 %}
15203 // FROM HERE
15204 
15205 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15206 %{
15207   match(Set dst (MaxI src1 src2));
15208 
15209   effect(DEF dst, USE src1, USE src2, KILL cr);
15210   size(8);
15211 
15212   ins_cost(INSN_COST * 3);
15213   format %{
15214     "cmpw $src1 $src2\t signed int\n\t"
15215     "cselw $dst, $src1, $src2 gt\t"
15216   %}
15217 
15218   ins_encode %{
15219     __ cmpw(as_Register($src1$$reg),
15220             as_Register($src2$$reg));
15221     __ cselw(as_Register($dst$$reg),
15222              as_Register($src1$$reg),
15223              as_Register($src2$$reg),
15224              Assembler::GT);
15225   %}
15226 
15227   ins_pipe(ialu_reg_reg);
15228 %}
15229 
15230 // ============================================================================
15231 // Branch Instructions
15232 
15233 // Direct Branch.
15234 instruct branch(label lbl)
15235 %{
15236   match(Goto);
15237 
15238   effect(USE lbl);
15239 
15240   ins_cost(BRANCH_COST);
15241   format %{ "b  $lbl" %}
15242 
15243   ins_encode(aarch64_enc_b(lbl));
15244 
15245   ins_pipe(pipe_branch);
15246 %}
15247 
15248 // Conditional Near Branch
15249 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15250 %{
15251   // Same match rule as `branchConFar'.
15252   match(If cmp cr);
15253 
15254   effect(USE lbl);
15255 
15256   ins_cost(BRANCH_COST);
15257   // If set to 1 this indicates that the current instruction is a
15258   // short variant of a long branch. This avoids using this
15259   // instruction in first-pass matching. It will then only be used in
15260   // the `Shorten_branches' pass.
15261   // ins_short_branch(1);
15262   format %{ "b$cmp  $lbl" %}
15263 
15264   ins_encode(aarch64_enc_br_con(cmp, lbl));
15265 
15266   ins_pipe(pipe_branch_cond);
15267 %}
15268 
15269 // Conditional Near Branch Unsigned
15270 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15271 %{
15272   // Same match rule as `branchConFar'.
15273   match(If cmp cr);
15274 
15275   effect(USE lbl);
15276 
15277   ins_cost(BRANCH_COST);
15278   // If set to 1 this indicates that the current instruction is a
15279   // short variant of a long branch. This avoids using this
15280   // instruction in first-pass matching. It will then only be used in
15281   // the `Shorten_branches' pass.
15282   // ins_short_branch(1);
15283   format %{ "b$cmp  $lbl\t# unsigned" %}
15284 
15285   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15286 
15287   ins_pipe(pipe_branch_cond);
15288 %}
15289 
15290 // Make use of CBZ and CBNZ.  These instructions, as well as being
15291 // shorter than (cmp; branch), have the additional benefit of not
15292 // killing the flags.
15293 
15294 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15295   match(If cmp (CmpI op1 op2));
15296   effect(USE labl);
15297 
15298   ins_cost(BRANCH_COST);
15299   format %{ "cbw$cmp   $op1, $labl" %}
15300   ins_encode %{
15301     Label* L = $labl$$label;
15302     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15303     if (cond == Assembler::EQ)
15304       __ cbzw($op1$$Register, *L);
15305     else
15306       __ cbnzw($op1$$Register, *L);
15307   %}
15308   ins_pipe(pipe_cmp_branch);
15309 %}
15310 
15311 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15312   match(If cmp (CmpL op1 op2));
15313   effect(USE labl);
15314 
15315   ins_cost(BRANCH_COST);
15316   format %{ "cb$cmp   $op1, $labl" %}
15317   ins_encode %{
15318     Label* L = $labl$$label;
15319     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15320     if (cond == Assembler::EQ)
15321       __ cbz($op1$$Register, *L);
15322     else
15323       __ cbnz($op1$$Register, *L);
15324   %}
15325   ins_pipe(pipe_cmp_branch);
15326 %}
15327 
15328 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15329   match(If cmp (CmpP op1 op2));
15330   effect(USE labl);
15331 
15332   ins_cost(BRANCH_COST);
15333   format %{ "cb$cmp   $op1, $labl" %}
15334   ins_encode %{
15335     Label* L = $labl$$label;
15336     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15337     if (cond == Assembler::EQ)
15338       __ cbz($op1$$Register, *L);
15339     else
15340       __ cbnz($op1$$Register, *L);
15341   %}
15342   ins_pipe(pipe_cmp_branch);
15343 %}
15344 
15345 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15346   match(If cmp (CmpN op1 op2));
15347   effect(USE labl);
15348 
15349   ins_cost(BRANCH_COST);
15350   format %{ "cbw$cmp   $op1, $labl" %}
15351   ins_encode %{
15352     Label* L = $labl$$label;
15353     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15354     if (cond == Assembler::EQ)
15355       __ cbzw($op1$$Register, *L);
15356     else
15357       __ cbnzw($op1$$Register, *L);
15358   %}
15359   ins_pipe(pipe_cmp_branch);
15360 %}
15361 
15362 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15363   match(If cmp (CmpP (DecodeN oop) zero));
15364   effect(USE labl);
15365 
15366   ins_cost(BRANCH_COST);
15367   format %{ "cb$cmp   $oop, $labl" %}
15368   ins_encode %{
15369     Label* L = $labl$$label;
15370     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15371     if (cond == Assembler::EQ)
15372       __ cbzw($oop$$Register, *L);
15373     else
15374       __ cbnzw($oop$$Register, *L);
15375   %}
15376   ins_pipe(pipe_cmp_branch);
15377 %}
15378 
15379 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15380   match(If cmp (CmpU op1 op2));
15381   effect(USE labl);
15382 
15383   ins_cost(BRANCH_COST);
15384   format %{ "cbw$cmp   $op1, $labl" %}
15385   ins_encode %{
15386     Label* L = $labl$$label;
15387     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15388     if (cond == Assembler::EQ || cond == Assembler::LS)
15389       __ cbzw($op1$$Register, *L);
15390     else
15391       __ cbnzw($op1$$Register, *L);
15392   %}
15393   ins_pipe(pipe_cmp_branch);
15394 %}
15395 
15396 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15397   match(If cmp (CmpUL op1 op2));
15398   effect(USE labl);
15399 
15400   ins_cost(BRANCH_COST);
15401   format %{ "cb$cmp   $op1, $labl" %}
15402   ins_encode %{
15403     Label* L = $labl$$label;
15404     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15405     if (cond == Assembler::EQ || cond == Assembler::LS)
15406       __ cbz($op1$$Register, *L);
15407     else
15408       __ cbnz($op1$$Register, *L);
15409   %}
15410   ins_pipe(pipe_cmp_branch);
15411 %}
15412 
15413 // Test bit and Branch
15414 
15415 // Patterns for short (< 32KiB) variants
15416 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15417   match(If cmp (CmpL op1 op2));
15418   effect(USE labl);
15419 
15420   ins_cost(BRANCH_COST);
15421   format %{ "cb$cmp   $op1, $labl # long" %}
15422   ins_encode %{
15423     Label* L = $labl$$label;
15424     Assembler::Condition cond =
15425       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15426     __ tbr(cond, $op1$$Register, 63, *L);
15427   %}
15428   ins_pipe(pipe_cmp_branch);
15429   ins_short_branch(1);
15430 %}
15431 
15432 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15433   match(If cmp (CmpI op1 op2));
15434   effect(USE labl);
15435 
15436   ins_cost(BRANCH_COST);
15437   format %{ "cb$cmp   $op1, $labl # int" %}
15438   ins_encode %{
15439     Label* L = $labl$$label;
15440     Assembler::Condition cond =
15441       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15442     __ tbr(cond, $op1$$Register, 31, *L);
15443   %}
15444   ins_pipe(pipe_cmp_branch);
15445   ins_short_branch(1);
15446 %}
15447 
15448 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15449   match(If cmp (CmpL (AndL op1 op2) op3));
15450   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15451   effect(USE labl);
15452 
15453   ins_cost(BRANCH_COST);
15454   format %{ "tb$cmp   $op1, $op2, $labl" %}
15455   ins_encode %{
15456     Label* L = $labl$$label;
15457     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15458     int bit = exact_log2($op2$$constant);
15459     __ tbr(cond, $op1$$Register, bit, *L);
15460   %}
15461   ins_pipe(pipe_cmp_branch);
15462   ins_short_branch(1);
15463 %}
15464 
15465 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15466   match(If cmp (CmpI (AndI op1 op2) op3));
15467   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15468   effect(USE labl);
15469 
15470   ins_cost(BRANCH_COST);
15471   format %{ "tb$cmp   $op1, $op2, $labl" %}
15472   ins_encode %{
15473     Label* L = $labl$$label;
15474     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15475     int bit = exact_log2($op2$$constant);
15476     __ tbr(cond, $op1$$Register, bit, *L);
15477   %}
15478   ins_pipe(pipe_cmp_branch);
15479   ins_short_branch(1);
15480 %}
15481 
15482 // And far variants
15483 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15484   match(If cmp (CmpL op1 op2));
15485   effect(USE labl);
15486 
15487   ins_cost(BRANCH_COST);
15488   format %{ "cb$cmp   $op1, $labl # long" %}
15489   ins_encode %{
15490     Label* L = $labl$$label;
15491     Assembler::Condition cond =
15492       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15493     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15494   %}
15495   ins_pipe(pipe_cmp_branch);
15496 %}
15497 
15498 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15499   match(If cmp (CmpI op1 op2));
15500   effect(USE labl);
15501 
15502   ins_cost(BRANCH_COST);
15503   format %{ "cb$cmp   $op1, $labl # int" %}
15504   ins_encode %{
15505     Label* L = $labl$$label;
15506     Assembler::Condition cond =
15507       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15508     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15509   %}
15510   ins_pipe(pipe_cmp_branch);
15511 %}
15512 
15513 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15514   match(If cmp (CmpL (AndL op1 op2) op3));
15515   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15516   effect(USE labl);
15517 
15518   ins_cost(BRANCH_COST);
15519   format %{ "tb$cmp   $op1, $op2, $labl" %}
15520   ins_encode %{
15521     Label* L = $labl$$label;
15522     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15523     int bit = exact_log2($op2$$constant);
15524     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15525   %}
15526   ins_pipe(pipe_cmp_branch);
15527 %}
15528 
15529 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15530   match(If cmp (CmpI (AndI op1 op2) op3));
15531   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15532   effect(USE labl);
15533 
15534   ins_cost(BRANCH_COST);
15535   format %{ "tb$cmp   $op1, $op2, $labl" %}
15536   ins_encode %{
15537     Label* L = $labl$$label;
15538     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15539     int bit = exact_log2($op2$$constant);
15540     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15541   %}
15542   ins_pipe(pipe_cmp_branch);
15543 %}
15544 
15545 // Test bits
15546 
15547 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15548   match(Set cr (CmpL (AndL op1 op2) op3));
15549   predicate(Assembler::operand_valid_for_logical_immediate
15550             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15551 
15552   ins_cost(INSN_COST);
15553   format %{ "tst $op1, $op2 # long" %}
15554   ins_encode %{
15555     __ tst($op1$$Register, $op2$$constant);
15556   %}
15557   ins_pipe(ialu_reg_reg);
15558 %}
15559 
15560 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15561   match(Set cr (CmpI (AndI op1 op2) op3));
15562   predicate(Assembler::operand_valid_for_logical_immediate
15563             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15564 
15565   ins_cost(INSN_COST);
15566   format %{ "tst $op1, $op2 # int" %}
15567   ins_encode %{
15568     __ tstw($op1$$Register, $op2$$constant);
15569   %}
15570   ins_pipe(ialu_reg_reg);
15571 %}
15572 
15573 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15574   match(Set cr (CmpL (AndL op1 op2) op3));
15575 
15576   ins_cost(INSN_COST);
15577   format %{ "tst $op1, $op2 # long" %}
15578   ins_encode %{
15579     __ tst($op1$$Register, $op2$$Register);
15580   %}
15581   ins_pipe(ialu_reg_reg);
15582 %}
15583 
15584 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15585   match(Set cr (CmpI (AndI op1 op2) op3));
15586 
15587   ins_cost(INSN_COST);
15588   format %{ "tstw $op1, $op2 # int" %}
15589   ins_encode %{
15590     __ tstw($op1$$Register, $op2$$Register);
15591   %}
15592   ins_pipe(ialu_reg_reg);
15593 %}
15594 
15595 
15596 // Conditional Far Branch
15597 // Conditional Far Branch Unsigned
15598 // TODO: fixme
15599 
15600 // counted loop end branch near
15601 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15602 %{
15603   match(CountedLoopEnd cmp cr);
15604 
15605   effect(USE lbl);
15606 
15607   ins_cost(BRANCH_COST);
15608   // short variant.
15609   // ins_short_branch(1);
15610   format %{ "b$cmp $lbl \t// counted loop end" %}
15611 
15612   ins_encode(aarch64_enc_br_con(cmp, lbl));
15613 
15614   ins_pipe(pipe_branch);
15615 %}
15616 
15617 // counted loop end branch near Unsigned
15618 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15619 %{
15620   match(CountedLoopEnd cmp cr);
15621 
15622   effect(USE lbl);
15623 
15624   ins_cost(BRANCH_COST);
15625   // short variant.
15626   // ins_short_branch(1);
15627   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15628 
15629   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15630 
15631   ins_pipe(pipe_branch);
15632 %}
15633 
15634 // counted loop end branch far
15635 // counted loop end branch far unsigned
15636 // TODO: fixme
15637 
15638 // ============================================================================
15639 // inlined locking and unlocking
15640 
15641 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15642 %{
15643   match(Set cr (FastLock object box));
15644   effect(TEMP tmp, TEMP tmp2);
15645 
15646   // TODO
15647   // identify correct cost
15648   ins_cost(5 * INSN_COST);
15649   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15650 
15651   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15652 
15653   ins_pipe(pipe_serial);
15654 %}
15655 
15656 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15657 %{
15658   match(Set cr (FastUnlock object box));
15659   effect(TEMP tmp, TEMP tmp2);
15660 
15661   ins_cost(5 * INSN_COST);
15662   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15663 
15664   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15665 
15666   ins_pipe(pipe_serial);
15667 %}
15668 
15669 
15670 // ============================================================================
15671 // Safepoint Instructions
15672 
15673 // TODO
15674 // provide a near and far version of this code
15675 
15676 instruct safePoint(iRegP poll)
15677 %{
15678   match(SafePoint poll);
15679 
15680   format %{
15681     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15682   %}
15683   ins_encode %{
15684     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15685   %}
15686   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15687 %}
15688 
15689 
15690 // ============================================================================
15691 // Procedure Call/Return Instructions
15692 
15693 // Call Java Static Instruction
15694 
15695 instruct CallStaticJavaDirect(method meth)
15696 %{
15697   match(CallStaticJava);
15698 
15699   effect(USE meth);
15700 
15701   ins_cost(CALL_COST);
15702 
15703   format %{ "call,static $meth \t// ==> " %}
15704 
15705   ins_encode( aarch64_enc_java_static_call(meth),
15706               aarch64_enc_call_epilog );
15707 
15708   ins_pipe(pipe_class_call);
15709 %}
15710 
15711 // TO HERE
15712 
15713 // Call Java Dynamic Instruction
15714 instruct CallDynamicJavaDirect(method meth)
15715 %{
15716   match(CallDynamicJava);
15717 
15718   effect(USE meth);
15719 
15720   ins_cost(CALL_COST);
15721 
15722   format %{ "CALL,dynamic $meth \t// ==> " %}
15723 
15724   ins_encode( aarch64_enc_java_dynamic_call(meth),
15725                aarch64_enc_call_epilog );
15726 
15727   ins_pipe(pipe_class_call);
15728 %}
15729 
15730 // Call Runtime Instruction
15731 
15732 instruct CallRuntimeDirect(method meth)
15733 %{
15734   match(CallRuntime);
15735 
15736   effect(USE meth);
15737 
15738   ins_cost(CALL_COST);
15739 
15740   format %{ "CALL, runtime $meth" %}
15741 
15742   ins_encode( aarch64_enc_java_to_runtime(meth) );
15743 
15744   ins_pipe(pipe_class_call);
15745 %}
15746 
15747 // Call Runtime Instruction
15748 
15749 instruct CallLeafDirect(method meth)
15750 %{
15751   match(CallLeaf);
15752 
15753   effect(USE meth);
15754 
15755   ins_cost(CALL_COST);
15756 
15757   format %{ "CALL, runtime leaf $meth" %}
15758 
15759   ins_encode( aarch64_enc_java_to_runtime(meth) );
15760 
15761   ins_pipe(pipe_class_call);
15762 %}
15763 
15764 // Call Runtime Instruction
15765 
15766 instruct CallLeafNoFPDirect(method meth)
15767 %{
15768   match(CallLeafNoFP);
15769 
15770   effect(USE meth);
15771 
15772   ins_cost(CALL_COST);
15773 
15774   format %{ "CALL, runtime leaf nofp $meth" %}
15775 
15776   ins_encode( aarch64_enc_java_to_runtime(meth) );
15777 
15778   ins_pipe(pipe_class_call);
15779 %}
15780 
15781 // Tail Call; Jump from runtime stub to Java code.
15782 // Also known as an 'interprocedural jump'.
15783 // Target of jump will eventually return to caller.
15784 // TailJump below removes the return address.
15785 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15786 %{
15787   match(TailCall jump_target method_oop);
15788 
15789   ins_cost(CALL_COST);
15790 
15791   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15792 
15793   ins_encode(aarch64_enc_tail_call(jump_target));
15794 
15795   ins_pipe(pipe_class_call);
15796 %}
15797 
15798 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15799 %{
15800   match(TailJump jump_target ex_oop);
15801 
15802   ins_cost(CALL_COST);
15803 
15804   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15805 
15806   ins_encode(aarch64_enc_tail_jmp(jump_target));
15807 
15808   ins_pipe(pipe_class_call);
15809 %}
15810 
15811 // Create exception oop: created by stack-crawling runtime code.
15812 // Created exception is now available to this handler, and is setup
15813 // just prior to jumping to this handler. No code emitted.
15814 // TODO check
15815 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15816 instruct CreateException(iRegP_R0 ex_oop)
15817 %{
15818   match(Set ex_oop (CreateEx));
15819 
15820   format %{ " -- \t// exception oop; no code emitted" %}
15821 
15822   size(0);
15823 
15824   ins_encode( /*empty*/ );
15825 
15826   ins_pipe(pipe_class_empty);
15827 %}
15828 
15829 // Rethrow exception: The exception oop will come in the first
15830 // argument position. Then JUMP (not call) to the rethrow stub code.
15831 instruct RethrowException() %{
15832   match(Rethrow);
15833   ins_cost(CALL_COST);
15834 
15835   format %{ "b rethrow_stub" %}
15836 
15837   ins_encode( aarch64_enc_rethrow() );
15838 
15839   ins_pipe(pipe_class_call);
15840 %}
15841 
15842 
15843 // Return Instruction
15844 // epilog node loads ret address into lr as part of frame pop
15845 instruct Ret()
15846 %{
15847   match(Return);
15848 
15849   format %{ "ret\t// return register" %}
15850 
15851   ins_encode( aarch64_enc_ret() );
15852 
15853   ins_pipe(pipe_branch);
15854 %}
15855 
15856 // Die now.
15857 instruct ShouldNotReachHere() %{
15858   match(Halt);
15859 
15860   ins_cost(CALL_COST);
15861   format %{ "ShouldNotReachHere" %}
15862 
15863   ins_encode %{
15864     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15865     // return true
15866     __ dpcs1(0xdead + 1);
15867   %}
15868 
15869   ins_pipe(pipe_class_default);
15870 %}
15871 
15872 // ============================================================================
15873 // Partial Subtype Check
15874 //
15875 // superklass array for an instance of the superklass.  Set a hidden
15876 // internal cache on a hit (cache is checked with exposed code in
15877 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15878 // encoding ALSO sets flags.
15879 
15880 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15881 %{
15882   match(Set result (PartialSubtypeCheck sub super));
15883   effect(KILL cr, KILL temp);
15884 
15885   ins_cost(1100);  // slightly larger than the next version
15886   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15887 
15888   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15889 
15890   opcode(0x1); // Force zero of result reg on hit
15891 
15892   ins_pipe(pipe_class_memory);
15893 %}
15894 
15895 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15896 %{
15897   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15898   effect(KILL temp, KILL result);
15899 
15900   ins_cost(1100);  // slightly larger than the next version
15901   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15902 
15903   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15904 
15905   opcode(0x0); // Don't zero result reg on hit
15906 
15907   ins_pipe(pipe_class_memory);
15908 %}
15909 
15910 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15911                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15912 %{
15913   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15914   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15915   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15916 
15917   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15918   ins_encode %{
15919     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15920     __ string_compare($str1$$Register, $str2$$Register,
15921                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15922                       $tmp1$$Register,
15923                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15924   %}
15925   ins_pipe(pipe_class_memory);
15926 %}
15927 
15928 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15929                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15930 %{
15931   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15932   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15933   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15934 
15935   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15936   ins_encode %{
15937     __ string_compare($str1$$Register, $str2$$Register,
15938                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15939                       $tmp1$$Register,
15940                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15941   %}
15942   ins_pipe(pipe_class_memory);
15943 %}
15944 
15945 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15946                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15947 %{
15948   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15949   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15950   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15951 
15952   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15953   ins_encode %{
15954     __ string_compare($str1$$Register, $str2$$Register,
15955                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15956                       $tmp1$$Register,
15957                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15958   %}
15959   ins_pipe(pipe_class_memory);
15960 %}
15961 
15962 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15963                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15964 %{
15965   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15966   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15967   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15968 
15969   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15970   ins_encode %{
15971     __ string_compare($str1$$Register, $str2$$Register,
15972                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15973                       $tmp1$$Register,
15974                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15975   %}
15976   ins_pipe(pipe_class_memory);
15977 %}
15978 
15979 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15980        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15981 %{
15982   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15983   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15984   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15985          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15986   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15987 
15988   ins_encode %{
15989     __ string_indexof($str1$$Register, $str2$$Register,
15990                       $cnt1$$Register, $cnt2$$Register,
15991                       $tmp1$$Register, $tmp2$$Register,
15992                       $tmp3$$Register, $tmp4$$Register,
15993                       -1, $result$$Register, StrIntrinsicNode::UU);
15994   %}
15995   ins_pipe(pipe_class_memory);
15996 %}
15997 
15998 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15999        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16000 %{
16001   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16002   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16003   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16004          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16005   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
16006 
16007   ins_encode %{
16008     __ string_indexof($str1$$Register, $str2$$Register,
16009                       $cnt1$$Register, $cnt2$$Register,
16010                       $tmp1$$Register, $tmp2$$Register,
16011                       $tmp3$$Register, $tmp4$$Register,
16012                       -1, $result$$Register, StrIntrinsicNode::LL);
16013   %}
16014   ins_pipe(pipe_class_memory);
16015 %}
16016 
16017 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16018        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16019 %{
16020   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16021   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16022   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16023          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16024   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
16025 
16026   ins_encode %{
16027     __ string_indexof($str1$$Register, $str2$$Register,
16028                       $cnt1$$Register, $cnt2$$Register,
16029                       $tmp1$$Register, $tmp2$$Register,
16030                       $tmp3$$Register, $tmp4$$Register,
16031                       -1, $result$$Register, StrIntrinsicNode::UL);
16032   %}
16033   ins_pipe(pipe_class_memory);
16034 %}
16035 
16036 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16037        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16038 %{
16039   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16040   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16041   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16042          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16043   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
16044 
16045   ins_encode %{
16046     __ string_indexof($str1$$Register, $str2$$Register,
16047                       $cnt1$$Register, $cnt2$$Register,
16048                       $tmp1$$Register, $tmp2$$Register,
16049                       $tmp3$$Register, $tmp4$$Register,
16050                       -1, $result$$Register, StrIntrinsicNode::LU);
16051   %}
16052   ins_pipe(pipe_class_memory);
16053 %}
16054 
16055 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16056                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16057                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16058 %{
16059   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16060   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16061   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16062          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16063   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16064 
16065   ins_encode %{
16066     int icnt2 = (int)$int_cnt2$$constant;
16067     __ string_indexof($str1$$Register, $str2$$Register,
16068                       $cnt1$$Register, zr,
16069                       $tmp1$$Register, $tmp2$$Register,
16070                       $tmp3$$Register, $tmp4$$Register,
16071                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16072   %}
16073   ins_pipe(pipe_class_memory);
16074 %}
16075 
16076 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16077                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16078                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16079 %{
16080   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16081   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16082   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16083          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16084   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16085 
16086   ins_encode %{
16087     int icnt2 = (int)$int_cnt2$$constant;
16088     __ string_indexof($str1$$Register, $str2$$Register,
16089                       $cnt1$$Register, zr,
16090                       $tmp1$$Register, $tmp2$$Register,
16091                       $tmp3$$Register, $tmp4$$Register,
16092                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16093   %}
16094   ins_pipe(pipe_class_memory);
16095 %}
16096 
16097 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16098                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16099                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16100 %{
16101   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16102   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16103   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16104          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16105   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16106 
16107   ins_encode %{
16108     int icnt2 = (int)$int_cnt2$$constant;
16109     __ string_indexof($str1$$Register, $str2$$Register,
16110                       $cnt1$$Register, zr,
16111                       $tmp1$$Register, $tmp2$$Register,
16112                       $tmp3$$Register, $tmp4$$Register,
16113                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16114   %}
16115   ins_pipe(pipe_class_memory);
16116 %}
16117 
16118 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16119                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16120                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16121 %{
16122   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16123   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16124   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16125          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16126   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
16127 
16128   ins_encode %{
16129     int icnt2 = (int)$int_cnt2$$constant;
16130     __ string_indexof($str1$$Register, $str2$$Register,
16131                       $cnt1$$Register, zr,
16132                       $tmp1$$Register, $tmp2$$Register,
16133                       $tmp3$$Register, $tmp4$$Register,
16134                       icnt2, $result$$Register, StrIntrinsicNode::LU);
16135   %}
16136   ins_pipe(pipe_class_memory);
16137 %}
16138 
16139 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16140                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16141                               iRegINoSp tmp3, rFlagsReg cr)
16142 %{
16143   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16144   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16145          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16146 
16147   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16148 
16149   ins_encode %{
16150     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16151                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16152                            $tmp3$$Register);
16153   %}
16154   ins_pipe(pipe_class_memory);
16155 %}
16156 
16157 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegP_R4 cnt,
16158                         iRegI_R0 result, rFlagsReg cr)
16159 %{
16160   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16161   match(Set result (StrEquals (Binary str1 str2) cnt));
16162   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16163 
16164   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16165   ins_encode %{
16166     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16167     __ arrays_equals($str1$$Register, $str2$$Register,
16168                      $result$$Register, $cnt$$Register,
16169                      1, /*is_string*/true);
16170   %}
16171   ins_pipe(pipe_class_memory);
16172 %}
16173 
16174 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16175                         iRegI_R0 result, rFlagsReg cr)
16176 %{
16177   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16178   match(Set result (StrEquals (Binary str1 str2) cnt));
16179   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16180 
16181   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16182   ins_encode %{
16183     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16184     __ asrw($cnt$$Register, $cnt$$Register, 1);
16185     __ arrays_equals($str1$$Register, $str2$$Register,
16186                      $result$$Register, $cnt$$Register,
16187                      2, /*is_string*/true);
16188   %}
16189   ins_pipe(pipe_class_memory);
16190 %}
16191 
16192 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16193                       iRegP_R4 tmp, rFlagsReg cr)
16194 %{
16195   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16196   match(Set result (AryEq ary1 ary2));
16197   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16198 
16199   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16200   ins_encode %{
16201     __ arrays_equals($ary1$$Register, $ary2$$Register,
16202                      $result$$Register, $tmp$$Register,
16203                      1, /*is_string*/false);
16204     %}
16205   ins_pipe(pipe_class_memory);
16206 %}
16207 
16208 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16209                       iRegP_R4 tmp, rFlagsReg cr)
16210 %{
16211   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16212   match(Set result (AryEq ary1 ary2));
16213   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16214 
16215   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16216   ins_encode %{
16217     __ arrays_equals($ary1$$Register, $ary2$$Register,
16218                      $result$$Register, $tmp$$Register,
16219                      2, /*is_string*/false);
16220   %}
16221   ins_pipe(pipe_class_memory);
16222 %}
16223 
16224 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16225 %{
16226   match(Set result (HasNegatives ary1 len));
16227   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16228   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16229   ins_encode %{
16230     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16231   %}
16232   ins_pipe( pipe_slow );
16233 %}
16234 
16235 // fast char[] to byte[] compression
16236 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16237                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16238                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16239                          iRegI_R0 result, rFlagsReg cr)
16240 %{
16241   match(Set result (StrCompressedCopy src (Binary dst len)));
16242   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16243 
16244   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16245   ins_encode %{
16246     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16247                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16248                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16249                            $result$$Register);
16250   %}
16251   ins_pipe( pipe_slow );
16252 %}
16253 
16254 // fast byte[] to char[] inflation
16255 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16256                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16257 %{
16258   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16259   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16260 
16261   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16262   ins_encode %{
16263     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16264                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16265   %}
16266   ins_pipe(pipe_class_memory);
16267 %}
16268 
16269 // encode char[] to byte[] in ISO_8859_1
16270 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16271                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16272                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16273                           iRegI_R0 result, rFlagsReg cr)
16274 %{
16275   match(Set result (EncodeISOArray src (Binary dst len)));
16276   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16277          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16278 
16279   format %{ "Encode array $src,$dst,$len -> $result" %}
16280   ins_encode %{
16281     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16282          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16283          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16284   %}
16285   ins_pipe( pipe_class_memory );
16286 %}
16287 
16288 // ============================================================================
16289 // This name is KNOWN by the ADLC and cannot be changed.
16290 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16291 // for this guy.
16292 instruct tlsLoadP(thread_RegP dst)
16293 %{
16294   match(Set dst (ThreadLocal));
16295 
16296   ins_cost(0);
16297 
16298   format %{ " -- \t// $dst=Thread::current(), empty" %}
16299 
16300   size(0);
16301 
16302   ins_encode( /*empty*/ );
16303 
16304   ins_pipe(pipe_class_empty);
16305 %}
16306 
16307 // ====================VECTOR INSTRUCTIONS=====================================
16308 
16309 // Load vector (32 bits)
16310 instruct loadV4(vecD dst, vmem4 mem)
16311 %{
16312   predicate(n->as_LoadVector()->memory_size() == 4);
16313   match(Set dst (LoadVector mem));
16314   ins_cost(4 * INSN_COST);
16315   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16316   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16317   ins_pipe(vload_reg_mem64);
16318 %}
16319 
16320 // Load vector (64 bits)
16321 instruct loadV8(vecD dst, vmem8 mem)
16322 %{
16323   predicate(n->as_LoadVector()->memory_size() == 8);
16324   match(Set dst (LoadVector mem));
16325   ins_cost(4 * INSN_COST);
16326   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16327   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16328   ins_pipe(vload_reg_mem64);
16329 %}
16330 
16331 // Load Vector (128 bits)
16332 instruct loadV16(vecX dst, vmem16 mem)
16333 %{
16334   predicate(n->as_LoadVector()->memory_size() == 16);
16335   match(Set dst (LoadVector mem));
16336   ins_cost(4 * INSN_COST);
16337   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16338   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16339   ins_pipe(vload_reg_mem128);
16340 %}
16341 
16342 // Store Vector (32 bits)
16343 instruct storeV4(vecD src, vmem4 mem)
16344 %{
16345   predicate(n->as_StoreVector()->memory_size() == 4);
16346   match(Set mem (StoreVector mem src));
16347   ins_cost(4 * INSN_COST);
16348   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16349   ins_encode( aarch64_enc_strvS(src, mem) );
16350   ins_pipe(vstore_reg_mem64);
16351 %}
16352 
16353 // Store Vector (64 bits)
16354 instruct storeV8(vecD src, vmem8 mem)
16355 %{
16356   predicate(n->as_StoreVector()->memory_size() == 8);
16357   match(Set mem (StoreVector mem src));
16358   ins_cost(4 * INSN_COST);
16359   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16360   ins_encode( aarch64_enc_strvD(src, mem) );
16361   ins_pipe(vstore_reg_mem64);
16362 %}
16363 
16364 // Store Vector (128 bits)
16365 instruct storeV16(vecX src, vmem16 mem)
16366 %{
16367   predicate(n->as_StoreVector()->memory_size() == 16);
16368   match(Set mem (StoreVector mem src));
16369   ins_cost(4 * INSN_COST);
16370   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16371   ins_encode( aarch64_enc_strvQ(src, mem) );
16372   ins_pipe(vstore_reg_mem128);
16373 %}
16374 
16375 instruct replicate8B(vecD dst, iRegIorL2I src)
16376 %{
16377   predicate(n->as_Vector()->length() == 4 ||
16378             n->as_Vector()->length() == 8);
16379   match(Set dst (ReplicateB src));
16380   ins_cost(INSN_COST);
16381   format %{ "dup  $dst, $src\t# vector (8B)" %}
16382   ins_encode %{
16383     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16384   %}
16385   ins_pipe(vdup_reg_reg64);
16386 %}
16387 
16388 instruct replicate16B(vecX dst, iRegIorL2I src)
16389 %{
16390   predicate(n->as_Vector()->length() == 16);
16391   match(Set dst (ReplicateB src));
16392   ins_cost(INSN_COST);
16393   format %{ "dup  $dst, $src\t# vector (16B)" %}
16394   ins_encode %{
16395     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16396   %}
16397   ins_pipe(vdup_reg_reg128);
16398 %}
16399 
16400 instruct replicate8B_imm(vecD dst, immI con)
16401 %{
16402   predicate(n->as_Vector()->length() == 4 ||
16403             n->as_Vector()->length() == 8);
16404   match(Set dst (ReplicateB con));
16405   ins_cost(INSN_COST);
16406   format %{ "movi  $dst, $con\t# vector(8B)" %}
16407   ins_encode %{
16408     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16409   %}
16410   ins_pipe(vmovi_reg_imm64);
16411 %}
16412 
16413 instruct replicate16B_imm(vecX dst, immI con)
16414 %{
16415   predicate(n->as_Vector()->length() == 16);
16416   match(Set dst (ReplicateB con));
16417   ins_cost(INSN_COST);
16418   format %{ "movi  $dst, $con\t# vector(16B)" %}
16419   ins_encode %{
16420     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16421   %}
16422   ins_pipe(vmovi_reg_imm128);
16423 %}
16424 
16425 instruct replicate4S(vecD dst, iRegIorL2I src)
16426 %{
16427   predicate(n->as_Vector()->length() == 2 ||
16428             n->as_Vector()->length() == 4);
16429   match(Set dst (ReplicateS src));
16430   ins_cost(INSN_COST);
16431   format %{ "dup  $dst, $src\t# vector (4S)" %}
16432   ins_encode %{
16433     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16434   %}
16435   ins_pipe(vdup_reg_reg64);
16436 %}
16437 
16438 instruct replicate8S(vecX dst, iRegIorL2I src)
16439 %{
16440   predicate(n->as_Vector()->length() == 8);
16441   match(Set dst (ReplicateS src));
16442   ins_cost(INSN_COST);
16443   format %{ "dup  $dst, $src\t# vector (8S)" %}
16444   ins_encode %{
16445     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16446   %}
16447   ins_pipe(vdup_reg_reg128);
16448 %}
16449 
16450 instruct replicate4S_imm(vecD dst, immI con)
16451 %{
16452   predicate(n->as_Vector()->length() == 2 ||
16453             n->as_Vector()->length() == 4);
16454   match(Set dst (ReplicateS con));
16455   ins_cost(INSN_COST);
16456   format %{ "movi  $dst, $con\t# vector(4H)" %}
16457   ins_encode %{
16458     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16459   %}
16460   ins_pipe(vmovi_reg_imm64);
16461 %}
16462 
16463 instruct replicate8S_imm(vecX dst, immI con)
16464 %{
16465   predicate(n->as_Vector()->length() == 8);
16466   match(Set dst (ReplicateS con));
16467   ins_cost(INSN_COST);
16468   format %{ "movi  $dst, $con\t# vector(8H)" %}
16469   ins_encode %{
16470     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16471   %}
16472   ins_pipe(vmovi_reg_imm128);
16473 %}
16474 
16475 instruct replicate2I(vecD dst, iRegIorL2I src)
16476 %{
16477   predicate(n->as_Vector()->length() == 2);
16478   match(Set dst (ReplicateI src));
16479   ins_cost(INSN_COST);
16480   format %{ "dup  $dst, $src\t# vector (2I)" %}
16481   ins_encode %{
16482     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16483   %}
16484   ins_pipe(vdup_reg_reg64);
16485 %}
16486 
16487 instruct replicate4I(vecX dst, iRegIorL2I src)
16488 %{
16489   predicate(n->as_Vector()->length() == 4);
16490   match(Set dst (ReplicateI src));
16491   ins_cost(INSN_COST);
16492   format %{ "dup  $dst, $src\t# vector (4I)" %}
16493   ins_encode %{
16494     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16495   %}
16496   ins_pipe(vdup_reg_reg128);
16497 %}
16498 
16499 instruct replicate2I_imm(vecD dst, immI con)
16500 %{
16501   predicate(n->as_Vector()->length() == 2);
16502   match(Set dst (ReplicateI con));
16503   ins_cost(INSN_COST);
16504   format %{ "movi  $dst, $con\t# vector(2I)" %}
16505   ins_encode %{
16506     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16507   %}
16508   ins_pipe(vmovi_reg_imm64);
16509 %}
16510 
16511 instruct replicate4I_imm(vecX dst, immI con)
16512 %{
16513   predicate(n->as_Vector()->length() == 4);
16514   match(Set dst (ReplicateI con));
16515   ins_cost(INSN_COST);
16516   format %{ "movi  $dst, $con\t# vector(4I)" %}
16517   ins_encode %{
16518     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16519   %}
16520   ins_pipe(vmovi_reg_imm128);
16521 %}
16522 
16523 instruct replicate2L(vecX dst, iRegL src)
16524 %{
16525   predicate(n->as_Vector()->length() == 2);
16526   match(Set dst (ReplicateL src));
16527   ins_cost(INSN_COST);
16528   format %{ "dup  $dst, $src\t# vector (2L)" %}
16529   ins_encode %{
16530     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16531   %}
16532   ins_pipe(vdup_reg_reg128);
16533 %}
16534 
16535 instruct replicate2L_zero(vecX dst, immI0 zero)
16536 %{
16537   predicate(n->as_Vector()->length() == 2);
16538   match(Set dst (ReplicateI zero));
16539   ins_cost(INSN_COST);
16540   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16541   ins_encode %{
16542     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16543            as_FloatRegister($dst$$reg),
16544            as_FloatRegister($dst$$reg));
16545   %}
16546   ins_pipe(vmovi_reg_imm128);
16547 %}
16548 
16549 instruct replicate2F(vecD dst, vRegF src)
16550 %{
16551   predicate(n->as_Vector()->length() == 2);
16552   match(Set dst (ReplicateF src));
16553   ins_cost(INSN_COST);
16554   format %{ "dup  $dst, $src\t# vector (2F)" %}
16555   ins_encode %{
16556     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16557            as_FloatRegister($src$$reg));
16558   %}
16559   ins_pipe(vdup_reg_freg64);
16560 %}
16561 
16562 instruct replicate4F(vecX dst, vRegF src)
16563 %{
16564   predicate(n->as_Vector()->length() == 4);
16565   match(Set dst (ReplicateF src));
16566   ins_cost(INSN_COST);
16567   format %{ "dup  $dst, $src\t# vector (4F)" %}
16568   ins_encode %{
16569     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16570            as_FloatRegister($src$$reg));
16571   %}
16572   ins_pipe(vdup_reg_freg128);
16573 %}
16574 
16575 instruct replicate2D(vecX dst, vRegD src)
16576 %{
16577   predicate(n->as_Vector()->length() == 2);
16578   match(Set dst (ReplicateD src));
16579   ins_cost(INSN_COST);
16580   format %{ "dup  $dst, $src\t# vector (2D)" %}
16581   ins_encode %{
16582     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16583            as_FloatRegister($src$$reg));
16584   %}
16585   ins_pipe(vdup_reg_dreg128);
16586 %}
16587 
16588 // ====================REDUCTION ARITHMETIC====================================
16589 
16590 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16591 %{
16592   match(Set dst (AddReductionVI src1 src2));
16593   ins_cost(INSN_COST);
16594   effect(TEMP tmp, TEMP tmp2);
16595   format %{ "umov  $tmp, $src2, S, 0\n\t"
16596             "umov  $tmp2, $src2, S, 1\n\t"
16597             "addw  $dst, $src1, $tmp\n\t"
16598             "addw  $dst, $dst, $tmp2\t add reduction2i"
16599   %}
16600   ins_encode %{
16601     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16602     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16603     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16604     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16605   %}
16606   ins_pipe(pipe_class_default);
16607 %}
16608 
16609 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16610 %{
16611   match(Set dst (AddReductionVI src1 src2));
16612   ins_cost(INSN_COST);
16613   effect(TEMP tmp, TEMP tmp2);
16614   format %{ "addv  $tmp, T4S, $src2\n\t"
16615             "umov  $tmp2, $tmp, S, 0\n\t"
16616             "addw  $dst, $tmp2, $src1\t add reduction4i"
16617   %}
16618   ins_encode %{
16619     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16620             as_FloatRegister($src2$$reg));
16621     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16622     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16623   %}
16624   ins_pipe(pipe_class_default);
16625 %}
16626 
16627 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16628 %{
16629   match(Set dst (MulReductionVI src1 src2));
16630   ins_cost(INSN_COST);
16631   effect(TEMP tmp, TEMP dst);
16632   format %{ "umov  $tmp, $src2, S, 0\n\t"
16633             "mul   $dst, $tmp, $src1\n\t"
16634             "umov  $tmp, $src2, S, 1\n\t"
16635             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16636   %}
16637   ins_encode %{
16638     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16639     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16640     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16641     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16642   %}
16643   ins_pipe(pipe_class_default);
16644 %}
16645 
16646 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16647 %{
16648   match(Set dst (MulReductionVI src1 src2));
16649   ins_cost(INSN_COST);
16650   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16651   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16652             "mul   $tmp, $tmp, $src2\n\t"
16653             "umov  $tmp2, $tmp, S, 0\n\t"
16654             "mul   $dst, $tmp2, $src1\n\t"
16655             "umov  $tmp2, $tmp, S, 1\n\t"
16656             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16657   %}
16658   ins_encode %{
16659     __ ins(as_FloatRegister($tmp$$reg), __ D,
16660            as_FloatRegister($src2$$reg), 0, 1);
16661     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16662            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16663     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16664     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16665     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16666     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16667   %}
16668   ins_pipe(pipe_class_default);
16669 %}
16670 
16671 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16672 %{
16673   match(Set dst (AddReductionVF src1 src2));
16674   ins_cost(INSN_COST);
16675   effect(TEMP tmp, TEMP dst);
16676   format %{ "fadds $dst, $src1, $src2\n\t"
16677             "ins   $tmp, S, $src2, 0, 1\n\t"
16678             "fadds $dst, $dst, $tmp\t add reduction2f"
16679   %}
16680   ins_encode %{
16681     __ fadds(as_FloatRegister($dst$$reg),
16682              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16683     __ ins(as_FloatRegister($tmp$$reg), __ S,
16684            as_FloatRegister($src2$$reg), 0, 1);
16685     __ fadds(as_FloatRegister($dst$$reg),
16686              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16687   %}
16688   ins_pipe(pipe_class_default);
16689 %}
16690 
16691 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16692 %{
16693   match(Set dst (AddReductionVF src1 src2));
16694   ins_cost(INSN_COST);
16695   effect(TEMP tmp, TEMP dst);
16696   format %{ "fadds $dst, $src1, $src2\n\t"
16697             "ins   $tmp, S, $src2, 0, 1\n\t"
16698             "fadds $dst, $dst, $tmp\n\t"
16699             "ins   $tmp, S, $src2, 0, 2\n\t"
16700             "fadds $dst, $dst, $tmp\n\t"
16701             "ins   $tmp, S, $src2, 0, 3\n\t"
16702             "fadds $dst, $dst, $tmp\t add reduction4f"
16703   %}
16704   ins_encode %{
16705     __ fadds(as_FloatRegister($dst$$reg),
16706              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16707     __ ins(as_FloatRegister($tmp$$reg), __ S,
16708            as_FloatRegister($src2$$reg), 0, 1);
16709     __ fadds(as_FloatRegister($dst$$reg),
16710              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16711     __ ins(as_FloatRegister($tmp$$reg), __ S,
16712            as_FloatRegister($src2$$reg), 0, 2);
16713     __ fadds(as_FloatRegister($dst$$reg),
16714              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16715     __ ins(as_FloatRegister($tmp$$reg), __ S,
16716            as_FloatRegister($src2$$reg), 0, 3);
16717     __ fadds(as_FloatRegister($dst$$reg),
16718              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16719   %}
16720   ins_pipe(pipe_class_default);
16721 %}
16722 
16723 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16724 %{
16725   match(Set dst (MulReductionVF src1 src2));
16726   ins_cost(INSN_COST);
16727   effect(TEMP tmp, TEMP dst);
16728   format %{ "fmuls $dst, $src1, $src2\n\t"
16729             "ins   $tmp, S, $src2, 0, 1\n\t"
16730             "fmuls $dst, $dst, $tmp\t add reduction4f"
16731   %}
16732   ins_encode %{
16733     __ fmuls(as_FloatRegister($dst$$reg),
16734              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16735     __ ins(as_FloatRegister($tmp$$reg), __ S,
16736            as_FloatRegister($src2$$reg), 0, 1);
16737     __ fmuls(as_FloatRegister($dst$$reg),
16738              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16739   %}
16740   ins_pipe(pipe_class_default);
16741 %}
16742 
16743 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16744 %{
16745   match(Set dst (MulReductionVF src1 src2));
16746   ins_cost(INSN_COST);
16747   effect(TEMP tmp, TEMP dst);
16748   format %{ "fmuls $dst, $src1, $src2\n\t"
16749             "ins   $tmp, S, $src2, 0, 1\n\t"
16750             "fmuls $dst, $dst, $tmp\n\t"
16751             "ins   $tmp, S, $src2, 0, 2\n\t"
16752             "fmuls $dst, $dst, $tmp\n\t"
16753             "ins   $tmp, S, $src2, 0, 3\n\t"
16754             "fmuls $dst, $dst, $tmp\t add reduction4f"
16755   %}
16756   ins_encode %{
16757     __ fmuls(as_FloatRegister($dst$$reg),
16758              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16759     __ ins(as_FloatRegister($tmp$$reg), __ S,
16760            as_FloatRegister($src2$$reg), 0, 1);
16761     __ fmuls(as_FloatRegister($dst$$reg),
16762              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16763     __ ins(as_FloatRegister($tmp$$reg), __ S,
16764            as_FloatRegister($src2$$reg), 0, 2);
16765     __ fmuls(as_FloatRegister($dst$$reg),
16766              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16767     __ ins(as_FloatRegister($tmp$$reg), __ S,
16768            as_FloatRegister($src2$$reg), 0, 3);
16769     __ fmuls(as_FloatRegister($dst$$reg),
16770              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16771   %}
16772   ins_pipe(pipe_class_default);
16773 %}
16774 
16775 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16776 %{
16777   match(Set dst (AddReductionVD src1 src2));
16778   ins_cost(INSN_COST);
16779   effect(TEMP tmp, TEMP dst);
16780   format %{ "faddd $dst, $src1, $src2\n\t"
16781             "ins   $tmp, D, $src2, 0, 1\n\t"
16782             "faddd $dst, $dst, $tmp\t add reduction2d"
16783   %}
16784   ins_encode %{
16785     __ faddd(as_FloatRegister($dst$$reg),
16786              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16787     __ ins(as_FloatRegister($tmp$$reg), __ D,
16788            as_FloatRegister($src2$$reg), 0, 1);
16789     __ faddd(as_FloatRegister($dst$$reg),
16790              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16791   %}
16792   ins_pipe(pipe_class_default);
16793 %}
16794 
16795 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16796 %{
16797   match(Set dst (MulReductionVD src1 src2));
16798   ins_cost(INSN_COST);
16799   effect(TEMP tmp, TEMP dst);
16800   format %{ "fmuld $dst, $src1, $src2\n\t"
16801             "ins   $tmp, D, $src2, 0, 1\n\t"
16802             "fmuld $dst, $dst, $tmp\t add reduction2d"
16803   %}
16804   ins_encode %{
16805     __ fmuld(as_FloatRegister($dst$$reg),
16806              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16807     __ ins(as_FloatRegister($tmp$$reg), __ D,
16808            as_FloatRegister($src2$$reg), 0, 1);
16809     __ fmuld(as_FloatRegister($dst$$reg),
16810              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16811   %}
16812   ins_pipe(pipe_class_default);
16813 %}
16814 
16815 // ====================VECTOR ARITHMETIC=======================================
16816 
16817 // --------------------------------- ADD --------------------------------------
16818 
16819 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16820 %{
16821   predicate(n->as_Vector()->length() == 4 ||
16822             n->as_Vector()->length() == 8);
16823   match(Set dst (AddVB src1 src2));
16824   ins_cost(INSN_COST);
16825   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16826   ins_encode %{
16827     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16828             as_FloatRegister($src1$$reg),
16829             as_FloatRegister($src2$$reg));
16830   %}
16831   ins_pipe(vdop64);
16832 %}
16833 
16834 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16835 %{
16836   predicate(n->as_Vector()->length() == 16);
16837   match(Set dst (AddVB src1 src2));
16838   ins_cost(INSN_COST);
16839   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16840   ins_encode %{
16841     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16842             as_FloatRegister($src1$$reg),
16843             as_FloatRegister($src2$$reg));
16844   %}
16845   ins_pipe(vdop128);
16846 %}
16847 
16848 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16849 %{
16850   predicate(n->as_Vector()->length() == 2 ||
16851             n->as_Vector()->length() == 4);
16852   match(Set dst (AddVS src1 src2));
16853   ins_cost(INSN_COST);
16854   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16855   ins_encode %{
16856     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16857             as_FloatRegister($src1$$reg),
16858             as_FloatRegister($src2$$reg));
16859   %}
16860   ins_pipe(vdop64);
16861 %}
16862 
16863 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16864 %{
16865   predicate(n->as_Vector()->length() == 8);
16866   match(Set dst (AddVS src1 src2));
16867   ins_cost(INSN_COST);
16868   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16869   ins_encode %{
16870     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16871             as_FloatRegister($src1$$reg),
16872             as_FloatRegister($src2$$reg));
16873   %}
16874   ins_pipe(vdop128);
16875 %}
16876 
16877 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16878 %{
16879   predicate(n->as_Vector()->length() == 2);
16880   match(Set dst (AddVI src1 src2));
16881   ins_cost(INSN_COST);
16882   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16883   ins_encode %{
16884     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16885             as_FloatRegister($src1$$reg),
16886             as_FloatRegister($src2$$reg));
16887   %}
16888   ins_pipe(vdop64);
16889 %}
16890 
16891 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16892 %{
16893   predicate(n->as_Vector()->length() == 4);
16894   match(Set dst (AddVI src1 src2));
16895   ins_cost(INSN_COST);
16896   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16897   ins_encode %{
16898     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16899             as_FloatRegister($src1$$reg),
16900             as_FloatRegister($src2$$reg));
16901   %}
16902   ins_pipe(vdop128);
16903 %}
16904 
16905 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16906 %{
16907   predicate(n->as_Vector()->length() == 2);
16908   match(Set dst (AddVL src1 src2));
16909   ins_cost(INSN_COST);
16910   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16911   ins_encode %{
16912     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16913             as_FloatRegister($src1$$reg),
16914             as_FloatRegister($src2$$reg));
16915   %}
16916   ins_pipe(vdop128);
16917 %}
16918 
16919 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16920 %{
16921   predicate(n->as_Vector()->length() == 2);
16922   match(Set dst (AddVF src1 src2));
16923   ins_cost(INSN_COST);
16924   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16925   ins_encode %{
16926     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16927             as_FloatRegister($src1$$reg),
16928             as_FloatRegister($src2$$reg));
16929   %}
16930   ins_pipe(vdop_fp64);
16931 %}
16932 
16933 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16934 %{
16935   predicate(n->as_Vector()->length() == 4);
16936   match(Set dst (AddVF src1 src2));
16937   ins_cost(INSN_COST);
16938   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16939   ins_encode %{
16940     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16941             as_FloatRegister($src1$$reg),
16942             as_FloatRegister($src2$$reg));
16943   %}
16944   ins_pipe(vdop_fp128);
16945 %}
16946 
16947 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16948 %{
16949   match(Set dst (AddVD src1 src2));
16950   ins_cost(INSN_COST);
16951   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16952   ins_encode %{
16953     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16954             as_FloatRegister($src1$$reg),
16955             as_FloatRegister($src2$$reg));
16956   %}
16957   ins_pipe(vdop_fp128);
16958 %}
16959 
16960 // --------------------------------- SUB --------------------------------------
16961 
16962 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16963 %{
16964   predicate(n->as_Vector()->length() == 4 ||
16965             n->as_Vector()->length() == 8);
16966   match(Set dst (SubVB src1 src2));
16967   ins_cost(INSN_COST);
16968   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16969   ins_encode %{
16970     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16971             as_FloatRegister($src1$$reg),
16972             as_FloatRegister($src2$$reg));
16973   %}
16974   ins_pipe(vdop64);
16975 %}
16976 
16977 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16978 %{
16979   predicate(n->as_Vector()->length() == 16);
16980   match(Set dst (SubVB src1 src2));
16981   ins_cost(INSN_COST);
16982   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16983   ins_encode %{
16984     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16985             as_FloatRegister($src1$$reg),
16986             as_FloatRegister($src2$$reg));
16987   %}
16988   ins_pipe(vdop128);
16989 %}
16990 
16991 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16992 %{
16993   predicate(n->as_Vector()->length() == 2 ||
16994             n->as_Vector()->length() == 4);
16995   match(Set dst (SubVS src1 src2));
16996   ins_cost(INSN_COST);
16997   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16998   ins_encode %{
16999     __ subv(as_FloatRegister($dst$$reg), __ T4H,
17000             as_FloatRegister($src1$$reg),
17001             as_FloatRegister($src2$$reg));
17002   %}
17003   ins_pipe(vdop64);
17004 %}
17005 
17006 instruct vsub8S(vecX dst, vecX src1, vecX src2)
17007 %{
17008   predicate(n->as_Vector()->length() == 8);
17009   match(Set dst (SubVS src1 src2));
17010   ins_cost(INSN_COST);
17011   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
17012   ins_encode %{
17013     __ subv(as_FloatRegister($dst$$reg), __ T8H,
17014             as_FloatRegister($src1$$reg),
17015             as_FloatRegister($src2$$reg));
17016   %}
17017   ins_pipe(vdop128);
17018 %}
17019 
17020 instruct vsub2I(vecD dst, vecD src1, vecD src2)
17021 %{
17022   predicate(n->as_Vector()->length() == 2);
17023   match(Set dst (SubVI src1 src2));
17024   ins_cost(INSN_COST);
17025   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
17026   ins_encode %{
17027     __ subv(as_FloatRegister($dst$$reg), __ T2S,
17028             as_FloatRegister($src1$$reg),
17029             as_FloatRegister($src2$$reg));
17030   %}
17031   ins_pipe(vdop64);
17032 %}
17033 
17034 instruct vsub4I(vecX dst, vecX src1, vecX src2)
17035 %{
17036   predicate(n->as_Vector()->length() == 4);
17037   match(Set dst (SubVI src1 src2));
17038   ins_cost(INSN_COST);
17039   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
17040   ins_encode %{
17041     __ subv(as_FloatRegister($dst$$reg), __ T4S,
17042             as_FloatRegister($src1$$reg),
17043             as_FloatRegister($src2$$reg));
17044   %}
17045   ins_pipe(vdop128);
17046 %}
17047 
17048 instruct vsub2L(vecX dst, vecX src1, vecX src2)
17049 %{
17050   predicate(n->as_Vector()->length() == 2);
17051   match(Set dst (SubVL src1 src2));
17052   ins_cost(INSN_COST);
17053   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
17054   ins_encode %{
17055     __ subv(as_FloatRegister($dst$$reg), __ T2D,
17056             as_FloatRegister($src1$$reg),
17057             as_FloatRegister($src2$$reg));
17058   %}
17059   ins_pipe(vdop128);
17060 %}
17061 
17062 instruct vsub2F(vecD dst, vecD src1, vecD src2)
17063 %{
17064   predicate(n->as_Vector()->length() == 2);
17065   match(Set dst (SubVF src1 src2));
17066   ins_cost(INSN_COST);
17067   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17068   ins_encode %{
17069     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17070             as_FloatRegister($src1$$reg),
17071             as_FloatRegister($src2$$reg));
17072   %}
17073   ins_pipe(vdop_fp64);
17074 %}
17075 
17076 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17077 %{
17078   predicate(n->as_Vector()->length() == 4);
17079   match(Set dst (SubVF src1 src2));
17080   ins_cost(INSN_COST);
17081   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17082   ins_encode %{
17083     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17084             as_FloatRegister($src1$$reg),
17085             as_FloatRegister($src2$$reg));
17086   %}
17087   ins_pipe(vdop_fp128);
17088 %}
17089 
17090 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17091 %{
17092   predicate(n->as_Vector()->length() == 2);
17093   match(Set dst (SubVD src1 src2));
17094   ins_cost(INSN_COST);
17095   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17096   ins_encode %{
17097     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17098             as_FloatRegister($src1$$reg),
17099             as_FloatRegister($src2$$reg));
17100   %}
17101   ins_pipe(vdop_fp128);
17102 %}
17103 
17104 // --------------------------------- MUL --------------------------------------
17105 
17106 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17107 %{
17108   predicate(n->as_Vector()->length() == 2 ||
17109             n->as_Vector()->length() == 4);
17110   match(Set dst (MulVS src1 src2));
17111   ins_cost(INSN_COST);
17112   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17113   ins_encode %{
17114     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17115             as_FloatRegister($src1$$reg),
17116             as_FloatRegister($src2$$reg));
17117   %}
17118   ins_pipe(vmul64);
17119 %}
17120 
17121 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17122 %{
17123   predicate(n->as_Vector()->length() == 8);
17124   match(Set dst (MulVS src1 src2));
17125   ins_cost(INSN_COST);
17126   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17127   ins_encode %{
17128     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17129             as_FloatRegister($src1$$reg),
17130             as_FloatRegister($src2$$reg));
17131   %}
17132   ins_pipe(vmul128);
17133 %}
17134 
17135 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17136 %{
17137   predicate(n->as_Vector()->length() == 2);
17138   match(Set dst (MulVI src1 src2));
17139   ins_cost(INSN_COST);
17140   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17141   ins_encode %{
17142     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17143             as_FloatRegister($src1$$reg),
17144             as_FloatRegister($src2$$reg));
17145   %}
17146   ins_pipe(vmul64);
17147 %}
17148 
17149 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17150 %{
17151   predicate(n->as_Vector()->length() == 4);
17152   match(Set dst (MulVI src1 src2));
17153   ins_cost(INSN_COST);
17154   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17155   ins_encode %{
17156     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17157             as_FloatRegister($src1$$reg),
17158             as_FloatRegister($src2$$reg));
17159   %}
17160   ins_pipe(vmul128);
17161 %}
17162 
17163 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17164 %{
17165   predicate(n->as_Vector()->length() == 2);
17166   match(Set dst (MulVF src1 src2));
17167   ins_cost(INSN_COST);
17168   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17169   ins_encode %{
17170     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17171             as_FloatRegister($src1$$reg),
17172             as_FloatRegister($src2$$reg));
17173   %}
17174   ins_pipe(vmuldiv_fp64);
17175 %}
17176 
17177 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17178 %{
17179   predicate(n->as_Vector()->length() == 4);
17180   match(Set dst (MulVF src1 src2));
17181   ins_cost(INSN_COST);
17182   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17183   ins_encode %{
17184     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17185             as_FloatRegister($src1$$reg),
17186             as_FloatRegister($src2$$reg));
17187   %}
17188   ins_pipe(vmuldiv_fp128);
17189 %}
17190 
17191 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17192 %{
17193   predicate(n->as_Vector()->length() == 2);
17194   match(Set dst (MulVD src1 src2));
17195   ins_cost(INSN_COST);
17196   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17197   ins_encode %{
17198     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17199             as_FloatRegister($src1$$reg),
17200             as_FloatRegister($src2$$reg));
17201   %}
17202   ins_pipe(vmuldiv_fp128);
17203 %}
17204 
17205 // --------------------------------- MLA --------------------------------------
17206 
17207 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17208 %{
17209   predicate(n->as_Vector()->length() == 2 ||
17210             n->as_Vector()->length() == 4);
17211   match(Set dst (AddVS dst (MulVS src1 src2)));
17212   ins_cost(INSN_COST);
17213   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17214   ins_encode %{
17215     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17216             as_FloatRegister($src1$$reg),
17217             as_FloatRegister($src2$$reg));
17218   %}
17219   ins_pipe(vmla64);
17220 %}
17221 
17222 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17223 %{
17224   predicate(n->as_Vector()->length() == 8);
17225   match(Set dst (AddVS dst (MulVS src1 src2)));
17226   ins_cost(INSN_COST);
17227   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17228   ins_encode %{
17229     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17230             as_FloatRegister($src1$$reg),
17231             as_FloatRegister($src2$$reg));
17232   %}
17233   ins_pipe(vmla128);
17234 %}
17235 
17236 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17237 %{
17238   predicate(n->as_Vector()->length() == 2);
17239   match(Set dst (AddVI dst (MulVI src1 src2)));
17240   ins_cost(INSN_COST);
17241   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17242   ins_encode %{
17243     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17244             as_FloatRegister($src1$$reg),
17245             as_FloatRegister($src2$$reg));
17246   %}
17247   ins_pipe(vmla64);
17248 %}
17249 
17250 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17251 %{
17252   predicate(n->as_Vector()->length() == 4);
17253   match(Set dst (AddVI dst (MulVI src1 src2)));
17254   ins_cost(INSN_COST);
17255   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17256   ins_encode %{
17257     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17258             as_FloatRegister($src1$$reg),
17259             as_FloatRegister($src2$$reg));
17260   %}
17261   ins_pipe(vmla128);
17262 %}
17263 
17264 // dst + src1 * src2
17265 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17266   predicate(UseFMA && n->as_Vector()->length() == 2);
17267   match(Set dst (FmaVF  dst (Binary src1 src2)));
17268   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17269   ins_cost(INSN_COST);
17270   ins_encode %{
17271     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17272             as_FloatRegister($src1$$reg),
17273             as_FloatRegister($src2$$reg));
17274   %}
17275   ins_pipe(vmuldiv_fp64);
17276 %}
17277 
17278 // dst + src1 * src2
17279 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17280   predicate(UseFMA && n->as_Vector()->length() == 4);
17281   match(Set dst (FmaVF  dst (Binary src1 src2)));
17282   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17283   ins_cost(INSN_COST);
17284   ins_encode %{
17285     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17286             as_FloatRegister($src1$$reg),
17287             as_FloatRegister($src2$$reg));
17288   %}
17289   ins_pipe(vmuldiv_fp128);
17290 %}
17291 
17292 // dst + src1 * src2
17293 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17294   predicate(UseFMA && n->as_Vector()->length() == 2);
17295   match(Set dst (FmaVD  dst (Binary src1 src2)));
17296   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17297   ins_cost(INSN_COST);
17298   ins_encode %{
17299     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17300             as_FloatRegister($src1$$reg),
17301             as_FloatRegister($src2$$reg));
17302   %}
17303   ins_pipe(vmuldiv_fp128);
17304 %}
17305 
17306 // --------------------------------- MLS --------------------------------------
17307 
17308 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17309 %{
17310   predicate(n->as_Vector()->length() == 2 ||
17311             n->as_Vector()->length() == 4);
17312   match(Set dst (SubVS dst (MulVS src1 src2)));
17313   ins_cost(INSN_COST);
17314   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17315   ins_encode %{
17316     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17317             as_FloatRegister($src1$$reg),
17318             as_FloatRegister($src2$$reg));
17319   %}
17320   ins_pipe(vmla64);
17321 %}
17322 
17323 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17324 %{
17325   predicate(n->as_Vector()->length() == 8);
17326   match(Set dst (SubVS dst (MulVS src1 src2)));
17327   ins_cost(INSN_COST);
17328   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17329   ins_encode %{
17330     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17331             as_FloatRegister($src1$$reg),
17332             as_FloatRegister($src2$$reg));
17333   %}
17334   ins_pipe(vmla128);
17335 %}
17336 
17337 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17338 %{
17339   predicate(n->as_Vector()->length() == 2);
17340   match(Set dst (SubVI dst (MulVI src1 src2)));
17341   ins_cost(INSN_COST);
17342   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17343   ins_encode %{
17344     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17345             as_FloatRegister($src1$$reg),
17346             as_FloatRegister($src2$$reg));
17347   %}
17348   ins_pipe(vmla64);
17349 %}
17350 
17351 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17352 %{
17353   predicate(n->as_Vector()->length() == 4);
17354   match(Set dst (SubVI dst (MulVI src1 src2)));
17355   ins_cost(INSN_COST);
17356   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17357   ins_encode %{
17358     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17359             as_FloatRegister($src1$$reg),
17360             as_FloatRegister($src2$$reg));
17361   %}
17362   ins_pipe(vmla128);
17363 %}
17364 
17365 // dst - src1 * src2
17366 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17367   predicate(UseFMA && n->as_Vector()->length() == 2);
17368   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17369   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17370   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17371   ins_cost(INSN_COST);
17372   ins_encode %{
17373     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17374             as_FloatRegister($src1$$reg),
17375             as_FloatRegister($src2$$reg));
17376   %}
17377   ins_pipe(vmuldiv_fp64);
17378 %}
17379 
17380 // dst - src1 * src2
17381 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17382   predicate(UseFMA && n->as_Vector()->length() == 4);
17383   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17384   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17385   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17386   ins_cost(INSN_COST);
17387   ins_encode %{
17388     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17389             as_FloatRegister($src1$$reg),
17390             as_FloatRegister($src2$$reg));
17391   %}
17392   ins_pipe(vmuldiv_fp128);
17393 %}
17394 
17395 // dst - src1 * src2
17396 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17397   predicate(UseFMA && n->as_Vector()->length() == 2);
17398   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17399   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17400   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17401   ins_cost(INSN_COST);
17402   ins_encode %{
17403     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17404             as_FloatRegister($src1$$reg),
17405             as_FloatRegister($src2$$reg));
17406   %}
17407   ins_pipe(vmuldiv_fp128);
17408 %}
17409 
17410 // --------------------------------- DIV --------------------------------------
17411 
17412 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17413 %{
17414   predicate(n->as_Vector()->length() == 2);
17415   match(Set dst (DivVF src1 src2));
17416   ins_cost(INSN_COST);
17417   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17418   ins_encode %{
17419     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17420             as_FloatRegister($src1$$reg),
17421             as_FloatRegister($src2$$reg));
17422   %}
17423   ins_pipe(vmuldiv_fp64);
17424 %}
17425 
17426 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17427 %{
17428   predicate(n->as_Vector()->length() == 4);
17429   match(Set dst (DivVF src1 src2));
17430   ins_cost(INSN_COST);
17431   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17432   ins_encode %{
17433     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17434             as_FloatRegister($src1$$reg),
17435             as_FloatRegister($src2$$reg));
17436   %}
17437   ins_pipe(vmuldiv_fp128);
17438 %}
17439 
17440 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17441 %{
17442   predicate(n->as_Vector()->length() == 2);
17443   match(Set dst (DivVD src1 src2));
17444   ins_cost(INSN_COST);
17445   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17446   ins_encode %{
17447     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17448             as_FloatRegister($src1$$reg),
17449             as_FloatRegister($src2$$reg));
17450   %}
17451   ins_pipe(vmuldiv_fp128);
17452 %}
17453 
17454 // --------------------------------- SQRT -------------------------------------
17455 
17456 instruct vsqrt2D(vecX dst, vecX src)
17457 %{
17458   predicate(n->as_Vector()->length() == 2);
17459   match(Set dst (SqrtVD src));
17460   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17461   ins_encode %{
17462     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17463              as_FloatRegister($src$$reg));
17464   %}
17465   ins_pipe(vsqrt_fp128);
17466 %}
17467 
17468 // --------------------------------- ABS --------------------------------------
17469 
17470 instruct vabs2F(vecD dst, vecD src)
17471 %{
17472   predicate(n->as_Vector()->length() == 2);
17473   match(Set dst (AbsVF src));
17474   ins_cost(INSN_COST * 3);
17475   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17476   ins_encode %{
17477     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17478             as_FloatRegister($src$$reg));
17479   %}
17480   ins_pipe(vunop_fp64);
17481 %}
17482 
17483 instruct vabs4F(vecX dst, vecX src)
17484 %{
17485   predicate(n->as_Vector()->length() == 4);
17486   match(Set dst (AbsVF src));
17487   ins_cost(INSN_COST * 3);
17488   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17489   ins_encode %{
17490     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17491             as_FloatRegister($src$$reg));
17492   %}
17493   ins_pipe(vunop_fp128);
17494 %}
17495 
17496 instruct vabs2D(vecX dst, vecX src)
17497 %{
17498   predicate(n->as_Vector()->length() == 2);
17499   match(Set dst (AbsVD src));
17500   ins_cost(INSN_COST * 3);
17501   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17502   ins_encode %{
17503     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17504             as_FloatRegister($src$$reg));
17505   %}
17506   ins_pipe(vunop_fp128);
17507 %}
17508 
17509 // --------------------------------- NEG --------------------------------------
17510 
17511 instruct vneg2F(vecD dst, vecD src)
17512 %{
17513   predicate(n->as_Vector()->length() == 2);
17514   match(Set dst (NegVF src));
17515   ins_cost(INSN_COST * 3);
17516   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17517   ins_encode %{
17518     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17519             as_FloatRegister($src$$reg));
17520   %}
17521   ins_pipe(vunop_fp64);
17522 %}
17523 
17524 instruct vneg4F(vecX dst, vecX src)
17525 %{
17526   predicate(n->as_Vector()->length() == 4);
17527   match(Set dst (NegVF src));
17528   ins_cost(INSN_COST * 3);
17529   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17530   ins_encode %{
17531     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17532             as_FloatRegister($src$$reg));
17533   %}
17534   ins_pipe(vunop_fp128);
17535 %}
17536 
17537 instruct vneg2D(vecX dst, vecX src)
17538 %{
17539   predicate(n->as_Vector()->length() == 2);
17540   match(Set dst (NegVD src));
17541   ins_cost(INSN_COST * 3);
17542   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17543   ins_encode %{
17544     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17545             as_FloatRegister($src$$reg));
17546   %}
17547   ins_pipe(vunop_fp128);
17548 %}
17549 
17550 // --------------------------------- AND --------------------------------------
17551 
17552 instruct vand8B(vecD dst, vecD src1, vecD src2)
17553 %{
17554   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17555             n->as_Vector()->length_in_bytes() == 8);
17556   match(Set dst (AndV src1 src2));
17557   ins_cost(INSN_COST);
17558   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17559   ins_encode %{
17560     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17561             as_FloatRegister($src1$$reg),
17562             as_FloatRegister($src2$$reg));
17563   %}
17564   ins_pipe(vlogical64);
17565 %}
17566 
17567 instruct vand16B(vecX dst, vecX src1, vecX src2)
17568 %{
17569   predicate(n->as_Vector()->length_in_bytes() == 16);
17570   match(Set dst (AndV src1 src2));
17571   ins_cost(INSN_COST);
17572   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17573   ins_encode %{
17574     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17575             as_FloatRegister($src1$$reg),
17576             as_FloatRegister($src2$$reg));
17577   %}
17578   ins_pipe(vlogical128);
17579 %}
17580 
17581 // --------------------------------- OR ---------------------------------------
17582 
17583 instruct vor8B(vecD dst, vecD src1, vecD src2)
17584 %{
17585   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17586             n->as_Vector()->length_in_bytes() == 8);
17587   match(Set dst (OrV src1 src2));
17588   ins_cost(INSN_COST);
17589   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17590   ins_encode %{
17591     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17592             as_FloatRegister($src1$$reg),
17593             as_FloatRegister($src2$$reg));
17594   %}
17595   ins_pipe(vlogical64);
17596 %}
17597 
17598 instruct vor16B(vecX dst, vecX src1, vecX src2)
17599 %{
17600   predicate(n->as_Vector()->length_in_bytes() == 16);
17601   match(Set dst (OrV src1 src2));
17602   ins_cost(INSN_COST);
17603   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17604   ins_encode %{
17605     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17606             as_FloatRegister($src1$$reg),
17607             as_FloatRegister($src2$$reg));
17608   %}
17609   ins_pipe(vlogical128);
17610 %}
17611 
17612 // --------------------------------- XOR --------------------------------------
17613 
17614 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17615 %{
17616   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17617             n->as_Vector()->length_in_bytes() == 8);
17618   match(Set dst (XorV src1 src2));
17619   ins_cost(INSN_COST);
17620   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17621   ins_encode %{
17622     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17623             as_FloatRegister($src1$$reg),
17624             as_FloatRegister($src2$$reg));
17625   %}
17626   ins_pipe(vlogical64);
17627 %}
17628 
17629 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17630 %{
17631   predicate(n->as_Vector()->length_in_bytes() == 16);
17632   match(Set dst (XorV src1 src2));
17633   ins_cost(INSN_COST);
17634   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17635   ins_encode %{
17636     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17637             as_FloatRegister($src1$$reg),
17638             as_FloatRegister($src2$$reg));
17639   %}
17640   ins_pipe(vlogical128);
17641 %}
17642 
17643 // ------------------------------ Shift ---------------------------------------
17644 
17645 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17646   match(Set dst (LShiftCntV cnt));
17647   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17648   ins_encode %{
17649     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17650   %}
17651   ins_pipe(vdup_reg_reg128);
17652 %}
17653 
17654 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17655 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17656   match(Set dst (RShiftCntV cnt));
17657   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17658   ins_encode %{
17659     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17660     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17661   %}
17662   ins_pipe(vdup_reg_reg128);
17663 %}
17664 
17665 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17666   predicate(n->as_Vector()->length() == 4 ||
17667             n->as_Vector()->length() == 8);
17668   match(Set dst (LShiftVB src shift));
17669   match(Set dst (RShiftVB src shift));
17670   ins_cost(INSN_COST);
17671   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17672   ins_encode %{
17673     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17674             as_FloatRegister($src$$reg),
17675             as_FloatRegister($shift$$reg));
17676   %}
17677   ins_pipe(vshift64);
17678 %}
17679 
17680 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17681   predicate(n->as_Vector()->length() == 16);
17682   match(Set dst (LShiftVB src shift));
17683   match(Set dst (RShiftVB src shift));
17684   ins_cost(INSN_COST);
17685   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17686   ins_encode %{
17687     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17688             as_FloatRegister($src$$reg),
17689             as_FloatRegister($shift$$reg));
17690   %}
17691   ins_pipe(vshift128);
17692 %}
17693 
17694 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17695   predicate(n->as_Vector()->length() == 4 ||
17696             n->as_Vector()->length() == 8);
17697   match(Set dst (URShiftVB src shift));
17698   ins_cost(INSN_COST);
17699   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17700   ins_encode %{
17701     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17702             as_FloatRegister($src$$reg),
17703             as_FloatRegister($shift$$reg));
17704   %}
17705   ins_pipe(vshift64);
17706 %}
17707 
17708 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17709   predicate(n->as_Vector()->length() == 16);
17710   match(Set dst (URShiftVB src shift));
17711   ins_cost(INSN_COST);
17712   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17713   ins_encode %{
17714     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17715             as_FloatRegister($src$$reg),
17716             as_FloatRegister($shift$$reg));
17717   %}
17718   ins_pipe(vshift128);
17719 %}
17720 
17721 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17722   predicate(n->as_Vector()->length() == 4 ||
17723             n->as_Vector()->length() == 8);
17724   match(Set dst (LShiftVB src shift));
17725   ins_cost(INSN_COST);
17726   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17727   ins_encode %{
17728     int sh = (int)$shift$$constant & 31;
17729     if (sh >= 8) {
17730       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17731              as_FloatRegister($src$$reg),
17732              as_FloatRegister($src$$reg));
17733     } else {
17734       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17735              as_FloatRegister($src$$reg), sh);
17736     }
17737   %}
17738   ins_pipe(vshift64_imm);
17739 %}
17740 
17741 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17742   predicate(n->as_Vector()->length() == 16);
17743   match(Set dst (LShiftVB src shift));
17744   ins_cost(INSN_COST);
17745   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17746   ins_encode %{
17747     int sh = (int)$shift$$constant & 31;
17748     if (sh >= 8) {
17749       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17750              as_FloatRegister($src$$reg),
17751              as_FloatRegister($src$$reg));
17752     } else {
17753       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17754              as_FloatRegister($src$$reg), sh);
17755     }
17756   %}
17757   ins_pipe(vshift128_imm);
17758 %}
17759 
17760 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17761   predicate(n->as_Vector()->length() == 4 ||
17762             n->as_Vector()->length() == 8);
17763   match(Set dst (RShiftVB src shift));
17764   ins_cost(INSN_COST);
17765   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17766   ins_encode %{
17767     int sh = (int)$shift$$constant & 31;
17768     if (sh >= 8) sh = 7;
17769     sh = -sh & 7;
17770     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17771            as_FloatRegister($src$$reg), sh);
17772   %}
17773   ins_pipe(vshift64_imm);
17774 %}
17775 
17776 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17777   predicate(n->as_Vector()->length() == 16);
17778   match(Set dst (RShiftVB src shift));
17779   ins_cost(INSN_COST);
17780   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17781   ins_encode %{
17782     int sh = (int)$shift$$constant & 31;
17783     if (sh >= 8) sh = 7;
17784     sh = -sh & 7;
17785     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17786            as_FloatRegister($src$$reg), sh);
17787   %}
17788   ins_pipe(vshift128_imm);
17789 %}
17790 
17791 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17792   predicate(n->as_Vector()->length() == 4 ||
17793             n->as_Vector()->length() == 8);
17794   match(Set dst (URShiftVB src shift));
17795   ins_cost(INSN_COST);
17796   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17797   ins_encode %{
17798     int sh = (int)$shift$$constant & 31;
17799     if (sh >= 8) {
17800       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17801              as_FloatRegister($src$$reg),
17802              as_FloatRegister($src$$reg));
17803     } else {
17804       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17805              as_FloatRegister($src$$reg), -sh & 7);
17806     }
17807   %}
17808   ins_pipe(vshift64_imm);
17809 %}
17810 
17811 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17812   predicate(n->as_Vector()->length() == 16);
17813   match(Set dst (URShiftVB src shift));
17814   ins_cost(INSN_COST);
17815   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17816   ins_encode %{
17817     int sh = (int)$shift$$constant & 31;
17818     if (sh >= 8) {
17819       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17820              as_FloatRegister($src$$reg),
17821              as_FloatRegister($src$$reg));
17822     } else {
17823       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17824              as_FloatRegister($src$$reg), -sh & 7);
17825     }
17826   %}
17827   ins_pipe(vshift128_imm);
17828 %}
17829 
17830 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17831   predicate(n->as_Vector()->length() == 2 ||
17832             n->as_Vector()->length() == 4);
17833   match(Set dst (LShiftVS src shift));
17834   match(Set dst (RShiftVS src shift));
17835   ins_cost(INSN_COST);
17836   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17837   ins_encode %{
17838     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17839             as_FloatRegister($src$$reg),
17840             as_FloatRegister($shift$$reg));
17841   %}
17842   ins_pipe(vshift64);
17843 %}
17844 
17845 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17846   predicate(n->as_Vector()->length() == 8);
17847   match(Set dst (LShiftVS src shift));
17848   match(Set dst (RShiftVS src shift));
17849   ins_cost(INSN_COST);
17850   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17851   ins_encode %{
17852     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17853             as_FloatRegister($src$$reg),
17854             as_FloatRegister($shift$$reg));
17855   %}
17856   ins_pipe(vshift128);
17857 %}
17858 
17859 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17860   predicate(n->as_Vector()->length() == 2 ||
17861             n->as_Vector()->length() == 4);
17862   match(Set dst (URShiftVS src shift));
17863   ins_cost(INSN_COST);
17864   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17865   ins_encode %{
17866     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17867             as_FloatRegister($src$$reg),
17868             as_FloatRegister($shift$$reg));
17869   %}
17870   ins_pipe(vshift64);
17871 %}
17872 
17873 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17874   predicate(n->as_Vector()->length() == 8);
17875   match(Set dst (URShiftVS src shift));
17876   ins_cost(INSN_COST);
17877   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17878   ins_encode %{
17879     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17880             as_FloatRegister($src$$reg),
17881             as_FloatRegister($shift$$reg));
17882   %}
17883   ins_pipe(vshift128);
17884 %}
17885 
17886 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17887   predicate(n->as_Vector()->length() == 2 ||
17888             n->as_Vector()->length() == 4);
17889   match(Set dst (LShiftVS src shift));
17890   ins_cost(INSN_COST);
17891   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17892   ins_encode %{
17893     int sh = (int)$shift$$constant & 31;
17894     if (sh >= 16) {
17895       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17896              as_FloatRegister($src$$reg),
17897              as_FloatRegister($src$$reg));
17898     } else {
17899       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17900              as_FloatRegister($src$$reg), sh);
17901     }
17902   %}
17903   ins_pipe(vshift64_imm);
17904 %}
17905 
17906 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17907   predicate(n->as_Vector()->length() == 8);
17908   match(Set dst (LShiftVS src shift));
17909   ins_cost(INSN_COST);
17910   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17911   ins_encode %{
17912     int sh = (int)$shift$$constant & 31;
17913     if (sh >= 16) {
17914       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17915              as_FloatRegister($src$$reg),
17916              as_FloatRegister($src$$reg));
17917     } else {
17918       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17919              as_FloatRegister($src$$reg), sh);
17920     }
17921   %}
17922   ins_pipe(vshift128_imm);
17923 %}
17924 
17925 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17926   predicate(n->as_Vector()->length() == 2 ||
17927             n->as_Vector()->length() == 4);
17928   match(Set dst (RShiftVS src shift));
17929   ins_cost(INSN_COST);
17930   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17931   ins_encode %{
17932     int sh = (int)$shift$$constant & 31;
17933     if (sh >= 16) sh = 15;
17934     sh = -sh & 15;
17935     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17936            as_FloatRegister($src$$reg), sh);
17937   %}
17938   ins_pipe(vshift64_imm);
17939 %}
17940 
17941 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17942   predicate(n->as_Vector()->length() == 8);
17943   match(Set dst (RShiftVS src shift));
17944   ins_cost(INSN_COST);
17945   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17946   ins_encode %{
17947     int sh = (int)$shift$$constant & 31;
17948     if (sh >= 16) sh = 15;
17949     sh = -sh & 15;
17950     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17951            as_FloatRegister($src$$reg), sh);
17952   %}
17953   ins_pipe(vshift128_imm);
17954 %}
17955 
17956 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17957   predicate(n->as_Vector()->length() == 2 ||
17958             n->as_Vector()->length() == 4);
17959   match(Set dst (URShiftVS src shift));
17960   ins_cost(INSN_COST);
17961   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17962   ins_encode %{
17963     int sh = (int)$shift$$constant & 31;
17964     if (sh >= 16) {
17965       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17966              as_FloatRegister($src$$reg),
17967              as_FloatRegister($src$$reg));
17968     } else {
17969       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17970              as_FloatRegister($src$$reg), -sh & 15);
17971     }
17972   %}
17973   ins_pipe(vshift64_imm);
17974 %}
17975 
17976 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17977   predicate(n->as_Vector()->length() == 8);
17978   match(Set dst (URShiftVS src shift));
17979   ins_cost(INSN_COST);
17980   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17981   ins_encode %{
17982     int sh = (int)$shift$$constant & 31;
17983     if (sh >= 16) {
17984       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17985              as_FloatRegister($src$$reg),
17986              as_FloatRegister($src$$reg));
17987     } else {
17988       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17989              as_FloatRegister($src$$reg), -sh & 15);
17990     }
17991   %}
17992   ins_pipe(vshift128_imm);
17993 %}
17994 
17995 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17996   predicate(n->as_Vector()->length() == 2);
17997   match(Set dst (LShiftVI src shift));
17998   match(Set dst (RShiftVI src shift));
17999   ins_cost(INSN_COST);
18000   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
18001   ins_encode %{
18002     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
18003             as_FloatRegister($src$$reg),
18004             as_FloatRegister($shift$$reg));
18005   %}
18006   ins_pipe(vshift64);
18007 %}
18008 
18009 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
18010   predicate(n->as_Vector()->length() == 4);
18011   match(Set dst (LShiftVI src shift));
18012   match(Set dst (RShiftVI src shift));
18013   ins_cost(INSN_COST);
18014   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
18015   ins_encode %{
18016     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
18017             as_FloatRegister($src$$reg),
18018             as_FloatRegister($shift$$reg));
18019   %}
18020   ins_pipe(vshift128);
18021 %}
18022 
18023 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
18024   predicate(n->as_Vector()->length() == 2);
18025   match(Set dst (URShiftVI src shift));
18026   ins_cost(INSN_COST);
18027   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
18028   ins_encode %{
18029     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
18030             as_FloatRegister($src$$reg),
18031             as_FloatRegister($shift$$reg));
18032   %}
18033   ins_pipe(vshift64);
18034 %}
18035 
18036 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
18037   predicate(n->as_Vector()->length() == 4);
18038   match(Set dst (URShiftVI src shift));
18039   ins_cost(INSN_COST);
18040   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
18041   ins_encode %{
18042     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
18043             as_FloatRegister($src$$reg),
18044             as_FloatRegister($shift$$reg));
18045   %}
18046   ins_pipe(vshift128);
18047 %}
18048 
18049 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
18050   predicate(n->as_Vector()->length() == 2);
18051   match(Set dst (LShiftVI src shift));
18052   ins_cost(INSN_COST);
18053   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
18054   ins_encode %{
18055     __ shl(as_FloatRegister($dst$$reg), __ T2S,
18056            as_FloatRegister($src$$reg),
18057            (int)$shift$$constant & 31);
18058   %}
18059   ins_pipe(vshift64_imm);
18060 %}
18061 
18062 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18063   predicate(n->as_Vector()->length() == 4);
18064   match(Set dst (LShiftVI src shift));
18065   ins_cost(INSN_COST);
18066   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18067   ins_encode %{
18068     __ shl(as_FloatRegister($dst$$reg), __ T4S,
18069            as_FloatRegister($src$$reg),
18070            (int)$shift$$constant & 31);
18071   %}
18072   ins_pipe(vshift128_imm);
18073 %}
18074 
18075 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18076   predicate(n->as_Vector()->length() == 2);
18077   match(Set dst (RShiftVI src shift));
18078   ins_cost(INSN_COST);
18079   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18080   ins_encode %{
18081     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18082             as_FloatRegister($src$$reg),
18083             -(int)$shift$$constant & 31);
18084   %}
18085   ins_pipe(vshift64_imm);
18086 %}
18087 
18088 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18089   predicate(n->as_Vector()->length() == 4);
18090   match(Set dst (RShiftVI src shift));
18091   ins_cost(INSN_COST);
18092   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18093   ins_encode %{
18094     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18095             as_FloatRegister($src$$reg),
18096             -(int)$shift$$constant & 31);
18097   %}
18098   ins_pipe(vshift128_imm);
18099 %}
18100 
18101 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18102   predicate(n->as_Vector()->length() == 2);
18103   match(Set dst (URShiftVI src shift));
18104   ins_cost(INSN_COST);
18105   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18106   ins_encode %{
18107     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18108             as_FloatRegister($src$$reg),
18109             -(int)$shift$$constant & 31);
18110   %}
18111   ins_pipe(vshift64_imm);
18112 %}
18113 
18114 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18115   predicate(n->as_Vector()->length() == 4);
18116   match(Set dst (URShiftVI src shift));
18117   ins_cost(INSN_COST);
18118   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18119   ins_encode %{
18120     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18121             as_FloatRegister($src$$reg),
18122             -(int)$shift$$constant & 31);
18123   %}
18124   ins_pipe(vshift128_imm);
18125 %}
18126 
18127 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18128   predicate(n->as_Vector()->length() == 2);
18129   match(Set dst (LShiftVL src shift));
18130   match(Set dst (RShiftVL src shift));
18131   ins_cost(INSN_COST);
18132   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18133   ins_encode %{
18134     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18135             as_FloatRegister($src$$reg),
18136             as_FloatRegister($shift$$reg));
18137   %}
18138   ins_pipe(vshift128);
18139 %}
18140 
18141 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18142   predicate(n->as_Vector()->length() == 2);
18143   match(Set dst (URShiftVL src shift));
18144   ins_cost(INSN_COST);
18145   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18146   ins_encode %{
18147     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18148             as_FloatRegister($src$$reg),
18149             as_FloatRegister($shift$$reg));
18150   %}
18151   ins_pipe(vshift128);
18152 %}
18153 
18154 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18155   predicate(n->as_Vector()->length() == 2);
18156   match(Set dst (LShiftVL src shift));
18157   ins_cost(INSN_COST);
18158   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18159   ins_encode %{
18160     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18161            as_FloatRegister($src$$reg),
18162            (int)$shift$$constant & 63);
18163   %}
18164   ins_pipe(vshift128_imm);
18165 %}
18166 
18167 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18168   predicate(n->as_Vector()->length() == 2);
18169   match(Set dst (RShiftVL src shift));
18170   ins_cost(INSN_COST);
18171   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18172   ins_encode %{
18173     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18174             as_FloatRegister($src$$reg),
18175             -(int)$shift$$constant & 63);
18176   %}
18177   ins_pipe(vshift128_imm);
18178 %}
18179 
18180 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18181   predicate(n->as_Vector()->length() == 2);
18182   match(Set dst (URShiftVL src shift));
18183   ins_cost(INSN_COST);
18184   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18185   ins_encode %{
18186     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18187             as_FloatRegister($src$$reg),
18188             -(int)$shift$$constant & 63);
18189   %}
18190   ins_pipe(vshift128_imm);
18191 %}
18192 
18193 //----------PEEPHOLE RULES-----------------------------------------------------
18194 // These must follow all instruction definitions as they use the names
18195 // defined in the instructions definitions.
18196 //
18197 // peepmatch ( root_instr_name [preceding_instruction]* );
18198 //
18199 // peepconstraint %{
18200 // (instruction_number.operand_name relational_op instruction_number.operand_name
18201 //  [, ...] );
18202 // // instruction numbers are zero-based using left to right order in peepmatch
18203 //
18204 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18205 // // provide an instruction_number.operand_name for each operand that appears
18206 // // in the replacement instruction's match rule
18207 //
18208 // ---------VM FLAGS---------------------------------------------------------
18209 //
18210 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18211 //
18212 // Each peephole rule is given an identifying number starting with zero and
18213 // increasing by one in the order seen by the parser.  An individual peephole
18214 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18215 // on the command-line.
18216 //
18217 // ---------CURRENT LIMITATIONS----------------------------------------------
18218 //
18219 // Only match adjacent instructions in same basic block
18220 // Only equality constraints
18221 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18222 // Only one replacement instruction
18223 //
18224 // ---------EXAMPLE----------------------------------------------------------
18225 //
18226 // // pertinent parts of existing instructions in architecture description
18227 // instruct movI(iRegINoSp dst, iRegI src)
18228 // %{
18229 //   match(Set dst (CopyI src));
18230 // %}
18231 //
18232 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18233 // %{
18234 //   match(Set dst (AddI dst src));
18235 //   effect(KILL cr);
18236 // %}
18237 //
18238 // // Change (inc mov) to lea
18239 // peephole %{
18240 //   // increment preceeded by register-register move
18241 //   peepmatch ( incI_iReg movI );
18242 //   // require that the destination register of the increment
18243 //   // match the destination register of the move
18244 //   peepconstraint ( 0.dst == 1.dst );
18245 //   // construct a replacement instruction that sets
18246 //   // the destination to ( move's source register + one )
18247 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18248 // %}
18249 //
18250 
18251 // Implementation no longer uses movX instructions since
18252 // machine-independent system no longer uses CopyX nodes.
18253 //
18254 // peephole
18255 // %{
18256 //   peepmatch (incI_iReg movI);
18257 //   peepconstraint (0.dst == 1.dst);
18258 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18259 // %}
18260 
18261 // peephole
18262 // %{
18263 //   peepmatch (decI_iReg movI);
18264 //   peepconstraint (0.dst == 1.dst);
18265 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18266 // %}
18267 
18268 // peephole
18269 // %{
18270 //   peepmatch (addI_iReg_imm movI);
18271 //   peepconstraint (0.dst == 1.dst);
18272 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18273 // %}
18274 
18275 // peephole
18276 // %{
18277 //   peepmatch (incL_iReg movL);
18278 //   peepconstraint (0.dst == 1.dst);
18279 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18280 // %}
18281 
18282 // peephole
18283 // %{
18284 //   peepmatch (decL_iReg movL);
18285 //   peepconstraint (0.dst == 1.dst);
18286 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18287 // %}
18288 
18289 // peephole
18290 // %{
18291 //   peepmatch (addL_iReg_imm movL);
18292 //   peepconstraint (0.dst == 1.dst);
18293 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18294 // %}
18295 
18296 // peephole
18297 // %{
18298 //   peepmatch (addP_iReg_imm movP);
18299 //   peepconstraint (0.dst == 1.dst);
18300 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18301 // %}
18302 
18303 // // Change load of spilled value to only a spill
18304 // instruct storeI(memory mem, iRegI src)
18305 // %{
18306 //   match(Set mem (StoreI mem src));
18307 // %}
18308 //
18309 // instruct loadI(iRegINoSp dst, memory mem)
18310 // %{
18311 //   match(Set dst (LoadI mem));
18312 // %}
18313 //
18314 
18315 //----------SMARTSPILL RULES---------------------------------------------------
18316 // These must follow all instruction definitions as they use the names
18317 // defined in the instructions definitions.
18318 
18319 // Local Variables:
18320 // mode: c++
18321 // End: