1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   // graph traversal helpers
1040 
1041   MemBarNode *parent_membar(const Node *n);
1042   MemBarNode *child_membar(const MemBarNode *n);
1043   bool leading_membar(const MemBarNode *barrier);
1044 
1045   bool is_card_mark_membar(const MemBarNode *barrier);
1046   bool is_CAS(int opcode);
1047 
1048   MemBarNode *leading_to_normal(MemBarNode *leading);
1049   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1050   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1051   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1052   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1053 
1054   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1055 
1056   bool unnecessary_acquire(const Node *barrier);
1057   bool needs_acquiring_load(const Node *load);
1058 
1059   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1060 
1061   bool unnecessary_release(const Node *barrier);
1062   bool unnecessary_volatile(const Node *barrier);
1063   bool needs_releasing_store(const Node *store);
1064 
1065   // predicate controlling translation of CompareAndSwapX
1066   bool needs_acquiring_load_exclusive(const Node *load);
1067 
1068   // predicate controlling translation of StoreCM
1069   bool unnecessary_storestore(const Node *storecm);
1070 
1071   // predicate controlling addressing modes
1072   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1073 %}
1074 
1075 source %{
1076 
1077   // Optimizaton of volatile gets and puts
1078   // -------------------------------------
1079   //
1080   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1081   // use to implement volatile reads and writes. For a volatile read
1082   // we simply need
1083   //
1084   //   ldar<x>
1085   //
1086   // and for a volatile write we need
1087   //
1088   //   stlr<x>
1089   //
1090   // Alternatively, we can implement them by pairing a normal
1091   // load/store with a memory barrier. For a volatile read we need
1092   //
1093   //   ldr<x>
1094   //   dmb ishld
1095   //
1096   // for a volatile write
1097   //
1098   //   dmb ish
1099   //   str<x>
1100   //   dmb ish
1101   //
1102   // We can also use ldaxr and stlxr to implement compare and swap CAS
1103   // sequences. These are normally translated to an instruction
1104   // sequence like the following
1105   //
1106   //   dmb      ish
1107   // retry:
1108   //   ldxr<x>   rval raddr
1109   //   cmp       rval rold
1110   //   b.ne done
1111   //   stlxr<x>  rval, rnew, rold
1112   //   cbnz      rval retry
1113   // done:
1114   //   cset      r0, eq
1115   //   dmb ishld
1116   //
1117   // Note that the exclusive store is already using an stlxr
1118   // instruction. That is required to ensure visibility to other
1119   // threads of the exclusive write (assuming it succeeds) before that
1120   // of any subsequent writes.
1121   //
1122   // The following instruction sequence is an improvement on the above
1123   //
1124   // retry:
1125   //   ldaxr<x>  rval raddr
1126   //   cmp       rval rold
1127   //   b.ne done
1128   //   stlxr<x>  rval, rnew, rold
1129   //   cbnz      rval retry
1130   // done:
1131   //   cset      r0, eq
1132   //
1133   // We don't need the leading dmb ish since the stlxr guarantees
1134   // visibility of prior writes in the case that the swap is
1135   // successful. Crucially we don't have to worry about the case where
1136   // the swap is not successful since no valid program should be
1137   // relying on visibility of prior changes by the attempting thread
1138   // in the case where the CAS fails.
1139   //
1140   // Similarly, we don't need the trailing dmb ishld if we substitute
1141   // an ldaxr instruction since that will provide all the guarantees we
1142   // require regarding observation of changes made by other threads
1143   // before any change to the CAS address observed by the load.
1144   //
1145   // In order to generate the desired instruction sequence we need to
1146   // be able to identify specific 'signature' ideal graph node
1147   // sequences which i) occur as a translation of a volatile reads or
1148   // writes or CAS operations and ii) do not occur through any other
1149   // translation or graph transformation. We can then provide
1150   // alternative aldc matching rules which translate these node
1151   // sequences to the desired machine code sequences. Selection of the
1152   // alternative rules can be implemented by predicates which identify
1153   // the relevant node sequences.
1154   //
1155   // The ideal graph generator translates a volatile read to the node
1156   // sequence
1157   //
1158   //   LoadX[mo_acquire]
1159   //   MemBarAcquire
1160   //
1161   // As a special case when using the compressed oops optimization we
1162   // may also see this variant
1163   //
1164   //   LoadN[mo_acquire]
1165   //   DecodeN
1166   //   MemBarAcquire
1167   //
1168   // A volatile write is translated to the node sequence
1169   //
1170   //   MemBarRelease
1171   //   StoreX[mo_release] {CardMark}-optional
1172   //   MemBarVolatile
1173   //
1174   // n.b. the above node patterns are generated with a strict
1175   // 'signature' configuration of input and output dependencies (see
1176   // the predicates below for exact details). The card mark may be as
1177   // simple as a few extra nodes or, in a few GC configurations, may
1178   // include more complex control flow between the leading and
1179   // trailing memory barriers. However, whatever the card mark
1180   // configuration these signatures are unique to translated volatile
1181   // reads/stores -- they will not appear as a result of any other
1182   // bytecode translation or inlining nor as a consequence of
1183   // optimizing transforms.
1184   //
1185   // We also want to catch inlined unsafe volatile gets and puts and
1186   // be able to implement them using either ldar<x>/stlr<x> or some
1187   // combination of ldr<x>/stlr<x> and dmb instructions.
1188   //
1189   // Inlined unsafe volatiles puts manifest as a minor variant of the
1190   // normal volatile put node sequence containing an extra cpuorder
1191   // membar
1192   //
1193   //   MemBarRelease
1194   //   MemBarCPUOrder
1195   //   StoreX[mo_release] {CardMark}-optional
1196   //   MemBarCPUOrder
1197   //   MemBarVolatile
1198   //
1199   // n.b. as an aside, a cpuorder membar is not itself subject to
1200   // matching and translation by adlc rules.  However, the rule
1201   // predicates need to detect its presence in order to correctly
1202   // select the desired adlc rules.
1203   //
1204   // Inlined unsafe volatile gets manifest as a slightly different
1205   // node sequence to a normal volatile get because of the
1206   // introduction of some CPUOrder memory barriers to bracket the
1207   // Load. However, but the same basic skeleton of a LoadX feeding a
1208   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1209   // present
1210   //
1211   //   MemBarCPUOrder
1212   //        ||       \\
1213   //   MemBarCPUOrder LoadX[mo_acquire]
1214   //        ||            |
1215   //        ||       {DecodeN} optional
1216   //        ||       /
1217   //     MemBarAcquire
1218   //
1219   // In this case the acquire membar does not directly depend on the
1220   // load. However, we can be sure that the load is generated from an
1221   // inlined unsafe volatile get if we see it dependent on this unique
1222   // sequence of membar nodes. Similarly, given an acquire membar we
1223   // can know that it was added because of an inlined unsafe volatile
1224   // get if it is fed and feeds a cpuorder membar and if its feed
1225   // membar also feeds an acquiring load.
1226   //
1227   // Finally an inlined (Unsafe) CAS operation is translated to the
1228   // following ideal graph
1229   //
1230   //   MemBarRelease
1231   //   MemBarCPUOrder
1232   //   CompareAndSwapX {CardMark}-optional
1233   //   MemBarCPUOrder
1234   //   MemBarAcquire
1235   //
1236   // So, where we can identify these volatile read and write
1237   // signatures we can choose to plant either of the above two code
1238   // sequences. For a volatile read we can simply plant a normal
1239   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1240   // also choose to inhibit translation of the MemBarAcquire and
1241   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1242   //
1243   // When we recognise a volatile store signature we can choose to
1244   // plant at a dmb ish as a translation for the MemBarRelease, a
1245   // normal str<x> and then a dmb ish for the MemBarVolatile.
1246   // Alternatively, we can inhibit translation of the MemBarRelease
1247   // and MemBarVolatile and instead plant a simple stlr<x>
1248   // instruction.
1249   //
1250   // when we recognise a CAS signature we can choose to plant a dmb
1251   // ish as a translation for the MemBarRelease, the conventional
1252   // macro-instruction sequence for the CompareAndSwap node (which
1253   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1254   // Alternatively, we can elide generation of the dmb instructions
1255   // and plant the alternative CompareAndSwap macro-instruction
1256   // sequence (which uses ldaxr<x>).
1257   //
1258   // Of course, the above only applies when we see these signature
1259   // configurations. We still want to plant dmb instructions in any
1260   // other cases where we may see a MemBarAcquire, MemBarRelease or
1261   // MemBarVolatile. For example, at the end of a constructor which
1262   // writes final/volatile fields we will see a MemBarRelease
1263   // instruction and this needs a 'dmb ish' lest we risk the
1264   // constructed object being visible without making the
1265   // final/volatile field writes visible.
1266   //
1267   // n.b. the translation rules below which rely on detection of the
1268   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1269   // If we see anything other than the signature configurations we
1270   // always just translate the loads and stores to ldr<x> and str<x>
1271   // and translate acquire, release and volatile membars to the
1272   // relevant dmb instructions.
1273   //
1274 
1275   // graph traversal helpers used for volatile put/get and CAS
1276   // optimization
1277 
1278   // 1) general purpose helpers
1279 
1280   // if node n is linked to a parent MemBarNode by an intervening
1281   // Control and Memory ProjNode return the MemBarNode otherwise return
1282   // NULL.
1283   //
1284   // n may only be a Load or a MemBar.
1285 
1286   MemBarNode *parent_membar(const Node *n)
1287   {
1288     Node *ctl = NULL;
1289     Node *mem = NULL;
1290     Node *membar = NULL;
1291 
1292     if (n->is_Load()) {
1293       ctl = n->lookup(LoadNode::Control);
1294       mem = n->lookup(LoadNode::Memory);
1295     } else if (n->is_MemBar()) {
1296       ctl = n->lookup(TypeFunc::Control);
1297       mem = n->lookup(TypeFunc::Memory);
1298     } else {
1299         return NULL;
1300     }
1301 
1302     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1303       return NULL;
1304     }
1305 
1306     membar = ctl->lookup(0);
1307 
1308     if (!membar || !membar->is_MemBar()) {
1309       return NULL;
1310     }
1311 
1312     if (mem->lookup(0) != membar) {
1313       return NULL;
1314     }
1315 
1316     return membar->as_MemBar();
1317   }
1318 
1319   // if n is linked to a child MemBarNode by intervening Control and
1320   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1321 
1322   MemBarNode *child_membar(const MemBarNode *n)
1323   {
1324     ProjNode *ctl = n->proj_out_or_null(TypeFunc::Control);
1325     ProjNode *mem = n->proj_out_or_null(TypeFunc::Memory);
1326 
1327     // MemBar needs to have both a Ctl and Mem projection
1328     if (! ctl || ! mem)
1329       return NULL;
1330 
1331     MemBarNode *child = NULL;
1332     Node *x;
1333 
1334     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1335       x = ctl->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x->is_MemBar()) {
1339           child = x->as_MemBar();
1340           break;
1341       }
1342     }
1343 
1344     if (child == NULL) {
1345       return NULL;
1346     }
1347 
1348     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1349       x = mem->fast_out(i);
1350       // if we see a membar we keep hold of it. we may also see a new
1351       // arena copy of the original but it will appear later
1352       if (x == child) {
1353         return child;
1354       }
1355     }
1356     return NULL;
1357   }
1358 
1359   // helper predicate use to filter candidates for a leading memory
1360   // barrier
1361   //
1362   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1363   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1364 
1365   bool leading_membar(const MemBarNode *barrier)
1366   {
1367     int opcode = barrier->Opcode();
1368     // if this is a release membar we are ok
1369     if (opcode == Op_MemBarRelease) {
1370       return true;
1371     }
1372     // if its a cpuorder membar . . .
1373     if (opcode != Op_MemBarCPUOrder) {
1374       return false;
1375     }
1376     // then the parent has to be a release membar
1377     MemBarNode *parent = parent_membar(barrier);
1378     if (!parent) {
1379       return false;
1380     }
1381     opcode = parent->Opcode();
1382     return opcode == Op_MemBarRelease;
1383   }
1384 
1385   // 2) card mark detection helper
1386 
1387   // helper predicate which can be used to detect a volatile membar
1388   // introduced as part of a conditional card mark sequence either by
1389   // G1 or by CMS when UseCondCardMark is true.
1390   //
1391   // membar can be definitively determined to be part of a card mark
1392   // sequence if and only if all the following hold
1393   //
1394   // i) it is a MemBarVolatile
1395   //
1396   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1397   // true
1398   //
1399   // iii) the node's Mem projection feeds a StoreCM node.
1400 
1401   bool is_card_mark_membar(const MemBarNode *barrier)
1402   {
1403     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1404       return false;
1405     }
1406 
1407     if (barrier->Opcode() != Op_MemBarVolatile) {
1408       return false;
1409     }
1410 
1411     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1412 
1413     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1414       Node *y = mem->fast_out(i);
1415       if (y->Opcode() == Op_StoreCM) {
1416         return true;
1417       }
1418     }
1419 
1420     return false;
1421   }
1422 
1423 
1424   // 3) helper predicates to traverse volatile put or CAS graphs which
1425   // may contain GC barrier subgraphs
1426 
1427   // Preamble
1428   // --------
1429   //
1430   // for volatile writes we can omit generating barriers and employ a
1431   // releasing store when we see a node sequence sequence with a
1432   // leading MemBarRelease and a trailing MemBarVolatile as follows
1433   //
1434   //   MemBarRelease
1435   //  {      ||      } -- optional
1436   //  {MemBarCPUOrder}
1437   //         ||     \\
1438   //         ||     StoreX[mo_release]
1439   //         | \     /
1440   //         | MergeMem
1441   //         | /
1442   //  {MemBarCPUOrder} -- optional
1443   //  {      ||      }
1444   //   MemBarVolatile
1445   //
1446   // where
1447   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1448   //  | \ and / indicate further routing of the Ctl and Mem feeds
1449   //
1450   // this is the graph we see for non-object stores. however, for a
1451   // volatile Object store (StoreN/P) we may see other nodes below the
1452   // leading membar because of the need for a GC pre- or post-write
1453   // barrier.
1454   //
1455   // with most GC configurations we with see this simple variant which
1456   // includes a post-write barrier card mark.
1457   //
1458   //   MemBarRelease______________________________
1459   //         ||    \\               Ctl \        \\
1460   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1461   //         | \     /                       . . .  /
1462   //         | MergeMem
1463   //         | /
1464   //         ||      /
1465   //  {MemBarCPUOrder} -- optional
1466   //  {      ||      }
1467   //   MemBarVolatile
1468   //
1469   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1470   // the object address to an int used to compute the card offset) and
1471   // Ctl+Mem to a StoreB node (which does the actual card mark).
1472   //
1473   // n.b. a StoreCM node will only appear in this configuration when
1474   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1475   // because it implies a requirement to order visibility of the card
1476   // mark (StoreCM) relative to the object put (StoreP/N) using a
1477   // StoreStore memory barrier (arguably this ought to be represented
1478   // explicitly in the ideal graph but that is not how it works). This
1479   // ordering is required for both non-volatile and volatile
1480   // puts. Normally that means we need to translate a StoreCM using
1481   // the sequence
1482   //
1483   //   dmb ishst
1484   //   stlrb
1485   //
1486   // However, in the case of a volatile put if we can recognise this
1487   // configuration and plant an stlr for the object write then we can
1488   // omit the dmb and just plant an strb since visibility of the stlr
1489   // is ordered before visibility of subsequent stores. StoreCM nodes
1490   // also arise when using G1 or using CMS with conditional card
1491   // marking. In these cases (as we shall see) we don't need to insert
1492   // the dmb when translating StoreCM because there is already an
1493   // intervening StoreLoad barrier between it and the StoreP/N.
1494   //
1495   // It is also possible to perform the card mark conditionally on it
1496   // currently being unmarked in which case the volatile put graph
1497   // will look slightly different
1498   //
1499   //   MemBarRelease____________________________________________
1500   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1501   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1502   //         | \     /                              \            |
1503   //         | MergeMem                            . . .      StoreB
1504   //         | /                                                /
1505   //         ||     /
1506   //   MemBarVolatile
1507   //
1508   // It is worth noting at this stage that both the above
1509   // configurations can be uniquely identified by checking that the
1510   // memory flow includes the following subgraph:
1511   //
1512   //   MemBarRelease
1513   //  {MemBarCPUOrder}
1514   //          |  \      . . .
1515   //          |  StoreX[mo_release]  . . .
1516   //          |   /
1517   //         MergeMem
1518   //          |
1519   //  {MemBarCPUOrder}
1520   //   MemBarVolatile
1521   //
1522   // This is referred to as a *normal* subgraph. It can easily be
1523   // detected starting from any candidate MemBarRelease,
1524   // StoreX[mo_release] or MemBarVolatile.
1525   //
1526   // A simple variation on this normal case occurs for an unsafe CAS
1527   // operation. The basic graph for a non-object CAS is
1528   //
1529   //   MemBarRelease
1530   //         ||
1531   //   MemBarCPUOrder
1532   //         ||     \\   . . .
1533   //         ||     CompareAndSwapX
1534   //         ||       |
1535   //         ||     SCMemProj
1536   //         | \     /
1537   //         | MergeMem
1538   //         | /
1539   //   MemBarCPUOrder
1540   //         ||
1541   //   MemBarAcquire
1542   //
1543   // The same basic variations on this arrangement (mutatis mutandis)
1544   // occur when a card mark is introduced. i.e. we se the same basic
1545   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1546   // tail of the graph is a pair comprising a MemBarCPUOrder +
1547   // MemBarAcquire.
1548   //
1549   // So, in the case of a CAS the normal graph has the variant form
1550   //
1551   //   MemBarRelease
1552   //   MemBarCPUOrder
1553   //          |   \      . . .
1554   //          |  CompareAndSwapX  . . .
1555   //          |    |
1556   //          |   SCMemProj
1557   //          |   /  . . .
1558   //         MergeMem
1559   //          |
1560   //   MemBarCPUOrder
1561   //   MemBarAcquire
1562   //
1563   // This graph can also easily be detected starting from any
1564   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1565   //
1566   // the code below uses two helper predicates, leading_to_normal and
1567   // normal_to_leading to identify these normal graphs, one validating
1568   // the layout starting from the top membar and searching down and
1569   // the other validating the layout starting from the lower membar
1570   // and searching up.
1571   //
1572   // There are two special case GC configurations when a normal graph
1573   // may not be generated: when using G1 (which always employs a
1574   // conditional card mark); and when using CMS with conditional card
1575   // marking configured. These GCs are both concurrent rather than
1576   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1577   // graph between the leading and trailing membar nodes, in
1578   // particular enforcing stronger memory serialisation beween the
1579   // object put and the corresponding conditional card mark. CMS
1580   // employs a post-write GC barrier while G1 employs both a pre- and
1581   // post-write GC barrier. Of course the extra nodes may be absent --
1582   // they are only inserted for object puts/swaps. This significantly
1583   // complicates the task of identifying whether a MemBarRelease,
1584   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1585   // when using these GC configurations (see below). It adds similar
1586   // complexity to the task of identifying whether a MemBarRelease,
1587   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1588   //
1589   // In both cases the post-write subtree includes an auxiliary
1590   // MemBarVolatile (StoreLoad barrier) separating the object put/swap
1591   // and the read of the corresponding card. This poses two additional
1592   // problems.
1593   //
1594   // Firstly, a card mark MemBarVolatile needs to be distinguished
1595   // from a normal trailing MemBarVolatile. Resolving this first
1596   // problem is straightforward: a card mark MemBarVolatile always
1597   // projects a Mem feed to a StoreCM node and that is a unique marker
1598   //
1599   //      MemBarVolatile (card mark)
1600   //       C |    \     . . .
1601   //         |   StoreCM   . . .
1602   //       . . .
1603   //
1604   // The second problem is how the code generator is to translate the
1605   // card mark barrier? It always needs to be translated to a "dmb
1606   // ish" instruction whether or not it occurs as part of a volatile
1607   // put. A StoreLoad barrier is needed after the object put to ensure
1608   // i) visibility to GC threads of the object put and ii) visibility
1609   // to the mutator thread of any card clearing write by a GC
1610   // thread. Clearly a normal store (str) will not guarantee this
1611   // ordering but neither will a releasing store (stlr). The latter
1612   // guarantees that the object put is visible but does not guarantee
1613   // that writes by other threads have also been observed.
1614   //
1615   // So, returning to the task of translating the object put and the
1616   // leading/trailing membar nodes: what do the non-normal node graph
1617   // look like for these 2 special cases? and how can we determine the
1618   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1619   // in both normal and non-normal cases?
1620   //
1621   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1622   // which selects conditonal execution based on the value loaded
1623   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1624   // intervening StoreLoad barrier (MemBarVolatile).
1625   //
1626   // So, with CMS we may see a node graph for a volatile object store
1627   // which looks like this
1628   //
1629   //   MemBarRelease
1630   //   MemBarCPUOrder_(leading)__________________
1631   //     C |    M \       \\                   C \
1632   //       |       \    StoreN/P[mo_release]  CastP2X
1633   //       |    Bot \    /
1634   //       |       MergeMem
1635   //       |         /
1636   //      MemBarVolatile (card mark)
1637   //     C |  ||    M |
1638   //       | LoadB    |
1639   //       |   |      |
1640   //       | Cmp      |\
1641   //       | /        | \
1642   //       If         |  \
1643   //       | \        |   \
1644   // IfFalse  IfTrue  |    \
1645   //       \     / \  |     \
1646   //        \   / StoreCM    |
1647   //         \ /      |      |
1648   //        Region   . . .   |
1649   //          | \           /
1650   //          |  . . .  \  / Bot
1651   //          |       MergeMem
1652   //          |          |
1653   //       {MemBarCPUOrder}
1654   //        MemBarVolatile (trailing)
1655   //
1656   // The first MergeMem merges the AliasIdxBot Mem slice from the
1657   // leading membar and the oopptr Mem slice from the Store into the
1658   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1659   // Mem slice from the card mark membar and the AliasIdxRaw slice
1660   // from the StoreCM into the trailing membar (n.b. the latter
1661   // proceeds via a Phi associated with the If region).
1662   //
1663   // The graph for a CAS varies slightly, the difference being
1664   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1665   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1666   // MemBarAcquire pair.
1667   //
1668   //   MemBarRelease
1669   //   MemBarCPUOrder_(leading)_______________
1670   //     C |    M \       \\                C \
1671   //       |       \    CompareAndSwapN/P  CastP2X
1672   //       |        \      |
1673   //       |         \   SCMemProj
1674   //       |      Bot \   /
1675   //       |        MergeMem
1676   //       |         /
1677   //      MemBarVolatile (card mark)
1678   //     C |  ||    M |
1679   //       | LoadB    |
1680   //       |   |      |
1681   //       | Cmp      |\
1682   //       | /        | \
1683   //       If         |  \
1684   //       | \        |   \
1685   // IfFalse  IfTrue  |    \
1686   //       \     / \  |     \
1687   //        \   / StoreCM    |
1688   //         \ /      |      |
1689   //        Region   . . .   |
1690   //          | \           /
1691   //          |  . . .  \  / Bot
1692   //          |       MergeMem
1693   //          |          |
1694   //       {MemBarCPUOrder}
1695   //        MemBarVolatile (trailing)
1696   //
1697   //
1698   // G1 is quite a lot more complicated. The nodes inserted on behalf
1699   // of G1 may comprise: a pre-write graph which adds the old value to
1700   // the SATB queue; the releasing store itself; and, finally, a
1701   // post-write graph which performs a card mark.
1702   //
1703   // The pre-write graph may be omitted, but only when the put is
1704   // writing to a newly allocated (young gen) object and then only if
1705   // there is a direct memory chain to the Initialize node for the
1706   // object allocation. This will not happen for a volatile put since
1707   // any memory chain passes through the leading membar.
1708   //
1709   // The pre-write graph includes a series of 3 If tests. The outermost
1710   // If tests whether SATB is enabled (no else case). The next If tests
1711   // whether the old value is non-NULL (no else case). The third tests
1712   // whether the SATB queue index is > 0, if so updating the queue. The
1713   // else case for this third If calls out to the runtime to allocate a
1714   // new queue buffer.
1715   //
1716   // So with G1 the pre-write and releasing store subgraph looks like
1717   // this (the nested Ifs are omitted).
1718   //
1719   //  MemBarRelease (leading)____________
1720   //     C |  ||  M \   M \    M \  M \ . . .
1721   //       | LoadB   \  LoadL  LoadN   \
1722   //       | /        \                 \
1723   //       If         |\                 \
1724   //       | \        | \                 \
1725   //  IfFalse  IfTrue |  \                 \
1726   //       |     |    |   \                 |
1727   //       |     If   |   /\                |
1728   //       |     |          \               |
1729   //       |                 \              |
1730   //       |    . . .         \             |
1731   //       | /       | /       |            |
1732   //      Region  Phi[M]       |            |
1733   //       | \       |         |            |
1734   //       |  \_____ | ___     |            |
1735   //     C | C \     |   C \ M |            |
1736   //       | CastP2X | StoreN/P[mo_release] |
1737   //       |         |         |            |
1738   //     C |       M |       M |          M |
1739   //        \        |         |           /
1740   //                  . . .
1741   //          (post write subtree elided)
1742   //                    . . .
1743   //             C \         M /
1744   //                \         /
1745   //             {MemBarCPUOrder}
1746   //              MemBarVolatile (trailing)
1747   //
1748   // n.b. the LoadB in this subgraph is not the card read -- it's a
1749   // read of the SATB queue active flag.
1750   //
1751   // The G1 post-write subtree is also optional, this time when the
1752   // new value being written is either null or can be identified as a
1753   // newly allocated (young gen) object with no intervening control
1754   // flow. The latter cannot happen but the former may, in which case
1755   // the card mark membar is omitted and the memory feeds form the
1756   // leading membar and the SToreN/P are merged direct into the
1757   // trailing membar as per the normal subgraph. So, the only special
1758   // case which arises is when the post-write subgraph is generated.
1759   //
1760   // The kernel of the post-write G1 subgraph is the card mark itself
1761   // which includes a card mark memory barrier (MemBarVolatile), a
1762   // card test (LoadB), and a conditional update (If feeding a
1763   // StoreCM). These nodes are surrounded by a series of nested Ifs
1764   // which try to avoid doing the card mark. The top level If skips if
1765   // the object reference does not cross regions (i.e. it tests if
1766   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1767   // need not be recorded. The next If, which skips on a NULL value,
1768   // may be absent (it is not generated if the type of value is >=
1769   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1770   // checking if card_val != young).  n.b. although this test requires
1771   // a pre-read of the card it can safely be done before the StoreLoad
1772   // barrier. However that does not bypass the need to reread the card
1773   // after the barrier. A final, 4th If tests if the card is already
1774   // marked.
1775   //
1776   //                (pre-write subtree elided)
1777   //        . . .                  . . .    . . .  . . .
1778   //        C |                    M |     M |    M |
1779   //       Region                  Phi[M] StoreN    |
1780   //          |                     / \      |      |
1781   //         / \_______            /   \     |      |
1782   //      C / C \      . . .            \    |      |
1783   //       If   CastP2X . . .            |   |      |
1784   //       / \                           |   |      |
1785   //      /   \                          |   |      |
1786   // IfFalse IfTrue                      |   |      |
1787   //   |       |                         |   |     /|
1788   //   |       If                        |   |    / |
1789   //   |      / \                        |   |   /  |
1790   //   |     /   \                        \  |  /   |
1791   //   | IfFalse IfTrue                   MergeMem  |
1792   //   |  . . .    / \                       /      |
1793   //   |          /   \                     /       |
1794   //   |     IfFalse IfTrue                /        |
1795   //   |      . . .    |                  /         |
1796   //   |               If                /          |
1797   //   |               / \              /           |
1798   //   |              /   \            /            |
1799   //   |         IfFalse IfTrue       /             |
1800   //   |           . . .   |         /              |
1801   //   |                    \       /               |
1802   //   |                     \     /                |
1803   //   |             MemBarVolatile__(card mark)    |
1804   //   |                ||   C |  M \  M \          |
1805   //   |               LoadB   If    |    |         |
1806   //   |                      / \    |    |         |
1807   //   |                     . . .   |    |         |
1808   //   |                          \  |    |        /
1809   //   |                        StoreCM   |       /
1810   //   |                          . . .   |      /
1811   //   |                        _________/      /
1812   //   |                       /  _____________/
1813   //   |   . . .       . . .  |  /            /
1814   //   |    |                 | /   _________/
1815   //   |    |               Phi[M] /        /
1816   //   |    |                 |   /        /
1817   //   |    |                 |  /        /
1818   //   |  Region  . . .     Phi[M]  _____/
1819   //   |    /                 |    /
1820   //   |                      |   /
1821   //   | . . .   . . .        |  /
1822   //   | /                    | /
1823   // Region           |  |  Phi[M]
1824   //   |              |  |  / Bot
1825   //    \            MergeMem
1826   //     \            /
1827   //    {MemBarCPUOrder}
1828   //     MemBarVolatile
1829   //
1830   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1831   // from the leading membar and the oopptr Mem slice from the Store
1832   // into the card mark membar i.e. the memory flow to the card mark
1833   // membar still looks like a normal graph.
1834   //
1835   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1836   // Mem slices (from the StoreCM and other card mark queue stores).
1837   // However in this case the AliasIdxBot Mem slice does not come
1838   // direct from the card mark membar. It is merged through a series
1839   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1840   // from the leading membar with the Mem feed from the card mark
1841   // membar. Each Phi corresponds to one of the Ifs which may skip
1842   // around the card mark membar. So when the If implementing the NULL
1843   // value check has been elided the total number of Phis is 2
1844   // otherwise it is 3.
1845   //
1846   // The CAS graph when using G1GC also includes a pre-write subgraph
1847   // and an optional post-write subgraph. The same variations are
1848   // introduced as for CMS with conditional card marking i.e. the
1849   // StoreP/N is swapped for a CompareAndSwapP/N with a following
1850   // SCMemProj, the trailing MemBarVolatile for a MemBarCPUOrder +
1851   // MemBarAcquire pair. There may be an extra If test introduced in
1852   // the CAS case, when the boolean result of the CAS is tested by the
1853   // caller. In that case an extra Region and AliasIdxBot Phi may be
1854   // introduced before the MergeMem
1855   //
1856   // So, the upshot is that in all cases the subgraph will include a
1857   // *normal* memory subgraph betwen the leading membar and its child
1858   // membar: either a normal volatile put graph including a releasing
1859   // StoreX and terminating with a trailing volatile membar or card
1860   // mark volatile membar; or a normal CAS graph including a
1861   // CompareAndSwapX + SCMemProj pair and terminating with a card mark
1862   // volatile membar or a trailing cpu order and acquire membar
1863   // pair. If the child membar is not a (volatile) card mark membar
1864   // then it marks the end of the volatile put or CAS subgraph. If the
1865   // child is a card mark membar then the normal subgraph will form
1866   // part of a larger volatile put or CAS subgraph if and only if the
1867   // child feeds an AliasIdxBot Mem feed to a trailing barrier via a
1868   // MergeMem. That feed is either direct (for CMS) or via 2, 3 or 4
1869   // Phi nodes merging the leading barrier memory flow (for G1).
1870   //
1871   // The predicates controlling generation of instructions for store
1872   // and barrier nodes employ a few simple helper functions (described
1873   // below) which identify the presence or absence of all these
1874   // subgraph configurations and provide a means of traversing from
1875   // one node in the subgraph to another.
1876 
1877   // is_CAS(int opcode)
1878   //
1879   // return true if opcode is one of the possible CompareAndSwapX
1880   // values otherwise false.
1881 
1882   bool is_CAS(int opcode)
1883   {
1884     switch(opcode) {
1885       // We handle these
1886     case Op_CompareAndSwapI:
1887     case Op_CompareAndSwapL:
1888     case Op_CompareAndSwapP:
1889     case Op_CompareAndSwapN:
1890  // case Op_CompareAndSwapB:
1891  // case Op_CompareAndSwapS:
1892       return true;
1893       // These are TBD
1894     case Op_WeakCompareAndSwapB:
1895     case Op_WeakCompareAndSwapS:
1896     case Op_WeakCompareAndSwapI:
1897     case Op_WeakCompareAndSwapL:
1898     case Op_WeakCompareAndSwapP:
1899     case Op_WeakCompareAndSwapN:
1900     case Op_CompareAndExchangeB:
1901     case Op_CompareAndExchangeS:
1902     case Op_CompareAndExchangeI:
1903     case Op_CompareAndExchangeL:
1904     case Op_CompareAndExchangeP:
1905     case Op_CompareAndExchangeN:
1906       return false;
1907     default:
1908       return false;
1909     }
1910   }
1911 
1912   // helper to determine the maximum number of Phi nodes we may need to
1913   // traverse when searching from a card mark membar for the merge mem
1914   // feeding a trailing membar or vice versa
1915 
1916   int max_phis()
1917   {
1918     if (UseG1GC) {
1919       return 4;
1920     } else if (UseConcMarkSweepGC && UseCondCardMark) {
1921       return 1;
1922     } else {
1923       return 0;
1924     }
1925   }
1926 
1927   // leading_to_normal
1928   //
1929   // graph traversal helper which detects the normal case Mem feed
1930   // from a release membar (or, optionally, its cpuorder child) to a
1931   // dependent volatile or acquire membar i.e. it ensures that one of
1932   // the following 3 Mem flow subgraphs is present.
1933   //
1934   //   MemBarRelease
1935   //   MemBarCPUOrder {leading}
1936   //          |  \      . . .
1937   //          |  StoreN/P[mo_release]  . . .
1938   //          |   /
1939   //         MergeMem
1940   //          |
1941   //  {MemBarCPUOrder}
1942   //   MemBarVolatile {trailing or card mark}
1943   //
1944   //   MemBarRelease
1945   //   MemBarCPUOrder {leading}
1946   //          |  \      . . .
1947   //          |  CompareAndSwapX  . . .
1948   //          |   /
1949   //         MergeMem
1950   //          |
1951   //   MemBarVolatile {card mark}
1952   //
1953   //   MemBarRelease
1954   //   MemBarCPUOrder {leading}
1955   //          |  \      . . .
1956   //          |  CompareAndSwapX  . . .
1957   //          |   /
1958   //         MergeMem
1959   //          |
1960   //   MemBarCPUOrder
1961   //   MemBarAcquire {trailing}
1962   //
1963   // if the correct configuration is present returns the trailing
1964   // membar otherwise NULL.
1965   //
1966   // the input membar is expected to be either a cpuorder membar or a
1967   // release membar. in the latter case it should not have a cpu membar
1968   // child.
1969   //
1970   // the returned value may be a card mark or trailing membar
1971   //
1972 
1973   MemBarNode *leading_to_normal(MemBarNode *leading)
1974   {
1975     assert((leading->Opcode() == Op_MemBarRelease ||
1976             leading->Opcode() == Op_MemBarCPUOrder),
1977            "expecting a volatile or cpuroder membar!");
1978 
1979     // check the mem flow
1980     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1981 
1982     if (!mem) {
1983       return NULL;
1984     }
1985 
1986     Node *x = NULL;
1987     StoreNode * st = NULL;
1988     LoadStoreNode *cas = NULL;
1989     MergeMemNode *mm = NULL;
1990 
1991     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1992       x = mem->fast_out(i);
1993       if (x->is_MergeMem()) {
1994         if (mm != NULL) {
1995           return NULL;
1996         }
1997         // two merge mems is one too many
1998         mm = x->as_MergeMem();
1999       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2000         // two releasing stores/CAS nodes is one too many
2001         if (st != NULL || cas != NULL) {
2002           return NULL;
2003         }
2004         st = x->as_Store();
2005       } else if (is_CAS(x->Opcode())) {
2006         if (st != NULL || cas != NULL) {
2007           return NULL;
2008         }
2009         cas = x->as_LoadStore();
2010       }
2011     }
2012 
2013     // must have a store or a cas
2014     if (!st && !cas) {
2015       return NULL;
2016     }
2017 
2018     // must have a merge
2019     if (!mm) {
2020       return NULL;
2021     }
2022 
2023     Node *feed = NULL;
2024     if (cas) {
2025       // look for an SCMemProj
2026       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2027         x = cas->fast_out(i);
2028         if (x->Opcode() == Op_SCMemProj) {
2029           feed = x;
2030           break;
2031         }
2032       }
2033       if (feed == NULL) {
2034         return NULL;
2035       }
2036     } else {
2037       feed = st;
2038     }
2039     // ensure the feed node feeds the existing mergemem;
2040     for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2041       x = feed->fast_out(i);
2042       if (x == mm) {
2043         break;
2044       }
2045     }
2046     if (x != mm) {
2047       return NULL;
2048     }
2049 
2050     MemBarNode *mbar = NULL;
2051     // ensure the merge feeds to the expected type of membar
2052     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2053       x = mm->fast_out(i);
2054       if (x->is_MemBar()) {
2055         if (x->Opcode() == Op_MemBarCPUOrder) {
2056           // with a store any cpu order membar should precede a
2057           // trailing volatile membar. with a cas it should precede a
2058           // trailing acquire membar. in either case try to skip to
2059           // that next membar
2060           MemBarNode *y =  x->as_MemBar();
2061           y = child_membar(y);
2062           if (y != NULL) {
2063             // skip to this new membar to do the check
2064             x = y;
2065           }
2066           
2067         }
2068         if (x->Opcode() == Op_MemBarVolatile) {
2069           mbar = x->as_MemBar();
2070           // for a volatile store this can be either a trailing membar
2071           // or a card mark membar. for a cas it must be a card mark
2072           // membar
2073           assert(cas == NULL || is_card_mark_membar(mbar),
2074                  "in CAS graph volatile membar must be a card mark");
2075         } else if (cas != NULL && x->Opcode() == Op_MemBarAcquire) {
2076           mbar = x->as_MemBar();
2077         }
2078         break;
2079       }
2080     }
2081 
2082     return mbar;
2083   }
2084 
2085   // normal_to_leading
2086   //
2087   // graph traversal helper which detects the normal case Mem feed
2088   // from either a card mark or a trailing membar to a preceding
2089   // release membar (optionally its cpuorder child) i.e. it ensures
2090   // that one of the following 3 Mem flow subgraphs is present.
2091   //
2092   //   MemBarRelease
2093   //  {MemBarCPUOrder} {leading}
2094   //          |  \      . . .
2095   //          |  StoreN/P[mo_release]  . . .
2096   //          |   /
2097   //         MergeMem
2098   //          |
2099   //  {MemBarCPUOrder}
2100   //   MemBarVolatile {trailing or card mark}
2101   //
2102   //   MemBarRelease
2103   //   MemBarCPUOrder {leading}
2104   //          |  \      . . .
2105   //          |  CompareAndSwapX  . . .
2106   //          |   /
2107   //         MergeMem
2108   //          |
2109   //   MemBarVolatile {card mark}
2110   //
2111   //   MemBarRelease
2112   //   MemBarCPUOrder {leading}
2113   //          |  \      . . .
2114   //          |  CompareAndSwapX  . . .
2115   //          |   /
2116   //         MergeMem
2117   //          |
2118   //   MemBarCPUOrder
2119   //   MemBarAcquire {trailing}
2120   //
2121   // this predicate checks for the same flow as the previous predicate
2122   // but starting from the bottom rather than the top.
2123   //
2124   // if the configuration is present returns the cpuorder member for
2125   // preference or when absent the release membar otherwise NULL.
2126   //
2127   // n.b. the input membar is expected to be a MemBarVolatile but
2128   // need not be a card mark membar.
2129 
2130   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2131   {
2132     // input must be a volatile membar
2133     assert((barrier->Opcode() == Op_MemBarVolatile ||
2134             barrier->Opcode() == Op_MemBarAcquire),
2135            "expecting a volatile or an acquire membar");
2136     bool barrier_is_acquire = barrier->Opcode() == Op_MemBarAcquire;
2137 
2138     // if we have an intervening cpu order membar then start the
2139     // search from it
2140     
2141     Node *x = parent_membar(barrier);
2142 
2143     if (x == NULL) {
2144       // stick with the original barrier
2145       x = (Node *)barrier;
2146     } else if (x->Opcode() != Op_MemBarCPUOrder) {
2147       // any other barrier means this is not the graph we want
2148       return NULL;
2149     }
2150 
2151     // the Mem feed to the membar should be a merge
2152     x = x ->in(TypeFunc::Memory);
2153     if (!x->is_MergeMem())
2154       return NULL;
2155 
2156     MergeMemNode *mm = x->as_MergeMem();
2157 
2158     // the merge should get its Bottom mem feed from the leading membar
2159     x = mm->in(Compile::AliasIdxBot);
2160 
2161     // ensure this is a non control projection
2162     if (!x->is_Proj() || x->is_CFG()) {
2163       return NULL;
2164     }
2165     // if it is fed by a membar that's the one we want
2166     x = x->in(0);
2167 
2168     if (!x->is_MemBar()) {
2169       return NULL;
2170     }
2171 
2172     MemBarNode *leading = x->as_MemBar();
2173     // reject invalid candidates
2174     if (!leading_membar(leading)) {
2175       return NULL;
2176     }
2177 
2178     // ok, we have a leading membar, now for the sanity clauses
2179 
2180     // the leading membar must feed Mem to a releasing store or CAS
2181     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2182     StoreNode *st = NULL;
2183     LoadStoreNode *cas = NULL;
2184     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2185       x = mem->fast_out(i);
2186       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2187         // two stores or CASes is one too many
2188         if (st != NULL || cas != NULL) {
2189           return NULL;
2190         }
2191         st = x->as_Store();
2192       } else if (is_CAS(x->Opcode())) {
2193         if (st != NULL || cas != NULL) {
2194           return NULL;
2195         }
2196         cas = x->as_LoadStore();
2197       }
2198     }
2199 
2200     // we should not have both a store and a cas
2201     if (st == NULL & cas == NULL) {
2202       return NULL;
2203     }
2204     if (st == NULL) {
2205       // if we started from a volatile membar and found a CAS then the
2206       // original membar ought to be for a card mark
2207       assert((barrier_is_acquire || is_card_mark_membar(barrier)),
2208              "unexpected volatile barrier (i.e. not card mark) in CAS graph");
2209       // check that the CAS feeds the merge we used to get here via an
2210       // intermediary SCMemProj
2211       Node *scmemproj = NULL;
2212       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2213         x = cas->fast_out(i);
2214         if (x->Opcode() == Op_SCMemProj) {
2215           scmemproj = x;
2216           break;
2217         }
2218       }
2219       if (scmemproj == NULL) {
2220         return NULL;
2221       }
2222       for (DUIterator_Fast imax, i = scmemproj->fast_outs(imax); i < imax; i++) {
2223         x = scmemproj->fast_out(i);
2224         if (x == mm) {
2225           return leading;
2226         }
2227       }
2228     } else {
2229       // we should not have found a store if we started from an acquire
2230       assert(!barrier_is_acquire,
2231              "unexpected trailing acquire barrier in volatile store graph");
2232 
2233       // the store should feed the merge we used to get here
2234       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2235         if (st->fast_out(i) == mm) {
2236           return leading;
2237         }
2238       }
2239     }
2240 
2241     return NULL;
2242   }
2243 
2244   // card_mark_to_trailing
2245   //
2246   // graph traversal helper which detects extra, non-normal Mem feed
2247   // from a card mark volatile membar to a trailing membar i.e. it
2248   // ensures that one of the following three GC post-write Mem flow
2249   // subgraphs is present.
2250   //
2251   // 1)
2252   //     . . .
2253   //       |
2254   //   MemBarVolatile (card mark)
2255   //      |          |
2256   //      |        StoreCM
2257   //      |          |
2258   //      |        . . .
2259   //  Bot |  /
2260   //   MergeMem
2261   //      |
2262   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2263   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2264   //                                 
2265   //
2266   // 2)
2267   //   MemBarRelease/CPUOrder (leading)
2268   //    |
2269   //    |
2270   //    |\       . . .
2271   //    | \        |
2272   //    |  \  MemBarVolatile (card mark)
2273   //    |   \   |     |
2274   //     \   \  |   StoreCM    . . .
2275   //      \   \ |
2276   //       \  Phi
2277   //        \ /
2278   //        Phi  . . .
2279   //     Bot |   /
2280   //       MergeMem
2281   //         |
2282   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2283   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2284   //
2285   // 3)
2286   //   MemBarRelease/CPUOrder (leading)
2287   //    |
2288   //    |\
2289   //    | \
2290   //    |  \      . . .
2291   //    |   \       |
2292   //    |\   \  MemBarVolatile (card mark)
2293   //    | \   \   |     |
2294   //    |  \   \  |   StoreCM    . . .
2295   //    |   \   \ |
2296   //     \   \  Phi
2297   //      \   \ /
2298   //       \  Phi
2299   //        \ /
2300   //        Phi  . . .
2301   //     Bot |   /
2302   //       MergeMem
2303   //         |
2304   //         |
2305   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2306   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2307   //
2308   // 4)
2309   //   MemBarRelease/CPUOrder (leading)
2310   //    |
2311   //    |\
2312   //    | \
2313   //    |  \
2314   //    |   \
2315   //    |\   \
2316   //    | \   \
2317   //    |  \   \        . . .
2318   //    |   \   \         |
2319   //    |\   \   \   MemBarVolatile (card mark)
2320   //    | \   \   \   /   |
2321   //    |  \   \   \ /  StoreCM    . . .
2322   //    |   \   \  Phi
2323   //     \   \   \ /
2324   //      \   \  Phi
2325   //       \   \ /
2326   //        \  Phi
2327   //         \ /
2328   //         Phi  . . .
2329   //      Bot |   /
2330   //       MergeMem
2331   //          |
2332   //          |
2333   //    MemBarCPUOrder
2334   //    MemBarAcquire {trailing}
2335   //
2336   // configuration 1 is only valid if UseConcMarkSweepGC &&
2337   // UseCondCardMark
2338   //
2339   // configuration 2, is only valid if UseConcMarkSweepGC &&
2340   // UseCondCardMark or if UseG1GC
2341   //
2342   // configurations 3 and 4 are only valid if UseG1GC.
2343   //
2344   // if a valid configuration is present returns the trailing membar
2345   // otherwise NULL.
2346   //
2347   // n.b. the supplied membar is expected to be a card mark
2348   // MemBarVolatile i.e. the caller must ensure the input node has the
2349   // correct operand and feeds Mem to a StoreCM node
2350 
2351   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2352   {
2353     // input must be a card mark volatile membar
2354     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2355 
2356     Node *feed = barrier->proj_out(TypeFunc::Memory);
2357     Node *x;
2358     MergeMemNode *mm = NULL;
2359 
2360     const int MAX_PHIS = max_phis(); // max phis we will search through
2361     int phicount = 0;                // current search count
2362 
2363     bool retry_feed = true;
2364     while (retry_feed) {
2365       // see if we have a direct MergeMem feed
2366       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2367         x = feed->fast_out(i);
2368         // the correct Phi will be merging a Bot memory slice
2369         if (x->is_MergeMem()) {
2370           mm = x->as_MergeMem();
2371           break;
2372         }
2373       }
2374       if (mm) {
2375         retry_feed = false;
2376       } else if (phicount++ < MAX_PHIS) {
2377         // the barrier may feed indirectly via one or two Phi nodes
2378         PhiNode *phi = NULL;
2379         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2380           x = feed->fast_out(i);
2381           // the correct Phi will be merging a Bot memory slice
2382           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2383             phi = x->as_Phi();
2384             break;
2385           }
2386         }
2387         if (!phi) {
2388           return NULL;
2389         }
2390         // look for another merge below this phi
2391         feed = phi;
2392       } else {
2393         // couldn't find a merge
2394         return NULL;
2395       }
2396     }
2397 
2398     // sanity check this feed turns up as the expected slice
2399     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2400 
2401     MemBarNode *trailing = NULL;
2402     // be sure we have a trailing membar fed by the merge
2403     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2404       x = mm->fast_out(i);
2405       if (x->is_MemBar()) {
2406         // if this is an intervening cpu order membar skip to the
2407         // following membar
2408         if (x->Opcode() == Op_MemBarCPUOrder) {
2409           MemBarNode *y =  x->as_MemBar();
2410           y = child_membar(y);
2411           if (y != NULL) {
2412             x = y;
2413           }
2414         }
2415         if (x->Opcode() == Op_MemBarVolatile ||
2416             x->Opcode() == Op_MemBarAcquire) {
2417           trailing = x->as_MemBar();
2418         }
2419         break;
2420       }
2421     }
2422 
2423     return trailing;
2424   }
2425 
2426   // trailing_to_card_mark
2427   //
2428   // graph traversal helper which detects extra, non-normal Mem feed
2429   // from a trailing volatile membar to a preceding card mark volatile
2430   // membar i.e. it identifies whether one of the three possible extra
2431   // GC post-write Mem flow subgraphs is present
2432   //
2433   // this predicate checks for the same flow as the previous predicate
2434   // but starting from the bottom rather than the top.
2435   //
2436   // if the configuration is present returns the card mark membar
2437   // otherwise NULL
2438   //
2439   // n.b. the supplied membar is expected to be a trailing
2440   // MemBarVolatile or MemBarAcquire i.e. the caller must ensure the
2441   // input node has the correct opcode
2442 
2443   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2444   {
2445     assert(trailing->Opcode() == Op_MemBarVolatile ||
2446            trailing->Opcode() == Op_MemBarAcquire,
2447            "expecting a volatile or acquire membar");
2448     assert(!is_card_mark_membar(trailing),
2449            "not expecting a card mark membar");
2450 
2451     Node *x = (Node *)trailing;
2452 
2453     // look for a preceding cpu order membar
2454     MemBarNode *y = parent_membar(x->as_MemBar());
2455     if (y != NULL) {
2456       // make sure it is a cpu order membar
2457       if (y->Opcode() != Op_MemBarCPUOrder) {
2458         // this is nto the graph we were looking for
2459         return NULL;
2460       }
2461       // start the search from here
2462       x = y;
2463     }
2464 
2465     // the Mem feed to the membar should be a merge
2466     x = x->in(TypeFunc::Memory);
2467     if (!x->is_MergeMem()) {
2468       return NULL;
2469     }
2470 
2471     MergeMemNode *mm = x->as_MergeMem();
2472 
2473     x = mm->in(Compile::AliasIdxBot);
2474     // with G1 we may possibly see a Phi or two before we see a Memory
2475     // Proj from the card mark membar
2476 
2477     const int MAX_PHIS = max_phis(); // max phis we will search through
2478     int phicount = 0;                    // current search count
2479 
2480     bool retry_feed = !x->is_Proj();
2481 
2482     while (retry_feed) {
2483       if (x->is_Phi() && phicount++ < MAX_PHIS) {
2484         PhiNode *phi = x->as_Phi();
2485         ProjNode *proj = NULL;
2486         PhiNode *nextphi = NULL;
2487         bool found_leading = false;
2488         for (uint i = 1; i < phi->req(); i++) {
2489           x = phi->in(i);
2490           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2491             nextphi = x->as_Phi();
2492           } else if (x->is_Proj()) {
2493             int opcode = x->in(0)->Opcode();
2494             if (opcode == Op_MemBarVolatile) {
2495               proj = x->as_Proj();
2496             } else if (opcode == Op_MemBarRelease ||
2497                        opcode == Op_MemBarCPUOrder) {
2498               // probably a leading membar
2499               found_leading = true;
2500             }
2501           }
2502         }
2503         // if we found a correct looking proj then retry from there
2504         // otherwise we must see a leading and a phi or this the
2505         // wrong config
2506         if (proj != NULL) {
2507           x = proj;
2508           retry_feed = false;
2509         } else if (found_leading && nextphi != NULL) {
2510           // retry from this phi to check phi2
2511           x = nextphi;
2512         } else {
2513           // not what we were looking for
2514           return NULL;
2515         }
2516       } else {
2517         return NULL;
2518       }
2519     }
2520     // the proj has to come from the card mark membar
2521     x = x->in(0);
2522     if (!x->is_MemBar()) {
2523       return NULL;
2524     }
2525 
2526     MemBarNode *card_mark_membar = x->as_MemBar();
2527 
2528     if (!is_card_mark_membar(card_mark_membar)) {
2529       return NULL;
2530     }
2531 
2532     return card_mark_membar;
2533   }
2534 
2535   // trailing_to_leading
2536   //
2537   // graph traversal helper which checks the Mem flow up the graph
2538   // from a (non-card mark) trailing membar attempting to locate and
2539   // return an associated leading membar. it first looks for a
2540   // subgraph in the normal configuration (relying on helper
2541   // normal_to_leading). failing that it then looks for one of the
2542   // possible post-write card mark subgraphs linking the trailing node
2543   // to a the card mark membar (relying on helper
2544   // trailing_to_card_mark), and then checks that the card mark membar
2545   // is fed by a leading membar (once again relying on auxiliary
2546   // predicate normal_to_leading).
2547   //
2548   // if the configuration is valid returns the cpuorder member for
2549   // preference or when absent the release membar otherwise NULL.
2550   //
2551   // n.b. the input membar is expected to be either a volatile or
2552   // acquire membar but in the former case must *not* be a card mark
2553   // membar.
2554 
2555   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2556   {
2557     assert((trailing->Opcode() == Op_MemBarAcquire ||
2558             trailing->Opcode() == Op_MemBarVolatile),
2559            "expecting an acquire or volatile membar");
2560     assert((trailing->Opcode() != Op_MemBarVolatile ||
2561             !is_card_mark_membar(trailing)),
2562            "not expecting a card mark membar");
2563 
2564     MemBarNode *leading = normal_to_leading(trailing);
2565 
2566     if (leading) {
2567       return leading;
2568     }
2569 
2570     // there is no normal path from trailing to leading membar. see if
2571     // we can arrive via a card mark membar
2572 
2573     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2574 
2575     if (!card_mark_membar) {
2576       return NULL;
2577     }
2578 
2579     return normal_to_leading(card_mark_membar);
2580   }
2581 
2582   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2583 
2584 bool unnecessary_acquire(const Node *barrier)
2585 {
2586   assert(barrier->is_MemBar(), "expecting a membar");
2587 
2588   if (UseBarriersForVolatile) {
2589     // we need to plant a dmb
2590     return false;
2591   }
2592 
2593   // a volatile read derived from bytecode (or also from an inlined
2594   // SHA field read via LibraryCallKit::load_field_from_object)
2595   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2596   // with a bogus read dependency on it's preceding load. so in those
2597   // cases we will find the load node at the PARMS offset of the
2598   // acquire membar.  n.b. there may be an intervening DecodeN node.
2599 
2600   Node *x = barrier->lookup(TypeFunc::Parms);
2601   if (x) {
2602     // we are starting from an acquire and it has a fake dependency
2603     //
2604     // need to check for
2605     //
2606     //   LoadX[mo_acquire]
2607     //   {  |1   }
2608     //   {DecodeN}
2609     //      |Parms
2610     //   MemBarAcquire*
2611     //
2612     // where * tags node we were passed
2613     // and |k means input k
2614     if (x->is_DecodeNarrowPtr()) {
2615       x = x->in(1);
2616     }
2617 
2618     return (x->is_Load() && x->as_Load()->is_acquire());
2619   }
2620 
2621   // other option for unnecessary membar is that it is a trailing node
2622   // belonging to a CAS
2623 
2624   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2625 
2626   return leading != NULL;
2627 }
2628 
2629 bool needs_acquiring_load(const Node *n)
2630 {
2631   assert(n->is_Load(), "expecting a load");
2632   if (UseBarriersForVolatile) {
2633     // we use a normal load and a dmb
2634     return false;
2635   }
2636 
2637   LoadNode *ld = n->as_Load();
2638 
2639   if (!ld->is_acquire()) {
2640     return false;
2641   }
2642 
2643   // check if this load is feeding an acquire membar
2644   //
2645   //   LoadX[mo_acquire]
2646   //   {  |1   }
2647   //   {DecodeN}
2648   //      |Parms
2649   //   MemBarAcquire*
2650   //
2651   // where * tags node we were passed
2652   // and |k means input k
2653 
2654   Node *start = ld;
2655   Node *mbacq = NULL;
2656 
2657   // if we hit a DecodeNarrowPtr we reset the start node and restart
2658   // the search through the outputs
2659  restart:
2660 
2661   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2662     Node *x = start->fast_out(i);
2663     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2664       mbacq = x;
2665     } else if (!mbacq &&
2666                (x->is_DecodeNarrowPtr() ||
2667                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2668       start = x;
2669       goto restart;
2670     }
2671   }
2672 
2673   if (mbacq) {
2674     return true;
2675   }
2676 
2677   return false;
2678 }
2679 
2680 bool unnecessary_release(const Node *n)
2681 {
2682   assert((n->is_MemBar() &&
2683           n->Opcode() == Op_MemBarRelease),
2684          "expecting a release membar");
2685 
2686   if (UseBarriersForVolatile) {
2687     // we need to plant a dmb
2688     return false;
2689   }
2690 
2691   // if there is a dependent CPUOrder barrier then use that as the
2692   // leading
2693 
2694   MemBarNode *barrier = n->as_MemBar();
2695   // check for an intervening cpuorder membar
2696   MemBarNode *b = child_membar(barrier);
2697   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2698     // ok, so start the check from the dependent cpuorder barrier
2699     barrier = b;
2700   }
2701 
2702   // must start with a normal feed
2703   MemBarNode *child_barrier = leading_to_normal(barrier);
2704 
2705   if (!child_barrier) {
2706     return false;
2707   }
2708 
2709   if (!is_card_mark_membar(child_barrier)) {
2710     // this is the trailing membar and we are done
2711     return true;
2712   }
2713 
2714   // must be sure this card mark feeds a trailing membar
2715   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2716   return (trailing != NULL);
2717 }
2718 
2719 bool unnecessary_volatile(const Node *n)
2720 {
2721   // assert n->is_MemBar();
2722   if (UseBarriersForVolatile) {
2723     // we need to plant a dmb
2724     return false;
2725   }
2726 
2727   MemBarNode *mbvol = n->as_MemBar();
2728 
2729   // first we check if this is part of a card mark. if so then we have
2730   // to generate a StoreLoad barrier
2731 
2732   if (is_card_mark_membar(mbvol)) {
2733       return false;
2734   }
2735 
2736   // ok, if it's not a card mark then we still need to check if it is
2737   // a trailing membar of a volatile put graph.
2738 
2739   return (trailing_to_leading(mbvol) != NULL);
2740 }
2741 
2742 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2743 
2744 bool needs_releasing_store(const Node *n)
2745 {
2746   // assert n->is_Store();
2747   if (UseBarriersForVolatile) {
2748     // we use a normal store and dmb combination
2749     return false;
2750   }
2751 
2752   StoreNode *st = n->as_Store();
2753 
2754   // the store must be marked as releasing
2755   if (!st->is_release()) {
2756     return false;
2757   }
2758 
2759   // the store must be fed by a membar
2760 
2761   Node *x = st->lookup(StoreNode::Memory);
2762 
2763   if (! x || !x->is_Proj()) {
2764     return false;
2765   }
2766 
2767   ProjNode *proj = x->as_Proj();
2768 
2769   x = proj->lookup(0);
2770 
2771   if (!x || !x->is_MemBar()) {
2772     return false;
2773   }
2774 
2775   MemBarNode *barrier = x->as_MemBar();
2776 
2777   // if the barrier is a release membar or a cpuorder mmebar fed by a
2778   // release membar then we need to check whether that forms part of a
2779   // volatile put graph.
2780 
2781   // reject invalid candidates
2782   if (!leading_membar(barrier)) {
2783     return false;
2784   }
2785 
2786   // does this lead a normal subgraph?
2787   MemBarNode *mbvol = leading_to_normal(barrier);
2788 
2789   if (!mbvol) {
2790     return false;
2791   }
2792 
2793   // all done unless this is a card mark
2794   if (!is_card_mark_membar(mbvol)) {
2795     return true;
2796   }
2797 
2798   // we found a card mark -- just make sure we have a trailing barrier
2799 
2800   return (card_mark_to_trailing(mbvol) != NULL);
2801 }
2802 
2803 // predicate controlling translation of CAS
2804 //
2805 // returns true if CAS needs to use an acquiring load otherwise false
2806 
2807 bool needs_acquiring_load_exclusive(const Node *n)
2808 {
2809   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2810   if (UseBarriersForVolatile) {
2811     return false;
2812   }
2813 
2814   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2815 #ifdef ASSERT
2816   LoadStoreNode *st = n->as_LoadStore();
2817 
2818   // the store must be fed by a membar
2819 
2820   Node *x = st->lookup(StoreNode::Memory);
2821 
2822   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2823 
2824   ProjNode *proj = x->as_Proj();
2825 
2826   x = proj->lookup(0);
2827 
2828   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2829 
2830   MemBarNode *barrier = x->as_MemBar();
2831 
2832   // the barrier must be a cpuorder mmebar fed by a release membar
2833 
2834   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2835          "CAS not fed by cpuorder membar!");
2836 
2837   MemBarNode *b = parent_membar(barrier);
2838   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2839           "CAS not fed by cpuorder+release membar pair!");
2840 
2841   // does this lead a normal subgraph?
2842   MemBarNode *mbar = leading_to_normal(barrier);
2843 
2844   assert(mbar != NULL, "CAS not embedded in normal graph!");
2845 
2846   // if this is a card mark membar check we have a trailing acquire
2847 
2848   if (is_card_mark_membar(mbar)) {
2849     mbar = card_mark_to_trailing(mbar);
2850   }
2851 
2852   assert(mbar != NULL, "card mark membar for CAS not embedded in normal graph!");
2853 
2854   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2855 #endif // ASSERT
2856   // so we can just return true here
2857   return true;
2858 }
2859 
2860 // predicate controlling translation of StoreCM
2861 //
2862 // returns true if a StoreStore must precede the card write otherwise
2863 // false
2864 
2865 bool unnecessary_storestore(const Node *storecm)
2866 {
2867   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2868 
2869   // we only ever need to generate a dmb ishst between an object put
2870   // and the associated card mark when we are using CMS without
2871   // conditional card marking
2872 
2873   if (!UseConcMarkSweepGC || UseCondCardMark) {
2874     return true;
2875   }
2876 
2877   // if we are implementing volatile puts using barriers then the
2878   // object put as an str so we must insert the dmb ishst
2879 
2880   if (UseBarriersForVolatile) {
2881     return false;
2882   }
2883 
2884   // we can omit the dmb ishst if this StoreCM is part of a volatile
2885   // put because in thta case the put will be implemented by stlr
2886   //
2887   // we need to check for a normal subgraph feeding this StoreCM.
2888   // that means the StoreCM must be fed Memory from a leading membar,
2889   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2890   // leading membar must be part of a normal subgraph
2891 
2892   Node *x = storecm->in(StoreNode::Memory);
2893 
2894   if (!x->is_Proj()) {
2895     return false;
2896   }
2897 
2898   x = x->in(0);
2899 
2900   if (!x->is_MemBar()) {
2901     return false;
2902   }
2903 
2904   MemBarNode *leading = x->as_MemBar();
2905 
2906   // reject invalid candidates
2907   if (!leading_membar(leading)) {
2908     return false;
2909   }
2910 
2911   // we can omit the StoreStore if it is the head of a normal subgraph
2912   return (leading_to_normal(leading) != NULL);
2913 }
2914 
2915 
2916 #define __ _masm.
2917 
2918 // advance declarations for helper functions to convert register
2919 // indices to register objects
2920 
2921 // the ad file has to provide implementations of certain methods
2922 // expected by the generic code
2923 //
2924 // REQUIRED FUNCTIONALITY
2925 
2926 //=============================================================================
2927 
2928 // !!!!! Special hack to get all types of calls to specify the byte offset
2929 //       from the start of the call to the point where the return address
2930 //       will point.
2931 
2932 int MachCallStaticJavaNode::ret_addr_offset()
2933 {
2934   // call should be a simple bl
2935   int off = 4;
2936   return off;
2937 }
2938 
2939 int MachCallDynamicJavaNode::ret_addr_offset()
2940 {
2941   return 16; // movz, movk, movk, bl
2942 }
2943 
2944 int MachCallRuntimeNode::ret_addr_offset() {
2945   // for generated stubs the call will be
2946   //   far_call(addr)
2947   // for real runtime callouts it will be six instructions
2948   // see aarch64_enc_java_to_runtime
2949   //   adr(rscratch2, retaddr)
2950   //   lea(rscratch1, RuntimeAddress(addr)
2951   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2952   //   blrt rscratch1
2953   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2954   if (cb) {
2955     return MacroAssembler::far_branch_size();
2956   } else {
2957     return 6 * NativeInstruction::instruction_size;
2958   }
2959 }
2960 
2961 // Indicate if the safepoint node needs the polling page as an input
2962 
2963 // the shared code plants the oop data at the start of the generated
2964 // code for the safepoint node and that needs ot be at the load
2965 // instruction itself. so we cannot plant a mov of the safepoint poll
2966 // address followed by a load. setting this to true means the mov is
2967 // scheduled as a prior instruction. that's better for scheduling
2968 // anyway.
2969 
2970 bool SafePointNode::needs_polling_address_input()
2971 {
2972   return true;
2973 }
2974 
2975 //=============================================================================
2976 
2977 #ifndef PRODUCT
2978 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2979   st->print("BREAKPOINT");
2980 }
2981 #endif
2982 
2983 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2984   MacroAssembler _masm(&cbuf);
2985   __ brk(0);
2986 }
2987 
2988 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2989   return MachNode::size(ra_);
2990 }
2991 
2992 //=============================================================================
2993 
2994 #ifndef PRODUCT
2995   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2996     st->print("nop \t# %d bytes pad for loops and calls", _count);
2997   }
2998 #endif
2999 
3000   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
3001     MacroAssembler _masm(&cbuf);
3002     for (int i = 0; i < _count; i++) {
3003       __ nop();
3004     }
3005   }
3006 
3007   uint MachNopNode::size(PhaseRegAlloc*) const {
3008     return _count * NativeInstruction::instruction_size;
3009   }
3010 
3011 //=============================================================================
3012 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3013 
3014 int Compile::ConstantTable::calculate_table_base_offset() const {
3015   return 0;  // absolute addressing, no offset
3016 }
3017 
3018 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3019 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3020   ShouldNotReachHere();
3021 }
3022 
3023 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3024   // Empty encoding
3025 }
3026 
3027 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3028   return 0;
3029 }
3030 
3031 #ifndef PRODUCT
3032 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3033   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3034 }
3035 #endif
3036 
3037 #ifndef PRODUCT
3038 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3039   Compile* C = ra_->C;
3040 
3041   int framesize = C->frame_slots() << LogBytesPerInt;
3042 
3043   if (C->need_stack_bang(framesize))
3044     st->print("# stack bang size=%d\n\t", framesize);
3045 
3046   if (framesize < ((1 << 9) + 2 * wordSize)) {
3047     st->print("sub  sp, sp, #%d\n\t", framesize);
3048     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3049     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3050   } else {
3051     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3052     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3053     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3054     st->print("sub  sp, sp, rscratch1");
3055   }
3056 }
3057 #endif
3058 
3059 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3060   Compile* C = ra_->C;
3061   MacroAssembler _masm(&cbuf);
3062 
3063   // n.b. frame size includes space for return pc and rfp
3064   const long framesize = C->frame_size_in_bytes();
3065   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3066 
3067   // insert a nop at the start of the prolog so we can patch in a
3068   // branch if we need to invalidate the method later
3069   __ nop();
3070 
3071   int bangsize = C->bang_size_in_bytes();
3072   if (C->need_stack_bang(bangsize) && UseStackBanging)
3073     __ generate_stack_overflow_check(bangsize);
3074 
3075   __ build_frame(framesize);
3076 
3077   if (NotifySimulator) {
3078     __ notify(Assembler::method_entry);
3079   }
3080 
3081   if (VerifyStackAtCalls) {
3082     Unimplemented();
3083   }
3084 
3085   C->set_frame_complete(cbuf.insts_size());
3086 
3087   if (C->has_mach_constant_base_node()) {
3088     // NOTE: We set the table base offset here because users might be
3089     // emitted before MachConstantBaseNode.
3090     Compile::ConstantTable& constant_table = C->constant_table();
3091     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3092   }
3093 }
3094 
3095 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3096 {
3097   return MachNode::size(ra_); // too many variables; just compute it
3098                               // the hard way
3099 }
3100 
3101 int MachPrologNode::reloc() const
3102 {
3103   return 0;
3104 }
3105 
3106 //=============================================================================
3107 
3108 #ifndef PRODUCT
3109 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3110   Compile* C = ra_->C;
3111   int framesize = C->frame_slots() << LogBytesPerInt;
3112 
3113   st->print("# pop frame %d\n\t",framesize);
3114 
3115   if (framesize == 0) {
3116     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3117   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3118     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3119     st->print("add  sp, sp, #%d\n\t", framesize);
3120   } else {
3121     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3122     st->print("add  sp, sp, rscratch1\n\t");
3123     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3124   }
3125 
3126   if (do_polling() && C->is_method_compilation()) {
3127     st->print("# touch polling page\n\t");
3128     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3129     st->print("ldr zr, [rscratch1]");
3130   }
3131 }
3132 #endif
3133 
3134 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3135   Compile* C = ra_->C;
3136   MacroAssembler _masm(&cbuf);
3137   int framesize = C->frame_slots() << LogBytesPerInt;
3138 
3139   __ remove_frame(framesize);
3140 
3141   if (NotifySimulator) {
3142     __ notify(Assembler::method_reentry);
3143   }
3144 
3145   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3146     __ reserved_stack_check();
3147   }
3148 
3149   if (do_polling() && C->is_method_compilation()) {
3150     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3151   }
3152 }
3153 
3154 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3155   // Variable size. Determine dynamically.
3156   return MachNode::size(ra_);
3157 }
3158 
3159 int MachEpilogNode::reloc() const {
3160   // Return number of relocatable values contained in this instruction.
3161   return 1; // 1 for polling page.
3162 }
3163 
3164 const Pipeline * MachEpilogNode::pipeline() const {
3165   return MachNode::pipeline_class();
3166 }
3167 
3168 // This method seems to be obsolete. It is declared in machnode.hpp
3169 // and defined in all *.ad files, but it is never called. Should we
3170 // get rid of it?
3171 int MachEpilogNode::safepoint_offset() const {
3172   assert(do_polling(), "no return for this epilog node");
3173   return 4;
3174 }
3175 
3176 //=============================================================================
3177 
3178 // Figure out which register class each belongs in: rc_int, rc_float or
3179 // rc_stack.
3180 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3181 
3182 static enum RC rc_class(OptoReg::Name reg) {
3183 
3184   if (reg == OptoReg::Bad) {
3185     return rc_bad;
3186   }
3187 
3188   // we have 30 int registers * 2 halves
3189   // (rscratch1 and rscratch2 are omitted)
3190 
3191   if (reg < 60) {
3192     return rc_int;
3193   }
3194 
3195   // we have 32 float register * 2 halves
3196   if (reg < 60 + 128) {
3197     return rc_float;
3198   }
3199 
3200   // Between float regs & stack is the flags regs.
3201   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3202 
3203   return rc_stack;
3204 }
3205 
3206 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3207   Compile* C = ra_->C;
3208 
3209   // Get registers to move.
3210   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3211   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3212   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3213   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3214 
3215   enum RC src_hi_rc = rc_class(src_hi);
3216   enum RC src_lo_rc = rc_class(src_lo);
3217   enum RC dst_hi_rc = rc_class(dst_hi);
3218   enum RC dst_lo_rc = rc_class(dst_lo);
3219 
3220   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3221 
3222   if (src_hi != OptoReg::Bad) {
3223     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3224            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3225            "expected aligned-adjacent pairs");
3226   }
3227 
3228   if (src_lo == dst_lo && src_hi == dst_hi) {
3229     return 0;            // Self copy, no move.
3230   }
3231 
3232   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3233               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3234   int src_offset = ra_->reg2offset(src_lo);
3235   int dst_offset = ra_->reg2offset(dst_lo);
3236 
3237   if (bottom_type()->isa_vect() != NULL) {
3238     uint ireg = ideal_reg();
3239     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3240     if (cbuf) {
3241       MacroAssembler _masm(cbuf);
3242       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3243       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3244         // stack->stack
3245         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3246         if (ireg == Op_VecD) {
3247           __ unspill(rscratch1, true, src_offset);
3248           __ spill(rscratch1, true, dst_offset);
3249         } else {
3250           __ spill_copy128(src_offset, dst_offset);
3251         }
3252       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3253         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3254                ireg == Op_VecD ? __ T8B : __ T16B,
3255                as_FloatRegister(Matcher::_regEncode[src_lo]));
3256       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3257         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3258                        ireg == Op_VecD ? __ D : __ Q,
3259                        ra_->reg2offset(dst_lo));
3260       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3261         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3262                        ireg == Op_VecD ? __ D : __ Q,
3263                        ra_->reg2offset(src_lo));
3264       } else {
3265         ShouldNotReachHere();
3266       }
3267     }
3268   } else if (cbuf) {
3269     MacroAssembler _masm(cbuf);
3270     switch (src_lo_rc) {
3271     case rc_int:
3272       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3273         if (is64) {
3274             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3275                    as_Register(Matcher::_regEncode[src_lo]));
3276         } else {
3277             MacroAssembler _masm(cbuf);
3278             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3279                     as_Register(Matcher::_regEncode[src_lo]));
3280         }
3281       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3282         if (is64) {
3283             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3284                      as_Register(Matcher::_regEncode[src_lo]));
3285         } else {
3286             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3287                      as_Register(Matcher::_regEncode[src_lo]));
3288         }
3289       } else {                    // gpr --> stack spill
3290         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3291         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3292       }
3293       break;
3294     case rc_float:
3295       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3296         if (is64) {
3297             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3298                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3299         } else {
3300             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3301                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3302         }
3303       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3304           if (cbuf) {
3305             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3306                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3307         } else {
3308             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3309                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3310         }
3311       } else {                    // fpr --> stack spill
3312         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3313         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3314                  is64 ? __ D : __ S, dst_offset);
3315       }
3316       break;
3317     case rc_stack:
3318       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3319         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3320       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3321         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3322                    is64 ? __ D : __ S, src_offset);
3323       } else {                    // stack --> stack copy
3324         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3325         __ unspill(rscratch1, is64, src_offset);
3326         __ spill(rscratch1, is64, dst_offset);
3327       }
3328       break;
3329     default:
3330       assert(false, "bad rc_class for spill");
3331       ShouldNotReachHere();
3332     }
3333   }
3334 
3335   if (st) {
3336     st->print("spill ");
3337     if (src_lo_rc == rc_stack) {
3338       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3339     } else {
3340       st->print("%s -> ", Matcher::regName[src_lo]);
3341     }
3342     if (dst_lo_rc == rc_stack) {
3343       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3344     } else {
3345       st->print("%s", Matcher::regName[dst_lo]);
3346     }
3347     if (bottom_type()->isa_vect() != NULL) {
3348       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3349     } else {
3350       st->print("\t# spill size = %d", is64 ? 64:32);
3351     }
3352   }
3353 
3354   return 0;
3355 
3356 }
3357 
3358 #ifndef PRODUCT
3359 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3360   if (!ra_)
3361     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3362   else
3363     implementation(NULL, ra_, false, st);
3364 }
3365 #endif
3366 
3367 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3368   implementation(&cbuf, ra_, false, NULL);
3369 }
3370 
3371 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3372   return MachNode::size(ra_);
3373 }
3374 
3375 //=============================================================================
3376 
3377 #ifndef PRODUCT
3378 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3379   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3380   int reg = ra_->get_reg_first(this);
3381   st->print("add %s, rsp, #%d]\t# box lock",
3382             Matcher::regName[reg], offset);
3383 }
3384 #endif
3385 
3386 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3387   MacroAssembler _masm(&cbuf);
3388 
3389   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3390   int reg    = ra_->get_encode(this);
3391 
3392   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3393     __ add(as_Register(reg), sp, offset);
3394   } else {
3395     ShouldNotReachHere();
3396   }
3397 }
3398 
3399 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3400   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3401   return 4;
3402 }
3403 
3404 //=============================================================================
3405 
3406 #ifndef PRODUCT
3407 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3408 {
3409   st->print_cr("# MachUEPNode");
3410   if (UseCompressedClassPointers) {
3411     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3412     if (Universe::narrow_klass_shift() != 0) {
3413       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3414     }
3415   } else {
3416    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3417   }
3418   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3419   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3420 }
3421 #endif
3422 
3423 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3424 {
3425   // This is the unverified entry point.
3426   MacroAssembler _masm(&cbuf);
3427 
3428   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3429   Label skip;
3430   // TODO
3431   // can we avoid this skip and still use a reloc?
3432   __ br(Assembler::EQ, skip);
3433   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3434   __ bind(skip);
3435 }
3436 
3437 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3438 {
3439   return MachNode::size(ra_);
3440 }
3441 
3442 // REQUIRED EMIT CODE
3443 
3444 //=============================================================================
3445 
3446 // Emit exception handler code.
3447 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3448 {
3449   // mov rscratch1 #exception_blob_entry_point
3450   // br rscratch1
3451   // Note that the code buffer's insts_mark is always relative to insts.
3452   // That's why we must use the macroassembler to generate a handler.
3453   MacroAssembler _masm(&cbuf);
3454   address base = __ start_a_stub(size_exception_handler());
3455   if (base == NULL) {
3456     ciEnv::current()->record_failure("CodeCache is full");
3457     return 0;  // CodeBuffer::expand failed
3458   }
3459   int offset = __ offset();
3460   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3461   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3462   __ end_a_stub();
3463   return offset;
3464 }
3465 
3466 // Emit deopt handler code.
3467 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3468 {
3469   // Note that the code buffer's insts_mark is always relative to insts.
3470   // That's why we must use the macroassembler to generate a handler.
3471   MacroAssembler _masm(&cbuf);
3472   address base = __ start_a_stub(size_deopt_handler());
3473   if (base == NULL) {
3474     ciEnv::current()->record_failure("CodeCache is full");
3475     return 0;  // CodeBuffer::expand failed
3476   }
3477   int offset = __ offset();
3478 
3479   __ adr(lr, __ pc());
3480   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3481 
3482   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3483   __ end_a_stub();
3484   return offset;
3485 }
3486 
3487 // REQUIRED MATCHER CODE
3488 
3489 //=============================================================================
3490 
3491 const bool Matcher::match_rule_supported(int opcode) {
3492 
3493   switch (opcode) {
3494   default:
3495     break;
3496   }
3497 
3498   if (!has_match_rule(opcode)) {
3499     return false;
3500   }
3501 
3502   return true;  // Per default match rules are supported.
3503 }
3504 
3505 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3506 
3507   // TODO
3508   // identify extra cases that we might want to provide match rules for
3509   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3510   bool ret_value = match_rule_supported(opcode);
3511   // Add rules here.
3512 
3513   return ret_value;  // Per default match rules are supported.
3514 }
3515 
3516 const bool Matcher::has_predicated_vectors(void) {
3517   return false;
3518 }
3519 
3520 const int Matcher::float_pressure(int default_pressure_threshold) {
3521   return default_pressure_threshold;
3522 }
3523 
3524 int Matcher::regnum_to_fpu_offset(int regnum)
3525 {
3526   Unimplemented();
3527   return 0;
3528 }
3529 
3530 // Is this branch offset short enough that a short branch can be used?
3531 //
3532 // NOTE: If the platform does not provide any short branch variants, then
3533 //       this method should return false for offset 0.
3534 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3535   // The passed offset is relative to address of the branch.
3536 
3537   return (-32768 <= offset && offset < 32768);
3538 }
3539 
3540 const bool Matcher::isSimpleConstant64(jlong value) {
3541   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3542   // Probably always true, even if a temp register is required.
3543   return true;
3544 }
3545 
3546 // true just means we have fast l2f conversion
3547 const bool Matcher::convL2FSupported(void) {
3548   return true;
3549 }
3550 
3551 // Vector width in bytes.
3552 const int Matcher::vector_width_in_bytes(BasicType bt) {
3553   int size = MIN2(16,(int)MaxVectorSize);
3554   // Minimum 2 values in vector
3555   if (size < 2*type2aelembytes(bt)) size = 0;
3556   // But never < 4
3557   if (size < 4) size = 0;
3558   return size;
3559 }
3560 
3561 // Limits on vector size (number of elements) loaded into vector.
3562 const int Matcher::max_vector_size(const BasicType bt) {
3563   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3564 }
3565 const int Matcher::min_vector_size(const BasicType bt) {
3566 //  For the moment limit the vector size to 8 bytes
3567     int size = 8 / type2aelembytes(bt);
3568     if (size < 2) size = 2;
3569     return size;
3570 }
3571 
3572 // Vector ideal reg.
3573 const uint Matcher::vector_ideal_reg(int len) {
3574   switch(len) {
3575     case  8: return Op_VecD;
3576     case 16: return Op_VecX;
3577   }
3578   ShouldNotReachHere();
3579   return 0;
3580 }
3581 
3582 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3583   return Op_VecX;
3584 }
3585 
3586 // AES support not yet implemented
3587 const bool Matcher::pass_original_key_for_aes() {
3588   return false;
3589 }
3590 
3591 // x86 supports misaligned vectors store/load.
3592 const bool Matcher::misaligned_vectors_ok() {
3593   return !AlignVector; // can be changed by flag
3594 }
3595 
3596 // false => size gets scaled to BytesPerLong, ok.
3597 const bool Matcher::init_array_count_is_in_bytes = false;
3598 
3599 // Use conditional move (CMOVL)
3600 const int Matcher::long_cmove_cost() {
3601   // long cmoves are no more expensive than int cmoves
3602   return 0;
3603 }
3604 
3605 const int Matcher::float_cmove_cost() {
3606   // float cmoves are no more expensive than int cmoves
3607   return 0;
3608 }
3609 
3610 // Does the CPU require late expand (see block.cpp for description of late expand)?
3611 const bool Matcher::require_postalloc_expand = false;
3612 
3613 // Do we need to mask the count passed to shift instructions or does
3614 // the cpu only look at the lower 5/6 bits anyway?
3615 const bool Matcher::need_masked_shift_count = false;
3616 
3617 // This affects two different things:
3618 //  - how Decode nodes are matched
3619 //  - how ImplicitNullCheck opportunities are recognized
3620 // If true, the matcher will try to remove all Decodes and match them
3621 // (as operands) into nodes. NullChecks are not prepared to deal with
3622 // Decodes by final_graph_reshaping().
3623 // If false, final_graph_reshaping() forces the decode behind the Cmp
3624 // for a NullCheck. The matcher matches the Decode node into a register.
3625 // Implicit_null_check optimization moves the Decode along with the
3626 // memory operation back up before the NullCheck.
3627 bool Matcher::narrow_oop_use_complex_address() {
3628   return Universe::narrow_oop_shift() == 0;
3629 }
3630 
3631 bool Matcher::narrow_klass_use_complex_address() {
3632 // TODO
3633 // decide whether we need to set this to true
3634   return false;
3635 }
3636 
3637 bool Matcher::const_oop_prefer_decode() {
3638   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3639   return Universe::narrow_oop_base() == NULL;
3640 }
3641 
3642 bool Matcher::const_klass_prefer_decode() {
3643   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3644   return Universe::narrow_klass_base() == NULL;
3645 }
3646 
3647 // Is it better to copy float constants, or load them directly from
3648 // memory?  Intel can load a float constant from a direct address,
3649 // requiring no extra registers.  Most RISCs will have to materialize
3650 // an address into a register first, so they would do better to copy
3651 // the constant from stack.
3652 const bool Matcher::rematerialize_float_constants = false;
3653 
3654 // If CPU can load and store mis-aligned doubles directly then no
3655 // fixup is needed.  Else we split the double into 2 integer pieces
3656 // and move it piece-by-piece.  Only happens when passing doubles into
3657 // C code as the Java calling convention forces doubles to be aligned.
3658 const bool Matcher::misaligned_doubles_ok = true;
3659 
3660 // No-op on amd64
3661 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3662   Unimplemented();
3663 }
3664 
3665 // Advertise here if the CPU requires explicit rounding operations to
3666 // implement the UseStrictFP mode.
3667 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3668 
3669 // Are floats converted to double when stored to stack during
3670 // deoptimization?
3671 bool Matcher::float_in_double() { return false; }
3672 
3673 // Do ints take an entire long register or just half?
3674 // The relevant question is how the int is callee-saved:
3675 // the whole long is written but de-opt'ing will have to extract
3676 // the relevant 32 bits.
3677 const bool Matcher::int_in_long = true;
3678 
3679 // Return whether or not this register is ever used as an argument.
3680 // This function is used on startup to build the trampoline stubs in
3681 // generateOptoStub.  Registers not mentioned will be killed by the VM
3682 // call in the trampoline, and arguments in those registers not be
3683 // available to the callee.
3684 bool Matcher::can_be_java_arg(int reg)
3685 {
3686   return
3687     reg ==  R0_num || reg == R0_H_num ||
3688     reg ==  R1_num || reg == R1_H_num ||
3689     reg ==  R2_num || reg == R2_H_num ||
3690     reg ==  R3_num || reg == R3_H_num ||
3691     reg ==  R4_num || reg == R4_H_num ||
3692     reg ==  R5_num || reg == R5_H_num ||
3693     reg ==  R6_num || reg == R6_H_num ||
3694     reg ==  R7_num || reg == R7_H_num ||
3695     reg ==  V0_num || reg == V0_H_num ||
3696     reg ==  V1_num || reg == V1_H_num ||
3697     reg ==  V2_num || reg == V2_H_num ||
3698     reg ==  V3_num || reg == V3_H_num ||
3699     reg ==  V4_num || reg == V4_H_num ||
3700     reg ==  V5_num || reg == V5_H_num ||
3701     reg ==  V6_num || reg == V6_H_num ||
3702     reg ==  V7_num || reg == V7_H_num;
3703 }
3704 
3705 bool Matcher::is_spillable_arg(int reg)
3706 {
3707   return can_be_java_arg(reg);
3708 }
3709 
3710 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3711   return false;
3712 }
3713 
3714 RegMask Matcher::divI_proj_mask() {
3715   ShouldNotReachHere();
3716   return RegMask();
3717 }
3718 
3719 // Register for MODI projection of divmodI.
3720 RegMask Matcher::modI_proj_mask() {
3721   ShouldNotReachHere();
3722   return RegMask();
3723 }
3724 
3725 // Register for DIVL projection of divmodL.
3726 RegMask Matcher::divL_proj_mask() {
3727   ShouldNotReachHere();
3728   return RegMask();
3729 }
3730 
3731 // Register for MODL projection of divmodL.
3732 RegMask Matcher::modL_proj_mask() {
3733   ShouldNotReachHere();
3734   return RegMask();
3735 }
3736 
3737 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3738   return FP_REG_mask();
3739 }
3740 
3741 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3742   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3743     Node* u = addp->fast_out(i);
3744     if (u->is_Mem()) {
3745       int opsize = u->as_Mem()->memory_size();
3746       assert(opsize > 0, "unexpected memory operand size");
3747       if (u->as_Mem()->memory_size() != (1<<shift)) {
3748         return false;
3749       }
3750     }
3751   }
3752   return true;
3753 }
3754 
3755 const bool Matcher::convi2l_type_required = false;
3756 
3757 // Should the Matcher clone shifts on addressing modes, expecting them
3758 // to be subsumed into complex addressing expressions or compute them
3759 // into registers?
3760 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3761   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3762     return true;
3763   }
3764 
3765   Node *off = m->in(AddPNode::Offset);
3766   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3767       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3768       // Are there other uses besides address expressions?
3769       !is_visited(off)) {
3770     address_visited.set(off->_idx); // Flag as address_visited
3771     mstack.push(off->in(2), Visit);
3772     Node *conv = off->in(1);
3773     if (conv->Opcode() == Op_ConvI2L &&
3774         // Are there other uses besides address expressions?
3775         !is_visited(conv)) {
3776       address_visited.set(conv->_idx); // Flag as address_visited
3777       mstack.push(conv->in(1), Pre_Visit);
3778     } else {
3779       mstack.push(conv, Pre_Visit);
3780     }
3781     address_visited.test_set(m->_idx); // Flag as address_visited
3782     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3783     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3784     return true;
3785   } else if (off->Opcode() == Op_ConvI2L &&
3786              // Are there other uses besides address expressions?
3787              !is_visited(off)) {
3788     address_visited.test_set(m->_idx); // Flag as address_visited
3789     address_visited.set(off->_idx); // Flag as address_visited
3790     mstack.push(off->in(1), Pre_Visit);
3791     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3792     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3793     return true;
3794   }
3795   return false;
3796 }
3797 
3798 void Compile::reshape_address(AddPNode* addp) {
3799 }
3800 
3801 // helper for encoding java_to_runtime calls on sim
3802 //
3803 // this is needed to compute the extra arguments required when
3804 // planting a call to the simulator blrt instruction. the TypeFunc
3805 // can be queried to identify the counts for integral, and floating
3806 // arguments and the return type
3807 
3808 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3809 {
3810   int gps = 0;
3811   int fps = 0;
3812   const TypeTuple *domain = tf->domain();
3813   int max = domain->cnt();
3814   for (int i = TypeFunc::Parms; i < max; i++) {
3815     const Type *t = domain->field_at(i);
3816     switch(t->basic_type()) {
3817     case T_FLOAT:
3818     case T_DOUBLE:
3819       fps++;
3820     default:
3821       gps++;
3822     }
3823   }
3824   gpcnt = gps;
3825   fpcnt = fps;
3826   BasicType rt = tf->return_type();
3827   switch (rt) {
3828   case T_VOID:
3829     rtype = MacroAssembler::ret_type_void;
3830     break;
3831   default:
3832     rtype = MacroAssembler::ret_type_integral;
3833     break;
3834   case T_FLOAT:
3835     rtype = MacroAssembler::ret_type_float;
3836     break;
3837   case T_DOUBLE:
3838     rtype = MacroAssembler::ret_type_double;
3839     break;
3840   }
3841 }
3842 
3843 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3844   MacroAssembler _masm(&cbuf);                                          \
3845   {                                                                     \
3846     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3847     guarantee(DISP == 0, "mode not permitted for volatile");            \
3848     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3849     __ INSN(REG, as_Register(BASE));                                    \
3850   }
3851 
3852 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3853 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3854 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3855                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3856 
3857   // Used for all non-volatile memory accesses.  The use of
3858   // $mem->opcode() to discover whether this pattern uses sign-extended
3859   // offsets is something of a kludge.
3860   static void loadStore(MacroAssembler masm, mem_insn insn,
3861                          Register reg, int opcode,
3862                          Register base, int index, int size, int disp)
3863   {
3864     Address::extend scale;
3865 
3866     // Hooboy, this is fugly.  We need a way to communicate to the
3867     // encoder that the index needs to be sign extended, so we have to
3868     // enumerate all the cases.
3869     switch (opcode) {
3870     case INDINDEXSCALEDI2L:
3871     case INDINDEXSCALEDI2LN:
3872     case INDINDEXI2L:
3873     case INDINDEXI2LN:
3874       scale = Address::sxtw(size);
3875       break;
3876     default:
3877       scale = Address::lsl(size);
3878     }
3879 
3880     if (index == -1) {
3881       (masm.*insn)(reg, Address(base, disp));
3882     } else {
3883       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3884       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3885     }
3886   }
3887 
3888   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3889                          FloatRegister reg, int opcode,
3890                          Register base, int index, int size, int disp)
3891   {
3892     Address::extend scale;
3893 
3894     switch (opcode) {
3895     case INDINDEXSCALEDI2L:
3896     case INDINDEXSCALEDI2LN:
3897       scale = Address::sxtw(size);
3898       break;
3899     default:
3900       scale = Address::lsl(size);
3901     }
3902 
3903      if (index == -1) {
3904       (masm.*insn)(reg, Address(base, disp));
3905     } else {
3906       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3907       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3908     }
3909   }
3910 
3911   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3912                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3913                          int opcode, Register base, int index, int size, int disp)
3914   {
3915     if (index == -1) {
3916       (masm.*insn)(reg, T, Address(base, disp));
3917     } else {
3918       assert(disp == 0, "unsupported address mode");
3919       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3920     }
3921   }
3922 
3923 %}
3924 
3925 
3926 
3927 //----------ENCODING BLOCK-----------------------------------------------------
3928 // This block specifies the encoding classes used by the compiler to
3929 // output byte streams.  Encoding classes are parameterized macros
3930 // used by Machine Instruction Nodes in order to generate the bit
3931 // encoding of the instruction.  Operands specify their base encoding
3932 // interface with the interface keyword.  There are currently
3933 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3934 // COND_INTER.  REG_INTER causes an operand to generate a function
3935 // which returns its register number when queried.  CONST_INTER causes
3936 // an operand to generate a function which returns the value of the
3937 // constant when queried.  MEMORY_INTER causes an operand to generate
3938 // four functions which return the Base Register, the Index Register,
3939 // the Scale Value, and the Offset Value of the operand when queried.
3940 // COND_INTER causes an operand to generate six functions which return
3941 // the encoding code (ie - encoding bits for the instruction)
3942 // associated with each basic boolean condition for a conditional
3943 // instruction.
3944 //
3945 // Instructions specify two basic values for encoding.  Again, a
3946 // function is available to check if the constant displacement is an
3947 // oop. They use the ins_encode keyword to specify their encoding
3948 // classes (which must be a sequence of enc_class names, and their
3949 // parameters, specified in the encoding block), and they use the
3950 // opcode keyword to specify, in order, their primary, secondary, and
3951 // tertiary opcode.  Only the opcode sections which a particular
3952 // instruction needs for encoding need to be specified.
3953 encode %{
3954   // Build emit functions for each basic byte or larger field in the
3955   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3956   // from C++ code in the enc_class source block.  Emit functions will
3957   // live in the main source block for now.  In future, we can
3958   // generalize this by adding a syntax that specifies the sizes of
3959   // fields in an order, so that the adlc can build the emit functions
3960   // automagically
3961 
3962   // catch all for unimplemented encodings
3963   enc_class enc_unimplemented %{
3964     MacroAssembler _masm(&cbuf);
3965     __ unimplemented("C2 catch all");
3966   %}
3967 
3968   // BEGIN Non-volatile memory access
3969 
3970   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3971     Register dst_reg = as_Register($dst$$reg);
3972     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3973                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3974   %}
3975 
3976   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3977     Register dst_reg = as_Register($dst$$reg);
3978     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3979                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3980   %}
3981 
3982   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3983     Register dst_reg = as_Register($dst$$reg);
3984     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3985                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3986   %}
3987 
3988   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3989     Register dst_reg = as_Register($dst$$reg);
3990     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3991                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3992   %}
3993 
3994   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3995     Register dst_reg = as_Register($dst$$reg);
3996     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3997                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3998   %}
3999 
4000   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4001     Register dst_reg = as_Register($dst$$reg);
4002     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
4003                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4004   %}
4005 
4006   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4007     Register dst_reg = as_Register($dst$$reg);
4008     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4009                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4010   %}
4011 
4012   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4013     Register dst_reg = as_Register($dst$$reg);
4014     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
4015                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4016   %}
4017 
4018   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4019     Register dst_reg = as_Register($dst$$reg);
4020     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4021                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4022   %}
4023 
4024   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4025     Register dst_reg = as_Register($dst$$reg);
4026     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
4027                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4028   %}
4029 
4030   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4031     Register dst_reg = as_Register($dst$$reg);
4032     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
4033                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4034   %}
4035 
4036   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4037     Register dst_reg = as_Register($dst$$reg);
4038     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4039                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4040   %}
4041 
4042   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4043     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4044     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4045                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4046   %}
4047 
4048   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4049     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4050     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4051                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4052   %}
4053 
4054   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4055     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4056     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4057        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4058   %}
4059 
4060   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4061     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4062     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4063        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4064   %}
4065 
4066   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4067     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4068     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4069        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4070   %}
4071 
4072   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4073     Register src_reg = as_Register($src$$reg);
4074     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4075                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4076   %}
4077 
4078   enc_class aarch64_enc_strb0(memory mem) %{
4079     MacroAssembler _masm(&cbuf);
4080     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4081                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4082   %}
4083 
4084   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4085     MacroAssembler _masm(&cbuf);
4086     __ membar(Assembler::StoreStore);
4087     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4088                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4089   %}
4090 
4091   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4092     Register src_reg = as_Register($src$$reg);
4093     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4094                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4095   %}
4096 
4097   enc_class aarch64_enc_strh0(memory mem) %{
4098     MacroAssembler _masm(&cbuf);
4099     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4100                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4101   %}
4102 
4103   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4104     Register src_reg = as_Register($src$$reg);
4105     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4106                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4107   %}
4108 
4109   enc_class aarch64_enc_strw0(memory mem) %{
4110     MacroAssembler _masm(&cbuf);
4111     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4112                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4113   %}
4114 
4115   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4116     Register src_reg = as_Register($src$$reg);
4117     // we sometimes get asked to store the stack pointer into the
4118     // current thread -- we cannot do that directly on AArch64
4119     if (src_reg == r31_sp) {
4120       MacroAssembler _masm(&cbuf);
4121       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4122       __ mov(rscratch2, sp);
4123       src_reg = rscratch2;
4124     }
4125     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4126                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4127   %}
4128 
4129   enc_class aarch64_enc_str0(memory mem) %{
4130     MacroAssembler _masm(&cbuf);
4131     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4132                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4133   %}
4134 
4135   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4136     FloatRegister src_reg = as_FloatRegister($src$$reg);
4137     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4138                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4139   %}
4140 
4141   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4142     FloatRegister src_reg = as_FloatRegister($src$$reg);
4143     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4144                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4145   %}
4146 
4147   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4148     FloatRegister src_reg = as_FloatRegister($src$$reg);
4149     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4150        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4151   %}
4152 
4153   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4154     FloatRegister src_reg = as_FloatRegister($src$$reg);
4155     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4156        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4157   %}
4158 
4159   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4160     FloatRegister src_reg = as_FloatRegister($src$$reg);
4161     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4162        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4163   %}
4164 
4165   // END Non-volatile memory access
4166 
4167   // volatile loads and stores
4168 
4169   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4170     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4171                  rscratch1, stlrb);
4172   %}
4173 
4174   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4175     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4176                  rscratch1, stlrh);
4177   %}
4178 
4179   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4180     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4181                  rscratch1, stlrw);
4182   %}
4183 
4184 
4185   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4186     Register dst_reg = as_Register($dst$$reg);
4187     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4188              rscratch1, ldarb);
4189     __ sxtbw(dst_reg, dst_reg);
4190   %}
4191 
4192   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4193     Register dst_reg = as_Register($dst$$reg);
4194     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4195              rscratch1, ldarb);
4196     __ sxtb(dst_reg, dst_reg);
4197   %}
4198 
4199   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4200     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4201              rscratch1, ldarb);
4202   %}
4203 
4204   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4205     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4206              rscratch1, ldarb);
4207   %}
4208 
4209   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4210     Register dst_reg = as_Register($dst$$reg);
4211     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4212              rscratch1, ldarh);
4213     __ sxthw(dst_reg, dst_reg);
4214   %}
4215 
4216   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4217     Register dst_reg = as_Register($dst$$reg);
4218     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4219              rscratch1, ldarh);
4220     __ sxth(dst_reg, dst_reg);
4221   %}
4222 
4223   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4224     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4225              rscratch1, ldarh);
4226   %}
4227 
4228   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4229     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4230              rscratch1, ldarh);
4231   %}
4232 
4233   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4234     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4235              rscratch1, ldarw);
4236   %}
4237 
4238   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4239     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4240              rscratch1, ldarw);
4241   %}
4242 
4243   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4244     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4245              rscratch1, ldar);
4246   %}
4247 
4248   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4249     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4250              rscratch1, ldarw);
4251     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4252   %}
4253 
4254   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4255     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4256              rscratch1, ldar);
4257     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4258   %}
4259 
4260   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4261     Register src_reg = as_Register($src$$reg);
4262     // we sometimes get asked to store the stack pointer into the
4263     // current thread -- we cannot do that directly on AArch64
4264     if (src_reg == r31_sp) {
4265         MacroAssembler _masm(&cbuf);
4266       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4267       __ mov(rscratch2, sp);
4268       src_reg = rscratch2;
4269     }
4270     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4271                  rscratch1, stlr);
4272   %}
4273 
4274   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4275     {
4276       MacroAssembler _masm(&cbuf);
4277       FloatRegister src_reg = as_FloatRegister($src$$reg);
4278       __ fmovs(rscratch2, src_reg);
4279     }
4280     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4281                  rscratch1, stlrw);
4282   %}
4283 
4284   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4285     {
4286       MacroAssembler _masm(&cbuf);
4287       FloatRegister src_reg = as_FloatRegister($src$$reg);
4288       __ fmovd(rscratch2, src_reg);
4289     }
4290     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4291                  rscratch1, stlr);
4292   %}
4293 
4294   // synchronized read/update encodings
4295 
4296   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4297     MacroAssembler _masm(&cbuf);
4298     Register dst_reg = as_Register($dst$$reg);
4299     Register base = as_Register($mem$$base);
4300     int index = $mem$$index;
4301     int scale = $mem$$scale;
4302     int disp = $mem$$disp;
4303     if (index == -1) {
4304        if (disp != 0) {
4305         __ lea(rscratch1, Address(base, disp));
4306         __ ldaxr(dst_reg, rscratch1);
4307       } else {
4308         // TODO
4309         // should we ever get anything other than this case?
4310         __ ldaxr(dst_reg, base);
4311       }
4312     } else {
4313       Register index_reg = as_Register(index);
4314       if (disp == 0) {
4315         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4316         __ ldaxr(dst_reg, rscratch1);
4317       } else {
4318         __ lea(rscratch1, Address(base, disp));
4319         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4320         __ ldaxr(dst_reg, rscratch1);
4321       }
4322     }
4323   %}
4324 
4325   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4326     MacroAssembler _masm(&cbuf);
4327     Register src_reg = as_Register($src$$reg);
4328     Register base = as_Register($mem$$base);
4329     int index = $mem$$index;
4330     int scale = $mem$$scale;
4331     int disp = $mem$$disp;
4332     if (index == -1) {
4333        if (disp != 0) {
4334         __ lea(rscratch2, Address(base, disp));
4335         __ stlxr(rscratch1, src_reg, rscratch2);
4336       } else {
4337         // TODO
4338         // should we ever get anything other than this case?
4339         __ stlxr(rscratch1, src_reg, base);
4340       }
4341     } else {
4342       Register index_reg = as_Register(index);
4343       if (disp == 0) {
4344         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4345         __ stlxr(rscratch1, src_reg, rscratch2);
4346       } else {
4347         __ lea(rscratch2, Address(base, disp));
4348         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4349         __ stlxr(rscratch1, src_reg, rscratch2);
4350       }
4351     }
4352     __ cmpw(rscratch1, zr);
4353   %}
4354 
4355   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4356     MacroAssembler _masm(&cbuf);
4357     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4358     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4359                Assembler::xword, /*acquire*/ false, /*release*/ true,
4360                /*weak*/ false, noreg);
4361   %}
4362 
4363   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4364     MacroAssembler _masm(&cbuf);
4365     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4366     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4367                Assembler::word, /*acquire*/ false, /*release*/ true,
4368                /*weak*/ false, noreg);
4369   %}
4370 
4371   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4372     MacroAssembler _masm(&cbuf);
4373     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4374     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4375                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4376                /*weak*/ false, noreg);
4377   %}
4378 
4379   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4380     MacroAssembler _masm(&cbuf);
4381     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4382     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4383                Assembler::byte, /*acquire*/ false, /*release*/ true,
4384                /*weak*/ false, noreg);
4385   %}
4386 
4387 
4388   // The only difference between aarch64_enc_cmpxchg and
4389   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4390   // CompareAndSwap sequence to serve as a barrier on acquiring a
4391   // lock.
4392   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4393     MacroAssembler _masm(&cbuf);
4394     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4395     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4396                Assembler::xword, /*acquire*/ true, /*release*/ true,
4397                /*weak*/ false, noreg);
4398   %}
4399 
4400   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4401     MacroAssembler _masm(&cbuf);
4402     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4403     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4404                Assembler::word, /*acquire*/ true, /*release*/ true,
4405                /*weak*/ false, noreg);
4406   %}
4407 
4408 
4409   // auxiliary used for CompareAndSwapX to set result register
4410   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4411     MacroAssembler _masm(&cbuf);
4412     Register res_reg = as_Register($res$$reg);
4413     __ cset(res_reg, Assembler::EQ);
4414   %}
4415 
4416   // prefetch encodings
4417 
4418   enc_class aarch64_enc_prefetchw(memory mem) %{
4419     MacroAssembler _masm(&cbuf);
4420     Register base = as_Register($mem$$base);
4421     int index = $mem$$index;
4422     int scale = $mem$$scale;
4423     int disp = $mem$$disp;
4424     if (index == -1) {
4425       __ prfm(Address(base, disp), PSTL1KEEP);
4426     } else {
4427       Register index_reg = as_Register(index);
4428       if (disp == 0) {
4429         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4430       } else {
4431         __ lea(rscratch1, Address(base, disp));
4432         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4433       }
4434     }
4435   %}
4436 
4437   /// mov envcodings
4438 
4439   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4440     MacroAssembler _masm(&cbuf);
4441     u_int32_t con = (u_int32_t)$src$$constant;
4442     Register dst_reg = as_Register($dst$$reg);
4443     if (con == 0) {
4444       __ movw(dst_reg, zr);
4445     } else {
4446       __ movw(dst_reg, con);
4447     }
4448   %}
4449 
4450   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4451     MacroAssembler _masm(&cbuf);
4452     Register dst_reg = as_Register($dst$$reg);
4453     u_int64_t con = (u_int64_t)$src$$constant;
4454     if (con == 0) {
4455       __ mov(dst_reg, zr);
4456     } else {
4457       __ mov(dst_reg, con);
4458     }
4459   %}
4460 
4461   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4462     MacroAssembler _masm(&cbuf);
4463     Register dst_reg = as_Register($dst$$reg);
4464     address con = (address)$src$$constant;
4465     if (con == NULL || con == (address)1) {
4466       ShouldNotReachHere();
4467     } else {
4468       relocInfo::relocType rtype = $src->constant_reloc();
4469       if (rtype == relocInfo::oop_type) {
4470         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4471       } else if (rtype == relocInfo::metadata_type) {
4472         __ mov_metadata(dst_reg, (Metadata*)con);
4473       } else {
4474         assert(rtype == relocInfo::none, "unexpected reloc type");
4475         if (con < (address)(uintptr_t)os::vm_page_size()) {
4476           __ mov(dst_reg, con);
4477         } else {
4478           unsigned long offset;
4479           __ adrp(dst_reg, con, offset);
4480           __ add(dst_reg, dst_reg, offset);
4481         }
4482       }
4483     }
4484   %}
4485 
4486   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4487     MacroAssembler _masm(&cbuf);
4488     Register dst_reg = as_Register($dst$$reg);
4489     __ mov(dst_reg, zr);
4490   %}
4491 
4492   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4493     MacroAssembler _masm(&cbuf);
4494     Register dst_reg = as_Register($dst$$reg);
4495     __ mov(dst_reg, (u_int64_t)1);
4496   %}
4497 
4498   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4499     MacroAssembler _masm(&cbuf);
4500     address page = (address)$src$$constant;
4501     Register dst_reg = as_Register($dst$$reg);
4502     unsigned long off;
4503     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4504     assert(off == 0, "assumed offset == 0");
4505   %}
4506 
4507   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4508     MacroAssembler _masm(&cbuf);
4509     __ load_byte_map_base($dst$$Register);
4510   %}
4511 
4512   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4513     MacroAssembler _masm(&cbuf);
4514     Register dst_reg = as_Register($dst$$reg);
4515     address con = (address)$src$$constant;
4516     if (con == NULL) {
4517       ShouldNotReachHere();
4518     } else {
4519       relocInfo::relocType rtype = $src->constant_reloc();
4520       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4521       __ set_narrow_oop(dst_reg, (jobject)con);
4522     }
4523   %}
4524 
4525   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4526     MacroAssembler _masm(&cbuf);
4527     Register dst_reg = as_Register($dst$$reg);
4528     __ mov(dst_reg, zr);
4529   %}
4530 
4531   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4532     MacroAssembler _masm(&cbuf);
4533     Register dst_reg = as_Register($dst$$reg);
4534     address con = (address)$src$$constant;
4535     if (con == NULL) {
4536       ShouldNotReachHere();
4537     } else {
4538       relocInfo::relocType rtype = $src->constant_reloc();
4539       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4540       __ set_narrow_klass(dst_reg, (Klass *)con);
4541     }
4542   %}
4543 
4544   // arithmetic encodings
4545 
4546   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4547     MacroAssembler _masm(&cbuf);
4548     Register dst_reg = as_Register($dst$$reg);
4549     Register src_reg = as_Register($src1$$reg);
4550     int32_t con = (int32_t)$src2$$constant;
4551     // add has primary == 0, subtract has primary == 1
4552     if ($primary) { con = -con; }
4553     if (con < 0) {
4554       __ subw(dst_reg, src_reg, -con);
4555     } else {
4556       __ addw(dst_reg, src_reg, con);
4557     }
4558   %}
4559 
4560   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4561     MacroAssembler _masm(&cbuf);
4562     Register dst_reg = as_Register($dst$$reg);
4563     Register src_reg = as_Register($src1$$reg);
4564     int32_t con = (int32_t)$src2$$constant;
4565     // add has primary == 0, subtract has primary == 1
4566     if ($primary) { con = -con; }
4567     if (con < 0) {
4568       __ sub(dst_reg, src_reg, -con);
4569     } else {
4570       __ add(dst_reg, src_reg, con);
4571     }
4572   %}
4573 
4574   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4575     MacroAssembler _masm(&cbuf);
4576    Register dst_reg = as_Register($dst$$reg);
4577    Register src1_reg = as_Register($src1$$reg);
4578    Register src2_reg = as_Register($src2$$reg);
4579     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4580   %}
4581 
4582   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4583     MacroAssembler _masm(&cbuf);
4584    Register dst_reg = as_Register($dst$$reg);
4585    Register src1_reg = as_Register($src1$$reg);
4586    Register src2_reg = as_Register($src2$$reg);
4587     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4588   %}
4589 
4590   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4591     MacroAssembler _masm(&cbuf);
4592    Register dst_reg = as_Register($dst$$reg);
4593    Register src1_reg = as_Register($src1$$reg);
4594    Register src2_reg = as_Register($src2$$reg);
4595     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4596   %}
4597 
4598   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4599     MacroAssembler _masm(&cbuf);
4600    Register dst_reg = as_Register($dst$$reg);
4601    Register src1_reg = as_Register($src1$$reg);
4602    Register src2_reg = as_Register($src2$$reg);
4603     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4604   %}
4605 
4606   // compare instruction encodings
4607 
4608   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4609     MacroAssembler _masm(&cbuf);
4610     Register reg1 = as_Register($src1$$reg);
4611     Register reg2 = as_Register($src2$$reg);
4612     __ cmpw(reg1, reg2);
4613   %}
4614 
4615   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4616     MacroAssembler _masm(&cbuf);
4617     Register reg = as_Register($src1$$reg);
4618     int32_t val = $src2$$constant;
4619     if (val >= 0) {
4620       __ subsw(zr, reg, val);
4621     } else {
4622       __ addsw(zr, reg, -val);
4623     }
4624   %}
4625 
4626   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4627     MacroAssembler _masm(&cbuf);
4628     Register reg1 = as_Register($src1$$reg);
4629     u_int32_t val = (u_int32_t)$src2$$constant;
4630     __ movw(rscratch1, val);
4631     __ cmpw(reg1, rscratch1);
4632   %}
4633 
4634   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4635     MacroAssembler _masm(&cbuf);
4636     Register reg1 = as_Register($src1$$reg);
4637     Register reg2 = as_Register($src2$$reg);
4638     __ cmp(reg1, reg2);
4639   %}
4640 
4641   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4642     MacroAssembler _masm(&cbuf);
4643     Register reg = as_Register($src1$$reg);
4644     int64_t val = $src2$$constant;
4645     if (val >= 0) {
4646       __ subs(zr, reg, val);
4647     } else if (val != -val) {
4648       __ adds(zr, reg, -val);
4649     } else {
4650     // aargh, Long.MIN_VALUE is a special case
4651       __ orr(rscratch1, zr, (u_int64_t)val);
4652       __ subs(zr, reg, rscratch1);
4653     }
4654   %}
4655 
4656   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4657     MacroAssembler _masm(&cbuf);
4658     Register reg1 = as_Register($src1$$reg);
4659     u_int64_t val = (u_int64_t)$src2$$constant;
4660     __ mov(rscratch1, val);
4661     __ cmp(reg1, rscratch1);
4662   %}
4663 
4664   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4665     MacroAssembler _masm(&cbuf);
4666     Register reg1 = as_Register($src1$$reg);
4667     Register reg2 = as_Register($src2$$reg);
4668     __ cmp(reg1, reg2);
4669   %}
4670 
4671   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4672     MacroAssembler _masm(&cbuf);
4673     Register reg1 = as_Register($src1$$reg);
4674     Register reg2 = as_Register($src2$$reg);
4675     __ cmpw(reg1, reg2);
4676   %}
4677 
4678   enc_class aarch64_enc_testp(iRegP src) %{
4679     MacroAssembler _masm(&cbuf);
4680     Register reg = as_Register($src$$reg);
4681     __ cmp(reg, zr);
4682   %}
4683 
4684   enc_class aarch64_enc_testn(iRegN src) %{
4685     MacroAssembler _masm(&cbuf);
4686     Register reg = as_Register($src$$reg);
4687     __ cmpw(reg, zr);
4688   %}
4689 
4690   enc_class aarch64_enc_b(label lbl) %{
4691     MacroAssembler _masm(&cbuf);
4692     Label *L = $lbl$$label;
4693     __ b(*L);
4694   %}
4695 
4696   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4697     MacroAssembler _masm(&cbuf);
4698     Label *L = $lbl$$label;
4699     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4700   %}
4701 
4702   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4703     MacroAssembler _masm(&cbuf);
4704     Label *L = $lbl$$label;
4705     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4706   %}
4707 
4708   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4709   %{
4710      Register sub_reg = as_Register($sub$$reg);
4711      Register super_reg = as_Register($super$$reg);
4712      Register temp_reg = as_Register($temp$$reg);
4713      Register result_reg = as_Register($result$$reg);
4714 
4715      Label miss;
4716      MacroAssembler _masm(&cbuf);
4717      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4718                                      NULL, &miss,
4719                                      /*set_cond_codes:*/ true);
4720      if ($primary) {
4721        __ mov(result_reg, zr);
4722      }
4723      __ bind(miss);
4724   %}
4725 
4726   enc_class aarch64_enc_java_static_call(method meth) %{
4727     MacroAssembler _masm(&cbuf);
4728 
4729     address addr = (address)$meth$$method;
4730     address call;
4731     if (!_method) {
4732       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4733       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4734     } else {
4735       int method_index = resolved_method_index(cbuf);
4736       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4737                                                   : static_call_Relocation::spec(method_index);
4738       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4739 
4740       // Emit stub for static call
4741       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4742       if (stub == NULL) {
4743         ciEnv::current()->record_failure("CodeCache is full");
4744         return;
4745       }
4746     }
4747     if (call == NULL) {
4748       ciEnv::current()->record_failure("CodeCache is full");
4749       return;
4750     }
4751   %}
4752 
4753   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4754     MacroAssembler _masm(&cbuf);
4755     int method_index = resolved_method_index(cbuf);
4756     address call = __ ic_call((address)$meth$$method, method_index);
4757     if (call == NULL) {
4758       ciEnv::current()->record_failure("CodeCache is full");
4759       return;
4760     }
4761   %}
4762 
4763   enc_class aarch64_enc_call_epilog() %{
4764     MacroAssembler _masm(&cbuf);
4765     if (VerifyStackAtCalls) {
4766       // Check that stack depth is unchanged: find majik cookie on stack
4767       __ call_Unimplemented();
4768     }
4769   %}
4770 
4771   enc_class aarch64_enc_java_to_runtime(method meth) %{
4772     MacroAssembler _masm(&cbuf);
4773 
4774     // some calls to generated routines (arraycopy code) are scheduled
4775     // by C2 as runtime calls. if so we can call them using a br (they
4776     // will be in a reachable segment) otherwise we have to use a blrt
4777     // which loads the absolute address into a register.
4778     address entry = (address)$meth$$method;
4779     CodeBlob *cb = CodeCache::find_blob(entry);
4780     if (cb) {
4781       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4782       if (call == NULL) {
4783         ciEnv::current()->record_failure("CodeCache is full");
4784         return;
4785       }
4786     } else {
4787       int gpcnt;
4788       int fpcnt;
4789       int rtype;
4790       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4791       Label retaddr;
4792       __ adr(rscratch2, retaddr);
4793       __ lea(rscratch1, RuntimeAddress(entry));
4794       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4795       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4796       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4797       __ bind(retaddr);
4798       __ add(sp, sp, 2 * wordSize);
4799     }
4800   %}
4801 
4802   enc_class aarch64_enc_rethrow() %{
4803     MacroAssembler _masm(&cbuf);
4804     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4805   %}
4806 
4807   enc_class aarch64_enc_ret() %{
4808     MacroAssembler _masm(&cbuf);
4809     __ ret(lr);
4810   %}
4811 
4812   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4813     MacroAssembler _masm(&cbuf);
4814     Register target_reg = as_Register($jump_target$$reg);
4815     __ br(target_reg);
4816   %}
4817 
4818   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4819     MacroAssembler _masm(&cbuf);
4820     Register target_reg = as_Register($jump_target$$reg);
4821     // exception oop should be in r0
4822     // ret addr has been popped into lr
4823     // callee expects it in r3
4824     __ mov(r3, lr);
4825     __ br(target_reg);
4826   %}
4827 
4828   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4829     MacroAssembler _masm(&cbuf);
4830     Register oop = as_Register($object$$reg);
4831     Register box = as_Register($box$$reg);
4832     Register disp_hdr = as_Register($tmp$$reg);
4833     Register tmp = as_Register($tmp2$$reg);
4834     Label cont;
4835     Label object_has_monitor;
4836     Label cas_failed;
4837 
4838     assert_different_registers(oop, box, tmp, disp_hdr);
4839 
4840     // Load markOop from object into displaced_header.
4841     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4842 
4843     // Always do locking in runtime.
4844     if (EmitSync & 0x01) {
4845       __ cmp(oop, zr);
4846       return;
4847     }
4848 
4849     if (UseBiasedLocking && !UseOptoBiasInlining) {
4850       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4851     }
4852 
4853     // Handle existing monitor
4854     if ((EmitSync & 0x02) == 0) {
4855       // we can use AArch64's bit test and branch here but
4856       // markoopDesc does not define a bit index just the bit value
4857       // so assert in case the bit pos changes
4858 #     define __monitor_value_log2 1
4859       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4860       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4861 #     undef __monitor_value_log2
4862     }
4863 
4864     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4865     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4866 
4867     // Load Compare Value application register.
4868 
4869     // Initialize the box. (Must happen before we update the object mark!)
4870     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4871 
4872     // Compare object markOop with mark and if equal exchange scratch1
4873     // with object markOop.
4874     if (UseLSE) {
4875       __ mov(tmp, disp_hdr);
4876       __ casal(Assembler::xword, tmp, box, oop);
4877       __ cmp(tmp, disp_hdr);
4878       __ br(Assembler::EQ, cont);
4879     } else {
4880       Label retry_load;
4881       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4882         __ prfm(Address(oop), PSTL1STRM);
4883       __ bind(retry_load);
4884       __ ldaxr(tmp, oop);
4885       __ cmp(tmp, disp_hdr);
4886       __ br(Assembler::NE, cas_failed);
4887       // use stlxr to ensure update is immediately visible
4888       __ stlxr(tmp, box, oop);
4889       __ cbzw(tmp, cont);
4890       __ b(retry_load);
4891     }
4892 
4893     // Formerly:
4894     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4895     //               /*newv=*/box,
4896     //               /*addr=*/oop,
4897     //               /*tmp=*/tmp,
4898     //               cont,
4899     //               /*fail*/NULL);
4900 
4901     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4902 
4903     // If the compare-and-exchange succeeded, then we found an unlocked
4904     // object, will have now locked it will continue at label cont
4905 
4906     __ bind(cas_failed);
4907     // We did not see an unlocked object so try the fast recursive case.
4908 
4909     // Check if the owner is self by comparing the value in the
4910     // markOop of object (disp_hdr) with the stack pointer.
4911     __ mov(rscratch1, sp);
4912     __ sub(disp_hdr, disp_hdr, rscratch1);
4913     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4914     // If condition is true we are cont and hence we can store 0 as the
4915     // displaced header in the box, which indicates that it is a recursive lock.
4916     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4917     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4918 
4919     // Handle existing monitor.
4920     if ((EmitSync & 0x02) == 0) {
4921       __ b(cont);
4922 
4923       __ bind(object_has_monitor);
4924       // The object's monitor m is unlocked iff m->owner == NULL,
4925       // otherwise m->owner may contain a thread or a stack address.
4926       //
4927       // Try to CAS m->owner from NULL to current thread.
4928       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4929       __ mov(disp_hdr, zr);
4930 
4931       if (UseLSE) {
4932         __ mov(rscratch1, disp_hdr);
4933         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4934         __ cmp(rscratch1, disp_hdr);
4935       } else {
4936         Label retry_load, fail;
4937         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4938           __ prfm(Address(tmp), PSTL1STRM);
4939         __ bind(retry_load);
4940         __ ldaxr(rscratch1, tmp);
4941         __ cmp(disp_hdr, rscratch1);
4942         __ br(Assembler::NE, fail);
4943         // use stlxr to ensure update is immediately visible
4944         __ stlxr(rscratch1, rthread, tmp);
4945         __ cbnzw(rscratch1, retry_load);
4946         __ bind(fail);
4947       }
4948 
4949       // Label next;
4950       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4951       //               /*newv=*/rthread,
4952       //               /*addr=*/tmp,
4953       //               /*tmp=*/rscratch1,
4954       //               /*succeed*/next,
4955       //               /*fail*/NULL);
4956       // __ bind(next);
4957 
4958       // store a non-null value into the box.
4959       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4960 
4961       // PPC port checks the following invariants
4962       // #ifdef ASSERT
4963       // bne(flag, cont);
4964       // We have acquired the monitor, check some invariants.
4965       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4966       // Invariant 1: _recursions should be 0.
4967       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4968       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4969       //                        "monitor->_recursions should be 0", -1);
4970       // Invariant 2: OwnerIsThread shouldn't be 0.
4971       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4972       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4973       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4974       // #endif
4975     }
4976 
4977     __ bind(cont);
4978     // flag == EQ indicates success
4979     // flag == NE indicates failure
4980 
4981   %}
4982 
4983   // TODO
4984   // reimplement this with custom cmpxchgptr code
4985   // which avoids some of the unnecessary branching
4986   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4987     MacroAssembler _masm(&cbuf);
4988     Register oop = as_Register($object$$reg);
4989     Register box = as_Register($box$$reg);
4990     Register disp_hdr = as_Register($tmp$$reg);
4991     Register tmp = as_Register($tmp2$$reg);
4992     Label cont;
4993     Label object_has_monitor;
4994     Label cas_failed;
4995 
4996     assert_different_registers(oop, box, tmp, disp_hdr);
4997 
4998     // Always do locking in runtime.
4999     if (EmitSync & 0x01) {
5000       __ cmp(oop, zr); // Oop can't be 0 here => always false.
5001       return;
5002     }
5003 
5004     if (UseBiasedLocking && !UseOptoBiasInlining) {
5005       __ biased_locking_exit(oop, tmp, cont);
5006     }
5007 
5008     // Find the lock address and load the displaced header from the stack.
5009     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5010 
5011     // If the displaced header is 0, we have a recursive unlock.
5012     __ cmp(disp_hdr, zr);
5013     __ br(Assembler::EQ, cont);
5014 
5015 
5016     // Handle existing monitor.
5017     if ((EmitSync & 0x02) == 0) {
5018       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
5019       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
5020     }
5021 
5022     // Check if it is still a light weight lock, this is is true if we
5023     // see the stack address of the basicLock in the markOop of the
5024     // object.
5025 
5026       if (UseLSE) {
5027         __ mov(tmp, box);
5028         __ casl(Assembler::xword, tmp, disp_hdr, oop);
5029         __ cmp(tmp, box);
5030       } else {
5031         Label retry_load;
5032         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5033           __ prfm(Address(oop), PSTL1STRM);
5034         __ bind(retry_load);
5035         __ ldxr(tmp, oop);
5036         __ cmp(box, tmp);
5037         __ br(Assembler::NE, cas_failed);
5038         // use stlxr to ensure update is immediately visible
5039         __ stlxr(tmp, disp_hdr, oop);
5040         __ cbzw(tmp, cont);
5041         __ b(retry_load);
5042       }
5043 
5044     // __ cmpxchgptr(/*compare_value=*/box,
5045     //               /*exchange_value=*/disp_hdr,
5046     //               /*where=*/oop,
5047     //               /*result=*/tmp,
5048     //               cont,
5049     //               /*cas_failed*/NULL);
5050     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5051 
5052     __ bind(cas_failed);
5053 
5054     // Handle existing monitor.
5055     if ((EmitSync & 0x02) == 0) {
5056       __ b(cont);
5057 
5058       __ bind(object_has_monitor);
5059       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5060       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5061       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5062       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5063       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5064       __ cmp(rscratch1, zr);
5065       __ br(Assembler::NE, cont);
5066 
5067       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5068       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5069       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5070       __ cmp(rscratch1, zr);
5071       __ cbnz(rscratch1, cont);
5072       // need a release store here
5073       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5074       __ stlr(rscratch1, tmp); // rscratch1 is zero
5075     }
5076 
5077     __ bind(cont);
5078     // flag == EQ indicates success
5079     // flag == NE indicates failure
5080   %}
5081 
5082 %}
5083 
5084 //----------FRAME--------------------------------------------------------------
5085 // Definition of frame structure and management information.
5086 //
5087 //  S T A C K   L A Y O U T    Allocators stack-slot number
5088 //                             |   (to get allocators register number
5089 //  G  Owned by    |        |  v    add OptoReg::stack0())
5090 //  r   CALLER     |        |
5091 //  o     |        +--------+      pad to even-align allocators stack-slot
5092 //  w     V        |  pad0  |        numbers; owned by CALLER
5093 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5094 //  h     ^        |   in   |  5
5095 //        |        |  args  |  4   Holes in incoming args owned by SELF
5096 //  |     |        |        |  3
5097 //  |     |        +--------+
5098 //  V     |        | old out|      Empty on Intel, window on Sparc
5099 //        |    old |preserve|      Must be even aligned.
5100 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5101 //        |        |   in   |  3   area for Intel ret address
5102 //     Owned by    |preserve|      Empty on Sparc.
5103 //       SELF      +--------+
5104 //        |        |  pad2  |  2   pad to align old SP
5105 //        |        +--------+  1
5106 //        |        | locks  |  0
5107 //        |        +--------+----> OptoReg::stack0(), even aligned
5108 //        |        |  pad1  | 11   pad to align new SP
5109 //        |        +--------+
5110 //        |        |        | 10
5111 //        |        | spills |  9   spills
5112 //        V        |        |  8   (pad0 slot for callee)
5113 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5114 //        ^        |  out   |  7
5115 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5116 //     Owned by    +--------+
5117 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5118 //        |    new |preserve|      Must be even-aligned.
5119 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5120 //        |        |        |
5121 //
5122 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5123 //         known from SELF's arguments and the Java calling convention.
5124 //         Region 6-7 is determined per call site.
5125 // Note 2: If the calling convention leaves holes in the incoming argument
5126 //         area, those holes are owned by SELF.  Holes in the outgoing area
5127 //         are owned by the CALLEE.  Holes should not be nessecary in the
5128 //         incoming area, as the Java calling convention is completely under
5129 //         the control of the AD file.  Doubles can be sorted and packed to
5130 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5131 //         varargs C calling conventions.
5132 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5133 //         even aligned with pad0 as needed.
5134 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5135 //           (the latter is true on Intel but is it false on AArch64?)
5136 //         region 6-11 is even aligned; it may be padded out more so that
5137 //         the region from SP to FP meets the minimum stack alignment.
5138 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5139 //         alignment.  Region 11, pad1, may be dynamically extended so that
5140 //         SP meets the minimum alignment.
5141 
5142 frame %{
5143   // What direction does stack grow in (assumed to be same for C & Java)
5144   stack_direction(TOWARDS_LOW);
5145 
5146   // These three registers define part of the calling convention
5147   // between compiled code and the interpreter.
5148 
5149   // Inline Cache Register or methodOop for I2C.
5150   inline_cache_reg(R12);
5151 
5152   // Method Oop Register when calling interpreter.
5153   interpreter_method_oop_reg(R12);
5154 
5155   // Number of stack slots consumed by locking an object
5156   sync_stack_slots(2);
5157 
5158   // Compiled code's Frame Pointer
5159   frame_pointer(R31);
5160 
5161   // Interpreter stores its frame pointer in a register which is
5162   // stored to the stack by I2CAdaptors.
5163   // I2CAdaptors convert from interpreted java to compiled java.
5164   interpreter_frame_pointer(R29);
5165 
5166   // Stack alignment requirement
5167   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5168 
5169   // Number of stack slots between incoming argument block and the start of
5170   // a new frame.  The PROLOG must add this many slots to the stack.  The
5171   // EPILOG must remove this many slots. aarch64 needs two slots for
5172   // return address and fp.
5173   // TODO think this is correct but check
5174   in_preserve_stack_slots(4);
5175 
5176   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5177   // for calls to C.  Supports the var-args backing area for register parms.
5178   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5179 
5180   // The after-PROLOG location of the return address.  Location of
5181   // return address specifies a type (REG or STACK) and a number
5182   // representing the register number (i.e. - use a register name) or
5183   // stack slot.
5184   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5185   // Otherwise, it is above the locks and verification slot and alignment word
5186   // TODO this may well be correct but need to check why that - 2 is there
5187   // ppc port uses 0 but we definitely need to allow for fixed_slots
5188   // which folds in the space used for monitors
5189   return_addr(STACK - 2 +
5190               align_up((Compile::current()->in_preserve_stack_slots() +
5191                         Compile::current()->fixed_slots()),
5192                        stack_alignment_in_slots()));
5193 
5194   // Body of function which returns an integer array locating
5195   // arguments either in registers or in stack slots.  Passed an array
5196   // of ideal registers called "sig" and a "length" count.  Stack-slot
5197   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5198   // arguments for a CALLEE.  Incoming stack arguments are
5199   // automatically biased by the preserve_stack_slots field above.
5200 
5201   calling_convention
5202   %{
5203     // No difference between ingoing/outgoing just pass false
5204     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5205   %}
5206 
5207   c_calling_convention
5208   %{
5209     // This is obviously always outgoing
5210     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5211   %}
5212 
5213   // Location of compiled Java return values.  Same as C for now.
5214   return_value
5215   %{
5216     // TODO do we allow ideal_reg == Op_RegN???
5217     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5218            "only return normal values");
5219 
5220     static const int lo[Op_RegL + 1] = { // enum name
5221       0,                                 // Op_Node
5222       0,                                 // Op_Set
5223       R0_num,                            // Op_RegN
5224       R0_num,                            // Op_RegI
5225       R0_num,                            // Op_RegP
5226       V0_num,                            // Op_RegF
5227       V0_num,                            // Op_RegD
5228       R0_num                             // Op_RegL
5229     };
5230 
5231     static const int hi[Op_RegL + 1] = { // enum name
5232       0,                                 // Op_Node
5233       0,                                 // Op_Set
5234       OptoReg::Bad,                       // Op_RegN
5235       OptoReg::Bad,                      // Op_RegI
5236       R0_H_num,                          // Op_RegP
5237       OptoReg::Bad,                      // Op_RegF
5238       V0_H_num,                          // Op_RegD
5239       R0_H_num                           // Op_RegL
5240     };
5241 
5242     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5243   %}
5244 %}
5245 
5246 //----------ATTRIBUTES---------------------------------------------------------
5247 //----------Operand Attributes-------------------------------------------------
5248 op_attrib op_cost(1);        // Required cost attribute
5249 
5250 //----------Instruction Attributes---------------------------------------------
5251 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5252 ins_attrib ins_size(32);        // Required size attribute (in bits)
5253 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5254                                 // a non-matching short branch variant
5255                                 // of some long branch?
5256 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5257                                 // be a power of 2) specifies the
5258                                 // alignment that some part of the
5259                                 // instruction (not necessarily the
5260                                 // start) requires.  If > 1, a
5261                                 // compute_padding() function must be
5262                                 // provided for the instruction
5263 
5264 //----------OPERANDS-----------------------------------------------------------
5265 // Operand definitions must precede instruction definitions for correct parsing
5266 // in the ADLC because operands constitute user defined types which are used in
5267 // instruction definitions.
5268 
5269 //----------Simple Operands----------------------------------------------------
5270 
5271 // Integer operands 32 bit
5272 // 32 bit immediate
5273 operand immI()
5274 %{
5275   match(ConI);
5276 
5277   op_cost(0);
5278   format %{ %}
5279   interface(CONST_INTER);
5280 %}
5281 
5282 // 32 bit zero
5283 operand immI0()
5284 %{
5285   predicate(n->get_int() == 0);
5286   match(ConI);
5287 
5288   op_cost(0);
5289   format %{ %}
5290   interface(CONST_INTER);
5291 %}
5292 
5293 // 32 bit unit increment
5294 operand immI_1()
5295 %{
5296   predicate(n->get_int() == 1);
5297   match(ConI);
5298 
5299   op_cost(0);
5300   format %{ %}
5301   interface(CONST_INTER);
5302 %}
5303 
5304 // 32 bit unit decrement
5305 operand immI_M1()
5306 %{
5307   predicate(n->get_int() == -1);
5308   match(ConI);
5309 
5310   op_cost(0);
5311   format %{ %}
5312   interface(CONST_INTER);
5313 %}
5314 
5315 // Shift values for add/sub extension shift
5316 operand immIExt()
5317 %{
5318   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5319   match(ConI);
5320 
5321   op_cost(0);
5322   format %{ %}
5323   interface(CONST_INTER);
5324 %}
5325 
5326 operand immI_le_4()
5327 %{
5328   predicate(n->get_int() <= 4);
5329   match(ConI);
5330 
5331   op_cost(0);
5332   format %{ %}
5333   interface(CONST_INTER);
5334 %}
5335 
5336 operand immI_31()
5337 %{
5338   predicate(n->get_int() == 31);
5339   match(ConI);
5340 
5341   op_cost(0);
5342   format %{ %}
5343   interface(CONST_INTER);
5344 %}
5345 
5346 operand immI_8()
5347 %{
5348   predicate(n->get_int() == 8);
5349   match(ConI);
5350 
5351   op_cost(0);
5352   format %{ %}
5353   interface(CONST_INTER);
5354 %}
5355 
5356 operand immI_16()
5357 %{
5358   predicate(n->get_int() == 16);
5359   match(ConI);
5360 
5361   op_cost(0);
5362   format %{ %}
5363   interface(CONST_INTER);
5364 %}
5365 
5366 operand immI_24()
5367 %{
5368   predicate(n->get_int() == 24);
5369   match(ConI);
5370 
5371   op_cost(0);
5372   format %{ %}
5373   interface(CONST_INTER);
5374 %}
5375 
5376 operand immI_32()
5377 %{
5378   predicate(n->get_int() == 32);
5379   match(ConI);
5380 
5381   op_cost(0);
5382   format %{ %}
5383   interface(CONST_INTER);
5384 %}
5385 
5386 operand immI_48()
5387 %{
5388   predicate(n->get_int() == 48);
5389   match(ConI);
5390 
5391   op_cost(0);
5392   format %{ %}
5393   interface(CONST_INTER);
5394 %}
5395 
5396 operand immI_56()
5397 %{
5398   predicate(n->get_int() == 56);
5399   match(ConI);
5400 
5401   op_cost(0);
5402   format %{ %}
5403   interface(CONST_INTER);
5404 %}
5405 
5406 operand immI_63()
5407 %{
5408   predicate(n->get_int() == 63);
5409   match(ConI);
5410 
5411   op_cost(0);
5412   format %{ %}
5413   interface(CONST_INTER);
5414 %}
5415 
5416 operand immI_64()
5417 %{
5418   predicate(n->get_int() == 64);
5419   match(ConI);
5420 
5421   op_cost(0);
5422   format %{ %}
5423   interface(CONST_INTER);
5424 %}
5425 
5426 operand immI_255()
5427 %{
5428   predicate(n->get_int() == 255);
5429   match(ConI);
5430 
5431   op_cost(0);
5432   format %{ %}
5433   interface(CONST_INTER);
5434 %}
5435 
5436 operand immI_65535()
5437 %{
5438   predicate(n->get_int() == 65535);
5439   match(ConI);
5440 
5441   op_cost(0);
5442   format %{ %}
5443   interface(CONST_INTER);
5444 %}
5445 
5446 operand immL_255()
5447 %{
5448   predicate(n->get_long() == 255L);
5449   match(ConL);
5450 
5451   op_cost(0);
5452   format %{ %}
5453   interface(CONST_INTER);
5454 %}
5455 
5456 operand immL_65535()
5457 %{
5458   predicate(n->get_long() == 65535L);
5459   match(ConL);
5460 
5461   op_cost(0);
5462   format %{ %}
5463   interface(CONST_INTER);
5464 %}
5465 
5466 operand immL_4294967295()
5467 %{
5468   predicate(n->get_long() == 4294967295L);
5469   match(ConL);
5470 
5471   op_cost(0);
5472   format %{ %}
5473   interface(CONST_INTER);
5474 %}
5475 
5476 operand immL_bitmask()
5477 %{
5478   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5479             && is_power_of_2(n->get_long() + 1));
5480   match(ConL);
5481 
5482   op_cost(0);
5483   format %{ %}
5484   interface(CONST_INTER);
5485 %}
5486 
5487 operand immI_bitmask()
5488 %{
5489   predicate(((n->get_int() & 0xc0000000) == 0)
5490             && is_power_of_2(n->get_int() + 1));
5491   match(ConI);
5492 
5493   op_cost(0);
5494   format %{ %}
5495   interface(CONST_INTER);
5496 %}
5497 
5498 // Scale values for scaled offset addressing modes (up to long but not quad)
5499 operand immIScale()
5500 %{
5501   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5502   match(ConI);
5503 
5504   op_cost(0);
5505   format %{ %}
5506   interface(CONST_INTER);
5507 %}
5508 
5509 // 26 bit signed offset -- for pc-relative branches
5510 operand immI26()
5511 %{
5512   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5513   match(ConI);
5514 
5515   op_cost(0);
5516   format %{ %}
5517   interface(CONST_INTER);
5518 %}
5519 
5520 // 19 bit signed offset -- for pc-relative loads
5521 operand immI19()
5522 %{
5523   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5524   match(ConI);
5525 
5526   op_cost(0);
5527   format %{ %}
5528   interface(CONST_INTER);
5529 %}
5530 
5531 // 12 bit unsigned offset -- for base plus immediate loads
5532 operand immIU12()
5533 %{
5534   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5535   match(ConI);
5536 
5537   op_cost(0);
5538   format %{ %}
5539   interface(CONST_INTER);
5540 %}
5541 
5542 operand immLU12()
5543 %{
5544   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5545   match(ConL);
5546 
5547   op_cost(0);
5548   format %{ %}
5549   interface(CONST_INTER);
5550 %}
5551 
5552 // Offset for scaled or unscaled immediate loads and stores
5553 operand immIOffset()
5554 %{
5555   predicate(Address::offset_ok_for_immed(n->get_int()));
5556   match(ConI);
5557 
5558   op_cost(0);
5559   format %{ %}
5560   interface(CONST_INTER);
5561 %}
5562 
5563 operand immIOffset4()
5564 %{
5565   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5566   match(ConI);
5567 
5568   op_cost(0);
5569   format %{ %}
5570   interface(CONST_INTER);
5571 %}
5572 
5573 operand immIOffset8()
5574 %{
5575   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5576   match(ConI);
5577 
5578   op_cost(0);
5579   format %{ %}
5580   interface(CONST_INTER);
5581 %}
5582 
5583 operand immIOffset16()
5584 %{
5585   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5586   match(ConI);
5587 
5588   op_cost(0);
5589   format %{ %}
5590   interface(CONST_INTER);
5591 %}
5592 
5593 operand immLoffset()
5594 %{
5595   predicate(Address::offset_ok_for_immed(n->get_long()));
5596   match(ConL);
5597 
5598   op_cost(0);
5599   format %{ %}
5600   interface(CONST_INTER);
5601 %}
5602 
5603 operand immLoffset4()
5604 %{
5605   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5606   match(ConL);
5607 
5608   op_cost(0);
5609   format %{ %}
5610   interface(CONST_INTER);
5611 %}
5612 
5613 operand immLoffset8()
5614 %{
5615   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5616   match(ConL);
5617 
5618   op_cost(0);
5619   format %{ %}
5620   interface(CONST_INTER);
5621 %}
5622 
5623 operand immLoffset16()
5624 %{
5625   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5626   match(ConL);
5627 
5628   op_cost(0);
5629   format %{ %}
5630   interface(CONST_INTER);
5631 %}
5632 
5633 // 32 bit integer valid for add sub immediate
5634 operand immIAddSub()
5635 %{
5636   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5637   match(ConI);
5638   op_cost(0);
5639   format %{ %}
5640   interface(CONST_INTER);
5641 %}
5642 
5643 // 32 bit unsigned integer valid for logical immediate
5644 // TODO -- check this is right when e.g the mask is 0x80000000
5645 operand immILog()
5646 %{
5647   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5648   match(ConI);
5649 
5650   op_cost(0);
5651   format %{ %}
5652   interface(CONST_INTER);
5653 %}
5654 
5655 // Integer operands 64 bit
5656 // 64 bit immediate
5657 operand immL()
5658 %{
5659   match(ConL);
5660 
5661   op_cost(0);
5662   format %{ %}
5663   interface(CONST_INTER);
5664 %}
5665 
5666 // 64 bit zero
5667 operand immL0()
5668 %{
5669   predicate(n->get_long() == 0);
5670   match(ConL);
5671 
5672   op_cost(0);
5673   format %{ %}
5674   interface(CONST_INTER);
5675 %}
5676 
5677 // 64 bit unit increment
5678 operand immL_1()
5679 %{
5680   predicate(n->get_long() == 1);
5681   match(ConL);
5682 
5683   op_cost(0);
5684   format %{ %}
5685   interface(CONST_INTER);
5686 %}
5687 
5688 // 64 bit unit decrement
5689 operand immL_M1()
5690 %{
5691   predicate(n->get_long() == -1);
5692   match(ConL);
5693 
5694   op_cost(0);
5695   format %{ %}
5696   interface(CONST_INTER);
5697 %}
5698 
5699 // 32 bit offset of pc in thread anchor
5700 
5701 operand immL_pc_off()
5702 %{
5703   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5704                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5705   match(ConL);
5706 
5707   op_cost(0);
5708   format %{ %}
5709   interface(CONST_INTER);
5710 %}
5711 
5712 // 64 bit integer valid for add sub immediate
5713 operand immLAddSub()
5714 %{
5715   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5716   match(ConL);
5717   op_cost(0);
5718   format %{ %}
5719   interface(CONST_INTER);
5720 %}
5721 
5722 // 64 bit integer valid for logical immediate
5723 operand immLLog()
5724 %{
5725   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5726   match(ConL);
5727   op_cost(0);
5728   format %{ %}
5729   interface(CONST_INTER);
5730 %}
5731 
5732 // Long Immediate: low 32-bit mask
5733 operand immL_32bits()
5734 %{
5735   predicate(n->get_long() == 0xFFFFFFFFL);
5736   match(ConL);
5737   op_cost(0);
5738   format %{ %}
5739   interface(CONST_INTER);
5740 %}
5741 
5742 // Pointer operands
5743 // Pointer Immediate
5744 operand immP()
5745 %{
5746   match(ConP);
5747 
5748   op_cost(0);
5749   format %{ %}
5750   interface(CONST_INTER);
5751 %}
5752 
5753 // NULL Pointer Immediate
5754 operand immP0()
5755 %{
5756   predicate(n->get_ptr() == 0);
5757   match(ConP);
5758 
5759   op_cost(0);
5760   format %{ %}
5761   interface(CONST_INTER);
5762 %}
5763 
5764 // Pointer Immediate One
5765 // this is used in object initialization (initial object header)
5766 operand immP_1()
5767 %{
5768   predicate(n->get_ptr() == 1);
5769   match(ConP);
5770 
5771   op_cost(0);
5772   format %{ %}
5773   interface(CONST_INTER);
5774 %}
5775 
5776 // Polling Page Pointer Immediate
5777 operand immPollPage()
5778 %{
5779   predicate((address)n->get_ptr() == os::get_polling_page());
5780   match(ConP);
5781 
5782   op_cost(0);
5783   format %{ %}
5784   interface(CONST_INTER);
5785 %}
5786 
5787 // Card Table Byte Map Base
5788 operand immByteMapBase()
5789 %{
5790   // Get base of card map
5791   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
5792             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
5793   match(ConP);
5794 
5795   op_cost(0);
5796   format %{ %}
5797   interface(CONST_INTER);
5798 %}
5799 
5800 // Pointer Immediate Minus One
5801 // this is used when we want to write the current PC to the thread anchor
5802 operand immP_M1()
5803 %{
5804   predicate(n->get_ptr() == -1);
5805   match(ConP);
5806 
5807   op_cost(0);
5808   format %{ %}
5809   interface(CONST_INTER);
5810 %}
5811 
5812 // Pointer Immediate Minus Two
5813 // this is used when we want to write the current PC to the thread anchor
5814 operand immP_M2()
5815 %{
5816   predicate(n->get_ptr() == -2);
5817   match(ConP);
5818 
5819   op_cost(0);
5820   format %{ %}
5821   interface(CONST_INTER);
5822 %}
5823 
5824 // Float and Double operands
5825 // Double Immediate
5826 operand immD()
5827 %{
5828   match(ConD);
5829   op_cost(0);
5830   format %{ %}
5831   interface(CONST_INTER);
5832 %}
5833 
5834 // Double Immediate: +0.0d
5835 operand immD0()
5836 %{
5837   predicate(jlong_cast(n->getd()) == 0);
5838   match(ConD);
5839 
5840   op_cost(0);
5841   format %{ %}
5842   interface(CONST_INTER);
5843 %}
5844 
5845 // constant 'double +0.0'.
5846 operand immDPacked()
5847 %{
5848   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5849   match(ConD);
5850   op_cost(0);
5851   format %{ %}
5852   interface(CONST_INTER);
5853 %}
5854 
5855 // Float Immediate
5856 operand immF()
5857 %{
5858   match(ConF);
5859   op_cost(0);
5860   format %{ %}
5861   interface(CONST_INTER);
5862 %}
5863 
5864 // Float Immediate: +0.0f.
5865 operand immF0()
5866 %{
5867   predicate(jint_cast(n->getf()) == 0);
5868   match(ConF);
5869 
5870   op_cost(0);
5871   format %{ %}
5872   interface(CONST_INTER);
5873 %}
5874 
5875 //
5876 operand immFPacked()
5877 %{
5878   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5879   match(ConF);
5880   op_cost(0);
5881   format %{ %}
5882   interface(CONST_INTER);
5883 %}
5884 
5885 // Narrow pointer operands
5886 // Narrow Pointer Immediate
5887 operand immN()
5888 %{
5889   match(ConN);
5890 
5891   op_cost(0);
5892   format %{ %}
5893   interface(CONST_INTER);
5894 %}
5895 
5896 // Narrow NULL Pointer Immediate
5897 operand immN0()
5898 %{
5899   predicate(n->get_narrowcon() == 0);
5900   match(ConN);
5901 
5902   op_cost(0);
5903   format %{ %}
5904   interface(CONST_INTER);
5905 %}
5906 
5907 operand immNKlass()
5908 %{
5909   match(ConNKlass);
5910 
5911   op_cost(0);
5912   format %{ %}
5913   interface(CONST_INTER);
5914 %}
5915 
5916 // Integer 32 bit Register Operands
5917 // Integer 32 bitRegister (excludes SP)
5918 operand iRegI()
5919 %{
5920   constraint(ALLOC_IN_RC(any_reg32));
5921   match(RegI);
5922   match(iRegINoSp);
5923   op_cost(0);
5924   format %{ %}
5925   interface(REG_INTER);
5926 %}
5927 
5928 // Integer 32 bit Register not Special
5929 operand iRegINoSp()
5930 %{
5931   constraint(ALLOC_IN_RC(no_special_reg32));
5932   match(RegI);
5933   op_cost(0);
5934   format %{ %}
5935   interface(REG_INTER);
5936 %}
5937 
5938 // Integer 64 bit Register Operands
5939 // Integer 64 bit Register (includes SP)
5940 operand iRegL()
5941 %{
5942   constraint(ALLOC_IN_RC(any_reg));
5943   match(RegL);
5944   match(iRegLNoSp);
5945   op_cost(0);
5946   format %{ %}
5947   interface(REG_INTER);
5948 %}
5949 
5950 // Integer 64 bit Register not Special
5951 operand iRegLNoSp()
5952 %{
5953   constraint(ALLOC_IN_RC(no_special_reg));
5954   match(RegL);
5955   match(iRegL_R0);
5956   format %{ %}
5957   interface(REG_INTER);
5958 %}
5959 
5960 // Pointer Register Operands
5961 // Pointer Register
5962 operand iRegP()
5963 %{
5964   constraint(ALLOC_IN_RC(ptr_reg));
5965   match(RegP);
5966   match(iRegPNoSp);
5967   match(iRegP_R0);
5968   //match(iRegP_R2);
5969   //match(iRegP_R4);
5970   //match(iRegP_R5);
5971   match(thread_RegP);
5972   op_cost(0);
5973   format %{ %}
5974   interface(REG_INTER);
5975 %}
5976 
5977 // Pointer 64 bit Register not Special
5978 operand iRegPNoSp()
5979 %{
5980   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5981   match(RegP);
5982   // match(iRegP);
5983   // match(iRegP_R0);
5984   // match(iRegP_R2);
5985   // match(iRegP_R4);
5986   // match(iRegP_R5);
5987   // match(thread_RegP);
5988   op_cost(0);
5989   format %{ %}
5990   interface(REG_INTER);
5991 %}
5992 
5993 // Pointer 64 bit Register R0 only
5994 operand iRegP_R0()
5995 %{
5996   constraint(ALLOC_IN_RC(r0_reg));
5997   match(RegP);
5998   // match(iRegP);
5999   match(iRegPNoSp);
6000   op_cost(0);
6001   format %{ %}
6002   interface(REG_INTER);
6003 %}
6004 
6005 // Pointer 64 bit Register R1 only
6006 operand iRegP_R1()
6007 %{
6008   constraint(ALLOC_IN_RC(r1_reg));
6009   match(RegP);
6010   // match(iRegP);
6011   match(iRegPNoSp);
6012   op_cost(0);
6013   format %{ %}
6014   interface(REG_INTER);
6015 %}
6016 
6017 // Pointer 64 bit Register R2 only
6018 operand iRegP_R2()
6019 %{
6020   constraint(ALLOC_IN_RC(r2_reg));
6021   match(RegP);
6022   // match(iRegP);
6023   match(iRegPNoSp);
6024   op_cost(0);
6025   format %{ %}
6026   interface(REG_INTER);
6027 %}
6028 
6029 // Pointer 64 bit Register R3 only
6030 operand iRegP_R3()
6031 %{
6032   constraint(ALLOC_IN_RC(r3_reg));
6033   match(RegP);
6034   // match(iRegP);
6035   match(iRegPNoSp);
6036   op_cost(0);
6037   format %{ %}
6038   interface(REG_INTER);
6039 %}
6040 
6041 // Pointer 64 bit Register R4 only
6042 operand iRegP_R4()
6043 %{
6044   constraint(ALLOC_IN_RC(r4_reg));
6045   match(RegP);
6046   // match(iRegP);
6047   match(iRegPNoSp);
6048   op_cost(0);
6049   format %{ %}
6050   interface(REG_INTER);
6051 %}
6052 
6053 // Pointer 64 bit Register R5 only
6054 operand iRegP_R5()
6055 %{
6056   constraint(ALLOC_IN_RC(r5_reg));
6057   match(RegP);
6058   // match(iRegP);
6059   match(iRegPNoSp);
6060   op_cost(0);
6061   format %{ %}
6062   interface(REG_INTER);
6063 %}
6064 
6065 // Pointer 64 bit Register R10 only
6066 operand iRegP_R10()
6067 %{
6068   constraint(ALLOC_IN_RC(r10_reg));
6069   match(RegP);
6070   // match(iRegP);
6071   match(iRegPNoSp);
6072   op_cost(0);
6073   format %{ %}
6074   interface(REG_INTER);
6075 %}
6076 
6077 // Long 64 bit Register R0 only
6078 operand iRegL_R0()
6079 %{
6080   constraint(ALLOC_IN_RC(r0_reg));
6081   match(RegL);
6082   match(iRegLNoSp);
6083   op_cost(0);
6084   format %{ %}
6085   interface(REG_INTER);
6086 %}
6087 
6088 // Long 64 bit Register R2 only
6089 operand iRegL_R2()
6090 %{
6091   constraint(ALLOC_IN_RC(r2_reg));
6092   match(RegL);
6093   match(iRegLNoSp);
6094   op_cost(0);
6095   format %{ %}
6096   interface(REG_INTER);
6097 %}
6098 
6099 // Long 64 bit Register R3 only
6100 operand iRegL_R3()
6101 %{
6102   constraint(ALLOC_IN_RC(r3_reg));
6103   match(RegL);
6104   match(iRegLNoSp);
6105   op_cost(0);
6106   format %{ %}
6107   interface(REG_INTER);
6108 %}
6109 
6110 // Long 64 bit Register R11 only
6111 operand iRegL_R11()
6112 %{
6113   constraint(ALLOC_IN_RC(r11_reg));
6114   match(RegL);
6115   match(iRegLNoSp);
6116   op_cost(0);
6117   format %{ %}
6118   interface(REG_INTER);
6119 %}
6120 
6121 // Pointer 64 bit Register FP only
6122 operand iRegP_FP()
6123 %{
6124   constraint(ALLOC_IN_RC(fp_reg));
6125   match(RegP);
6126   // match(iRegP);
6127   op_cost(0);
6128   format %{ %}
6129   interface(REG_INTER);
6130 %}
6131 
6132 // Register R0 only
6133 operand iRegI_R0()
6134 %{
6135   constraint(ALLOC_IN_RC(int_r0_reg));
6136   match(RegI);
6137   match(iRegINoSp);
6138   op_cost(0);
6139   format %{ %}
6140   interface(REG_INTER);
6141 %}
6142 
6143 // Register R2 only
6144 operand iRegI_R2()
6145 %{
6146   constraint(ALLOC_IN_RC(int_r2_reg));
6147   match(RegI);
6148   match(iRegINoSp);
6149   op_cost(0);
6150   format %{ %}
6151   interface(REG_INTER);
6152 %}
6153 
6154 // Register R3 only
6155 operand iRegI_R3()
6156 %{
6157   constraint(ALLOC_IN_RC(int_r3_reg));
6158   match(RegI);
6159   match(iRegINoSp);
6160   op_cost(0);
6161   format %{ %}
6162   interface(REG_INTER);
6163 %}
6164 
6165 
6166 // Register R4 only
6167 operand iRegI_R4()
6168 %{
6169   constraint(ALLOC_IN_RC(int_r4_reg));
6170   match(RegI);
6171   match(iRegINoSp);
6172   op_cost(0);
6173   format %{ %}
6174   interface(REG_INTER);
6175 %}
6176 
6177 
6178 // Pointer Register Operands
6179 // Narrow Pointer Register
6180 operand iRegN()
6181 %{
6182   constraint(ALLOC_IN_RC(any_reg32));
6183   match(RegN);
6184   match(iRegNNoSp);
6185   op_cost(0);
6186   format %{ %}
6187   interface(REG_INTER);
6188 %}
6189 
6190 operand iRegN_R0()
6191 %{
6192   constraint(ALLOC_IN_RC(r0_reg));
6193   match(iRegN);
6194   op_cost(0);
6195   format %{ %}
6196   interface(REG_INTER);
6197 %}
6198 
6199 operand iRegN_R2()
6200 %{
6201   constraint(ALLOC_IN_RC(r2_reg));
6202   match(iRegN);
6203   op_cost(0);
6204   format %{ %}
6205   interface(REG_INTER);
6206 %}
6207 
6208 operand iRegN_R3()
6209 %{
6210   constraint(ALLOC_IN_RC(r3_reg));
6211   match(iRegN);
6212   op_cost(0);
6213   format %{ %}
6214   interface(REG_INTER);
6215 %}
6216 
6217 // Integer 64 bit Register not Special
6218 operand iRegNNoSp()
6219 %{
6220   constraint(ALLOC_IN_RC(no_special_reg32));
6221   match(RegN);
6222   op_cost(0);
6223   format %{ %}
6224   interface(REG_INTER);
6225 %}
6226 
6227 // heap base register -- used for encoding immN0
6228 
6229 operand iRegIHeapbase()
6230 %{
6231   constraint(ALLOC_IN_RC(heapbase_reg));
6232   match(RegI);
6233   op_cost(0);
6234   format %{ %}
6235   interface(REG_INTER);
6236 %}
6237 
6238 // Float Register
6239 // Float register operands
6240 operand vRegF()
6241 %{
6242   constraint(ALLOC_IN_RC(float_reg));
6243   match(RegF);
6244 
6245   op_cost(0);
6246   format %{ %}
6247   interface(REG_INTER);
6248 %}
6249 
6250 // Double Register
6251 // Double register operands
6252 operand vRegD()
6253 %{
6254   constraint(ALLOC_IN_RC(double_reg));
6255   match(RegD);
6256 
6257   op_cost(0);
6258   format %{ %}
6259   interface(REG_INTER);
6260 %}
6261 
6262 operand vecD()
6263 %{
6264   constraint(ALLOC_IN_RC(vectord_reg));
6265   match(VecD);
6266 
6267   op_cost(0);
6268   format %{ %}
6269   interface(REG_INTER);
6270 %}
6271 
6272 operand vecX()
6273 %{
6274   constraint(ALLOC_IN_RC(vectorx_reg));
6275   match(VecX);
6276 
6277   op_cost(0);
6278   format %{ %}
6279   interface(REG_INTER);
6280 %}
6281 
6282 operand vRegD_V0()
6283 %{
6284   constraint(ALLOC_IN_RC(v0_reg));
6285   match(RegD);
6286   op_cost(0);
6287   format %{ %}
6288   interface(REG_INTER);
6289 %}
6290 
6291 operand vRegD_V1()
6292 %{
6293   constraint(ALLOC_IN_RC(v1_reg));
6294   match(RegD);
6295   op_cost(0);
6296   format %{ %}
6297   interface(REG_INTER);
6298 %}
6299 
6300 operand vRegD_V2()
6301 %{
6302   constraint(ALLOC_IN_RC(v2_reg));
6303   match(RegD);
6304   op_cost(0);
6305   format %{ %}
6306   interface(REG_INTER);
6307 %}
6308 
6309 operand vRegD_V3()
6310 %{
6311   constraint(ALLOC_IN_RC(v3_reg));
6312   match(RegD);
6313   op_cost(0);
6314   format %{ %}
6315   interface(REG_INTER);
6316 %}
6317 
6318 // Flags register, used as output of signed compare instructions
6319 
6320 // note that on AArch64 we also use this register as the output for
6321 // for floating point compare instructions (CmpF CmpD). this ensures
6322 // that ordered inequality tests use GT, GE, LT or LE none of which
6323 // pass through cases where the result is unordered i.e. one or both
6324 // inputs to the compare is a NaN. this means that the ideal code can
6325 // replace e.g. a GT with an LE and not end up capturing the NaN case
6326 // (where the comparison should always fail). EQ and NE tests are
6327 // always generated in ideal code so that unordered folds into the NE
6328 // case, matching the behaviour of AArch64 NE.
6329 //
6330 // This differs from x86 where the outputs of FP compares use a
6331 // special FP flags registers and where compares based on this
6332 // register are distinguished into ordered inequalities (cmpOpUCF) and
6333 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6334 // to explicitly handle the unordered case in branches. x86 also has
6335 // to include extra CMoveX rules to accept a cmpOpUCF input.
6336 
6337 operand rFlagsReg()
6338 %{
6339   constraint(ALLOC_IN_RC(int_flags));
6340   match(RegFlags);
6341 
6342   op_cost(0);
6343   format %{ "RFLAGS" %}
6344   interface(REG_INTER);
6345 %}
6346 
6347 // Flags register, used as output of unsigned compare instructions
6348 operand rFlagsRegU()
6349 %{
6350   constraint(ALLOC_IN_RC(int_flags));
6351   match(RegFlags);
6352 
6353   op_cost(0);
6354   format %{ "RFLAGSU" %}
6355   interface(REG_INTER);
6356 %}
6357 
6358 // Special Registers
6359 
6360 // Method Register
6361 operand inline_cache_RegP(iRegP reg)
6362 %{
6363   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6364   match(reg);
6365   match(iRegPNoSp);
6366   op_cost(0);
6367   format %{ %}
6368   interface(REG_INTER);
6369 %}
6370 
6371 operand interpreter_method_oop_RegP(iRegP reg)
6372 %{
6373   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6374   match(reg);
6375   match(iRegPNoSp);
6376   op_cost(0);
6377   format %{ %}
6378   interface(REG_INTER);
6379 %}
6380 
6381 // Thread Register
6382 operand thread_RegP(iRegP reg)
6383 %{
6384   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6385   match(reg);
6386   op_cost(0);
6387   format %{ %}
6388   interface(REG_INTER);
6389 %}
6390 
6391 operand lr_RegP(iRegP reg)
6392 %{
6393   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6394   match(reg);
6395   op_cost(0);
6396   format %{ %}
6397   interface(REG_INTER);
6398 %}
6399 
6400 //----------Memory Operands----------------------------------------------------
6401 
6402 operand indirect(iRegP reg)
6403 %{
6404   constraint(ALLOC_IN_RC(ptr_reg));
6405   match(reg);
6406   op_cost(0);
6407   format %{ "[$reg]" %}
6408   interface(MEMORY_INTER) %{
6409     base($reg);
6410     index(0xffffffff);
6411     scale(0x0);
6412     disp(0x0);
6413   %}
6414 %}
6415 
6416 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6417 %{
6418   constraint(ALLOC_IN_RC(ptr_reg));
6419   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6420   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6421   op_cost(0);
6422   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6423   interface(MEMORY_INTER) %{
6424     base($reg);
6425     index($ireg);
6426     scale($scale);
6427     disp(0x0);
6428   %}
6429 %}
6430 
6431 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6432 %{
6433   constraint(ALLOC_IN_RC(ptr_reg));
6434   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6435   match(AddP reg (LShiftL lreg scale));
6436   op_cost(0);
6437   format %{ "$reg, $lreg lsl($scale)" %}
6438   interface(MEMORY_INTER) %{
6439     base($reg);
6440     index($lreg);
6441     scale($scale);
6442     disp(0x0);
6443   %}
6444 %}
6445 
6446 operand indIndexI2L(iRegP reg, iRegI ireg)
6447 %{
6448   constraint(ALLOC_IN_RC(ptr_reg));
6449   match(AddP reg (ConvI2L ireg));
6450   op_cost(0);
6451   format %{ "$reg, $ireg, 0, I2L" %}
6452   interface(MEMORY_INTER) %{
6453     base($reg);
6454     index($ireg);
6455     scale(0x0);
6456     disp(0x0);
6457   %}
6458 %}
6459 
6460 operand indIndex(iRegP reg, iRegL lreg)
6461 %{
6462   constraint(ALLOC_IN_RC(ptr_reg));
6463   match(AddP reg lreg);
6464   op_cost(0);
6465   format %{ "$reg, $lreg" %}
6466   interface(MEMORY_INTER) %{
6467     base($reg);
6468     index($lreg);
6469     scale(0x0);
6470     disp(0x0);
6471   %}
6472 %}
6473 
6474 operand indOffI(iRegP reg, immIOffset off)
6475 %{
6476   constraint(ALLOC_IN_RC(ptr_reg));
6477   match(AddP reg off);
6478   op_cost(0);
6479   format %{ "[$reg, $off]" %}
6480   interface(MEMORY_INTER) %{
6481     base($reg);
6482     index(0xffffffff);
6483     scale(0x0);
6484     disp($off);
6485   %}
6486 %}
6487 
6488 operand indOffI4(iRegP reg, immIOffset4 off)
6489 %{
6490   constraint(ALLOC_IN_RC(ptr_reg));
6491   match(AddP reg off);
6492   op_cost(0);
6493   format %{ "[$reg, $off]" %}
6494   interface(MEMORY_INTER) %{
6495     base($reg);
6496     index(0xffffffff);
6497     scale(0x0);
6498     disp($off);
6499   %}
6500 %}
6501 
6502 operand indOffI8(iRegP reg, immIOffset8 off)
6503 %{
6504   constraint(ALLOC_IN_RC(ptr_reg));
6505   match(AddP reg off);
6506   op_cost(0);
6507   format %{ "[$reg, $off]" %}
6508   interface(MEMORY_INTER) %{
6509     base($reg);
6510     index(0xffffffff);
6511     scale(0x0);
6512     disp($off);
6513   %}
6514 %}
6515 
6516 operand indOffI16(iRegP reg, immIOffset16 off)
6517 %{
6518   constraint(ALLOC_IN_RC(ptr_reg));
6519   match(AddP reg off);
6520   op_cost(0);
6521   format %{ "[$reg, $off]" %}
6522   interface(MEMORY_INTER) %{
6523     base($reg);
6524     index(0xffffffff);
6525     scale(0x0);
6526     disp($off);
6527   %}
6528 %}
6529 
6530 operand indOffL(iRegP reg, immLoffset off)
6531 %{
6532   constraint(ALLOC_IN_RC(ptr_reg));
6533   match(AddP reg off);
6534   op_cost(0);
6535   format %{ "[$reg, $off]" %}
6536   interface(MEMORY_INTER) %{
6537     base($reg);
6538     index(0xffffffff);
6539     scale(0x0);
6540     disp($off);
6541   %}
6542 %}
6543 
6544 operand indOffL4(iRegP reg, immLoffset4 off)
6545 %{
6546   constraint(ALLOC_IN_RC(ptr_reg));
6547   match(AddP reg off);
6548   op_cost(0);
6549   format %{ "[$reg, $off]" %}
6550   interface(MEMORY_INTER) %{
6551     base($reg);
6552     index(0xffffffff);
6553     scale(0x0);
6554     disp($off);
6555   %}
6556 %}
6557 
6558 operand indOffL8(iRegP reg, immLoffset8 off)
6559 %{
6560   constraint(ALLOC_IN_RC(ptr_reg));
6561   match(AddP reg off);
6562   op_cost(0);
6563   format %{ "[$reg, $off]" %}
6564   interface(MEMORY_INTER) %{
6565     base($reg);
6566     index(0xffffffff);
6567     scale(0x0);
6568     disp($off);
6569   %}
6570 %}
6571 
6572 operand indOffL16(iRegP reg, immLoffset16 off)
6573 %{
6574   constraint(ALLOC_IN_RC(ptr_reg));
6575   match(AddP reg off);
6576   op_cost(0);
6577   format %{ "[$reg, $off]" %}
6578   interface(MEMORY_INTER) %{
6579     base($reg);
6580     index(0xffffffff);
6581     scale(0x0);
6582     disp($off);
6583   %}
6584 %}
6585 
6586 operand indirectN(iRegN reg)
6587 %{
6588   predicate(Universe::narrow_oop_shift() == 0);
6589   constraint(ALLOC_IN_RC(ptr_reg));
6590   match(DecodeN reg);
6591   op_cost(0);
6592   format %{ "[$reg]\t# narrow" %}
6593   interface(MEMORY_INTER) %{
6594     base($reg);
6595     index(0xffffffff);
6596     scale(0x0);
6597     disp(0x0);
6598   %}
6599 %}
6600 
6601 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6602 %{
6603   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6604   constraint(ALLOC_IN_RC(ptr_reg));
6605   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6606   op_cost(0);
6607   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6608   interface(MEMORY_INTER) %{
6609     base($reg);
6610     index($ireg);
6611     scale($scale);
6612     disp(0x0);
6613   %}
6614 %}
6615 
6616 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6617 %{
6618   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6619   constraint(ALLOC_IN_RC(ptr_reg));
6620   match(AddP (DecodeN reg) (LShiftL lreg scale));
6621   op_cost(0);
6622   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6623   interface(MEMORY_INTER) %{
6624     base($reg);
6625     index($lreg);
6626     scale($scale);
6627     disp(0x0);
6628   %}
6629 %}
6630 
6631 operand indIndexI2LN(iRegN reg, iRegI ireg)
6632 %{
6633   predicate(Universe::narrow_oop_shift() == 0);
6634   constraint(ALLOC_IN_RC(ptr_reg));
6635   match(AddP (DecodeN reg) (ConvI2L ireg));
6636   op_cost(0);
6637   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6638   interface(MEMORY_INTER) %{
6639     base($reg);
6640     index($ireg);
6641     scale(0x0);
6642     disp(0x0);
6643   %}
6644 %}
6645 
6646 operand indIndexN(iRegN reg, iRegL lreg)
6647 %{
6648   predicate(Universe::narrow_oop_shift() == 0);
6649   constraint(ALLOC_IN_RC(ptr_reg));
6650   match(AddP (DecodeN reg) lreg);
6651   op_cost(0);
6652   format %{ "$reg, $lreg\t# narrow" %}
6653   interface(MEMORY_INTER) %{
6654     base($reg);
6655     index($lreg);
6656     scale(0x0);
6657     disp(0x0);
6658   %}
6659 %}
6660 
6661 operand indOffIN(iRegN reg, immIOffset off)
6662 %{
6663   predicate(Universe::narrow_oop_shift() == 0);
6664   constraint(ALLOC_IN_RC(ptr_reg));
6665   match(AddP (DecodeN reg) off);
6666   op_cost(0);
6667   format %{ "[$reg, $off]\t# narrow" %}
6668   interface(MEMORY_INTER) %{
6669     base($reg);
6670     index(0xffffffff);
6671     scale(0x0);
6672     disp($off);
6673   %}
6674 %}
6675 
6676 operand indOffLN(iRegN reg, immLoffset off)
6677 %{
6678   predicate(Universe::narrow_oop_shift() == 0);
6679   constraint(ALLOC_IN_RC(ptr_reg));
6680   match(AddP (DecodeN reg) off);
6681   op_cost(0);
6682   format %{ "[$reg, $off]\t# narrow" %}
6683   interface(MEMORY_INTER) %{
6684     base($reg);
6685     index(0xffffffff);
6686     scale(0x0);
6687     disp($off);
6688   %}
6689 %}
6690 
6691 
6692 
6693 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6694 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6695 %{
6696   constraint(ALLOC_IN_RC(ptr_reg));
6697   match(AddP reg off);
6698   op_cost(0);
6699   format %{ "[$reg, $off]" %}
6700   interface(MEMORY_INTER) %{
6701     base($reg);
6702     index(0xffffffff);
6703     scale(0x0);
6704     disp($off);
6705   %}
6706 %}
6707 
6708 //----------Special Memory Operands--------------------------------------------
6709 // Stack Slot Operand - This operand is used for loading and storing temporary
6710 //                      values on the stack where a match requires a value to
6711 //                      flow through memory.
6712 operand stackSlotP(sRegP reg)
6713 %{
6714   constraint(ALLOC_IN_RC(stack_slots));
6715   op_cost(100);
6716   // No match rule because this operand is only generated in matching
6717   // match(RegP);
6718   format %{ "[$reg]" %}
6719   interface(MEMORY_INTER) %{
6720     base(0x1e);  // RSP
6721     index(0x0);  // No Index
6722     scale(0x0);  // No Scale
6723     disp($reg);  // Stack Offset
6724   %}
6725 %}
6726 
6727 operand stackSlotI(sRegI reg)
6728 %{
6729   constraint(ALLOC_IN_RC(stack_slots));
6730   // No match rule because this operand is only generated in matching
6731   // match(RegI);
6732   format %{ "[$reg]" %}
6733   interface(MEMORY_INTER) %{
6734     base(0x1e);  // RSP
6735     index(0x0);  // No Index
6736     scale(0x0);  // No Scale
6737     disp($reg);  // Stack Offset
6738   %}
6739 %}
6740 
6741 operand stackSlotF(sRegF reg)
6742 %{
6743   constraint(ALLOC_IN_RC(stack_slots));
6744   // No match rule because this operand is only generated in matching
6745   // match(RegF);
6746   format %{ "[$reg]" %}
6747   interface(MEMORY_INTER) %{
6748     base(0x1e);  // RSP
6749     index(0x0);  // No Index
6750     scale(0x0);  // No Scale
6751     disp($reg);  // Stack Offset
6752   %}
6753 %}
6754 
6755 operand stackSlotD(sRegD reg)
6756 %{
6757   constraint(ALLOC_IN_RC(stack_slots));
6758   // No match rule because this operand is only generated in matching
6759   // match(RegD);
6760   format %{ "[$reg]" %}
6761   interface(MEMORY_INTER) %{
6762     base(0x1e);  // RSP
6763     index(0x0);  // No Index
6764     scale(0x0);  // No Scale
6765     disp($reg);  // Stack Offset
6766   %}
6767 %}
6768 
6769 operand stackSlotL(sRegL reg)
6770 %{
6771   constraint(ALLOC_IN_RC(stack_slots));
6772   // No match rule because this operand is only generated in matching
6773   // match(RegL);
6774   format %{ "[$reg]" %}
6775   interface(MEMORY_INTER) %{
6776     base(0x1e);  // RSP
6777     index(0x0);  // No Index
6778     scale(0x0);  // No Scale
6779     disp($reg);  // Stack Offset
6780   %}
6781 %}
6782 
6783 // Operands for expressing Control Flow
6784 // NOTE: Label is a predefined operand which should not be redefined in
6785 //       the AD file. It is generically handled within the ADLC.
6786 
6787 //----------Conditional Branch Operands----------------------------------------
6788 // Comparison Op  - This is the operation of the comparison, and is limited to
6789 //                  the following set of codes:
6790 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6791 //
6792 // Other attributes of the comparison, such as unsignedness, are specified
6793 // by the comparison instruction that sets a condition code flags register.
6794 // That result is represented by a flags operand whose subtype is appropriate
6795 // to the unsignedness (etc.) of the comparison.
6796 //
6797 // Later, the instruction which matches both the Comparison Op (a Bool) and
6798 // the flags (produced by the Cmp) specifies the coding of the comparison op
6799 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6800 
6801 // used for signed integral comparisons and fp comparisons
6802 
6803 operand cmpOp()
6804 %{
6805   match(Bool);
6806 
6807   format %{ "" %}
6808   interface(COND_INTER) %{
6809     equal(0x0, "eq");
6810     not_equal(0x1, "ne");
6811     less(0xb, "lt");
6812     greater_equal(0xa, "ge");
6813     less_equal(0xd, "le");
6814     greater(0xc, "gt");
6815     overflow(0x6, "vs");
6816     no_overflow(0x7, "vc");
6817   %}
6818 %}
6819 
6820 // used for unsigned integral comparisons
6821 
6822 operand cmpOpU()
6823 %{
6824   match(Bool);
6825 
6826   format %{ "" %}
6827   interface(COND_INTER) %{
6828     equal(0x0, "eq");
6829     not_equal(0x1, "ne");
6830     less(0x3, "lo");
6831     greater_equal(0x2, "hs");
6832     less_equal(0x9, "ls");
6833     greater(0x8, "hi");
6834     overflow(0x6, "vs");
6835     no_overflow(0x7, "vc");
6836   %}
6837 %}
6838 
6839 // used for certain integral comparisons which can be
6840 // converted to cbxx or tbxx instructions
6841 
6842 operand cmpOpEqNe()
6843 %{
6844   match(Bool);
6845   match(CmpOp);
6846   op_cost(0);
6847   predicate(n->as_Bool()->_test._test == BoolTest::ne
6848             || n->as_Bool()->_test._test == BoolTest::eq);
6849 
6850   format %{ "" %}
6851   interface(COND_INTER) %{
6852     equal(0x0, "eq");
6853     not_equal(0x1, "ne");
6854     less(0xb, "lt");
6855     greater_equal(0xa, "ge");
6856     less_equal(0xd, "le");
6857     greater(0xc, "gt");
6858     overflow(0x6, "vs");
6859     no_overflow(0x7, "vc");
6860   %}
6861 %}
6862 
6863 // used for certain integral comparisons which can be
6864 // converted to cbxx or tbxx instructions
6865 
6866 operand cmpOpLtGe()
6867 %{
6868   match(Bool);
6869   match(CmpOp);
6870   op_cost(0);
6871 
6872   predicate(n->as_Bool()->_test._test == BoolTest::lt
6873             || n->as_Bool()->_test._test == BoolTest::ge);
6874 
6875   format %{ "" %}
6876   interface(COND_INTER) %{
6877     equal(0x0, "eq");
6878     not_equal(0x1, "ne");
6879     less(0xb, "lt");
6880     greater_equal(0xa, "ge");
6881     less_equal(0xd, "le");
6882     greater(0xc, "gt");
6883     overflow(0x6, "vs");
6884     no_overflow(0x7, "vc");
6885   %}
6886 %}
6887 
6888 // used for certain unsigned integral comparisons which can be
6889 // converted to cbxx or tbxx instructions
6890 
6891 operand cmpOpUEqNeLtGe()
6892 %{
6893   match(Bool);
6894   match(CmpOp);
6895   op_cost(0);
6896 
6897   predicate(n->as_Bool()->_test._test == BoolTest::eq
6898             || n->as_Bool()->_test._test == BoolTest::ne
6899             || n->as_Bool()->_test._test == BoolTest::lt
6900             || n->as_Bool()->_test._test == BoolTest::ge);
6901 
6902   format %{ "" %}
6903   interface(COND_INTER) %{
6904     equal(0x0, "eq");
6905     not_equal(0x1, "ne");
6906     less(0xb, "lt");
6907     greater_equal(0xa, "ge");
6908     less_equal(0xd, "le");
6909     greater(0xc, "gt");
6910     overflow(0x6, "vs");
6911     no_overflow(0x7, "vc");
6912   %}
6913 %}
6914 
6915 // Special operand allowing long args to int ops to be truncated for free
6916 
6917 operand iRegL2I(iRegL reg) %{
6918 
6919   op_cost(0);
6920 
6921   match(ConvL2I reg);
6922 
6923   format %{ "l2i($reg)" %}
6924 
6925   interface(REG_INTER)
6926 %}
6927 
6928 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6929 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6930 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6931 
6932 //----------OPERAND CLASSES----------------------------------------------------
6933 // Operand Classes are groups of operands that are used as to simplify
6934 // instruction definitions by not requiring the AD writer to specify
6935 // separate instructions for every form of operand when the
6936 // instruction accepts multiple operand types with the same basic
6937 // encoding and format. The classic case of this is memory operands.
6938 
6939 // memory is used to define read/write location for load/store
6940 // instruction defs. we can turn a memory op into an Address
6941 
6942 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6943                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6944 
6945 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6946 // operations. it allows the src to be either an iRegI or a (ConvL2I
6947 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6948 // can be elided because the 32-bit instruction will just employ the
6949 // lower 32 bits anyway.
6950 //
6951 // n.b. this does not elide all L2I conversions. if the truncated
6952 // value is consumed by more than one operation then the ConvL2I
6953 // cannot be bundled into the consuming nodes so an l2i gets planted
6954 // (actually a movw $dst $src) and the downstream instructions consume
6955 // the result of the l2i as an iRegI input. That's a shame since the
6956 // movw is actually redundant but its not too costly.
6957 
6958 opclass iRegIorL2I(iRegI, iRegL2I);
6959 
6960 //----------PIPELINE-----------------------------------------------------------
6961 // Rules which define the behavior of the target architectures pipeline.
6962 
6963 // For specific pipelines, eg A53, define the stages of that pipeline
6964 //pipe_desc(ISS, EX1, EX2, WR);
6965 #define ISS S0
6966 #define EX1 S1
6967 #define EX2 S2
6968 #define WR  S3
6969 
6970 // Integer ALU reg operation
6971 pipeline %{
6972 
6973 attributes %{
6974   // ARM instructions are of fixed length
6975   fixed_size_instructions;        // Fixed size instructions TODO does
6976   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6977   // ARM instructions come in 32-bit word units
6978   instruction_unit_size = 4;         // An instruction is 4 bytes long
6979   instruction_fetch_unit_size = 64;  // The processor fetches one line
6980   instruction_fetch_units = 1;       // of 64 bytes
6981 
6982   // List of nop instructions
6983   nops( MachNop );
6984 %}
6985 
6986 // We don't use an actual pipeline model so don't care about resources
6987 // or description. we do use pipeline classes to introduce fixed
6988 // latencies
6989 
6990 //----------RESOURCES----------------------------------------------------------
6991 // Resources are the functional units available to the machine
6992 
6993 resources( INS0, INS1, INS01 = INS0 | INS1,
6994            ALU0, ALU1, ALU = ALU0 | ALU1,
6995            MAC,
6996            DIV,
6997            BRANCH,
6998            LDST,
6999            NEON_FP);
7000 
7001 //----------PIPELINE DESCRIPTION-----------------------------------------------
7002 // Pipeline Description specifies the stages in the machine's pipeline
7003 
7004 // Define the pipeline as a generic 6 stage pipeline
7005 pipe_desc(S0, S1, S2, S3, S4, S5);
7006 
7007 //----------PIPELINE CLASSES---------------------------------------------------
7008 // Pipeline Classes describe the stages in which input and output are
7009 // referenced by the hardware pipeline.
7010 
7011 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
7012 %{
7013   single_instruction;
7014   src1   : S1(read);
7015   src2   : S2(read);
7016   dst    : S5(write);
7017   INS01  : ISS;
7018   NEON_FP : S5;
7019 %}
7020 
7021 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
7022 %{
7023   single_instruction;
7024   src1   : S1(read);
7025   src2   : S2(read);
7026   dst    : S5(write);
7027   INS01  : ISS;
7028   NEON_FP : S5;
7029 %}
7030 
7031 pipe_class fp_uop_s(vRegF dst, vRegF src)
7032 %{
7033   single_instruction;
7034   src    : S1(read);
7035   dst    : S5(write);
7036   INS01  : ISS;
7037   NEON_FP : S5;
7038 %}
7039 
7040 pipe_class fp_uop_d(vRegD dst, vRegD src)
7041 %{
7042   single_instruction;
7043   src    : S1(read);
7044   dst    : S5(write);
7045   INS01  : ISS;
7046   NEON_FP : S5;
7047 %}
7048 
7049 pipe_class fp_d2f(vRegF dst, vRegD src)
7050 %{
7051   single_instruction;
7052   src    : S1(read);
7053   dst    : S5(write);
7054   INS01  : ISS;
7055   NEON_FP : S5;
7056 %}
7057 
7058 pipe_class fp_f2d(vRegD dst, vRegF src)
7059 %{
7060   single_instruction;
7061   src    : S1(read);
7062   dst    : S5(write);
7063   INS01  : ISS;
7064   NEON_FP : S5;
7065 %}
7066 
7067 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7068 %{
7069   single_instruction;
7070   src    : S1(read);
7071   dst    : S5(write);
7072   INS01  : ISS;
7073   NEON_FP : S5;
7074 %}
7075 
7076 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7077 %{
7078   single_instruction;
7079   src    : S1(read);
7080   dst    : S5(write);
7081   INS01  : ISS;
7082   NEON_FP : S5;
7083 %}
7084 
7085 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7086 %{
7087   single_instruction;
7088   src    : S1(read);
7089   dst    : S5(write);
7090   INS01  : ISS;
7091   NEON_FP : S5;
7092 %}
7093 
7094 pipe_class fp_l2f(vRegF dst, iRegL src)
7095 %{
7096   single_instruction;
7097   src    : S1(read);
7098   dst    : S5(write);
7099   INS01  : ISS;
7100   NEON_FP : S5;
7101 %}
7102 
7103 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7104 %{
7105   single_instruction;
7106   src    : S1(read);
7107   dst    : S5(write);
7108   INS01  : ISS;
7109   NEON_FP : S5;
7110 %}
7111 
7112 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7113 %{
7114   single_instruction;
7115   src    : S1(read);
7116   dst    : S5(write);
7117   INS01  : ISS;
7118   NEON_FP : S5;
7119 %}
7120 
7121 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7122 %{
7123   single_instruction;
7124   src    : S1(read);
7125   dst    : S5(write);
7126   INS01  : ISS;
7127   NEON_FP : S5;
7128 %}
7129 
7130 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7131 %{
7132   single_instruction;
7133   src    : S1(read);
7134   dst    : S5(write);
7135   INS01  : ISS;
7136   NEON_FP : S5;
7137 %}
7138 
7139 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7140 %{
7141   single_instruction;
7142   src1   : S1(read);
7143   src2   : S2(read);
7144   dst    : S5(write);
7145   INS0   : ISS;
7146   NEON_FP : S5;
7147 %}
7148 
7149 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7150 %{
7151   single_instruction;
7152   src1   : S1(read);
7153   src2   : S2(read);
7154   dst    : S5(write);
7155   INS0   : ISS;
7156   NEON_FP : S5;
7157 %}
7158 
7159 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7160 %{
7161   single_instruction;
7162   cr     : S1(read);
7163   src1   : S1(read);
7164   src2   : S1(read);
7165   dst    : S3(write);
7166   INS01  : ISS;
7167   NEON_FP : S3;
7168 %}
7169 
7170 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7171 %{
7172   single_instruction;
7173   cr     : S1(read);
7174   src1   : S1(read);
7175   src2   : S1(read);
7176   dst    : S3(write);
7177   INS01  : ISS;
7178   NEON_FP : S3;
7179 %}
7180 
7181 pipe_class fp_imm_s(vRegF dst)
7182 %{
7183   single_instruction;
7184   dst    : S3(write);
7185   INS01  : ISS;
7186   NEON_FP : S3;
7187 %}
7188 
7189 pipe_class fp_imm_d(vRegD dst)
7190 %{
7191   single_instruction;
7192   dst    : S3(write);
7193   INS01  : ISS;
7194   NEON_FP : S3;
7195 %}
7196 
7197 pipe_class fp_load_constant_s(vRegF dst)
7198 %{
7199   single_instruction;
7200   dst    : S4(write);
7201   INS01  : ISS;
7202   NEON_FP : S4;
7203 %}
7204 
7205 pipe_class fp_load_constant_d(vRegD dst)
7206 %{
7207   single_instruction;
7208   dst    : S4(write);
7209   INS01  : ISS;
7210   NEON_FP : S4;
7211 %}
7212 
7213 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7214 %{
7215   single_instruction;
7216   dst    : S5(write);
7217   src1   : S1(read);
7218   src2   : S1(read);
7219   INS01  : ISS;
7220   NEON_FP : S5;
7221 %}
7222 
7223 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7224 %{
7225   single_instruction;
7226   dst    : S5(write);
7227   src1   : S1(read);
7228   src2   : S1(read);
7229   INS0   : ISS;
7230   NEON_FP : S5;
7231 %}
7232 
7233 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7234 %{
7235   single_instruction;
7236   dst    : S5(write);
7237   src1   : S1(read);
7238   src2   : S1(read);
7239   dst    : S1(read);
7240   INS01  : ISS;
7241   NEON_FP : S5;
7242 %}
7243 
7244 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7245 %{
7246   single_instruction;
7247   dst    : S5(write);
7248   src1   : S1(read);
7249   src2   : S1(read);
7250   dst    : S1(read);
7251   INS0   : ISS;
7252   NEON_FP : S5;
7253 %}
7254 
7255 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7256 %{
7257   single_instruction;
7258   dst    : S4(write);
7259   src1   : S2(read);
7260   src2   : S2(read);
7261   INS01  : ISS;
7262   NEON_FP : S4;
7263 %}
7264 
7265 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7266 %{
7267   single_instruction;
7268   dst    : S4(write);
7269   src1   : S2(read);
7270   src2   : S2(read);
7271   INS0   : ISS;
7272   NEON_FP : S4;
7273 %}
7274 
7275 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7276 %{
7277   single_instruction;
7278   dst    : S3(write);
7279   src1   : S2(read);
7280   src2   : S2(read);
7281   INS01  : ISS;
7282   NEON_FP : S3;
7283 %}
7284 
7285 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7286 %{
7287   single_instruction;
7288   dst    : S3(write);
7289   src1   : S2(read);
7290   src2   : S2(read);
7291   INS0   : ISS;
7292   NEON_FP : S3;
7293 %}
7294 
7295 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7296 %{
7297   single_instruction;
7298   dst    : S3(write);
7299   src    : S1(read);
7300   shift  : S1(read);
7301   INS01  : ISS;
7302   NEON_FP : S3;
7303 %}
7304 
7305 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7306 %{
7307   single_instruction;
7308   dst    : S3(write);
7309   src    : S1(read);
7310   shift  : S1(read);
7311   INS0   : ISS;
7312   NEON_FP : S3;
7313 %}
7314 
7315 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7316 %{
7317   single_instruction;
7318   dst    : S3(write);
7319   src    : S1(read);
7320   INS01  : ISS;
7321   NEON_FP : S3;
7322 %}
7323 
7324 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7325 %{
7326   single_instruction;
7327   dst    : S3(write);
7328   src    : S1(read);
7329   INS0   : ISS;
7330   NEON_FP : S3;
7331 %}
7332 
7333 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7334 %{
7335   single_instruction;
7336   dst    : S5(write);
7337   src1   : S1(read);
7338   src2   : S1(read);
7339   INS01  : ISS;
7340   NEON_FP : S5;
7341 %}
7342 
7343 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7344 %{
7345   single_instruction;
7346   dst    : S5(write);
7347   src1   : S1(read);
7348   src2   : S1(read);
7349   INS0   : ISS;
7350   NEON_FP : S5;
7351 %}
7352 
7353 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7354 %{
7355   single_instruction;
7356   dst    : S5(write);
7357   src1   : S1(read);
7358   src2   : S1(read);
7359   INS0   : ISS;
7360   NEON_FP : S5;
7361 %}
7362 
7363 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7364 %{
7365   single_instruction;
7366   dst    : S5(write);
7367   src1   : S1(read);
7368   src2   : S1(read);
7369   INS0   : ISS;
7370   NEON_FP : S5;
7371 %}
7372 
7373 pipe_class vsqrt_fp128(vecX dst, vecX src)
7374 %{
7375   single_instruction;
7376   dst    : S5(write);
7377   src    : S1(read);
7378   INS0   : ISS;
7379   NEON_FP : S5;
7380 %}
7381 
7382 pipe_class vunop_fp64(vecD dst, vecD src)
7383 %{
7384   single_instruction;
7385   dst    : S5(write);
7386   src    : S1(read);
7387   INS01  : ISS;
7388   NEON_FP : S5;
7389 %}
7390 
7391 pipe_class vunop_fp128(vecX dst, vecX src)
7392 %{
7393   single_instruction;
7394   dst    : S5(write);
7395   src    : S1(read);
7396   INS0   : ISS;
7397   NEON_FP : S5;
7398 %}
7399 
7400 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7401 %{
7402   single_instruction;
7403   dst    : S3(write);
7404   src    : S1(read);
7405   INS01  : ISS;
7406   NEON_FP : S3;
7407 %}
7408 
7409 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7410 %{
7411   single_instruction;
7412   dst    : S3(write);
7413   src    : S1(read);
7414   INS01  : ISS;
7415   NEON_FP : S3;
7416 %}
7417 
7418 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7419 %{
7420   single_instruction;
7421   dst    : S3(write);
7422   src    : S1(read);
7423   INS01  : ISS;
7424   NEON_FP : S3;
7425 %}
7426 
7427 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7428 %{
7429   single_instruction;
7430   dst    : S3(write);
7431   src    : S1(read);
7432   INS01  : ISS;
7433   NEON_FP : S3;
7434 %}
7435 
7436 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7437 %{
7438   single_instruction;
7439   dst    : S3(write);
7440   src    : S1(read);
7441   INS01  : ISS;
7442   NEON_FP : S3;
7443 %}
7444 
7445 pipe_class vmovi_reg_imm64(vecD dst)
7446 %{
7447   single_instruction;
7448   dst    : S3(write);
7449   INS01  : ISS;
7450   NEON_FP : S3;
7451 %}
7452 
7453 pipe_class vmovi_reg_imm128(vecX dst)
7454 %{
7455   single_instruction;
7456   dst    : S3(write);
7457   INS0   : ISS;
7458   NEON_FP : S3;
7459 %}
7460 
7461 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7462 %{
7463   single_instruction;
7464   dst    : S5(write);
7465   mem    : ISS(read);
7466   INS01  : ISS;
7467   NEON_FP : S3;
7468 %}
7469 
7470 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7471 %{
7472   single_instruction;
7473   dst    : S5(write);
7474   mem    : ISS(read);
7475   INS01  : ISS;
7476   NEON_FP : S3;
7477 %}
7478 
7479 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7480 %{
7481   single_instruction;
7482   mem    : ISS(read);
7483   src    : S2(read);
7484   INS01  : ISS;
7485   NEON_FP : S3;
7486 %}
7487 
7488 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7489 %{
7490   single_instruction;
7491   mem    : ISS(read);
7492   src    : S2(read);
7493   INS01  : ISS;
7494   NEON_FP : S3;
7495 %}
7496 
7497 //------- Integer ALU operations --------------------------
7498 
7499 // Integer ALU reg-reg operation
7500 // Operands needed in EX1, result generated in EX2
7501 // Eg.  ADD     x0, x1, x2
7502 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7503 %{
7504   single_instruction;
7505   dst    : EX2(write);
7506   src1   : EX1(read);
7507   src2   : EX1(read);
7508   INS01  : ISS; // Dual issue as instruction 0 or 1
7509   ALU    : EX2;
7510 %}
7511 
7512 // Integer ALU reg-reg operation with constant shift
7513 // Shifted register must be available in LATE_ISS instead of EX1
7514 // Eg.  ADD     x0, x1, x2, LSL #2
7515 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7516 %{
7517   single_instruction;
7518   dst    : EX2(write);
7519   src1   : EX1(read);
7520   src2   : ISS(read);
7521   INS01  : ISS;
7522   ALU    : EX2;
7523 %}
7524 
7525 // Integer ALU reg operation with constant shift
7526 // Eg.  LSL     x0, x1, #shift
7527 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7528 %{
7529   single_instruction;
7530   dst    : EX2(write);
7531   src1   : ISS(read);
7532   INS01  : ISS;
7533   ALU    : EX2;
7534 %}
7535 
7536 // Integer ALU reg-reg operation with variable shift
7537 // Both operands must be available in LATE_ISS instead of EX1
7538 // Result is available in EX1 instead of EX2
7539 // Eg.  LSLV    x0, x1, x2
7540 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7541 %{
7542   single_instruction;
7543   dst    : EX1(write);
7544   src1   : ISS(read);
7545   src2   : ISS(read);
7546   INS01  : ISS;
7547   ALU    : EX1;
7548 %}
7549 
7550 // Integer ALU reg-reg operation with extract
7551 // As for _vshift above, but result generated in EX2
7552 // Eg.  EXTR    x0, x1, x2, #N
7553 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7554 %{
7555   single_instruction;
7556   dst    : EX2(write);
7557   src1   : ISS(read);
7558   src2   : ISS(read);
7559   INS1   : ISS; // Can only dual issue as Instruction 1
7560   ALU    : EX1;
7561 %}
7562 
7563 // Integer ALU reg operation
7564 // Eg.  NEG     x0, x1
7565 pipe_class ialu_reg(iRegI dst, iRegI src)
7566 %{
7567   single_instruction;
7568   dst    : EX2(write);
7569   src    : EX1(read);
7570   INS01  : ISS;
7571   ALU    : EX2;
7572 %}
7573 
7574 // Integer ALU reg mmediate operation
7575 // Eg.  ADD     x0, x1, #N
7576 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7577 %{
7578   single_instruction;
7579   dst    : EX2(write);
7580   src1   : EX1(read);
7581   INS01  : ISS;
7582   ALU    : EX2;
7583 %}
7584 
7585 // Integer ALU immediate operation (no source operands)
7586 // Eg.  MOV     x0, #N
7587 pipe_class ialu_imm(iRegI dst)
7588 %{
7589   single_instruction;
7590   dst    : EX1(write);
7591   INS01  : ISS;
7592   ALU    : EX1;
7593 %}
7594 
7595 //------- Compare operation -------------------------------
7596 
7597 // Compare reg-reg
7598 // Eg.  CMP     x0, x1
7599 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7600 %{
7601   single_instruction;
7602 //  fixed_latency(16);
7603   cr     : EX2(write);
7604   op1    : EX1(read);
7605   op2    : EX1(read);
7606   INS01  : ISS;
7607   ALU    : EX2;
7608 %}
7609 
7610 // Compare reg-reg
7611 // Eg.  CMP     x0, #N
7612 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7613 %{
7614   single_instruction;
7615 //  fixed_latency(16);
7616   cr     : EX2(write);
7617   op1    : EX1(read);
7618   INS01  : ISS;
7619   ALU    : EX2;
7620 %}
7621 
7622 //------- Conditional instructions ------------------------
7623 
7624 // Conditional no operands
7625 // Eg.  CSINC   x0, zr, zr, <cond>
7626 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7627 %{
7628   single_instruction;
7629   cr     : EX1(read);
7630   dst    : EX2(write);
7631   INS01  : ISS;
7632   ALU    : EX2;
7633 %}
7634 
7635 // Conditional 2 operand
7636 // EG.  CSEL    X0, X1, X2, <cond>
7637 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7638 %{
7639   single_instruction;
7640   cr     : EX1(read);
7641   src1   : EX1(read);
7642   src2   : EX1(read);
7643   dst    : EX2(write);
7644   INS01  : ISS;
7645   ALU    : EX2;
7646 %}
7647 
7648 // Conditional 2 operand
7649 // EG.  CSEL    X0, X1, X2, <cond>
7650 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7651 %{
7652   single_instruction;
7653   cr     : EX1(read);
7654   src    : EX1(read);
7655   dst    : EX2(write);
7656   INS01  : ISS;
7657   ALU    : EX2;
7658 %}
7659 
7660 //------- Multiply pipeline operations --------------------
7661 
7662 // Multiply reg-reg
7663 // Eg.  MUL     w0, w1, w2
7664 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7665 %{
7666   single_instruction;
7667   dst    : WR(write);
7668   src1   : ISS(read);
7669   src2   : ISS(read);
7670   INS01  : ISS;
7671   MAC    : WR;
7672 %}
7673 
7674 // Multiply accumulate
7675 // Eg.  MADD    w0, w1, w2, w3
7676 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7677 %{
7678   single_instruction;
7679   dst    : WR(write);
7680   src1   : ISS(read);
7681   src2   : ISS(read);
7682   src3   : ISS(read);
7683   INS01  : ISS;
7684   MAC    : WR;
7685 %}
7686 
7687 // Eg.  MUL     w0, w1, w2
7688 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7689 %{
7690   single_instruction;
7691   fixed_latency(3); // Maximum latency for 64 bit mul
7692   dst    : WR(write);
7693   src1   : ISS(read);
7694   src2   : ISS(read);
7695   INS01  : ISS;
7696   MAC    : WR;
7697 %}
7698 
7699 // Multiply accumulate
7700 // Eg.  MADD    w0, w1, w2, w3
7701 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7702 %{
7703   single_instruction;
7704   fixed_latency(3); // Maximum latency for 64 bit mul
7705   dst    : WR(write);
7706   src1   : ISS(read);
7707   src2   : ISS(read);
7708   src3   : ISS(read);
7709   INS01  : ISS;
7710   MAC    : WR;
7711 %}
7712 
7713 //------- Divide pipeline operations --------------------
7714 
7715 // Eg.  SDIV    w0, w1, w2
7716 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7717 %{
7718   single_instruction;
7719   fixed_latency(8); // Maximum latency for 32 bit divide
7720   dst    : WR(write);
7721   src1   : ISS(read);
7722   src2   : ISS(read);
7723   INS0   : ISS; // Can only dual issue as instruction 0
7724   DIV    : WR;
7725 %}
7726 
7727 // Eg.  SDIV    x0, x1, x2
7728 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7729 %{
7730   single_instruction;
7731   fixed_latency(16); // Maximum latency for 64 bit divide
7732   dst    : WR(write);
7733   src1   : ISS(read);
7734   src2   : ISS(read);
7735   INS0   : ISS; // Can only dual issue as instruction 0
7736   DIV    : WR;
7737 %}
7738 
7739 //------- Load pipeline operations ------------------------
7740 
7741 // Load - prefetch
7742 // Eg.  PFRM    <mem>
7743 pipe_class iload_prefetch(memory mem)
7744 %{
7745   single_instruction;
7746   mem    : ISS(read);
7747   INS01  : ISS;
7748   LDST   : WR;
7749 %}
7750 
7751 // Load - reg, mem
7752 // Eg.  LDR     x0, <mem>
7753 pipe_class iload_reg_mem(iRegI dst, memory mem)
7754 %{
7755   single_instruction;
7756   dst    : WR(write);
7757   mem    : ISS(read);
7758   INS01  : ISS;
7759   LDST   : WR;
7760 %}
7761 
7762 // Load - reg, reg
7763 // Eg.  LDR     x0, [sp, x1]
7764 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7765 %{
7766   single_instruction;
7767   dst    : WR(write);
7768   src    : ISS(read);
7769   INS01  : ISS;
7770   LDST   : WR;
7771 %}
7772 
7773 //------- Store pipeline operations -----------------------
7774 
7775 // Store - zr, mem
7776 // Eg.  STR     zr, <mem>
7777 pipe_class istore_mem(memory mem)
7778 %{
7779   single_instruction;
7780   mem    : ISS(read);
7781   INS01  : ISS;
7782   LDST   : WR;
7783 %}
7784 
7785 // Store - reg, mem
7786 // Eg.  STR     x0, <mem>
7787 pipe_class istore_reg_mem(iRegI src, memory mem)
7788 %{
7789   single_instruction;
7790   mem    : ISS(read);
7791   src    : EX2(read);
7792   INS01  : ISS;
7793   LDST   : WR;
7794 %}
7795 
7796 // Store - reg, reg
7797 // Eg. STR      x0, [sp, x1]
7798 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7799 %{
7800   single_instruction;
7801   dst    : ISS(read);
7802   src    : EX2(read);
7803   INS01  : ISS;
7804   LDST   : WR;
7805 %}
7806 
7807 //------- Store pipeline operations -----------------------
7808 
7809 // Branch
7810 pipe_class pipe_branch()
7811 %{
7812   single_instruction;
7813   INS01  : ISS;
7814   BRANCH : EX1;
7815 %}
7816 
7817 // Conditional branch
7818 pipe_class pipe_branch_cond(rFlagsReg cr)
7819 %{
7820   single_instruction;
7821   cr     : EX1(read);
7822   INS01  : ISS;
7823   BRANCH : EX1;
7824 %}
7825 
7826 // Compare & Branch
7827 // EG.  CBZ/CBNZ
7828 pipe_class pipe_cmp_branch(iRegI op1)
7829 %{
7830   single_instruction;
7831   op1    : EX1(read);
7832   INS01  : ISS;
7833   BRANCH : EX1;
7834 %}
7835 
7836 //------- Synchronisation operations ----------------------
7837 
7838 // Any operation requiring serialization.
7839 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7840 pipe_class pipe_serial()
7841 %{
7842   single_instruction;
7843   force_serialization;
7844   fixed_latency(16);
7845   INS01  : ISS(2); // Cannot dual issue with any other instruction
7846   LDST   : WR;
7847 %}
7848 
7849 // Generic big/slow expanded idiom - also serialized
7850 pipe_class pipe_slow()
7851 %{
7852   instruction_count(10);
7853   multiple_bundles;
7854   force_serialization;
7855   fixed_latency(16);
7856   INS01  : ISS(2); // Cannot dual issue with any other instruction
7857   LDST   : WR;
7858 %}
7859 
7860 // Empty pipeline class
7861 pipe_class pipe_class_empty()
7862 %{
7863   single_instruction;
7864   fixed_latency(0);
7865 %}
7866 
7867 // Default pipeline class.
7868 pipe_class pipe_class_default()
7869 %{
7870   single_instruction;
7871   fixed_latency(2);
7872 %}
7873 
7874 // Pipeline class for compares.
7875 pipe_class pipe_class_compare()
7876 %{
7877   single_instruction;
7878   fixed_latency(16);
7879 %}
7880 
7881 // Pipeline class for memory operations.
7882 pipe_class pipe_class_memory()
7883 %{
7884   single_instruction;
7885   fixed_latency(16);
7886 %}
7887 
7888 // Pipeline class for call.
7889 pipe_class pipe_class_call()
7890 %{
7891   single_instruction;
7892   fixed_latency(100);
7893 %}
7894 
7895 // Define the class for the Nop node.
7896 define %{
7897    MachNop = pipe_class_empty;
7898 %}
7899 
7900 %}
7901 //----------INSTRUCTIONS-------------------------------------------------------
7902 //
7903 // match      -- States which machine-independent subtree may be replaced
7904 //               by this instruction.
7905 // ins_cost   -- The estimated cost of this instruction is used by instruction
7906 //               selection to identify a minimum cost tree of machine
7907 //               instructions that matches a tree of machine-independent
7908 //               instructions.
7909 // format     -- A string providing the disassembly for this instruction.
7910 //               The value of an instruction's operand may be inserted
7911 //               by referring to it with a '$' prefix.
7912 // opcode     -- Three instruction opcodes may be provided.  These are referred
7913 //               to within an encode class as $primary, $secondary, and $tertiary
7914 //               rrspectively.  The primary opcode is commonly used to
7915 //               indicate the type of machine instruction, while secondary
7916 //               and tertiary are often used for prefix options or addressing
7917 //               modes.
7918 // ins_encode -- A list of encode classes with parameters. The encode class
7919 //               name must have been defined in an 'enc_class' specification
7920 //               in the encode section of the architecture description.
7921 
7922 // ============================================================================
7923 // Memory (Load/Store) Instructions
7924 
7925 // Load Instructions
7926 
7927 // Load Byte (8 bit signed)
7928 instruct loadB(iRegINoSp dst, memory mem)
7929 %{
7930   match(Set dst (LoadB mem));
7931   predicate(!needs_acquiring_load(n));
7932 
7933   ins_cost(4 * INSN_COST);
7934   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7935 
7936   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7937 
7938   ins_pipe(iload_reg_mem);
7939 %}
7940 
7941 // Load Byte (8 bit signed) into long
7942 instruct loadB2L(iRegLNoSp dst, memory mem)
7943 %{
7944   match(Set dst (ConvI2L (LoadB mem)));
7945   predicate(!needs_acquiring_load(n->in(1)));
7946 
7947   ins_cost(4 * INSN_COST);
7948   format %{ "ldrsb  $dst, $mem\t# byte" %}
7949 
7950   ins_encode(aarch64_enc_ldrsb(dst, mem));
7951 
7952   ins_pipe(iload_reg_mem);
7953 %}
7954 
7955 // Load Byte (8 bit unsigned)
7956 instruct loadUB(iRegINoSp dst, memory mem)
7957 %{
7958   match(Set dst (LoadUB mem));
7959   predicate(!needs_acquiring_load(n));
7960 
7961   ins_cost(4 * INSN_COST);
7962   format %{ "ldrbw  $dst, $mem\t# byte" %}
7963 
7964   ins_encode(aarch64_enc_ldrb(dst, mem));
7965 
7966   ins_pipe(iload_reg_mem);
7967 %}
7968 
7969 // Load Byte (8 bit unsigned) into long
7970 instruct loadUB2L(iRegLNoSp dst, memory mem)
7971 %{
7972   match(Set dst (ConvI2L (LoadUB mem)));
7973   predicate(!needs_acquiring_load(n->in(1)));
7974 
7975   ins_cost(4 * INSN_COST);
7976   format %{ "ldrb  $dst, $mem\t# byte" %}
7977 
7978   ins_encode(aarch64_enc_ldrb(dst, mem));
7979 
7980   ins_pipe(iload_reg_mem);
7981 %}
7982 
7983 // Load Short (16 bit signed)
7984 instruct loadS(iRegINoSp dst, memory mem)
7985 %{
7986   match(Set dst (LoadS mem));
7987   predicate(!needs_acquiring_load(n));
7988 
7989   ins_cost(4 * INSN_COST);
7990   format %{ "ldrshw  $dst, $mem\t# short" %}
7991 
7992   ins_encode(aarch64_enc_ldrshw(dst, mem));
7993 
7994   ins_pipe(iload_reg_mem);
7995 %}
7996 
7997 // Load Short (16 bit signed) into long
7998 instruct loadS2L(iRegLNoSp dst, memory mem)
7999 %{
8000   match(Set dst (ConvI2L (LoadS mem)));
8001   predicate(!needs_acquiring_load(n->in(1)));
8002 
8003   ins_cost(4 * INSN_COST);
8004   format %{ "ldrsh  $dst, $mem\t# short" %}
8005 
8006   ins_encode(aarch64_enc_ldrsh(dst, mem));
8007 
8008   ins_pipe(iload_reg_mem);
8009 %}
8010 
8011 // Load Char (16 bit unsigned)
8012 instruct loadUS(iRegINoSp dst, memory mem)
8013 %{
8014   match(Set dst (LoadUS mem));
8015   predicate(!needs_acquiring_load(n));
8016 
8017   ins_cost(4 * INSN_COST);
8018   format %{ "ldrh  $dst, $mem\t# short" %}
8019 
8020   ins_encode(aarch64_enc_ldrh(dst, mem));
8021 
8022   ins_pipe(iload_reg_mem);
8023 %}
8024 
8025 // Load Short/Char (16 bit unsigned) into long
8026 instruct loadUS2L(iRegLNoSp dst, memory mem)
8027 %{
8028   match(Set dst (ConvI2L (LoadUS mem)));
8029   predicate(!needs_acquiring_load(n->in(1)));
8030 
8031   ins_cost(4 * INSN_COST);
8032   format %{ "ldrh  $dst, $mem\t# short" %}
8033 
8034   ins_encode(aarch64_enc_ldrh(dst, mem));
8035 
8036   ins_pipe(iload_reg_mem);
8037 %}
8038 
8039 // Load Integer (32 bit signed)
8040 instruct loadI(iRegINoSp dst, memory mem)
8041 %{
8042   match(Set dst (LoadI mem));
8043   predicate(!needs_acquiring_load(n));
8044 
8045   ins_cost(4 * INSN_COST);
8046   format %{ "ldrw  $dst, $mem\t# int" %}
8047 
8048   ins_encode(aarch64_enc_ldrw(dst, mem));
8049 
8050   ins_pipe(iload_reg_mem);
8051 %}
8052 
8053 // Load Integer (32 bit signed) into long
8054 instruct loadI2L(iRegLNoSp dst, memory mem)
8055 %{
8056   match(Set dst (ConvI2L (LoadI mem)));
8057   predicate(!needs_acquiring_load(n->in(1)));
8058 
8059   ins_cost(4 * INSN_COST);
8060   format %{ "ldrsw  $dst, $mem\t# int" %}
8061 
8062   ins_encode(aarch64_enc_ldrsw(dst, mem));
8063 
8064   ins_pipe(iload_reg_mem);
8065 %}
8066 
8067 // Load Integer (32 bit unsigned) into long
8068 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8069 %{
8070   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8071   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8072 
8073   ins_cost(4 * INSN_COST);
8074   format %{ "ldrw  $dst, $mem\t# int" %}
8075 
8076   ins_encode(aarch64_enc_ldrw(dst, mem));
8077 
8078   ins_pipe(iload_reg_mem);
8079 %}
8080 
8081 // Load Long (64 bit signed)
8082 instruct loadL(iRegLNoSp dst, memory mem)
8083 %{
8084   match(Set dst (LoadL mem));
8085   predicate(!needs_acquiring_load(n));
8086 
8087   ins_cost(4 * INSN_COST);
8088   format %{ "ldr  $dst, $mem\t# int" %}
8089 
8090   ins_encode(aarch64_enc_ldr(dst, mem));
8091 
8092   ins_pipe(iload_reg_mem);
8093 %}
8094 
8095 // Load Range
8096 instruct loadRange(iRegINoSp dst, memory mem)
8097 %{
8098   match(Set dst (LoadRange mem));
8099 
8100   ins_cost(4 * INSN_COST);
8101   format %{ "ldrw  $dst, $mem\t# range" %}
8102 
8103   ins_encode(aarch64_enc_ldrw(dst, mem));
8104 
8105   ins_pipe(iload_reg_mem);
8106 %}
8107 
8108 // Load Pointer
8109 instruct loadP(iRegPNoSp dst, memory mem)
8110 %{
8111   match(Set dst (LoadP mem));
8112   predicate(!needs_acquiring_load(n));
8113 
8114   ins_cost(4 * INSN_COST);
8115   format %{ "ldr  $dst, $mem\t# ptr" %}
8116 
8117   ins_encode(aarch64_enc_ldr(dst, mem));
8118 
8119   ins_pipe(iload_reg_mem);
8120 %}
8121 
8122 // Load Compressed Pointer
8123 instruct loadN(iRegNNoSp dst, memory mem)
8124 %{
8125   match(Set dst (LoadN mem));
8126   predicate(!needs_acquiring_load(n));
8127 
8128   ins_cost(4 * INSN_COST);
8129   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8130 
8131   ins_encode(aarch64_enc_ldrw(dst, mem));
8132 
8133   ins_pipe(iload_reg_mem);
8134 %}
8135 
8136 // Load Klass Pointer
8137 instruct loadKlass(iRegPNoSp dst, memory mem)
8138 %{
8139   match(Set dst (LoadKlass mem));
8140   predicate(!needs_acquiring_load(n));
8141 
8142   ins_cost(4 * INSN_COST);
8143   format %{ "ldr  $dst, $mem\t# class" %}
8144 
8145   ins_encode(aarch64_enc_ldr(dst, mem));
8146 
8147   ins_pipe(iload_reg_mem);
8148 %}
8149 
8150 // Load Narrow Klass Pointer
8151 instruct loadNKlass(iRegNNoSp dst, memory mem)
8152 %{
8153   match(Set dst (LoadNKlass mem));
8154   predicate(!needs_acquiring_load(n));
8155 
8156   ins_cost(4 * INSN_COST);
8157   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8158 
8159   ins_encode(aarch64_enc_ldrw(dst, mem));
8160 
8161   ins_pipe(iload_reg_mem);
8162 %}
8163 
8164 // Load Float
8165 instruct loadF(vRegF dst, memory mem)
8166 %{
8167   match(Set dst (LoadF mem));
8168   predicate(!needs_acquiring_load(n));
8169 
8170   ins_cost(4 * INSN_COST);
8171   format %{ "ldrs  $dst, $mem\t# float" %}
8172 
8173   ins_encode( aarch64_enc_ldrs(dst, mem) );
8174 
8175   ins_pipe(pipe_class_memory);
8176 %}
8177 
8178 // Load Double
8179 instruct loadD(vRegD dst, memory mem)
8180 %{
8181   match(Set dst (LoadD mem));
8182   predicate(!needs_acquiring_load(n));
8183 
8184   ins_cost(4 * INSN_COST);
8185   format %{ "ldrd  $dst, $mem\t# double" %}
8186 
8187   ins_encode( aarch64_enc_ldrd(dst, mem) );
8188 
8189   ins_pipe(pipe_class_memory);
8190 %}
8191 
8192 
8193 // Load Int Constant
8194 instruct loadConI(iRegINoSp dst, immI src)
8195 %{
8196   match(Set dst src);
8197 
8198   ins_cost(INSN_COST);
8199   format %{ "mov $dst, $src\t# int" %}
8200 
8201   ins_encode( aarch64_enc_movw_imm(dst, src) );
8202 
8203   ins_pipe(ialu_imm);
8204 %}
8205 
8206 // Load Long Constant
8207 instruct loadConL(iRegLNoSp dst, immL src)
8208 %{
8209   match(Set dst src);
8210 
8211   ins_cost(INSN_COST);
8212   format %{ "mov $dst, $src\t# long" %}
8213 
8214   ins_encode( aarch64_enc_mov_imm(dst, src) );
8215 
8216   ins_pipe(ialu_imm);
8217 %}
8218 
8219 // Load Pointer Constant
8220 
8221 instruct loadConP(iRegPNoSp dst, immP con)
8222 %{
8223   match(Set dst con);
8224 
8225   ins_cost(INSN_COST * 4);
8226   format %{
8227     "mov  $dst, $con\t# ptr\n\t"
8228   %}
8229 
8230   ins_encode(aarch64_enc_mov_p(dst, con));
8231 
8232   ins_pipe(ialu_imm);
8233 %}
8234 
8235 // Load Null Pointer Constant
8236 
8237 instruct loadConP0(iRegPNoSp dst, immP0 con)
8238 %{
8239   match(Set dst con);
8240 
8241   ins_cost(INSN_COST);
8242   format %{ "mov  $dst, $con\t# NULL ptr" %}
8243 
8244   ins_encode(aarch64_enc_mov_p0(dst, con));
8245 
8246   ins_pipe(ialu_imm);
8247 %}
8248 
8249 // Load Pointer Constant One
8250 
8251 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8252 %{
8253   match(Set dst con);
8254 
8255   ins_cost(INSN_COST);
8256   format %{ "mov  $dst, $con\t# NULL ptr" %}
8257 
8258   ins_encode(aarch64_enc_mov_p1(dst, con));
8259 
8260   ins_pipe(ialu_imm);
8261 %}
8262 
8263 // Load Poll Page Constant
8264 
8265 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8266 %{
8267   match(Set dst con);
8268 
8269   ins_cost(INSN_COST);
8270   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8271 
8272   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8273 
8274   ins_pipe(ialu_imm);
8275 %}
8276 
8277 // Load Byte Map Base Constant
8278 
8279 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8280 %{
8281   match(Set dst con);
8282 
8283   ins_cost(INSN_COST);
8284   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8285 
8286   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8287 
8288   ins_pipe(ialu_imm);
8289 %}
8290 
8291 // Load Narrow Pointer Constant
8292 
8293 instruct loadConN(iRegNNoSp dst, immN con)
8294 %{
8295   match(Set dst con);
8296 
8297   ins_cost(INSN_COST * 4);
8298   format %{ "mov  $dst, $con\t# compressed ptr" %}
8299 
8300   ins_encode(aarch64_enc_mov_n(dst, con));
8301 
8302   ins_pipe(ialu_imm);
8303 %}
8304 
8305 // Load Narrow Null Pointer Constant
8306 
8307 instruct loadConN0(iRegNNoSp dst, immN0 con)
8308 %{
8309   match(Set dst con);
8310 
8311   ins_cost(INSN_COST);
8312   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8313 
8314   ins_encode(aarch64_enc_mov_n0(dst, con));
8315 
8316   ins_pipe(ialu_imm);
8317 %}
8318 
8319 // Load Narrow Klass Constant
8320 
8321 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8322 %{
8323   match(Set dst con);
8324 
8325   ins_cost(INSN_COST);
8326   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8327 
8328   ins_encode(aarch64_enc_mov_nk(dst, con));
8329 
8330   ins_pipe(ialu_imm);
8331 %}
8332 
8333 // Load Packed Float Constant
8334 
8335 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8336   match(Set dst con);
8337   ins_cost(INSN_COST * 4);
8338   format %{ "fmovs  $dst, $con"%}
8339   ins_encode %{
8340     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8341   %}
8342 
8343   ins_pipe(fp_imm_s);
8344 %}
8345 
8346 // Load Float Constant
8347 
8348 instruct loadConF(vRegF dst, immF con) %{
8349   match(Set dst con);
8350 
8351   ins_cost(INSN_COST * 4);
8352 
8353   format %{
8354     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8355   %}
8356 
8357   ins_encode %{
8358     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8359   %}
8360 
8361   ins_pipe(fp_load_constant_s);
8362 %}
8363 
8364 // Load Packed Double Constant
8365 
8366 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8367   match(Set dst con);
8368   ins_cost(INSN_COST);
8369   format %{ "fmovd  $dst, $con"%}
8370   ins_encode %{
8371     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8372   %}
8373 
8374   ins_pipe(fp_imm_d);
8375 %}
8376 
8377 // Load Double Constant
8378 
8379 instruct loadConD(vRegD dst, immD con) %{
8380   match(Set dst con);
8381 
8382   ins_cost(INSN_COST * 5);
8383   format %{
8384     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8385   %}
8386 
8387   ins_encode %{
8388     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8389   %}
8390 
8391   ins_pipe(fp_load_constant_d);
8392 %}
8393 
8394 // Store Instructions
8395 
8396 // Store CMS card-mark Immediate
8397 instruct storeimmCM0(immI0 zero, memory mem)
8398 %{
8399   match(Set mem (StoreCM mem zero));
8400   predicate(unnecessary_storestore(n));
8401 
8402   ins_cost(INSN_COST);
8403   format %{ "strb zr, $mem\t# byte" %}
8404 
8405   ins_encode(aarch64_enc_strb0(mem));
8406 
8407   ins_pipe(istore_mem);
8408 %}
8409 
8410 // Store CMS card-mark Immediate with intervening StoreStore
8411 // needed when using CMS with no conditional card marking
8412 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8413 %{
8414   match(Set mem (StoreCM mem zero));
8415 
8416   ins_cost(INSN_COST * 2);
8417   format %{ "dmb ishst"
8418       "\n\tstrb zr, $mem\t# byte" %}
8419 
8420   ins_encode(aarch64_enc_strb0_ordered(mem));
8421 
8422   ins_pipe(istore_mem);
8423 %}
8424 
8425 // Store Byte
8426 instruct storeB(iRegIorL2I src, memory mem)
8427 %{
8428   match(Set mem (StoreB mem src));
8429   predicate(!needs_releasing_store(n));
8430 
8431   ins_cost(INSN_COST);
8432   format %{ "strb  $src, $mem\t# byte" %}
8433 
8434   ins_encode(aarch64_enc_strb(src, mem));
8435 
8436   ins_pipe(istore_reg_mem);
8437 %}
8438 
8439 
8440 instruct storeimmB0(immI0 zero, memory mem)
8441 %{
8442   match(Set mem (StoreB mem zero));
8443   predicate(!needs_releasing_store(n));
8444 
8445   ins_cost(INSN_COST);
8446   format %{ "strb rscractch2, $mem\t# byte" %}
8447 
8448   ins_encode(aarch64_enc_strb0(mem));
8449 
8450   ins_pipe(istore_mem);
8451 %}
8452 
8453 // Store Char/Short
8454 instruct storeC(iRegIorL2I src, memory mem)
8455 %{
8456   match(Set mem (StoreC mem src));
8457   predicate(!needs_releasing_store(n));
8458 
8459   ins_cost(INSN_COST);
8460   format %{ "strh  $src, $mem\t# short" %}
8461 
8462   ins_encode(aarch64_enc_strh(src, mem));
8463 
8464   ins_pipe(istore_reg_mem);
8465 %}
8466 
8467 instruct storeimmC0(immI0 zero, memory mem)
8468 %{
8469   match(Set mem (StoreC mem zero));
8470   predicate(!needs_releasing_store(n));
8471 
8472   ins_cost(INSN_COST);
8473   format %{ "strh  zr, $mem\t# short" %}
8474 
8475   ins_encode(aarch64_enc_strh0(mem));
8476 
8477   ins_pipe(istore_mem);
8478 %}
8479 
8480 // Store Integer
8481 
8482 instruct storeI(iRegIorL2I src, memory mem)
8483 %{
8484   match(Set mem(StoreI mem src));
8485   predicate(!needs_releasing_store(n));
8486 
8487   ins_cost(INSN_COST);
8488   format %{ "strw  $src, $mem\t# int" %}
8489 
8490   ins_encode(aarch64_enc_strw(src, mem));
8491 
8492   ins_pipe(istore_reg_mem);
8493 %}
8494 
8495 instruct storeimmI0(immI0 zero, memory mem)
8496 %{
8497   match(Set mem(StoreI mem zero));
8498   predicate(!needs_releasing_store(n));
8499 
8500   ins_cost(INSN_COST);
8501   format %{ "strw  zr, $mem\t# int" %}
8502 
8503   ins_encode(aarch64_enc_strw0(mem));
8504 
8505   ins_pipe(istore_mem);
8506 %}
8507 
8508 // Store Long (64 bit signed)
8509 instruct storeL(iRegL src, memory mem)
8510 %{
8511   match(Set mem (StoreL mem src));
8512   predicate(!needs_releasing_store(n));
8513 
8514   ins_cost(INSN_COST);
8515   format %{ "str  $src, $mem\t# int" %}
8516 
8517   ins_encode(aarch64_enc_str(src, mem));
8518 
8519   ins_pipe(istore_reg_mem);
8520 %}
8521 
8522 // Store Long (64 bit signed)
8523 instruct storeimmL0(immL0 zero, memory mem)
8524 %{
8525   match(Set mem (StoreL mem zero));
8526   predicate(!needs_releasing_store(n));
8527 
8528   ins_cost(INSN_COST);
8529   format %{ "str  zr, $mem\t# int" %}
8530 
8531   ins_encode(aarch64_enc_str0(mem));
8532 
8533   ins_pipe(istore_mem);
8534 %}
8535 
8536 // Store Pointer
8537 instruct storeP(iRegP src, memory mem)
8538 %{
8539   match(Set mem (StoreP mem src));
8540   predicate(!needs_releasing_store(n));
8541 
8542   ins_cost(INSN_COST);
8543   format %{ "str  $src, $mem\t# ptr" %}
8544 
8545   ins_encode(aarch64_enc_str(src, mem));
8546 
8547   ins_pipe(istore_reg_mem);
8548 %}
8549 
8550 // Store Pointer
8551 instruct storeimmP0(immP0 zero, memory mem)
8552 %{
8553   match(Set mem (StoreP mem zero));
8554   predicate(!needs_releasing_store(n));
8555 
8556   ins_cost(INSN_COST);
8557   format %{ "str zr, $mem\t# ptr" %}
8558 
8559   ins_encode(aarch64_enc_str0(mem));
8560 
8561   ins_pipe(istore_mem);
8562 %}
8563 
8564 // Store Compressed Pointer
8565 instruct storeN(iRegN src, memory mem)
8566 %{
8567   match(Set mem (StoreN mem src));
8568   predicate(!needs_releasing_store(n));
8569 
8570   ins_cost(INSN_COST);
8571   format %{ "strw  $src, $mem\t# compressed ptr" %}
8572 
8573   ins_encode(aarch64_enc_strw(src, mem));
8574 
8575   ins_pipe(istore_reg_mem);
8576 %}
8577 
8578 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8579 %{
8580   match(Set mem (StoreN mem zero));
8581   predicate(Universe::narrow_oop_base() == NULL &&
8582             Universe::narrow_klass_base() == NULL &&
8583             (!needs_releasing_store(n)));
8584 
8585   ins_cost(INSN_COST);
8586   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8587 
8588   ins_encode(aarch64_enc_strw(heapbase, mem));
8589 
8590   ins_pipe(istore_reg_mem);
8591 %}
8592 
8593 // Store Float
8594 instruct storeF(vRegF src, memory mem)
8595 %{
8596   match(Set mem (StoreF mem src));
8597   predicate(!needs_releasing_store(n));
8598 
8599   ins_cost(INSN_COST);
8600   format %{ "strs  $src, $mem\t# float" %}
8601 
8602   ins_encode( aarch64_enc_strs(src, mem) );
8603 
8604   ins_pipe(pipe_class_memory);
8605 %}
8606 
8607 // TODO
8608 // implement storeImmF0 and storeFImmPacked
8609 
8610 // Store Double
8611 instruct storeD(vRegD src, memory mem)
8612 %{
8613   match(Set mem (StoreD mem src));
8614   predicate(!needs_releasing_store(n));
8615 
8616   ins_cost(INSN_COST);
8617   format %{ "strd  $src, $mem\t# double" %}
8618 
8619   ins_encode( aarch64_enc_strd(src, mem) );
8620 
8621   ins_pipe(pipe_class_memory);
8622 %}
8623 
8624 // Store Compressed Klass Pointer
8625 instruct storeNKlass(iRegN src, memory mem)
8626 %{
8627   predicate(!needs_releasing_store(n));
8628   match(Set mem (StoreNKlass mem src));
8629 
8630   ins_cost(INSN_COST);
8631   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8632 
8633   ins_encode(aarch64_enc_strw(src, mem));
8634 
8635   ins_pipe(istore_reg_mem);
8636 %}
8637 
8638 // TODO
8639 // implement storeImmD0 and storeDImmPacked
8640 
8641 // prefetch instructions
8642 // Must be safe to execute with invalid address (cannot fault).
8643 
8644 instruct prefetchalloc( memory mem ) %{
8645   match(PrefetchAllocation mem);
8646 
8647   ins_cost(INSN_COST);
8648   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8649 
8650   ins_encode( aarch64_enc_prefetchw(mem) );
8651 
8652   ins_pipe(iload_prefetch);
8653 %}
8654 
8655 //  ---------------- volatile loads and stores ----------------
8656 
8657 // Load Byte (8 bit signed)
8658 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8659 %{
8660   match(Set dst (LoadB mem));
8661 
8662   ins_cost(VOLATILE_REF_COST);
8663   format %{ "ldarsb  $dst, $mem\t# byte" %}
8664 
8665   ins_encode(aarch64_enc_ldarsb(dst, mem));
8666 
8667   ins_pipe(pipe_serial);
8668 %}
8669 
8670 // Load Byte (8 bit signed) into long
8671 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8672 %{
8673   match(Set dst (ConvI2L (LoadB mem)));
8674 
8675   ins_cost(VOLATILE_REF_COST);
8676   format %{ "ldarsb  $dst, $mem\t# byte" %}
8677 
8678   ins_encode(aarch64_enc_ldarsb(dst, mem));
8679 
8680   ins_pipe(pipe_serial);
8681 %}
8682 
8683 // Load Byte (8 bit unsigned)
8684 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8685 %{
8686   match(Set dst (LoadUB mem));
8687 
8688   ins_cost(VOLATILE_REF_COST);
8689   format %{ "ldarb  $dst, $mem\t# byte" %}
8690 
8691   ins_encode(aarch64_enc_ldarb(dst, mem));
8692 
8693   ins_pipe(pipe_serial);
8694 %}
8695 
8696 // Load Byte (8 bit unsigned) into long
8697 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8698 %{
8699   match(Set dst (ConvI2L (LoadUB mem)));
8700 
8701   ins_cost(VOLATILE_REF_COST);
8702   format %{ "ldarb  $dst, $mem\t# byte" %}
8703 
8704   ins_encode(aarch64_enc_ldarb(dst, mem));
8705 
8706   ins_pipe(pipe_serial);
8707 %}
8708 
8709 // Load Short (16 bit signed)
8710 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8711 %{
8712   match(Set dst (LoadS mem));
8713 
8714   ins_cost(VOLATILE_REF_COST);
8715   format %{ "ldarshw  $dst, $mem\t# short" %}
8716 
8717   ins_encode(aarch64_enc_ldarshw(dst, mem));
8718 
8719   ins_pipe(pipe_serial);
8720 %}
8721 
8722 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8723 %{
8724   match(Set dst (LoadUS mem));
8725 
8726   ins_cost(VOLATILE_REF_COST);
8727   format %{ "ldarhw  $dst, $mem\t# short" %}
8728 
8729   ins_encode(aarch64_enc_ldarhw(dst, mem));
8730 
8731   ins_pipe(pipe_serial);
8732 %}
8733 
8734 // Load Short/Char (16 bit unsigned) into long
8735 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8736 %{
8737   match(Set dst (ConvI2L (LoadUS mem)));
8738 
8739   ins_cost(VOLATILE_REF_COST);
8740   format %{ "ldarh  $dst, $mem\t# short" %}
8741 
8742   ins_encode(aarch64_enc_ldarh(dst, mem));
8743 
8744   ins_pipe(pipe_serial);
8745 %}
8746 
8747 // Load Short/Char (16 bit signed) into long
8748 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8749 %{
8750   match(Set dst (ConvI2L (LoadS mem)));
8751 
8752   ins_cost(VOLATILE_REF_COST);
8753   format %{ "ldarh  $dst, $mem\t# short" %}
8754 
8755   ins_encode(aarch64_enc_ldarsh(dst, mem));
8756 
8757   ins_pipe(pipe_serial);
8758 %}
8759 
8760 // Load Integer (32 bit signed)
8761 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8762 %{
8763   match(Set dst (LoadI mem));
8764 
8765   ins_cost(VOLATILE_REF_COST);
8766   format %{ "ldarw  $dst, $mem\t# int" %}
8767 
8768   ins_encode(aarch64_enc_ldarw(dst, mem));
8769 
8770   ins_pipe(pipe_serial);
8771 %}
8772 
8773 // Load Integer (32 bit unsigned) into long
8774 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8775 %{
8776   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8777 
8778   ins_cost(VOLATILE_REF_COST);
8779   format %{ "ldarw  $dst, $mem\t# int" %}
8780 
8781   ins_encode(aarch64_enc_ldarw(dst, mem));
8782 
8783   ins_pipe(pipe_serial);
8784 %}
8785 
8786 // Load Long (64 bit signed)
8787 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8788 %{
8789   match(Set dst (LoadL mem));
8790 
8791   ins_cost(VOLATILE_REF_COST);
8792   format %{ "ldar  $dst, $mem\t# int" %}
8793 
8794   ins_encode(aarch64_enc_ldar(dst, mem));
8795 
8796   ins_pipe(pipe_serial);
8797 %}
8798 
8799 // Load Pointer
8800 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8801 %{
8802   match(Set dst (LoadP mem));
8803 
8804   ins_cost(VOLATILE_REF_COST);
8805   format %{ "ldar  $dst, $mem\t# ptr" %}
8806 
8807   ins_encode(aarch64_enc_ldar(dst, mem));
8808 
8809   ins_pipe(pipe_serial);
8810 %}
8811 
8812 // Load Compressed Pointer
8813 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8814 %{
8815   match(Set dst (LoadN mem));
8816 
8817   ins_cost(VOLATILE_REF_COST);
8818   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8819 
8820   ins_encode(aarch64_enc_ldarw(dst, mem));
8821 
8822   ins_pipe(pipe_serial);
8823 %}
8824 
8825 // Load Float
8826 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8827 %{
8828   match(Set dst (LoadF mem));
8829 
8830   ins_cost(VOLATILE_REF_COST);
8831   format %{ "ldars  $dst, $mem\t# float" %}
8832 
8833   ins_encode( aarch64_enc_fldars(dst, mem) );
8834 
8835   ins_pipe(pipe_serial);
8836 %}
8837 
8838 // Load Double
8839 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8840 %{
8841   match(Set dst (LoadD mem));
8842 
8843   ins_cost(VOLATILE_REF_COST);
8844   format %{ "ldard  $dst, $mem\t# double" %}
8845 
8846   ins_encode( aarch64_enc_fldard(dst, mem) );
8847 
8848   ins_pipe(pipe_serial);
8849 %}
8850 
8851 // Store Byte
8852 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8853 %{
8854   match(Set mem (StoreB mem src));
8855 
8856   ins_cost(VOLATILE_REF_COST);
8857   format %{ "stlrb  $src, $mem\t# byte" %}
8858 
8859   ins_encode(aarch64_enc_stlrb(src, mem));
8860 
8861   ins_pipe(pipe_class_memory);
8862 %}
8863 
8864 // Store Char/Short
8865 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8866 %{
8867   match(Set mem (StoreC mem src));
8868 
8869   ins_cost(VOLATILE_REF_COST);
8870   format %{ "stlrh  $src, $mem\t# short" %}
8871 
8872   ins_encode(aarch64_enc_stlrh(src, mem));
8873 
8874   ins_pipe(pipe_class_memory);
8875 %}
8876 
8877 // Store Integer
8878 
8879 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8880 %{
8881   match(Set mem(StoreI mem src));
8882 
8883   ins_cost(VOLATILE_REF_COST);
8884   format %{ "stlrw  $src, $mem\t# int" %}
8885 
8886   ins_encode(aarch64_enc_stlrw(src, mem));
8887 
8888   ins_pipe(pipe_class_memory);
8889 %}
8890 
8891 // Store Long (64 bit signed)
8892 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8893 %{
8894   match(Set mem (StoreL mem src));
8895 
8896   ins_cost(VOLATILE_REF_COST);
8897   format %{ "stlr  $src, $mem\t# int" %}
8898 
8899   ins_encode(aarch64_enc_stlr(src, mem));
8900 
8901   ins_pipe(pipe_class_memory);
8902 %}
8903 
8904 // Store Pointer
8905 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8906 %{
8907   match(Set mem (StoreP mem src));
8908 
8909   ins_cost(VOLATILE_REF_COST);
8910   format %{ "stlr  $src, $mem\t# ptr" %}
8911 
8912   ins_encode(aarch64_enc_stlr(src, mem));
8913 
8914   ins_pipe(pipe_class_memory);
8915 %}
8916 
8917 // Store Compressed Pointer
8918 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8919 %{
8920   match(Set mem (StoreN mem src));
8921 
8922   ins_cost(VOLATILE_REF_COST);
8923   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8924 
8925   ins_encode(aarch64_enc_stlrw(src, mem));
8926 
8927   ins_pipe(pipe_class_memory);
8928 %}
8929 
8930 // Store Float
8931 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8932 %{
8933   match(Set mem (StoreF mem src));
8934 
8935   ins_cost(VOLATILE_REF_COST);
8936   format %{ "stlrs  $src, $mem\t# float" %}
8937 
8938   ins_encode( aarch64_enc_fstlrs(src, mem) );
8939 
8940   ins_pipe(pipe_class_memory);
8941 %}
8942 
8943 // TODO
8944 // implement storeImmF0 and storeFImmPacked
8945 
8946 // Store Double
8947 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8948 %{
8949   match(Set mem (StoreD mem src));
8950 
8951   ins_cost(VOLATILE_REF_COST);
8952   format %{ "stlrd  $src, $mem\t# double" %}
8953 
8954   ins_encode( aarch64_enc_fstlrd(src, mem) );
8955 
8956   ins_pipe(pipe_class_memory);
8957 %}
8958 
8959 //  ---------------- end of volatile loads and stores ----------------
8960 
8961 // ============================================================================
8962 // BSWAP Instructions
8963 
8964 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8965   match(Set dst (ReverseBytesI src));
8966 
8967   ins_cost(INSN_COST);
8968   format %{ "revw  $dst, $src" %}
8969 
8970   ins_encode %{
8971     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8972   %}
8973 
8974   ins_pipe(ialu_reg);
8975 %}
8976 
8977 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8978   match(Set dst (ReverseBytesL src));
8979 
8980   ins_cost(INSN_COST);
8981   format %{ "rev  $dst, $src" %}
8982 
8983   ins_encode %{
8984     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8985   %}
8986 
8987   ins_pipe(ialu_reg);
8988 %}
8989 
8990 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8991   match(Set dst (ReverseBytesUS src));
8992 
8993   ins_cost(INSN_COST);
8994   format %{ "rev16w  $dst, $src" %}
8995 
8996   ins_encode %{
8997     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8998   %}
8999 
9000   ins_pipe(ialu_reg);
9001 %}
9002 
9003 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9004   match(Set dst (ReverseBytesS src));
9005 
9006   ins_cost(INSN_COST);
9007   format %{ "rev16w  $dst, $src\n\t"
9008             "sbfmw $dst, $dst, #0, #15" %}
9009 
9010   ins_encode %{
9011     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9012     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9013   %}
9014 
9015   ins_pipe(ialu_reg);
9016 %}
9017 
9018 // ============================================================================
9019 // Zero Count Instructions
9020 
9021 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9022   match(Set dst (CountLeadingZerosI src));
9023 
9024   ins_cost(INSN_COST);
9025   format %{ "clzw  $dst, $src" %}
9026   ins_encode %{
9027     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9028   %}
9029 
9030   ins_pipe(ialu_reg);
9031 %}
9032 
9033 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9034   match(Set dst (CountLeadingZerosL src));
9035 
9036   ins_cost(INSN_COST);
9037   format %{ "clz   $dst, $src" %}
9038   ins_encode %{
9039     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9040   %}
9041 
9042   ins_pipe(ialu_reg);
9043 %}
9044 
9045 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9046   match(Set dst (CountTrailingZerosI src));
9047 
9048   ins_cost(INSN_COST * 2);
9049   format %{ "rbitw  $dst, $src\n\t"
9050             "clzw   $dst, $dst" %}
9051   ins_encode %{
9052     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9053     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9054   %}
9055 
9056   ins_pipe(ialu_reg);
9057 %}
9058 
9059 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9060   match(Set dst (CountTrailingZerosL src));
9061 
9062   ins_cost(INSN_COST * 2);
9063   format %{ "rbit   $dst, $src\n\t"
9064             "clz    $dst, $dst" %}
9065   ins_encode %{
9066     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9067     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9068   %}
9069 
9070   ins_pipe(ialu_reg);
9071 %}
9072 
9073 //---------- Population Count Instructions -------------------------------------
9074 //
9075 
9076 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9077   predicate(UsePopCountInstruction);
9078   match(Set dst (PopCountI src));
9079   effect(TEMP tmp);
9080   ins_cost(INSN_COST * 13);
9081 
9082   format %{ "movw   $src, $src\n\t"
9083             "mov    $tmp, $src\t# vector (1D)\n\t"
9084             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9085             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9086             "mov    $dst, $tmp\t# vector (1D)" %}
9087   ins_encode %{
9088     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9089     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9090     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9091     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9092     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9093   %}
9094 
9095   ins_pipe(pipe_class_default);
9096 %}
9097 
9098 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9099   predicate(UsePopCountInstruction);
9100   match(Set dst (PopCountI (LoadI mem)));
9101   effect(TEMP tmp);
9102   ins_cost(INSN_COST * 13);
9103 
9104   format %{ "ldrs   $tmp, $mem\n\t"
9105             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9106             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9107             "mov    $dst, $tmp\t# vector (1D)" %}
9108   ins_encode %{
9109     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9110     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9111                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9112     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9113     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9114     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9115   %}
9116 
9117   ins_pipe(pipe_class_default);
9118 %}
9119 
9120 // Note: Long.bitCount(long) returns an int.
9121 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9122   predicate(UsePopCountInstruction);
9123   match(Set dst (PopCountL src));
9124   effect(TEMP tmp);
9125   ins_cost(INSN_COST * 13);
9126 
9127   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9128             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9129             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9130             "mov    $dst, $tmp\t# vector (1D)" %}
9131   ins_encode %{
9132     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9133     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9134     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9135     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9136   %}
9137 
9138   ins_pipe(pipe_class_default);
9139 %}
9140 
9141 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9142   predicate(UsePopCountInstruction);
9143   match(Set dst (PopCountL (LoadL mem)));
9144   effect(TEMP tmp);
9145   ins_cost(INSN_COST * 13);
9146 
9147   format %{ "ldrd   $tmp, $mem\n\t"
9148             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9149             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9150             "mov    $dst, $tmp\t# vector (1D)" %}
9151   ins_encode %{
9152     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9153     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9154                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9155     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9156     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9157     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9158   %}
9159 
9160   ins_pipe(pipe_class_default);
9161 %}
9162 
9163 // ============================================================================
9164 // MemBar Instruction
9165 
9166 instruct load_fence() %{
9167   match(LoadFence);
9168   ins_cost(VOLATILE_REF_COST);
9169 
9170   format %{ "load_fence" %}
9171 
9172   ins_encode %{
9173     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9174   %}
9175   ins_pipe(pipe_serial);
9176 %}
9177 
9178 instruct unnecessary_membar_acquire() %{
9179   predicate(unnecessary_acquire(n));
9180   match(MemBarAcquire);
9181   ins_cost(0);
9182 
9183   format %{ "membar_acquire (elided)" %}
9184 
9185   ins_encode %{
9186     __ block_comment("membar_acquire (elided)");
9187   %}
9188 
9189   ins_pipe(pipe_class_empty);
9190 %}
9191 
9192 instruct membar_acquire() %{
9193   match(MemBarAcquire);
9194   ins_cost(VOLATILE_REF_COST);
9195 
9196   format %{ "membar_acquire" %}
9197 
9198   ins_encode %{
9199     __ block_comment("membar_acquire");
9200     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9201   %}
9202 
9203   ins_pipe(pipe_serial);
9204 %}
9205 
9206 
9207 instruct membar_acquire_lock() %{
9208   match(MemBarAcquireLock);
9209   ins_cost(VOLATILE_REF_COST);
9210 
9211   format %{ "membar_acquire_lock (elided)" %}
9212 
9213   ins_encode %{
9214     __ block_comment("membar_acquire_lock (elided)");
9215   %}
9216 
9217   ins_pipe(pipe_serial);
9218 %}
9219 
9220 instruct store_fence() %{
9221   match(StoreFence);
9222   ins_cost(VOLATILE_REF_COST);
9223 
9224   format %{ "store_fence" %}
9225 
9226   ins_encode %{
9227     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9228   %}
9229   ins_pipe(pipe_serial);
9230 %}
9231 
9232 instruct unnecessary_membar_release() %{
9233   predicate(unnecessary_release(n));
9234   match(MemBarRelease);
9235   ins_cost(0);
9236 
9237   format %{ "membar_release (elided)" %}
9238 
9239   ins_encode %{
9240     __ block_comment("membar_release (elided)");
9241   %}
9242   ins_pipe(pipe_serial);
9243 %}
9244 
9245 instruct membar_release() %{
9246   match(MemBarRelease);
9247   ins_cost(VOLATILE_REF_COST);
9248 
9249   format %{ "membar_release" %}
9250 
9251   ins_encode %{
9252     __ block_comment("membar_release");
9253     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9254   %}
9255   ins_pipe(pipe_serial);
9256 %}
9257 
9258 instruct membar_storestore() %{
9259   match(MemBarStoreStore);
9260   ins_cost(VOLATILE_REF_COST);
9261 
9262   format %{ "MEMBAR-store-store" %}
9263 
9264   ins_encode %{
9265     __ membar(Assembler::StoreStore);
9266   %}
9267   ins_pipe(pipe_serial);
9268 %}
9269 
9270 instruct membar_release_lock() %{
9271   match(MemBarReleaseLock);
9272   ins_cost(VOLATILE_REF_COST);
9273 
9274   format %{ "membar_release_lock (elided)" %}
9275 
9276   ins_encode %{
9277     __ block_comment("membar_release_lock (elided)");
9278   %}
9279 
9280   ins_pipe(pipe_serial);
9281 %}
9282 
9283 instruct unnecessary_membar_volatile() %{
9284   predicate(unnecessary_volatile(n));
9285   match(MemBarVolatile);
9286   ins_cost(0);
9287 
9288   format %{ "membar_volatile (elided)" %}
9289 
9290   ins_encode %{
9291     __ block_comment("membar_volatile (elided)");
9292   %}
9293 
9294   ins_pipe(pipe_serial);
9295 %}
9296 
9297 instruct membar_volatile() %{
9298   match(MemBarVolatile);
9299   ins_cost(VOLATILE_REF_COST*100);
9300 
9301   format %{ "membar_volatile" %}
9302 
9303   ins_encode %{
9304     __ block_comment("membar_volatile");
9305     __ membar(Assembler::StoreLoad);
9306   %}
9307 
9308   ins_pipe(pipe_serial);
9309 %}
9310 
9311 // ============================================================================
9312 // Cast/Convert Instructions
9313 
9314 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9315   match(Set dst (CastX2P src));
9316 
9317   ins_cost(INSN_COST);
9318   format %{ "mov $dst, $src\t# long -> ptr" %}
9319 
9320   ins_encode %{
9321     if ($dst$$reg != $src$$reg) {
9322       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9323     }
9324   %}
9325 
9326   ins_pipe(ialu_reg);
9327 %}
9328 
9329 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9330   match(Set dst (CastP2X src));
9331 
9332   ins_cost(INSN_COST);
9333   format %{ "mov $dst, $src\t# ptr -> long" %}
9334 
9335   ins_encode %{
9336     if ($dst$$reg != $src$$reg) {
9337       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9338     }
9339   %}
9340 
9341   ins_pipe(ialu_reg);
9342 %}
9343 
9344 // Convert oop into int for vectors alignment masking
9345 instruct convP2I(iRegINoSp dst, iRegP src) %{
9346   match(Set dst (ConvL2I (CastP2X src)));
9347 
9348   ins_cost(INSN_COST);
9349   format %{ "movw $dst, $src\t# ptr -> int" %}
9350   ins_encode %{
9351     __ movw($dst$$Register, $src$$Register);
9352   %}
9353 
9354   ins_pipe(ialu_reg);
9355 %}
9356 
9357 // Convert compressed oop into int for vectors alignment masking
9358 // in case of 32bit oops (heap < 4Gb).
9359 instruct convN2I(iRegINoSp dst, iRegN src)
9360 %{
9361   predicate(Universe::narrow_oop_shift() == 0);
9362   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9363 
9364   ins_cost(INSN_COST);
9365   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9366   ins_encode %{
9367     __ movw($dst$$Register, $src$$Register);
9368   %}
9369 
9370   ins_pipe(ialu_reg);
9371 %}
9372 
9373 
9374 // Convert oop pointer into compressed form
9375 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9376   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9377   match(Set dst (EncodeP src));
9378   effect(KILL cr);
9379   ins_cost(INSN_COST * 3);
9380   format %{ "encode_heap_oop $dst, $src" %}
9381   ins_encode %{
9382     Register s = $src$$Register;
9383     Register d = $dst$$Register;
9384     __ encode_heap_oop(d, s);
9385   %}
9386   ins_pipe(ialu_reg);
9387 %}
9388 
9389 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9390   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9391   match(Set dst (EncodeP src));
9392   ins_cost(INSN_COST * 3);
9393   format %{ "encode_heap_oop_not_null $dst, $src" %}
9394   ins_encode %{
9395     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9396   %}
9397   ins_pipe(ialu_reg);
9398 %}
9399 
9400 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9401   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9402             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9403   match(Set dst (DecodeN src));
9404   ins_cost(INSN_COST * 3);
9405   format %{ "decode_heap_oop $dst, $src" %}
9406   ins_encode %{
9407     Register s = $src$$Register;
9408     Register d = $dst$$Register;
9409     __ decode_heap_oop(d, s);
9410   %}
9411   ins_pipe(ialu_reg);
9412 %}
9413 
9414 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9415   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9416             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9417   match(Set dst (DecodeN src));
9418   ins_cost(INSN_COST * 3);
9419   format %{ "decode_heap_oop_not_null $dst, $src" %}
9420   ins_encode %{
9421     Register s = $src$$Register;
9422     Register d = $dst$$Register;
9423     __ decode_heap_oop_not_null(d, s);
9424   %}
9425   ins_pipe(ialu_reg);
9426 %}
9427 
9428 // n.b. AArch64 implementations of encode_klass_not_null and
9429 // decode_klass_not_null do not modify the flags register so, unlike
9430 // Intel, we don't kill CR as a side effect here
9431 
9432 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9433   match(Set dst (EncodePKlass src));
9434 
9435   ins_cost(INSN_COST * 3);
9436   format %{ "encode_klass_not_null $dst,$src" %}
9437 
9438   ins_encode %{
9439     Register src_reg = as_Register($src$$reg);
9440     Register dst_reg = as_Register($dst$$reg);
9441     __ encode_klass_not_null(dst_reg, src_reg);
9442   %}
9443 
9444    ins_pipe(ialu_reg);
9445 %}
9446 
9447 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9448   match(Set dst (DecodeNKlass src));
9449 
9450   ins_cost(INSN_COST * 3);
9451   format %{ "decode_klass_not_null $dst,$src" %}
9452 
9453   ins_encode %{
9454     Register src_reg = as_Register($src$$reg);
9455     Register dst_reg = as_Register($dst$$reg);
9456     if (dst_reg != src_reg) {
9457       __ decode_klass_not_null(dst_reg, src_reg);
9458     } else {
9459       __ decode_klass_not_null(dst_reg);
9460     }
9461   %}
9462 
9463    ins_pipe(ialu_reg);
9464 %}
9465 
9466 instruct checkCastPP(iRegPNoSp dst)
9467 %{
9468   match(Set dst (CheckCastPP dst));
9469 
9470   size(0);
9471   format %{ "# checkcastPP of $dst" %}
9472   ins_encode(/* empty encoding */);
9473   ins_pipe(pipe_class_empty);
9474 %}
9475 
9476 instruct castPP(iRegPNoSp dst)
9477 %{
9478   match(Set dst (CastPP dst));
9479 
9480   size(0);
9481   format %{ "# castPP of $dst" %}
9482   ins_encode(/* empty encoding */);
9483   ins_pipe(pipe_class_empty);
9484 %}
9485 
9486 instruct castII(iRegI dst)
9487 %{
9488   match(Set dst (CastII dst));
9489 
9490   size(0);
9491   format %{ "# castII of $dst" %}
9492   ins_encode(/* empty encoding */);
9493   ins_cost(0);
9494   ins_pipe(pipe_class_empty);
9495 %}
9496 
9497 // ============================================================================
9498 // Atomic operation instructions
9499 //
9500 // Intel and SPARC both implement Ideal Node LoadPLocked and
9501 // Store{PIL}Conditional instructions using a normal load for the
9502 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9503 //
9504 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9505 // pair to lock object allocations from Eden space when not using
9506 // TLABs.
9507 //
9508 // There does not appear to be a Load{IL}Locked Ideal Node and the
9509 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9510 // and to use StoreIConditional only for 32-bit and StoreLConditional
9511 // only for 64-bit.
9512 //
9513 // We implement LoadPLocked and StorePLocked instructions using,
9514 // respectively the AArch64 hw load-exclusive and store-conditional
9515 // instructions. Whereas we must implement each of
9516 // Store{IL}Conditional using a CAS which employs a pair of
9517 // instructions comprising a load-exclusive followed by a
9518 // store-conditional.
9519 
9520 
9521 // Locked-load (linked load) of the current heap-top
9522 // used when updating the eden heap top
9523 // implemented using ldaxr on AArch64
9524 
9525 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9526 %{
9527   match(Set dst (LoadPLocked mem));
9528 
9529   ins_cost(VOLATILE_REF_COST);
9530 
9531   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9532 
9533   ins_encode(aarch64_enc_ldaxr(dst, mem));
9534 
9535   ins_pipe(pipe_serial);
9536 %}
9537 
9538 // Conditional-store of the updated heap-top.
9539 // Used during allocation of the shared heap.
9540 // Sets flag (EQ) on success.
9541 // implemented using stlxr on AArch64.
9542 
9543 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9544 %{
9545   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9546 
9547   ins_cost(VOLATILE_REF_COST);
9548 
9549  // TODO
9550  // do we need to do a store-conditional release or can we just use a
9551  // plain store-conditional?
9552 
9553   format %{
9554     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9555     "cmpw rscratch1, zr\t# EQ on successful write"
9556   %}
9557 
9558   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9559 
9560   ins_pipe(pipe_serial);
9561 %}
9562 
9563 
9564 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9565 // when attempting to rebias a lock towards the current thread.  We
9566 // must use the acquire form of cmpxchg in order to guarantee acquire
9567 // semantics in this case.
9568 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9569 %{
9570   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9571 
9572   ins_cost(VOLATILE_REF_COST);
9573 
9574   format %{
9575     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9576     "cmpw rscratch1, zr\t# EQ on successful write"
9577   %}
9578 
9579   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9580 
9581   ins_pipe(pipe_slow);
9582 %}
9583 
9584 // storeIConditional also has acquire semantics, for no better reason
9585 // than matching storeLConditional.  At the time of writing this
9586 // comment storeIConditional was not used anywhere by AArch64.
9587 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9588 %{
9589   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9590 
9591   ins_cost(VOLATILE_REF_COST);
9592 
9593   format %{
9594     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9595     "cmpw rscratch1, zr\t# EQ on successful write"
9596   %}
9597 
9598   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9599 
9600   ins_pipe(pipe_slow);
9601 %}
9602 
9603 // standard CompareAndSwapX when we are using barriers
9604 // these have higher priority than the rules selected by a predicate
9605 
9606 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9607 // can't match them
9608 
9609 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9610 
9611   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9612   ins_cost(2 * VOLATILE_REF_COST);
9613 
9614   effect(KILL cr);
9615 
9616   format %{
9617     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9618     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9619   %}
9620 
9621   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9622             aarch64_enc_cset_eq(res));
9623 
9624   ins_pipe(pipe_slow);
9625 %}
9626 
9627 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9628 
9629   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9630   ins_cost(2 * VOLATILE_REF_COST);
9631 
9632   effect(KILL cr);
9633 
9634   format %{
9635     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9636     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9637   %}
9638 
9639   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9640             aarch64_enc_cset_eq(res));
9641 
9642   ins_pipe(pipe_slow);
9643 %}
9644 
9645 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9646 
9647   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9648   ins_cost(2 * VOLATILE_REF_COST);
9649 
9650   effect(KILL cr);
9651 
9652  format %{
9653     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9654     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9655  %}
9656 
9657  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9658             aarch64_enc_cset_eq(res));
9659 
9660   ins_pipe(pipe_slow);
9661 %}
9662 
9663 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9664 
9665   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9666   ins_cost(2 * VOLATILE_REF_COST);
9667 
9668   effect(KILL cr);
9669 
9670  format %{
9671     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9672     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9673  %}
9674 
9675  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9676             aarch64_enc_cset_eq(res));
9677 
9678   ins_pipe(pipe_slow);
9679 %}
9680 
9681 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9682 
9683   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9684   ins_cost(2 * VOLATILE_REF_COST);
9685 
9686   effect(KILL cr);
9687 
9688  format %{
9689     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9690     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9691  %}
9692 
9693  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9694             aarch64_enc_cset_eq(res));
9695 
9696   ins_pipe(pipe_slow);
9697 %}
9698 
9699 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9700 
9701   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9702   ins_cost(2 * VOLATILE_REF_COST);
9703 
9704   effect(KILL cr);
9705 
9706  format %{
9707     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9708     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9709  %}
9710 
9711  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9712             aarch64_enc_cset_eq(res));
9713 
9714   ins_pipe(pipe_slow);
9715 %}
9716 
9717 // alternative CompareAndSwapX when we are eliding barriers
9718 
9719 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9720 
9721   predicate(needs_acquiring_load_exclusive(n));
9722   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9723   ins_cost(VOLATILE_REF_COST);
9724 
9725   effect(KILL cr);
9726 
9727  format %{
9728     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9729     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9730  %}
9731 
9732  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9733             aarch64_enc_cset_eq(res));
9734 
9735   ins_pipe(pipe_slow);
9736 %}
9737 
9738 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9739 
9740   predicate(needs_acquiring_load_exclusive(n));
9741   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9742   ins_cost(VOLATILE_REF_COST);
9743 
9744   effect(KILL cr);
9745 
9746  format %{
9747     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9748     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9749  %}
9750 
9751  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9752             aarch64_enc_cset_eq(res));
9753 
9754   ins_pipe(pipe_slow);
9755 %}
9756 
9757 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9758 
9759   predicate(needs_acquiring_load_exclusive(n));
9760   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9761   ins_cost(VOLATILE_REF_COST);
9762 
9763   effect(KILL cr);
9764 
9765  format %{
9766     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9767     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9768  %}
9769 
9770  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9771             aarch64_enc_cset_eq(res));
9772 
9773   ins_pipe(pipe_slow);
9774 %}
9775 
9776 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9777 
9778   predicate(needs_acquiring_load_exclusive(n));
9779   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9780   ins_cost(VOLATILE_REF_COST);
9781 
9782   effect(KILL cr);
9783 
9784  format %{
9785     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9786     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9787  %}
9788 
9789  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9790             aarch64_enc_cset_eq(res));
9791 
9792   ins_pipe(pipe_slow);
9793 %}
9794 
9795 
9796 // ---------------------------------------------------------------------
9797 
9798 
9799 // BEGIN This section of the file is automatically generated. Do not edit --------------
9800 
9801 // Sundry CAS operations.  Note that release is always true,
9802 // regardless of the memory ordering of the CAS.  This is because we
9803 // need the volatile case to be sequentially consistent but there is
9804 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9805 // can't check the type of memory ordering here, so we always emit a
9806 // STLXR.
9807 
9808 // This section is generated from aarch64_ad_cas.m4
9809 
9810 
9811 
9812 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9813   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9814   ins_cost(2 * VOLATILE_REF_COST);
9815   effect(TEMP_DEF res, KILL cr);
9816   format %{
9817     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9818   %}
9819   ins_encode %{
9820     __ uxtbw(rscratch2, $oldval$$Register);
9821     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9822                Assembler::byte, /*acquire*/ false, /*release*/ true,
9823                /*weak*/ false, $res$$Register);
9824     __ sxtbw($res$$Register, $res$$Register);
9825   %}
9826   ins_pipe(pipe_slow);
9827 %}
9828 
9829 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9830   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9831   ins_cost(2 * VOLATILE_REF_COST);
9832   effect(TEMP_DEF res, KILL cr);
9833   format %{
9834     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9835   %}
9836   ins_encode %{
9837     __ uxthw(rscratch2, $oldval$$Register);
9838     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9839                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9840                /*weak*/ false, $res$$Register);
9841     __ sxthw($res$$Register, $res$$Register);
9842   %}
9843   ins_pipe(pipe_slow);
9844 %}
9845 
9846 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9847   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9848   ins_cost(2 * VOLATILE_REF_COST);
9849   effect(TEMP_DEF res, KILL cr);
9850   format %{
9851     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9852   %}
9853   ins_encode %{
9854     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9855                Assembler::word, /*acquire*/ false, /*release*/ true,
9856                /*weak*/ false, $res$$Register);
9857   %}
9858   ins_pipe(pipe_slow);
9859 %}
9860 
9861 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9862   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9863   ins_cost(2 * VOLATILE_REF_COST);
9864   effect(TEMP_DEF res, KILL cr);
9865   format %{
9866     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9867   %}
9868   ins_encode %{
9869     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9870                Assembler::xword, /*acquire*/ false, /*release*/ true,
9871                /*weak*/ false, $res$$Register);
9872   %}
9873   ins_pipe(pipe_slow);
9874 %}
9875 
9876 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9877   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9878   ins_cost(2 * VOLATILE_REF_COST);
9879   effect(TEMP_DEF res, KILL cr);
9880   format %{
9881     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9882   %}
9883   ins_encode %{
9884     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9885                Assembler::word, /*acquire*/ false, /*release*/ true,
9886                /*weak*/ false, $res$$Register);
9887   %}
9888   ins_pipe(pipe_slow);
9889 %}
9890 
9891 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9892   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9893   ins_cost(2 * VOLATILE_REF_COST);
9894   effect(TEMP_DEF res, KILL cr);
9895   format %{
9896     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9897   %}
9898   ins_encode %{
9899     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9900                Assembler::xword, /*acquire*/ false, /*release*/ true,
9901                /*weak*/ false, $res$$Register);
9902   %}
9903   ins_pipe(pipe_slow);
9904 %}
9905 
9906 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9907   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9908   ins_cost(2 * VOLATILE_REF_COST);
9909   effect(KILL cr);
9910   format %{
9911     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9912     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9913   %}
9914   ins_encode %{
9915     __ uxtbw(rscratch2, $oldval$$Register);
9916     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9917                Assembler::byte, /*acquire*/ false, /*release*/ true,
9918                /*weak*/ true, noreg);
9919     __ csetw($res$$Register, Assembler::EQ);
9920   %}
9921   ins_pipe(pipe_slow);
9922 %}
9923 
9924 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9925   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9926   ins_cost(2 * VOLATILE_REF_COST);
9927   effect(KILL cr);
9928   format %{
9929     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9930     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9931   %}
9932   ins_encode %{
9933     __ uxthw(rscratch2, $oldval$$Register);
9934     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9935                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9936                /*weak*/ true, noreg);
9937     __ csetw($res$$Register, Assembler::EQ);
9938   %}
9939   ins_pipe(pipe_slow);
9940 %}
9941 
9942 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9943   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9944   ins_cost(2 * VOLATILE_REF_COST);
9945   effect(KILL cr);
9946   format %{
9947     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9948     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9949   %}
9950   ins_encode %{
9951     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9952                Assembler::word, /*acquire*/ false, /*release*/ true,
9953                /*weak*/ true, noreg);
9954     __ csetw($res$$Register, Assembler::EQ);
9955   %}
9956   ins_pipe(pipe_slow);
9957 %}
9958 
9959 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9960   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9961   ins_cost(2 * VOLATILE_REF_COST);
9962   effect(KILL cr);
9963   format %{
9964     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9965     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9966   %}
9967   ins_encode %{
9968     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9969                Assembler::xword, /*acquire*/ false, /*release*/ true,
9970                /*weak*/ true, noreg);
9971     __ csetw($res$$Register, Assembler::EQ);
9972   %}
9973   ins_pipe(pipe_slow);
9974 %}
9975 
9976 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9977   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9978   ins_cost(2 * VOLATILE_REF_COST);
9979   effect(KILL cr);
9980   format %{
9981     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9982     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9983   %}
9984   ins_encode %{
9985     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9986                Assembler::word, /*acquire*/ false, /*release*/ true,
9987                /*weak*/ true, noreg);
9988     __ csetw($res$$Register, Assembler::EQ);
9989   %}
9990   ins_pipe(pipe_slow);
9991 %}
9992 
9993 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9994   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9995   ins_cost(2 * VOLATILE_REF_COST);
9996   effect(KILL cr);
9997   format %{
9998     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9999     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10000   %}
10001   ins_encode %{
10002     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10003                Assembler::xword, /*acquire*/ false, /*release*/ true,
10004                /*weak*/ true, noreg);
10005     __ csetw($res$$Register, Assembler::EQ);
10006   %}
10007   ins_pipe(pipe_slow);
10008 %}
10009 
10010 // END This section of the file is automatically generated. Do not edit --------------
10011 // ---------------------------------------------------------------------
10012 
10013 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10014   match(Set prev (GetAndSetI mem newv));
10015   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10016   ins_encode %{
10017     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10018   %}
10019   ins_pipe(pipe_serial);
10020 %}
10021 
10022 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10023   match(Set prev (GetAndSetL mem newv));
10024   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10025   ins_encode %{
10026     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10027   %}
10028   ins_pipe(pipe_serial);
10029 %}
10030 
10031 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10032   match(Set prev (GetAndSetN mem newv));
10033   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10034   ins_encode %{
10035     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10036   %}
10037   ins_pipe(pipe_serial);
10038 %}
10039 
10040 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10041   match(Set prev (GetAndSetP mem newv));
10042   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10043   ins_encode %{
10044     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10045   %}
10046   ins_pipe(pipe_serial);
10047 %}
10048 
10049 
10050 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10051   match(Set newval (GetAndAddL mem incr));
10052   ins_cost(INSN_COST * 10);
10053   format %{ "get_and_addL $newval, [$mem], $incr" %}
10054   ins_encode %{
10055     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10056   %}
10057   ins_pipe(pipe_serial);
10058 %}
10059 
10060 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10061   predicate(n->as_LoadStore()->result_not_used());
10062   match(Set dummy (GetAndAddL mem incr));
10063   ins_cost(INSN_COST * 9);
10064   format %{ "get_and_addL [$mem], $incr" %}
10065   ins_encode %{
10066     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10067   %}
10068   ins_pipe(pipe_serial);
10069 %}
10070 
10071 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10072   match(Set newval (GetAndAddL mem incr));
10073   ins_cost(INSN_COST * 10);
10074   format %{ "get_and_addL $newval, [$mem], $incr" %}
10075   ins_encode %{
10076     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10077   %}
10078   ins_pipe(pipe_serial);
10079 %}
10080 
10081 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10082   predicate(n->as_LoadStore()->result_not_used());
10083   match(Set dummy (GetAndAddL mem incr));
10084   ins_cost(INSN_COST * 9);
10085   format %{ "get_and_addL [$mem], $incr" %}
10086   ins_encode %{
10087     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10088   %}
10089   ins_pipe(pipe_serial);
10090 %}
10091 
10092 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10093   match(Set newval (GetAndAddI mem incr));
10094   ins_cost(INSN_COST * 10);
10095   format %{ "get_and_addI $newval, [$mem], $incr" %}
10096   ins_encode %{
10097     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10098   %}
10099   ins_pipe(pipe_serial);
10100 %}
10101 
10102 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10103   predicate(n->as_LoadStore()->result_not_used());
10104   match(Set dummy (GetAndAddI mem incr));
10105   ins_cost(INSN_COST * 9);
10106   format %{ "get_and_addI [$mem], $incr" %}
10107   ins_encode %{
10108     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10109   %}
10110   ins_pipe(pipe_serial);
10111 %}
10112 
10113 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10114   match(Set newval (GetAndAddI mem incr));
10115   ins_cost(INSN_COST * 10);
10116   format %{ "get_and_addI $newval, [$mem], $incr" %}
10117   ins_encode %{
10118     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10119   %}
10120   ins_pipe(pipe_serial);
10121 %}
10122 
10123 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10124   predicate(n->as_LoadStore()->result_not_used());
10125   match(Set dummy (GetAndAddI mem incr));
10126   ins_cost(INSN_COST * 9);
10127   format %{ "get_and_addI [$mem], $incr" %}
10128   ins_encode %{
10129     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10130   %}
10131   ins_pipe(pipe_serial);
10132 %}
10133 
10134 // Manifest a CmpL result in an integer register.
10135 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10136 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10137 %{
10138   match(Set dst (CmpL3 src1 src2));
10139   effect(KILL flags);
10140 
10141   ins_cost(INSN_COST * 6);
10142   format %{
10143       "cmp $src1, $src2"
10144       "csetw $dst, ne"
10145       "cnegw $dst, lt"
10146   %}
10147   // format %{ "CmpL3 $dst, $src1, $src2" %}
10148   ins_encode %{
10149     __ cmp($src1$$Register, $src2$$Register);
10150     __ csetw($dst$$Register, Assembler::NE);
10151     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10152   %}
10153 
10154   ins_pipe(pipe_class_default);
10155 %}
10156 
10157 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10158 %{
10159   match(Set dst (CmpL3 src1 src2));
10160   effect(KILL flags);
10161 
10162   ins_cost(INSN_COST * 6);
10163   format %{
10164       "cmp $src1, $src2"
10165       "csetw $dst, ne"
10166       "cnegw $dst, lt"
10167   %}
10168   ins_encode %{
10169     int32_t con = (int32_t)$src2$$constant;
10170      if (con < 0) {
10171       __ adds(zr, $src1$$Register, -con);
10172     } else {
10173       __ subs(zr, $src1$$Register, con);
10174     }
10175     __ csetw($dst$$Register, Assembler::NE);
10176     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10177   %}
10178 
10179   ins_pipe(pipe_class_default);
10180 %}
10181 
10182 // ============================================================================
10183 // Conditional Move Instructions
10184 
10185 // n.b. we have identical rules for both a signed compare op (cmpOp)
10186 // and an unsigned compare op (cmpOpU). it would be nice if we could
10187 // define an op class which merged both inputs and use it to type the
10188 // argument to a single rule. unfortunatelyt his fails because the
10189 // opclass does not live up to the COND_INTER interface of its
10190 // component operands. When the generic code tries to negate the
10191 // operand it ends up running the generci Machoper::negate method
10192 // which throws a ShouldNotHappen. So, we have to provide two flavours
10193 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10194 
10195 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10196   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10197 
10198   ins_cost(INSN_COST * 2);
10199   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10200 
10201   ins_encode %{
10202     __ cselw(as_Register($dst$$reg),
10203              as_Register($src2$$reg),
10204              as_Register($src1$$reg),
10205              (Assembler::Condition)$cmp$$cmpcode);
10206   %}
10207 
10208   ins_pipe(icond_reg_reg);
10209 %}
10210 
10211 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10212   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10213 
10214   ins_cost(INSN_COST * 2);
10215   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10216 
10217   ins_encode %{
10218     __ cselw(as_Register($dst$$reg),
10219              as_Register($src2$$reg),
10220              as_Register($src1$$reg),
10221              (Assembler::Condition)$cmp$$cmpcode);
10222   %}
10223 
10224   ins_pipe(icond_reg_reg);
10225 %}
10226 
10227 // special cases where one arg is zero
10228 
10229 // n.b. this is selected in preference to the rule above because it
10230 // avoids loading constant 0 into a source register
10231 
10232 // TODO
10233 // we ought only to be able to cull one of these variants as the ideal
10234 // transforms ought always to order the zero consistently (to left/right?)
10235 
10236 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10237   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10238 
10239   ins_cost(INSN_COST * 2);
10240   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10241 
10242   ins_encode %{
10243     __ cselw(as_Register($dst$$reg),
10244              as_Register($src$$reg),
10245              zr,
10246              (Assembler::Condition)$cmp$$cmpcode);
10247   %}
10248 
10249   ins_pipe(icond_reg);
10250 %}
10251 
10252 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10253   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10254 
10255   ins_cost(INSN_COST * 2);
10256   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10257 
10258   ins_encode %{
10259     __ cselw(as_Register($dst$$reg),
10260              as_Register($src$$reg),
10261              zr,
10262              (Assembler::Condition)$cmp$$cmpcode);
10263   %}
10264 
10265   ins_pipe(icond_reg);
10266 %}
10267 
10268 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10269   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10270 
10271   ins_cost(INSN_COST * 2);
10272   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10273 
10274   ins_encode %{
10275     __ cselw(as_Register($dst$$reg),
10276              zr,
10277              as_Register($src$$reg),
10278              (Assembler::Condition)$cmp$$cmpcode);
10279   %}
10280 
10281   ins_pipe(icond_reg);
10282 %}
10283 
10284 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10285   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10286 
10287   ins_cost(INSN_COST * 2);
10288   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10289 
10290   ins_encode %{
10291     __ cselw(as_Register($dst$$reg),
10292              zr,
10293              as_Register($src$$reg),
10294              (Assembler::Condition)$cmp$$cmpcode);
10295   %}
10296 
10297   ins_pipe(icond_reg);
10298 %}
10299 
10300 // special case for creating a boolean 0 or 1
10301 
10302 // n.b. this is selected in preference to the rule above because it
10303 // avoids loading constants 0 and 1 into a source register
10304 
10305 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10306   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10307 
10308   ins_cost(INSN_COST * 2);
10309   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10310 
10311   ins_encode %{
10312     // equivalently
10313     // cset(as_Register($dst$$reg),
10314     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10315     __ csincw(as_Register($dst$$reg),
10316              zr,
10317              zr,
10318              (Assembler::Condition)$cmp$$cmpcode);
10319   %}
10320 
10321   ins_pipe(icond_none);
10322 %}
10323 
10324 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10325   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10326 
10327   ins_cost(INSN_COST * 2);
10328   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10329 
10330   ins_encode %{
10331     // equivalently
10332     // cset(as_Register($dst$$reg),
10333     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10334     __ csincw(as_Register($dst$$reg),
10335              zr,
10336              zr,
10337              (Assembler::Condition)$cmp$$cmpcode);
10338   %}
10339 
10340   ins_pipe(icond_none);
10341 %}
10342 
10343 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10344   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10345 
10346   ins_cost(INSN_COST * 2);
10347   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10348 
10349   ins_encode %{
10350     __ csel(as_Register($dst$$reg),
10351             as_Register($src2$$reg),
10352             as_Register($src1$$reg),
10353             (Assembler::Condition)$cmp$$cmpcode);
10354   %}
10355 
10356   ins_pipe(icond_reg_reg);
10357 %}
10358 
10359 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10360   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10361 
10362   ins_cost(INSN_COST * 2);
10363   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10364 
10365   ins_encode %{
10366     __ csel(as_Register($dst$$reg),
10367             as_Register($src2$$reg),
10368             as_Register($src1$$reg),
10369             (Assembler::Condition)$cmp$$cmpcode);
10370   %}
10371 
10372   ins_pipe(icond_reg_reg);
10373 %}
10374 
10375 // special cases where one arg is zero
10376 
10377 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10378   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10379 
10380   ins_cost(INSN_COST * 2);
10381   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10382 
10383   ins_encode %{
10384     __ csel(as_Register($dst$$reg),
10385             zr,
10386             as_Register($src$$reg),
10387             (Assembler::Condition)$cmp$$cmpcode);
10388   %}
10389 
10390   ins_pipe(icond_reg);
10391 %}
10392 
10393 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10394   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10395 
10396   ins_cost(INSN_COST * 2);
10397   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10398 
10399   ins_encode %{
10400     __ csel(as_Register($dst$$reg),
10401             zr,
10402             as_Register($src$$reg),
10403             (Assembler::Condition)$cmp$$cmpcode);
10404   %}
10405 
10406   ins_pipe(icond_reg);
10407 %}
10408 
10409 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10410   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10411 
10412   ins_cost(INSN_COST * 2);
10413   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10414 
10415   ins_encode %{
10416     __ csel(as_Register($dst$$reg),
10417             as_Register($src$$reg),
10418             zr,
10419             (Assembler::Condition)$cmp$$cmpcode);
10420   %}
10421 
10422   ins_pipe(icond_reg);
10423 %}
10424 
10425 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10426   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10427 
10428   ins_cost(INSN_COST * 2);
10429   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10430 
10431   ins_encode %{
10432     __ csel(as_Register($dst$$reg),
10433             as_Register($src$$reg),
10434             zr,
10435             (Assembler::Condition)$cmp$$cmpcode);
10436   %}
10437 
10438   ins_pipe(icond_reg);
10439 %}
10440 
10441 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10442   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10443 
10444   ins_cost(INSN_COST * 2);
10445   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10446 
10447   ins_encode %{
10448     __ csel(as_Register($dst$$reg),
10449             as_Register($src2$$reg),
10450             as_Register($src1$$reg),
10451             (Assembler::Condition)$cmp$$cmpcode);
10452   %}
10453 
10454   ins_pipe(icond_reg_reg);
10455 %}
10456 
10457 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10458   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10459 
10460   ins_cost(INSN_COST * 2);
10461   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10462 
10463   ins_encode %{
10464     __ csel(as_Register($dst$$reg),
10465             as_Register($src2$$reg),
10466             as_Register($src1$$reg),
10467             (Assembler::Condition)$cmp$$cmpcode);
10468   %}
10469 
10470   ins_pipe(icond_reg_reg);
10471 %}
10472 
10473 // special cases where one arg is zero
10474 
10475 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10476   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10477 
10478   ins_cost(INSN_COST * 2);
10479   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10480 
10481   ins_encode %{
10482     __ csel(as_Register($dst$$reg),
10483             zr,
10484             as_Register($src$$reg),
10485             (Assembler::Condition)$cmp$$cmpcode);
10486   %}
10487 
10488   ins_pipe(icond_reg);
10489 %}
10490 
10491 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10492   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10493 
10494   ins_cost(INSN_COST * 2);
10495   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10496 
10497   ins_encode %{
10498     __ csel(as_Register($dst$$reg),
10499             zr,
10500             as_Register($src$$reg),
10501             (Assembler::Condition)$cmp$$cmpcode);
10502   %}
10503 
10504   ins_pipe(icond_reg);
10505 %}
10506 
10507 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10508   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10509 
10510   ins_cost(INSN_COST * 2);
10511   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10512 
10513   ins_encode %{
10514     __ csel(as_Register($dst$$reg),
10515             as_Register($src$$reg),
10516             zr,
10517             (Assembler::Condition)$cmp$$cmpcode);
10518   %}
10519 
10520   ins_pipe(icond_reg);
10521 %}
10522 
10523 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10524   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10525 
10526   ins_cost(INSN_COST * 2);
10527   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10528 
10529   ins_encode %{
10530     __ csel(as_Register($dst$$reg),
10531             as_Register($src$$reg),
10532             zr,
10533             (Assembler::Condition)$cmp$$cmpcode);
10534   %}
10535 
10536   ins_pipe(icond_reg);
10537 %}
10538 
10539 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10540   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10541 
10542   ins_cost(INSN_COST * 2);
10543   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10544 
10545   ins_encode %{
10546     __ cselw(as_Register($dst$$reg),
10547              as_Register($src2$$reg),
10548              as_Register($src1$$reg),
10549              (Assembler::Condition)$cmp$$cmpcode);
10550   %}
10551 
10552   ins_pipe(icond_reg_reg);
10553 %}
10554 
10555 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10556   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10557 
10558   ins_cost(INSN_COST * 2);
10559   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10560 
10561   ins_encode %{
10562     __ cselw(as_Register($dst$$reg),
10563              as_Register($src2$$reg),
10564              as_Register($src1$$reg),
10565              (Assembler::Condition)$cmp$$cmpcode);
10566   %}
10567 
10568   ins_pipe(icond_reg_reg);
10569 %}
10570 
10571 // special cases where one arg is zero
10572 
10573 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10574   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10575 
10576   ins_cost(INSN_COST * 2);
10577   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10578 
10579   ins_encode %{
10580     __ cselw(as_Register($dst$$reg),
10581              zr,
10582              as_Register($src$$reg),
10583              (Assembler::Condition)$cmp$$cmpcode);
10584   %}
10585 
10586   ins_pipe(icond_reg);
10587 %}
10588 
10589 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10590   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10591 
10592   ins_cost(INSN_COST * 2);
10593   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10594 
10595   ins_encode %{
10596     __ cselw(as_Register($dst$$reg),
10597              zr,
10598              as_Register($src$$reg),
10599              (Assembler::Condition)$cmp$$cmpcode);
10600   %}
10601 
10602   ins_pipe(icond_reg);
10603 %}
10604 
10605 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10606   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10607 
10608   ins_cost(INSN_COST * 2);
10609   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10610 
10611   ins_encode %{
10612     __ cselw(as_Register($dst$$reg),
10613              as_Register($src$$reg),
10614              zr,
10615              (Assembler::Condition)$cmp$$cmpcode);
10616   %}
10617 
10618   ins_pipe(icond_reg);
10619 %}
10620 
10621 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10622   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10623 
10624   ins_cost(INSN_COST * 2);
10625   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10626 
10627   ins_encode %{
10628     __ cselw(as_Register($dst$$reg),
10629              as_Register($src$$reg),
10630              zr,
10631              (Assembler::Condition)$cmp$$cmpcode);
10632   %}
10633 
10634   ins_pipe(icond_reg);
10635 %}
10636 
10637 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10638 %{
10639   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10640 
10641   ins_cost(INSN_COST * 3);
10642 
10643   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10644   ins_encode %{
10645     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10646     __ fcsels(as_FloatRegister($dst$$reg),
10647               as_FloatRegister($src2$$reg),
10648               as_FloatRegister($src1$$reg),
10649               cond);
10650   %}
10651 
10652   ins_pipe(fp_cond_reg_reg_s);
10653 %}
10654 
10655 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10656 %{
10657   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10658 
10659   ins_cost(INSN_COST * 3);
10660 
10661   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10662   ins_encode %{
10663     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10664     __ fcsels(as_FloatRegister($dst$$reg),
10665               as_FloatRegister($src2$$reg),
10666               as_FloatRegister($src1$$reg),
10667               cond);
10668   %}
10669 
10670   ins_pipe(fp_cond_reg_reg_s);
10671 %}
10672 
10673 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10674 %{
10675   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10676 
10677   ins_cost(INSN_COST * 3);
10678 
10679   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10680   ins_encode %{
10681     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10682     __ fcseld(as_FloatRegister($dst$$reg),
10683               as_FloatRegister($src2$$reg),
10684               as_FloatRegister($src1$$reg),
10685               cond);
10686   %}
10687 
10688   ins_pipe(fp_cond_reg_reg_d);
10689 %}
10690 
10691 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10692 %{
10693   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10694 
10695   ins_cost(INSN_COST * 3);
10696 
10697   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10698   ins_encode %{
10699     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10700     __ fcseld(as_FloatRegister($dst$$reg),
10701               as_FloatRegister($src2$$reg),
10702               as_FloatRegister($src1$$reg),
10703               cond);
10704   %}
10705 
10706   ins_pipe(fp_cond_reg_reg_d);
10707 %}
10708 
10709 // ============================================================================
10710 // Arithmetic Instructions
10711 //
10712 
10713 // Integer Addition
10714 
10715 // TODO
10716 // these currently employ operations which do not set CR and hence are
10717 // not flagged as killing CR but we would like to isolate the cases
10718 // where we want to set flags from those where we don't. need to work
10719 // out how to do that.
10720 
10721 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10722   match(Set dst (AddI src1 src2));
10723 
10724   ins_cost(INSN_COST);
10725   format %{ "addw  $dst, $src1, $src2" %}
10726 
10727   ins_encode %{
10728     __ addw(as_Register($dst$$reg),
10729             as_Register($src1$$reg),
10730             as_Register($src2$$reg));
10731   %}
10732 
10733   ins_pipe(ialu_reg_reg);
10734 %}
10735 
10736 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10737   match(Set dst (AddI src1 src2));
10738 
10739   ins_cost(INSN_COST);
10740   format %{ "addw $dst, $src1, $src2" %}
10741 
10742   // use opcode to indicate that this is an add not a sub
10743   opcode(0x0);
10744 
10745   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10746 
10747   ins_pipe(ialu_reg_imm);
10748 %}
10749 
10750 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10751   match(Set dst (AddI (ConvL2I src1) src2));
10752 
10753   ins_cost(INSN_COST);
10754   format %{ "addw $dst, $src1, $src2" %}
10755 
10756   // use opcode to indicate that this is an add not a sub
10757   opcode(0x0);
10758 
10759   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10760 
10761   ins_pipe(ialu_reg_imm);
10762 %}
10763 
10764 // Pointer Addition
10765 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10766   match(Set dst (AddP src1 src2));
10767 
10768   ins_cost(INSN_COST);
10769   format %{ "add $dst, $src1, $src2\t# ptr" %}
10770 
10771   ins_encode %{
10772     __ add(as_Register($dst$$reg),
10773            as_Register($src1$$reg),
10774            as_Register($src2$$reg));
10775   %}
10776 
10777   ins_pipe(ialu_reg_reg);
10778 %}
10779 
10780 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10781   match(Set dst (AddP src1 (ConvI2L src2)));
10782 
10783   ins_cost(1.9 * INSN_COST);
10784   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10785 
10786   ins_encode %{
10787     __ add(as_Register($dst$$reg),
10788            as_Register($src1$$reg),
10789            as_Register($src2$$reg), ext::sxtw);
10790   %}
10791 
10792   ins_pipe(ialu_reg_reg);
10793 %}
10794 
10795 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10796   match(Set dst (AddP src1 (LShiftL src2 scale)));
10797 
10798   ins_cost(1.9 * INSN_COST);
10799   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10800 
10801   ins_encode %{
10802     __ lea(as_Register($dst$$reg),
10803            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10804                    Address::lsl($scale$$constant)));
10805   %}
10806 
10807   ins_pipe(ialu_reg_reg_shift);
10808 %}
10809 
10810 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10811   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10812 
10813   ins_cost(1.9 * INSN_COST);
10814   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10815 
10816   ins_encode %{
10817     __ lea(as_Register($dst$$reg),
10818            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10819                    Address::sxtw($scale$$constant)));
10820   %}
10821 
10822   ins_pipe(ialu_reg_reg_shift);
10823 %}
10824 
10825 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10826   match(Set dst (LShiftL (ConvI2L src) scale));
10827 
10828   ins_cost(INSN_COST);
10829   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10830 
10831   ins_encode %{
10832     __ sbfiz(as_Register($dst$$reg),
10833           as_Register($src$$reg),
10834           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10835   %}
10836 
10837   ins_pipe(ialu_reg_shift);
10838 %}
10839 
10840 // Pointer Immediate Addition
10841 // n.b. this needs to be more expensive than using an indirect memory
10842 // operand
10843 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10844   match(Set dst (AddP src1 src2));
10845 
10846   ins_cost(INSN_COST);
10847   format %{ "add $dst, $src1, $src2\t# ptr" %}
10848 
10849   // use opcode to indicate that this is an add not a sub
10850   opcode(0x0);
10851 
10852   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10853 
10854   ins_pipe(ialu_reg_imm);
10855 %}
10856 
10857 // Long Addition
10858 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10859 
10860   match(Set dst (AddL src1 src2));
10861 
10862   ins_cost(INSN_COST);
10863   format %{ "add  $dst, $src1, $src2" %}
10864 
10865   ins_encode %{
10866     __ add(as_Register($dst$$reg),
10867            as_Register($src1$$reg),
10868            as_Register($src2$$reg));
10869   %}
10870 
10871   ins_pipe(ialu_reg_reg);
10872 %}
10873 
10874 // No constant pool entries requiredLong Immediate Addition.
10875 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10876   match(Set dst (AddL src1 src2));
10877 
10878   ins_cost(INSN_COST);
10879   format %{ "add $dst, $src1, $src2" %}
10880 
10881   // use opcode to indicate that this is an add not a sub
10882   opcode(0x0);
10883 
10884   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10885 
10886   ins_pipe(ialu_reg_imm);
10887 %}
10888 
10889 // Integer Subtraction
10890 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10891   match(Set dst (SubI src1 src2));
10892 
10893   ins_cost(INSN_COST);
10894   format %{ "subw  $dst, $src1, $src2" %}
10895 
10896   ins_encode %{
10897     __ subw(as_Register($dst$$reg),
10898             as_Register($src1$$reg),
10899             as_Register($src2$$reg));
10900   %}
10901 
10902   ins_pipe(ialu_reg_reg);
10903 %}
10904 
10905 // Immediate Subtraction
10906 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10907   match(Set dst (SubI src1 src2));
10908 
10909   ins_cost(INSN_COST);
10910   format %{ "subw $dst, $src1, $src2" %}
10911 
10912   // use opcode to indicate that this is a sub not an add
10913   opcode(0x1);
10914 
10915   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10916 
10917   ins_pipe(ialu_reg_imm);
10918 %}
10919 
10920 // Long Subtraction
10921 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10922 
10923   match(Set dst (SubL src1 src2));
10924 
10925   ins_cost(INSN_COST);
10926   format %{ "sub  $dst, $src1, $src2" %}
10927 
10928   ins_encode %{
10929     __ sub(as_Register($dst$$reg),
10930            as_Register($src1$$reg),
10931            as_Register($src2$$reg));
10932   %}
10933 
10934   ins_pipe(ialu_reg_reg);
10935 %}
10936 
10937 // No constant pool entries requiredLong Immediate Subtraction.
10938 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10939   match(Set dst (SubL src1 src2));
10940 
10941   ins_cost(INSN_COST);
10942   format %{ "sub$dst, $src1, $src2" %}
10943 
10944   // use opcode to indicate that this is a sub not an add
10945   opcode(0x1);
10946 
10947   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10948 
10949   ins_pipe(ialu_reg_imm);
10950 %}
10951 
10952 // Integer Negation (special case for sub)
10953 
10954 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10955   match(Set dst (SubI zero src));
10956 
10957   ins_cost(INSN_COST);
10958   format %{ "negw $dst, $src\t# int" %}
10959 
10960   ins_encode %{
10961     __ negw(as_Register($dst$$reg),
10962             as_Register($src$$reg));
10963   %}
10964 
10965   ins_pipe(ialu_reg);
10966 %}
10967 
10968 // Long Negation
10969 
10970 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
10971   match(Set dst (SubL zero src));
10972 
10973   ins_cost(INSN_COST);
10974   format %{ "neg $dst, $src\t# long" %}
10975 
10976   ins_encode %{
10977     __ neg(as_Register($dst$$reg),
10978            as_Register($src$$reg));
10979   %}
10980 
10981   ins_pipe(ialu_reg);
10982 %}
10983 
10984 // Integer Multiply
10985 
10986 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10987   match(Set dst (MulI src1 src2));
10988 
10989   ins_cost(INSN_COST * 3);
10990   format %{ "mulw  $dst, $src1, $src2" %}
10991 
10992   ins_encode %{
10993     __ mulw(as_Register($dst$$reg),
10994             as_Register($src1$$reg),
10995             as_Register($src2$$reg));
10996   %}
10997 
10998   ins_pipe(imul_reg_reg);
10999 %}
11000 
11001 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11002   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11003 
11004   ins_cost(INSN_COST * 3);
11005   format %{ "smull  $dst, $src1, $src2" %}
11006 
11007   ins_encode %{
11008     __ smull(as_Register($dst$$reg),
11009              as_Register($src1$$reg),
11010              as_Register($src2$$reg));
11011   %}
11012 
11013   ins_pipe(imul_reg_reg);
11014 %}
11015 
11016 // Long Multiply
11017 
11018 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11019   match(Set dst (MulL src1 src2));
11020 
11021   ins_cost(INSN_COST * 5);
11022   format %{ "mul  $dst, $src1, $src2" %}
11023 
11024   ins_encode %{
11025     __ mul(as_Register($dst$$reg),
11026            as_Register($src1$$reg),
11027            as_Register($src2$$reg));
11028   %}
11029 
11030   ins_pipe(lmul_reg_reg);
11031 %}
11032 
11033 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11034 %{
11035   match(Set dst (MulHiL src1 src2));
11036 
11037   ins_cost(INSN_COST * 7);
11038   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11039 
11040   ins_encode %{
11041     __ smulh(as_Register($dst$$reg),
11042              as_Register($src1$$reg),
11043              as_Register($src2$$reg));
11044   %}
11045 
11046   ins_pipe(lmul_reg_reg);
11047 %}
11048 
11049 // Combined Integer Multiply & Add/Sub
11050 
11051 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11052   match(Set dst (AddI src3 (MulI src1 src2)));
11053 
11054   ins_cost(INSN_COST * 3);
11055   format %{ "madd  $dst, $src1, $src2, $src3" %}
11056 
11057   ins_encode %{
11058     __ maddw(as_Register($dst$$reg),
11059              as_Register($src1$$reg),
11060              as_Register($src2$$reg),
11061              as_Register($src3$$reg));
11062   %}
11063 
11064   ins_pipe(imac_reg_reg);
11065 %}
11066 
11067 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11068   match(Set dst (SubI src3 (MulI src1 src2)));
11069 
11070   ins_cost(INSN_COST * 3);
11071   format %{ "msub  $dst, $src1, $src2, $src3" %}
11072 
11073   ins_encode %{
11074     __ msubw(as_Register($dst$$reg),
11075              as_Register($src1$$reg),
11076              as_Register($src2$$reg),
11077              as_Register($src3$$reg));
11078   %}
11079 
11080   ins_pipe(imac_reg_reg);
11081 %}
11082 
11083 // Combined Long Multiply & Add/Sub
11084 
11085 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11086   match(Set dst (AddL src3 (MulL src1 src2)));
11087 
11088   ins_cost(INSN_COST * 5);
11089   format %{ "madd  $dst, $src1, $src2, $src3" %}
11090 
11091   ins_encode %{
11092     __ madd(as_Register($dst$$reg),
11093             as_Register($src1$$reg),
11094             as_Register($src2$$reg),
11095             as_Register($src3$$reg));
11096   %}
11097 
11098   ins_pipe(lmac_reg_reg);
11099 %}
11100 
11101 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11102   match(Set dst (SubL src3 (MulL src1 src2)));
11103 
11104   ins_cost(INSN_COST * 5);
11105   format %{ "msub  $dst, $src1, $src2, $src3" %}
11106 
11107   ins_encode %{
11108     __ msub(as_Register($dst$$reg),
11109             as_Register($src1$$reg),
11110             as_Register($src2$$reg),
11111             as_Register($src3$$reg));
11112   %}
11113 
11114   ins_pipe(lmac_reg_reg);
11115 %}
11116 
11117 // Integer Divide
11118 
11119 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11120   match(Set dst (DivI src1 src2));
11121 
11122   ins_cost(INSN_COST * 19);
11123   format %{ "sdivw  $dst, $src1, $src2" %}
11124 
11125   ins_encode(aarch64_enc_divw(dst, src1, src2));
11126   ins_pipe(idiv_reg_reg);
11127 %}
11128 
11129 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11130   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11131   ins_cost(INSN_COST);
11132   format %{ "lsrw $dst, $src1, $div1" %}
11133   ins_encode %{
11134     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11135   %}
11136   ins_pipe(ialu_reg_shift);
11137 %}
11138 
11139 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11140   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11141   ins_cost(INSN_COST);
11142   format %{ "addw $dst, $src, LSR $div1" %}
11143 
11144   ins_encode %{
11145     __ addw(as_Register($dst$$reg),
11146               as_Register($src$$reg),
11147               as_Register($src$$reg),
11148               Assembler::LSR, 31);
11149   %}
11150   ins_pipe(ialu_reg);
11151 %}
11152 
11153 // Long Divide
11154 
11155 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11156   match(Set dst (DivL src1 src2));
11157 
11158   ins_cost(INSN_COST * 35);
11159   format %{ "sdiv   $dst, $src1, $src2" %}
11160 
11161   ins_encode(aarch64_enc_div(dst, src1, src2));
11162   ins_pipe(ldiv_reg_reg);
11163 %}
11164 
11165 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11166   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11167   ins_cost(INSN_COST);
11168   format %{ "lsr $dst, $src1, $div1" %}
11169   ins_encode %{
11170     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11171   %}
11172   ins_pipe(ialu_reg_shift);
11173 %}
11174 
11175 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11176   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11177   ins_cost(INSN_COST);
11178   format %{ "add $dst, $src, $div1" %}
11179 
11180   ins_encode %{
11181     __ add(as_Register($dst$$reg),
11182               as_Register($src$$reg),
11183               as_Register($src$$reg),
11184               Assembler::LSR, 63);
11185   %}
11186   ins_pipe(ialu_reg);
11187 %}
11188 
11189 // Integer Remainder
11190 
11191 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11192   match(Set dst (ModI src1 src2));
11193 
11194   ins_cost(INSN_COST * 22);
11195   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11196             "msubw($dst, rscratch1, $src2, $src1" %}
11197 
11198   ins_encode(aarch64_enc_modw(dst, src1, src2));
11199   ins_pipe(idiv_reg_reg);
11200 %}
11201 
11202 // Long Remainder
11203 
11204 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11205   match(Set dst (ModL src1 src2));
11206 
11207   ins_cost(INSN_COST * 38);
11208   format %{ "sdiv   rscratch1, $src1, $src2\n"
11209             "msub($dst, rscratch1, $src2, $src1" %}
11210 
11211   ins_encode(aarch64_enc_mod(dst, src1, src2));
11212   ins_pipe(ldiv_reg_reg);
11213 %}
11214 
11215 // Integer Shifts
11216 
11217 // Shift Left Register
11218 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11219   match(Set dst (LShiftI src1 src2));
11220 
11221   ins_cost(INSN_COST * 2);
11222   format %{ "lslvw  $dst, $src1, $src2" %}
11223 
11224   ins_encode %{
11225     __ lslvw(as_Register($dst$$reg),
11226              as_Register($src1$$reg),
11227              as_Register($src2$$reg));
11228   %}
11229 
11230   ins_pipe(ialu_reg_reg_vshift);
11231 %}
11232 
11233 // Shift Left Immediate
11234 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11235   match(Set dst (LShiftI src1 src2));
11236 
11237   ins_cost(INSN_COST);
11238   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11239 
11240   ins_encode %{
11241     __ lslw(as_Register($dst$$reg),
11242             as_Register($src1$$reg),
11243             $src2$$constant & 0x1f);
11244   %}
11245 
11246   ins_pipe(ialu_reg_shift);
11247 %}
11248 
11249 // Shift Right Logical Register
11250 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11251   match(Set dst (URShiftI src1 src2));
11252 
11253   ins_cost(INSN_COST * 2);
11254   format %{ "lsrvw  $dst, $src1, $src2" %}
11255 
11256   ins_encode %{
11257     __ lsrvw(as_Register($dst$$reg),
11258              as_Register($src1$$reg),
11259              as_Register($src2$$reg));
11260   %}
11261 
11262   ins_pipe(ialu_reg_reg_vshift);
11263 %}
11264 
11265 // Shift Right Logical Immediate
11266 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11267   match(Set dst (URShiftI src1 src2));
11268 
11269   ins_cost(INSN_COST);
11270   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11271 
11272   ins_encode %{
11273     __ lsrw(as_Register($dst$$reg),
11274             as_Register($src1$$reg),
11275             $src2$$constant & 0x1f);
11276   %}
11277 
11278   ins_pipe(ialu_reg_shift);
11279 %}
11280 
11281 // Shift Right Arithmetic Register
11282 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11283   match(Set dst (RShiftI src1 src2));
11284 
11285   ins_cost(INSN_COST * 2);
11286   format %{ "asrvw  $dst, $src1, $src2" %}
11287 
11288   ins_encode %{
11289     __ asrvw(as_Register($dst$$reg),
11290              as_Register($src1$$reg),
11291              as_Register($src2$$reg));
11292   %}
11293 
11294   ins_pipe(ialu_reg_reg_vshift);
11295 %}
11296 
11297 // Shift Right Arithmetic Immediate
11298 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11299   match(Set dst (RShiftI src1 src2));
11300 
11301   ins_cost(INSN_COST);
11302   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11303 
11304   ins_encode %{
11305     __ asrw(as_Register($dst$$reg),
11306             as_Register($src1$$reg),
11307             $src2$$constant & 0x1f);
11308   %}
11309 
11310   ins_pipe(ialu_reg_shift);
11311 %}
11312 
11313 // Combined Int Mask and Right Shift (using UBFM)
11314 // TODO
11315 
11316 // Long Shifts
11317 
11318 // Shift Left Register
11319 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11320   match(Set dst (LShiftL src1 src2));
11321 
11322   ins_cost(INSN_COST * 2);
11323   format %{ "lslv  $dst, $src1, $src2" %}
11324 
11325   ins_encode %{
11326     __ lslv(as_Register($dst$$reg),
11327             as_Register($src1$$reg),
11328             as_Register($src2$$reg));
11329   %}
11330 
11331   ins_pipe(ialu_reg_reg_vshift);
11332 %}
11333 
11334 // Shift Left Immediate
11335 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11336   match(Set dst (LShiftL src1 src2));
11337 
11338   ins_cost(INSN_COST);
11339   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11340 
11341   ins_encode %{
11342     __ lsl(as_Register($dst$$reg),
11343             as_Register($src1$$reg),
11344             $src2$$constant & 0x3f);
11345   %}
11346 
11347   ins_pipe(ialu_reg_shift);
11348 %}
11349 
11350 // Shift Right Logical Register
11351 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11352   match(Set dst (URShiftL src1 src2));
11353 
11354   ins_cost(INSN_COST * 2);
11355   format %{ "lsrv  $dst, $src1, $src2" %}
11356 
11357   ins_encode %{
11358     __ lsrv(as_Register($dst$$reg),
11359             as_Register($src1$$reg),
11360             as_Register($src2$$reg));
11361   %}
11362 
11363   ins_pipe(ialu_reg_reg_vshift);
11364 %}
11365 
11366 // Shift Right Logical Immediate
11367 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11368   match(Set dst (URShiftL src1 src2));
11369 
11370   ins_cost(INSN_COST);
11371   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11372 
11373   ins_encode %{
11374     __ lsr(as_Register($dst$$reg),
11375            as_Register($src1$$reg),
11376            $src2$$constant & 0x3f);
11377   %}
11378 
11379   ins_pipe(ialu_reg_shift);
11380 %}
11381 
11382 // A special-case pattern for card table stores.
11383 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11384   match(Set dst (URShiftL (CastP2X src1) src2));
11385 
11386   ins_cost(INSN_COST);
11387   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11388 
11389   ins_encode %{
11390     __ lsr(as_Register($dst$$reg),
11391            as_Register($src1$$reg),
11392            $src2$$constant & 0x3f);
11393   %}
11394 
11395   ins_pipe(ialu_reg_shift);
11396 %}
11397 
11398 // Shift Right Arithmetic Register
11399 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11400   match(Set dst (RShiftL src1 src2));
11401 
11402   ins_cost(INSN_COST * 2);
11403   format %{ "asrv  $dst, $src1, $src2" %}
11404 
11405   ins_encode %{
11406     __ asrv(as_Register($dst$$reg),
11407             as_Register($src1$$reg),
11408             as_Register($src2$$reg));
11409   %}
11410 
11411   ins_pipe(ialu_reg_reg_vshift);
11412 %}
11413 
11414 // Shift Right Arithmetic Immediate
11415 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11416   match(Set dst (RShiftL src1 src2));
11417 
11418   ins_cost(INSN_COST);
11419   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11420 
11421   ins_encode %{
11422     __ asr(as_Register($dst$$reg),
11423            as_Register($src1$$reg),
11424            $src2$$constant & 0x3f);
11425   %}
11426 
11427   ins_pipe(ialu_reg_shift);
11428 %}
11429 
11430 // BEGIN This section of the file is automatically generated. Do not edit --------------
11431 
11432 instruct regL_not_reg(iRegLNoSp dst,
11433                          iRegL src1, immL_M1 m1,
11434                          rFlagsReg cr) %{
11435   match(Set dst (XorL src1 m1));
11436   ins_cost(INSN_COST);
11437   format %{ "eon  $dst, $src1, zr" %}
11438 
11439   ins_encode %{
11440     __ eon(as_Register($dst$$reg),
11441               as_Register($src1$$reg),
11442               zr,
11443               Assembler::LSL, 0);
11444   %}
11445 
11446   ins_pipe(ialu_reg);
11447 %}
11448 instruct regI_not_reg(iRegINoSp dst,
11449                          iRegIorL2I src1, immI_M1 m1,
11450                          rFlagsReg cr) %{
11451   match(Set dst (XorI src1 m1));
11452   ins_cost(INSN_COST);
11453   format %{ "eonw  $dst, $src1, zr" %}
11454 
11455   ins_encode %{
11456     __ eonw(as_Register($dst$$reg),
11457               as_Register($src1$$reg),
11458               zr,
11459               Assembler::LSL, 0);
11460   %}
11461 
11462   ins_pipe(ialu_reg);
11463 %}
11464 
11465 instruct AndI_reg_not_reg(iRegINoSp dst,
11466                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11467                          rFlagsReg cr) %{
11468   match(Set dst (AndI src1 (XorI src2 m1)));
11469   ins_cost(INSN_COST);
11470   format %{ "bicw  $dst, $src1, $src2" %}
11471 
11472   ins_encode %{
11473     __ bicw(as_Register($dst$$reg),
11474               as_Register($src1$$reg),
11475               as_Register($src2$$reg),
11476               Assembler::LSL, 0);
11477   %}
11478 
11479   ins_pipe(ialu_reg_reg);
11480 %}
11481 
11482 instruct AndL_reg_not_reg(iRegLNoSp dst,
11483                          iRegL src1, iRegL src2, immL_M1 m1,
11484                          rFlagsReg cr) %{
11485   match(Set dst (AndL src1 (XorL src2 m1)));
11486   ins_cost(INSN_COST);
11487   format %{ "bic  $dst, $src1, $src2" %}
11488 
11489   ins_encode %{
11490     __ bic(as_Register($dst$$reg),
11491               as_Register($src1$$reg),
11492               as_Register($src2$$reg),
11493               Assembler::LSL, 0);
11494   %}
11495 
11496   ins_pipe(ialu_reg_reg);
11497 %}
11498 
11499 instruct OrI_reg_not_reg(iRegINoSp dst,
11500                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11501                          rFlagsReg cr) %{
11502   match(Set dst (OrI src1 (XorI src2 m1)));
11503   ins_cost(INSN_COST);
11504   format %{ "ornw  $dst, $src1, $src2" %}
11505 
11506   ins_encode %{
11507     __ ornw(as_Register($dst$$reg),
11508               as_Register($src1$$reg),
11509               as_Register($src2$$reg),
11510               Assembler::LSL, 0);
11511   %}
11512 
11513   ins_pipe(ialu_reg_reg);
11514 %}
11515 
11516 instruct OrL_reg_not_reg(iRegLNoSp dst,
11517                          iRegL src1, iRegL src2, immL_M1 m1,
11518                          rFlagsReg cr) %{
11519   match(Set dst (OrL src1 (XorL src2 m1)));
11520   ins_cost(INSN_COST);
11521   format %{ "orn  $dst, $src1, $src2" %}
11522 
11523   ins_encode %{
11524     __ orn(as_Register($dst$$reg),
11525               as_Register($src1$$reg),
11526               as_Register($src2$$reg),
11527               Assembler::LSL, 0);
11528   %}
11529 
11530   ins_pipe(ialu_reg_reg);
11531 %}
11532 
11533 instruct XorI_reg_not_reg(iRegINoSp dst,
11534                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11535                          rFlagsReg cr) %{
11536   match(Set dst (XorI m1 (XorI src2 src1)));
11537   ins_cost(INSN_COST);
11538   format %{ "eonw  $dst, $src1, $src2" %}
11539 
11540   ins_encode %{
11541     __ eonw(as_Register($dst$$reg),
11542               as_Register($src1$$reg),
11543               as_Register($src2$$reg),
11544               Assembler::LSL, 0);
11545   %}
11546 
11547   ins_pipe(ialu_reg_reg);
11548 %}
11549 
11550 instruct XorL_reg_not_reg(iRegLNoSp dst,
11551                          iRegL src1, iRegL src2, immL_M1 m1,
11552                          rFlagsReg cr) %{
11553   match(Set dst (XorL m1 (XorL src2 src1)));
11554   ins_cost(INSN_COST);
11555   format %{ "eon  $dst, $src1, $src2" %}
11556 
11557   ins_encode %{
11558     __ eon(as_Register($dst$$reg),
11559               as_Register($src1$$reg),
11560               as_Register($src2$$reg),
11561               Assembler::LSL, 0);
11562   %}
11563 
11564   ins_pipe(ialu_reg_reg);
11565 %}
11566 
11567 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11568                          iRegIorL2I src1, iRegIorL2I src2,
11569                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11570   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11571   ins_cost(1.9 * INSN_COST);
11572   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11573 
11574   ins_encode %{
11575     __ bicw(as_Register($dst$$reg),
11576               as_Register($src1$$reg),
11577               as_Register($src2$$reg),
11578               Assembler::LSR,
11579               $src3$$constant & 0x1f);
11580   %}
11581 
11582   ins_pipe(ialu_reg_reg_shift);
11583 %}
11584 
11585 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11586                          iRegL src1, iRegL src2,
11587                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11588   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11589   ins_cost(1.9 * INSN_COST);
11590   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11591 
11592   ins_encode %{
11593     __ bic(as_Register($dst$$reg),
11594               as_Register($src1$$reg),
11595               as_Register($src2$$reg),
11596               Assembler::LSR,
11597               $src3$$constant & 0x3f);
11598   %}
11599 
11600   ins_pipe(ialu_reg_reg_shift);
11601 %}
11602 
11603 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11604                          iRegIorL2I src1, iRegIorL2I src2,
11605                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11606   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11607   ins_cost(1.9 * INSN_COST);
11608   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11609 
11610   ins_encode %{
11611     __ bicw(as_Register($dst$$reg),
11612               as_Register($src1$$reg),
11613               as_Register($src2$$reg),
11614               Assembler::ASR,
11615               $src3$$constant & 0x1f);
11616   %}
11617 
11618   ins_pipe(ialu_reg_reg_shift);
11619 %}
11620 
11621 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11622                          iRegL src1, iRegL src2,
11623                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11624   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11625   ins_cost(1.9 * INSN_COST);
11626   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11627 
11628   ins_encode %{
11629     __ bic(as_Register($dst$$reg),
11630               as_Register($src1$$reg),
11631               as_Register($src2$$reg),
11632               Assembler::ASR,
11633               $src3$$constant & 0x3f);
11634   %}
11635 
11636   ins_pipe(ialu_reg_reg_shift);
11637 %}
11638 
11639 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11640                          iRegIorL2I src1, iRegIorL2I src2,
11641                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11642   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11643   ins_cost(1.9 * INSN_COST);
11644   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11645 
11646   ins_encode %{
11647     __ bicw(as_Register($dst$$reg),
11648               as_Register($src1$$reg),
11649               as_Register($src2$$reg),
11650               Assembler::LSL,
11651               $src3$$constant & 0x1f);
11652   %}
11653 
11654   ins_pipe(ialu_reg_reg_shift);
11655 %}
11656 
11657 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11658                          iRegL src1, iRegL src2,
11659                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11660   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11661   ins_cost(1.9 * INSN_COST);
11662   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11663 
11664   ins_encode %{
11665     __ bic(as_Register($dst$$reg),
11666               as_Register($src1$$reg),
11667               as_Register($src2$$reg),
11668               Assembler::LSL,
11669               $src3$$constant & 0x3f);
11670   %}
11671 
11672   ins_pipe(ialu_reg_reg_shift);
11673 %}
11674 
11675 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11676                          iRegIorL2I src1, iRegIorL2I src2,
11677                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11678   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11679   ins_cost(1.9 * INSN_COST);
11680   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11681 
11682   ins_encode %{
11683     __ eonw(as_Register($dst$$reg),
11684               as_Register($src1$$reg),
11685               as_Register($src2$$reg),
11686               Assembler::LSR,
11687               $src3$$constant & 0x1f);
11688   %}
11689 
11690   ins_pipe(ialu_reg_reg_shift);
11691 %}
11692 
11693 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11694                          iRegL src1, iRegL src2,
11695                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11696   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11697   ins_cost(1.9 * INSN_COST);
11698   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11699 
11700   ins_encode %{
11701     __ eon(as_Register($dst$$reg),
11702               as_Register($src1$$reg),
11703               as_Register($src2$$reg),
11704               Assembler::LSR,
11705               $src3$$constant & 0x3f);
11706   %}
11707 
11708   ins_pipe(ialu_reg_reg_shift);
11709 %}
11710 
11711 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11712                          iRegIorL2I src1, iRegIorL2I src2,
11713                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11714   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11715   ins_cost(1.9 * INSN_COST);
11716   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11717 
11718   ins_encode %{
11719     __ eonw(as_Register($dst$$reg),
11720               as_Register($src1$$reg),
11721               as_Register($src2$$reg),
11722               Assembler::ASR,
11723               $src3$$constant & 0x1f);
11724   %}
11725 
11726   ins_pipe(ialu_reg_reg_shift);
11727 %}
11728 
11729 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11730                          iRegL src1, iRegL src2,
11731                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11732   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11733   ins_cost(1.9 * INSN_COST);
11734   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11735 
11736   ins_encode %{
11737     __ eon(as_Register($dst$$reg),
11738               as_Register($src1$$reg),
11739               as_Register($src2$$reg),
11740               Assembler::ASR,
11741               $src3$$constant & 0x3f);
11742   %}
11743 
11744   ins_pipe(ialu_reg_reg_shift);
11745 %}
11746 
11747 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11748                          iRegIorL2I src1, iRegIorL2I src2,
11749                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11750   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11751   ins_cost(1.9 * INSN_COST);
11752   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11753 
11754   ins_encode %{
11755     __ eonw(as_Register($dst$$reg),
11756               as_Register($src1$$reg),
11757               as_Register($src2$$reg),
11758               Assembler::LSL,
11759               $src3$$constant & 0x1f);
11760   %}
11761 
11762   ins_pipe(ialu_reg_reg_shift);
11763 %}
11764 
11765 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11766                          iRegL src1, iRegL src2,
11767                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11768   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11769   ins_cost(1.9 * INSN_COST);
11770   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11771 
11772   ins_encode %{
11773     __ eon(as_Register($dst$$reg),
11774               as_Register($src1$$reg),
11775               as_Register($src2$$reg),
11776               Assembler::LSL,
11777               $src3$$constant & 0x3f);
11778   %}
11779 
11780   ins_pipe(ialu_reg_reg_shift);
11781 %}
11782 
11783 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11784                          iRegIorL2I src1, iRegIorL2I src2,
11785                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11786   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11787   ins_cost(1.9 * INSN_COST);
11788   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11789 
11790   ins_encode %{
11791     __ ornw(as_Register($dst$$reg),
11792               as_Register($src1$$reg),
11793               as_Register($src2$$reg),
11794               Assembler::LSR,
11795               $src3$$constant & 0x1f);
11796   %}
11797 
11798   ins_pipe(ialu_reg_reg_shift);
11799 %}
11800 
11801 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11802                          iRegL src1, iRegL src2,
11803                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11804   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11805   ins_cost(1.9 * INSN_COST);
11806   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11807 
11808   ins_encode %{
11809     __ orn(as_Register($dst$$reg),
11810               as_Register($src1$$reg),
11811               as_Register($src2$$reg),
11812               Assembler::LSR,
11813               $src3$$constant & 0x3f);
11814   %}
11815 
11816   ins_pipe(ialu_reg_reg_shift);
11817 %}
11818 
11819 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11820                          iRegIorL2I src1, iRegIorL2I src2,
11821                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11822   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11823   ins_cost(1.9 * INSN_COST);
11824   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11825 
11826   ins_encode %{
11827     __ ornw(as_Register($dst$$reg),
11828               as_Register($src1$$reg),
11829               as_Register($src2$$reg),
11830               Assembler::ASR,
11831               $src3$$constant & 0x1f);
11832   %}
11833 
11834   ins_pipe(ialu_reg_reg_shift);
11835 %}
11836 
11837 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11838                          iRegL src1, iRegL src2,
11839                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11840   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11841   ins_cost(1.9 * INSN_COST);
11842   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11843 
11844   ins_encode %{
11845     __ orn(as_Register($dst$$reg),
11846               as_Register($src1$$reg),
11847               as_Register($src2$$reg),
11848               Assembler::ASR,
11849               $src3$$constant & 0x3f);
11850   %}
11851 
11852   ins_pipe(ialu_reg_reg_shift);
11853 %}
11854 
11855 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11856                          iRegIorL2I src1, iRegIorL2I src2,
11857                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11858   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11859   ins_cost(1.9 * INSN_COST);
11860   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11861 
11862   ins_encode %{
11863     __ ornw(as_Register($dst$$reg),
11864               as_Register($src1$$reg),
11865               as_Register($src2$$reg),
11866               Assembler::LSL,
11867               $src3$$constant & 0x1f);
11868   %}
11869 
11870   ins_pipe(ialu_reg_reg_shift);
11871 %}
11872 
11873 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11874                          iRegL src1, iRegL src2,
11875                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11876   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11877   ins_cost(1.9 * INSN_COST);
11878   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11879 
11880   ins_encode %{
11881     __ orn(as_Register($dst$$reg),
11882               as_Register($src1$$reg),
11883               as_Register($src2$$reg),
11884               Assembler::LSL,
11885               $src3$$constant & 0x3f);
11886   %}
11887 
11888   ins_pipe(ialu_reg_reg_shift);
11889 %}
11890 
11891 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11892                          iRegIorL2I src1, iRegIorL2I src2,
11893                          immI src3, rFlagsReg cr) %{
11894   match(Set dst (AndI src1 (URShiftI src2 src3)));
11895 
11896   ins_cost(1.9 * INSN_COST);
11897   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11898 
11899   ins_encode %{
11900     __ andw(as_Register($dst$$reg),
11901               as_Register($src1$$reg),
11902               as_Register($src2$$reg),
11903               Assembler::LSR,
11904               $src3$$constant & 0x1f);
11905   %}
11906 
11907   ins_pipe(ialu_reg_reg_shift);
11908 %}
11909 
11910 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11911                          iRegL src1, iRegL src2,
11912                          immI src3, rFlagsReg cr) %{
11913   match(Set dst (AndL src1 (URShiftL src2 src3)));
11914 
11915   ins_cost(1.9 * INSN_COST);
11916   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11917 
11918   ins_encode %{
11919     __ andr(as_Register($dst$$reg),
11920               as_Register($src1$$reg),
11921               as_Register($src2$$reg),
11922               Assembler::LSR,
11923               $src3$$constant & 0x3f);
11924   %}
11925 
11926   ins_pipe(ialu_reg_reg_shift);
11927 %}
11928 
11929 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11930                          iRegIorL2I src1, iRegIorL2I src2,
11931                          immI src3, rFlagsReg cr) %{
11932   match(Set dst (AndI src1 (RShiftI src2 src3)));
11933 
11934   ins_cost(1.9 * INSN_COST);
11935   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11936 
11937   ins_encode %{
11938     __ andw(as_Register($dst$$reg),
11939               as_Register($src1$$reg),
11940               as_Register($src2$$reg),
11941               Assembler::ASR,
11942               $src3$$constant & 0x1f);
11943   %}
11944 
11945   ins_pipe(ialu_reg_reg_shift);
11946 %}
11947 
11948 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11949                          iRegL src1, iRegL src2,
11950                          immI src3, rFlagsReg cr) %{
11951   match(Set dst (AndL src1 (RShiftL src2 src3)));
11952 
11953   ins_cost(1.9 * INSN_COST);
11954   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11955 
11956   ins_encode %{
11957     __ andr(as_Register($dst$$reg),
11958               as_Register($src1$$reg),
11959               as_Register($src2$$reg),
11960               Assembler::ASR,
11961               $src3$$constant & 0x3f);
11962   %}
11963 
11964   ins_pipe(ialu_reg_reg_shift);
11965 %}
11966 
11967 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11968                          iRegIorL2I src1, iRegIorL2I src2,
11969                          immI src3, rFlagsReg cr) %{
11970   match(Set dst (AndI src1 (LShiftI src2 src3)));
11971 
11972   ins_cost(1.9 * INSN_COST);
11973   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11974 
11975   ins_encode %{
11976     __ andw(as_Register($dst$$reg),
11977               as_Register($src1$$reg),
11978               as_Register($src2$$reg),
11979               Assembler::LSL,
11980               $src3$$constant & 0x1f);
11981   %}
11982 
11983   ins_pipe(ialu_reg_reg_shift);
11984 %}
11985 
11986 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11987                          iRegL src1, iRegL src2,
11988                          immI src3, rFlagsReg cr) %{
11989   match(Set dst (AndL src1 (LShiftL src2 src3)));
11990 
11991   ins_cost(1.9 * INSN_COST);
11992   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11993 
11994   ins_encode %{
11995     __ andr(as_Register($dst$$reg),
11996               as_Register($src1$$reg),
11997               as_Register($src2$$reg),
11998               Assembler::LSL,
11999               $src3$$constant & 0x3f);
12000   %}
12001 
12002   ins_pipe(ialu_reg_reg_shift);
12003 %}
12004 
12005 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12006                          iRegIorL2I src1, iRegIorL2I src2,
12007                          immI src3, rFlagsReg cr) %{
12008   match(Set dst (XorI src1 (URShiftI src2 src3)));
12009 
12010   ins_cost(1.9 * INSN_COST);
12011   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12012 
12013   ins_encode %{
12014     __ eorw(as_Register($dst$$reg),
12015               as_Register($src1$$reg),
12016               as_Register($src2$$reg),
12017               Assembler::LSR,
12018               $src3$$constant & 0x1f);
12019   %}
12020 
12021   ins_pipe(ialu_reg_reg_shift);
12022 %}
12023 
12024 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12025                          iRegL src1, iRegL src2,
12026                          immI src3, rFlagsReg cr) %{
12027   match(Set dst (XorL src1 (URShiftL src2 src3)));
12028 
12029   ins_cost(1.9 * INSN_COST);
12030   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12031 
12032   ins_encode %{
12033     __ eor(as_Register($dst$$reg),
12034               as_Register($src1$$reg),
12035               as_Register($src2$$reg),
12036               Assembler::LSR,
12037               $src3$$constant & 0x3f);
12038   %}
12039 
12040   ins_pipe(ialu_reg_reg_shift);
12041 %}
12042 
12043 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12044                          iRegIorL2I src1, iRegIorL2I src2,
12045                          immI src3, rFlagsReg cr) %{
12046   match(Set dst (XorI src1 (RShiftI src2 src3)));
12047 
12048   ins_cost(1.9 * INSN_COST);
12049   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12050 
12051   ins_encode %{
12052     __ eorw(as_Register($dst$$reg),
12053               as_Register($src1$$reg),
12054               as_Register($src2$$reg),
12055               Assembler::ASR,
12056               $src3$$constant & 0x1f);
12057   %}
12058 
12059   ins_pipe(ialu_reg_reg_shift);
12060 %}
12061 
12062 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12063                          iRegL src1, iRegL src2,
12064                          immI src3, rFlagsReg cr) %{
12065   match(Set dst (XorL src1 (RShiftL src2 src3)));
12066 
12067   ins_cost(1.9 * INSN_COST);
12068   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12069 
12070   ins_encode %{
12071     __ eor(as_Register($dst$$reg),
12072               as_Register($src1$$reg),
12073               as_Register($src2$$reg),
12074               Assembler::ASR,
12075               $src3$$constant & 0x3f);
12076   %}
12077 
12078   ins_pipe(ialu_reg_reg_shift);
12079 %}
12080 
12081 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12082                          iRegIorL2I src1, iRegIorL2I src2,
12083                          immI src3, rFlagsReg cr) %{
12084   match(Set dst (XorI src1 (LShiftI src2 src3)));
12085 
12086   ins_cost(1.9 * INSN_COST);
12087   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12088 
12089   ins_encode %{
12090     __ eorw(as_Register($dst$$reg),
12091               as_Register($src1$$reg),
12092               as_Register($src2$$reg),
12093               Assembler::LSL,
12094               $src3$$constant & 0x1f);
12095   %}
12096 
12097   ins_pipe(ialu_reg_reg_shift);
12098 %}
12099 
12100 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12101                          iRegL src1, iRegL src2,
12102                          immI src3, rFlagsReg cr) %{
12103   match(Set dst (XorL src1 (LShiftL src2 src3)));
12104 
12105   ins_cost(1.9 * INSN_COST);
12106   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12107 
12108   ins_encode %{
12109     __ eor(as_Register($dst$$reg),
12110               as_Register($src1$$reg),
12111               as_Register($src2$$reg),
12112               Assembler::LSL,
12113               $src3$$constant & 0x3f);
12114   %}
12115 
12116   ins_pipe(ialu_reg_reg_shift);
12117 %}
12118 
12119 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12120                          iRegIorL2I src1, iRegIorL2I src2,
12121                          immI src3, rFlagsReg cr) %{
12122   match(Set dst (OrI src1 (URShiftI src2 src3)));
12123 
12124   ins_cost(1.9 * INSN_COST);
12125   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12126 
12127   ins_encode %{
12128     __ orrw(as_Register($dst$$reg),
12129               as_Register($src1$$reg),
12130               as_Register($src2$$reg),
12131               Assembler::LSR,
12132               $src3$$constant & 0x1f);
12133   %}
12134 
12135   ins_pipe(ialu_reg_reg_shift);
12136 %}
12137 
12138 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12139                          iRegL src1, iRegL src2,
12140                          immI src3, rFlagsReg cr) %{
12141   match(Set dst (OrL src1 (URShiftL src2 src3)));
12142 
12143   ins_cost(1.9 * INSN_COST);
12144   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12145 
12146   ins_encode %{
12147     __ orr(as_Register($dst$$reg),
12148               as_Register($src1$$reg),
12149               as_Register($src2$$reg),
12150               Assembler::LSR,
12151               $src3$$constant & 0x3f);
12152   %}
12153 
12154   ins_pipe(ialu_reg_reg_shift);
12155 %}
12156 
12157 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12158                          iRegIorL2I src1, iRegIorL2I src2,
12159                          immI src3, rFlagsReg cr) %{
12160   match(Set dst (OrI src1 (RShiftI src2 src3)));
12161 
12162   ins_cost(1.9 * INSN_COST);
12163   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12164 
12165   ins_encode %{
12166     __ orrw(as_Register($dst$$reg),
12167               as_Register($src1$$reg),
12168               as_Register($src2$$reg),
12169               Assembler::ASR,
12170               $src3$$constant & 0x1f);
12171   %}
12172 
12173   ins_pipe(ialu_reg_reg_shift);
12174 %}
12175 
12176 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12177                          iRegL src1, iRegL src2,
12178                          immI src3, rFlagsReg cr) %{
12179   match(Set dst (OrL src1 (RShiftL src2 src3)));
12180 
12181   ins_cost(1.9 * INSN_COST);
12182   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12183 
12184   ins_encode %{
12185     __ orr(as_Register($dst$$reg),
12186               as_Register($src1$$reg),
12187               as_Register($src2$$reg),
12188               Assembler::ASR,
12189               $src3$$constant & 0x3f);
12190   %}
12191 
12192   ins_pipe(ialu_reg_reg_shift);
12193 %}
12194 
12195 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12196                          iRegIorL2I src1, iRegIorL2I src2,
12197                          immI src3, rFlagsReg cr) %{
12198   match(Set dst (OrI src1 (LShiftI src2 src3)));
12199 
12200   ins_cost(1.9 * INSN_COST);
12201   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12202 
12203   ins_encode %{
12204     __ orrw(as_Register($dst$$reg),
12205               as_Register($src1$$reg),
12206               as_Register($src2$$reg),
12207               Assembler::LSL,
12208               $src3$$constant & 0x1f);
12209   %}
12210 
12211   ins_pipe(ialu_reg_reg_shift);
12212 %}
12213 
12214 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12215                          iRegL src1, iRegL src2,
12216                          immI src3, rFlagsReg cr) %{
12217   match(Set dst (OrL src1 (LShiftL src2 src3)));
12218 
12219   ins_cost(1.9 * INSN_COST);
12220   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12221 
12222   ins_encode %{
12223     __ orr(as_Register($dst$$reg),
12224               as_Register($src1$$reg),
12225               as_Register($src2$$reg),
12226               Assembler::LSL,
12227               $src3$$constant & 0x3f);
12228   %}
12229 
12230   ins_pipe(ialu_reg_reg_shift);
12231 %}
12232 
12233 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12234                          iRegIorL2I src1, iRegIorL2I src2,
12235                          immI src3, rFlagsReg cr) %{
12236   match(Set dst (AddI src1 (URShiftI src2 src3)));
12237 
12238   ins_cost(1.9 * INSN_COST);
12239   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12240 
12241   ins_encode %{
12242     __ addw(as_Register($dst$$reg),
12243               as_Register($src1$$reg),
12244               as_Register($src2$$reg),
12245               Assembler::LSR,
12246               $src3$$constant & 0x1f);
12247   %}
12248 
12249   ins_pipe(ialu_reg_reg_shift);
12250 %}
12251 
12252 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12253                          iRegL src1, iRegL src2,
12254                          immI src3, rFlagsReg cr) %{
12255   match(Set dst (AddL src1 (URShiftL src2 src3)));
12256 
12257   ins_cost(1.9 * INSN_COST);
12258   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12259 
12260   ins_encode %{
12261     __ add(as_Register($dst$$reg),
12262               as_Register($src1$$reg),
12263               as_Register($src2$$reg),
12264               Assembler::LSR,
12265               $src3$$constant & 0x3f);
12266   %}
12267 
12268   ins_pipe(ialu_reg_reg_shift);
12269 %}
12270 
12271 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12272                          iRegIorL2I src1, iRegIorL2I src2,
12273                          immI src3, rFlagsReg cr) %{
12274   match(Set dst (AddI src1 (RShiftI src2 src3)));
12275 
12276   ins_cost(1.9 * INSN_COST);
12277   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12278 
12279   ins_encode %{
12280     __ addw(as_Register($dst$$reg),
12281               as_Register($src1$$reg),
12282               as_Register($src2$$reg),
12283               Assembler::ASR,
12284               $src3$$constant & 0x1f);
12285   %}
12286 
12287   ins_pipe(ialu_reg_reg_shift);
12288 %}
12289 
12290 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12291                          iRegL src1, iRegL src2,
12292                          immI src3, rFlagsReg cr) %{
12293   match(Set dst (AddL src1 (RShiftL src2 src3)));
12294 
12295   ins_cost(1.9 * INSN_COST);
12296   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12297 
12298   ins_encode %{
12299     __ add(as_Register($dst$$reg),
12300               as_Register($src1$$reg),
12301               as_Register($src2$$reg),
12302               Assembler::ASR,
12303               $src3$$constant & 0x3f);
12304   %}
12305 
12306   ins_pipe(ialu_reg_reg_shift);
12307 %}
12308 
12309 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12310                          iRegIorL2I src1, iRegIorL2I src2,
12311                          immI src3, rFlagsReg cr) %{
12312   match(Set dst (AddI src1 (LShiftI src2 src3)));
12313 
12314   ins_cost(1.9 * INSN_COST);
12315   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12316 
12317   ins_encode %{
12318     __ addw(as_Register($dst$$reg),
12319               as_Register($src1$$reg),
12320               as_Register($src2$$reg),
12321               Assembler::LSL,
12322               $src3$$constant & 0x1f);
12323   %}
12324 
12325   ins_pipe(ialu_reg_reg_shift);
12326 %}
12327 
12328 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12329                          iRegL src1, iRegL src2,
12330                          immI src3, rFlagsReg cr) %{
12331   match(Set dst (AddL src1 (LShiftL src2 src3)));
12332 
12333   ins_cost(1.9 * INSN_COST);
12334   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12335 
12336   ins_encode %{
12337     __ add(as_Register($dst$$reg),
12338               as_Register($src1$$reg),
12339               as_Register($src2$$reg),
12340               Assembler::LSL,
12341               $src3$$constant & 0x3f);
12342   %}
12343 
12344   ins_pipe(ialu_reg_reg_shift);
12345 %}
12346 
12347 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12348                          iRegIorL2I src1, iRegIorL2I src2,
12349                          immI src3, rFlagsReg cr) %{
12350   match(Set dst (SubI src1 (URShiftI src2 src3)));
12351 
12352   ins_cost(1.9 * INSN_COST);
12353   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12354 
12355   ins_encode %{
12356     __ subw(as_Register($dst$$reg),
12357               as_Register($src1$$reg),
12358               as_Register($src2$$reg),
12359               Assembler::LSR,
12360               $src3$$constant & 0x1f);
12361   %}
12362 
12363   ins_pipe(ialu_reg_reg_shift);
12364 %}
12365 
12366 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12367                          iRegL src1, iRegL src2,
12368                          immI src3, rFlagsReg cr) %{
12369   match(Set dst (SubL src1 (URShiftL src2 src3)));
12370 
12371   ins_cost(1.9 * INSN_COST);
12372   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12373 
12374   ins_encode %{
12375     __ sub(as_Register($dst$$reg),
12376               as_Register($src1$$reg),
12377               as_Register($src2$$reg),
12378               Assembler::LSR,
12379               $src3$$constant & 0x3f);
12380   %}
12381 
12382   ins_pipe(ialu_reg_reg_shift);
12383 %}
12384 
12385 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12386                          iRegIorL2I src1, iRegIorL2I src2,
12387                          immI src3, rFlagsReg cr) %{
12388   match(Set dst (SubI src1 (RShiftI src2 src3)));
12389 
12390   ins_cost(1.9 * INSN_COST);
12391   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12392 
12393   ins_encode %{
12394     __ subw(as_Register($dst$$reg),
12395               as_Register($src1$$reg),
12396               as_Register($src2$$reg),
12397               Assembler::ASR,
12398               $src3$$constant & 0x1f);
12399   %}
12400 
12401   ins_pipe(ialu_reg_reg_shift);
12402 %}
12403 
12404 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12405                          iRegL src1, iRegL src2,
12406                          immI src3, rFlagsReg cr) %{
12407   match(Set dst (SubL src1 (RShiftL src2 src3)));
12408 
12409   ins_cost(1.9 * INSN_COST);
12410   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12411 
12412   ins_encode %{
12413     __ sub(as_Register($dst$$reg),
12414               as_Register($src1$$reg),
12415               as_Register($src2$$reg),
12416               Assembler::ASR,
12417               $src3$$constant & 0x3f);
12418   %}
12419 
12420   ins_pipe(ialu_reg_reg_shift);
12421 %}
12422 
12423 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12424                          iRegIorL2I src1, iRegIorL2I src2,
12425                          immI src3, rFlagsReg cr) %{
12426   match(Set dst (SubI src1 (LShiftI src2 src3)));
12427 
12428   ins_cost(1.9 * INSN_COST);
12429   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12430 
12431   ins_encode %{
12432     __ subw(as_Register($dst$$reg),
12433               as_Register($src1$$reg),
12434               as_Register($src2$$reg),
12435               Assembler::LSL,
12436               $src3$$constant & 0x1f);
12437   %}
12438 
12439   ins_pipe(ialu_reg_reg_shift);
12440 %}
12441 
12442 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12443                          iRegL src1, iRegL src2,
12444                          immI src3, rFlagsReg cr) %{
12445   match(Set dst (SubL src1 (LShiftL src2 src3)));
12446 
12447   ins_cost(1.9 * INSN_COST);
12448   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12449 
12450   ins_encode %{
12451     __ sub(as_Register($dst$$reg),
12452               as_Register($src1$$reg),
12453               as_Register($src2$$reg),
12454               Assembler::LSL,
12455               $src3$$constant & 0x3f);
12456   %}
12457 
12458   ins_pipe(ialu_reg_reg_shift);
12459 %}
12460 
12461 
12462 
12463 // Shift Left followed by Shift Right.
12464 // This idiom is used by the compiler for the i2b bytecode etc.
12465 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12466 %{
12467   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12468   // Make sure we are not going to exceed what sbfm can do.
12469   predicate((unsigned int)n->in(2)->get_int() <= 63
12470             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12471 
12472   ins_cost(INSN_COST * 2);
12473   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12474   ins_encode %{
12475     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12476     int s = 63 - lshift;
12477     int r = (rshift - lshift) & 63;
12478     __ sbfm(as_Register($dst$$reg),
12479             as_Register($src$$reg),
12480             r, s);
12481   %}
12482 
12483   ins_pipe(ialu_reg_shift);
12484 %}
12485 
12486 // Shift Left followed by Shift Right.
12487 // This idiom is used by the compiler for the i2b bytecode etc.
12488 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12489 %{
12490   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12491   // Make sure we are not going to exceed what sbfmw can do.
12492   predicate((unsigned int)n->in(2)->get_int() <= 31
12493             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12494 
12495   ins_cost(INSN_COST * 2);
12496   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12497   ins_encode %{
12498     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12499     int s = 31 - lshift;
12500     int r = (rshift - lshift) & 31;
12501     __ sbfmw(as_Register($dst$$reg),
12502             as_Register($src$$reg),
12503             r, s);
12504   %}
12505 
12506   ins_pipe(ialu_reg_shift);
12507 %}
12508 
12509 // Shift Left followed by Shift Right.
12510 // This idiom is used by the compiler for the i2b bytecode etc.
12511 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12512 %{
12513   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12514   // Make sure we are not going to exceed what ubfm can do.
12515   predicate((unsigned int)n->in(2)->get_int() <= 63
12516             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12517 
12518   ins_cost(INSN_COST * 2);
12519   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12520   ins_encode %{
12521     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12522     int s = 63 - lshift;
12523     int r = (rshift - lshift) & 63;
12524     __ ubfm(as_Register($dst$$reg),
12525             as_Register($src$$reg),
12526             r, s);
12527   %}
12528 
12529   ins_pipe(ialu_reg_shift);
12530 %}
12531 
12532 // Shift Left followed by Shift Right.
12533 // This idiom is used by the compiler for the i2b bytecode etc.
12534 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12535 %{
12536   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12537   // Make sure we are not going to exceed what ubfmw can do.
12538   predicate((unsigned int)n->in(2)->get_int() <= 31
12539             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12540 
12541   ins_cost(INSN_COST * 2);
12542   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12543   ins_encode %{
12544     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12545     int s = 31 - lshift;
12546     int r = (rshift - lshift) & 31;
12547     __ ubfmw(as_Register($dst$$reg),
12548             as_Register($src$$reg),
12549             r, s);
12550   %}
12551 
12552   ins_pipe(ialu_reg_shift);
12553 %}
12554 // Bitfield extract with shift & mask
12555 
12556 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12557 %{
12558   match(Set dst (AndI (URShiftI src rshift) mask));
12559 
12560   ins_cost(INSN_COST);
12561   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12562   ins_encode %{
12563     int rshift = $rshift$$constant;
12564     long mask = $mask$$constant;
12565     int width = exact_log2(mask+1);
12566     __ ubfxw(as_Register($dst$$reg),
12567             as_Register($src$$reg), rshift, width);
12568   %}
12569   ins_pipe(ialu_reg_shift);
12570 %}
12571 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12572 %{
12573   match(Set dst (AndL (URShiftL src rshift) mask));
12574 
12575   ins_cost(INSN_COST);
12576   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12577   ins_encode %{
12578     int rshift = $rshift$$constant;
12579     long mask = $mask$$constant;
12580     int width = exact_log2(mask+1);
12581     __ ubfx(as_Register($dst$$reg),
12582             as_Register($src$$reg), rshift, width);
12583   %}
12584   ins_pipe(ialu_reg_shift);
12585 %}
12586 
12587 // We can use ubfx when extending an And with a mask when we know mask
12588 // is positive.  We know that because immI_bitmask guarantees it.
12589 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12590 %{
12591   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12592 
12593   ins_cost(INSN_COST * 2);
12594   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12595   ins_encode %{
12596     int rshift = $rshift$$constant;
12597     long mask = $mask$$constant;
12598     int width = exact_log2(mask+1);
12599     __ ubfx(as_Register($dst$$reg),
12600             as_Register($src$$reg), rshift, width);
12601   %}
12602   ins_pipe(ialu_reg_shift);
12603 %}
12604 
12605 // We can use ubfiz when masking by a positive number and then left shifting the result.
12606 // We know that the mask is positive because immI_bitmask guarantees it.
12607 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12608 %{
12609   match(Set dst (LShiftI (AndI src mask) lshift));
12610   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12611     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12612 
12613   ins_cost(INSN_COST);
12614   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12615   ins_encode %{
12616     int lshift = $lshift$$constant;
12617     long mask = $mask$$constant;
12618     int width = exact_log2(mask+1);
12619     __ ubfizw(as_Register($dst$$reg),
12620           as_Register($src$$reg), lshift, width);
12621   %}
12622   ins_pipe(ialu_reg_shift);
12623 %}
12624 // We can use ubfiz when masking by a positive number and then left shifting the result.
12625 // We know that the mask is positive because immL_bitmask guarantees it.
12626 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12627 %{
12628   match(Set dst (LShiftL (AndL src mask) lshift));
12629   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12630     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12631 
12632   ins_cost(INSN_COST);
12633   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12634   ins_encode %{
12635     int lshift = $lshift$$constant;
12636     long mask = $mask$$constant;
12637     int width = exact_log2(mask+1);
12638     __ ubfiz(as_Register($dst$$reg),
12639           as_Register($src$$reg), lshift, width);
12640   %}
12641   ins_pipe(ialu_reg_shift);
12642 %}
12643 
12644 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12645 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12646 %{
12647   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12648   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12649     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12650 
12651   ins_cost(INSN_COST);
12652   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12653   ins_encode %{
12654     int lshift = $lshift$$constant;
12655     long mask = $mask$$constant;
12656     int width = exact_log2(mask+1);
12657     __ ubfiz(as_Register($dst$$reg),
12658              as_Register($src$$reg), lshift, width);
12659   %}
12660   ins_pipe(ialu_reg_shift);
12661 %}
12662 
12663 // Rotations
12664 
12665 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12666 %{
12667   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12668   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12669 
12670   ins_cost(INSN_COST);
12671   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12672 
12673   ins_encode %{
12674     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12675             $rshift$$constant & 63);
12676   %}
12677   ins_pipe(ialu_reg_reg_extr);
12678 %}
12679 
12680 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12681 %{
12682   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12683   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12684 
12685   ins_cost(INSN_COST);
12686   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12687 
12688   ins_encode %{
12689     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12690             $rshift$$constant & 31);
12691   %}
12692   ins_pipe(ialu_reg_reg_extr);
12693 %}
12694 
12695 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12696 %{
12697   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12698   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12699 
12700   ins_cost(INSN_COST);
12701   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12702 
12703   ins_encode %{
12704     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12705             $rshift$$constant & 63);
12706   %}
12707   ins_pipe(ialu_reg_reg_extr);
12708 %}
12709 
12710 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12711 %{
12712   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12713   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12714 
12715   ins_cost(INSN_COST);
12716   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12717 
12718   ins_encode %{
12719     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12720             $rshift$$constant & 31);
12721   %}
12722   ins_pipe(ialu_reg_reg_extr);
12723 %}
12724 
12725 
12726 // rol expander
12727 
12728 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12729 %{
12730   effect(DEF dst, USE src, USE shift);
12731 
12732   format %{ "rol    $dst, $src, $shift" %}
12733   ins_cost(INSN_COST * 3);
12734   ins_encode %{
12735     __ subw(rscratch1, zr, as_Register($shift$$reg));
12736     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12737             rscratch1);
12738     %}
12739   ins_pipe(ialu_reg_reg_vshift);
12740 %}
12741 
12742 // rol expander
12743 
12744 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12745 %{
12746   effect(DEF dst, USE src, USE shift);
12747 
12748   format %{ "rol    $dst, $src, $shift" %}
12749   ins_cost(INSN_COST * 3);
12750   ins_encode %{
12751     __ subw(rscratch1, zr, as_Register($shift$$reg));
12752     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12753             rscratch1);
12754     %}
12755   ins_pipe(ialu_reg_reg_vshift);
12756 %}
12757 
12758 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12759 %{
12760   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12761 
12762   expand %{
12763     rolL_rReg(dst, src, shift, cr);
12764   %}
12765 %}
12766 
12767 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12768 %{
12769   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12770 
12771   expand %{
12772     rolL_rReg(dst, src, shift, cr);
12773   %}
12774 %}
12775 
12776 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12777 %{
12778   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12779 
12780   expand %{
12781     rolI_rReg(dst, src, shift, cr);
12782   %}
12783 %}
12784 
12785 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12786 %{
12787   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12788 
12789   expand %{
12790     rolI_rReg(dst, src, shift, cr);
12791   %}
12792 %}
12793 
12794 // ror expander
12795 
12796 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12797 %{
12798   effect(DEF dst, USE src, USE shift);
12799 
12800   format %{ "ror    $dst, $src, $shift" %}
12801   ins_cost(INSN_COST);
12802   ins_encode %{
12803     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12804             as_Register($shift$$reg));
12805     %}
12806   ins_pipe(ialu_reg_reg_vshift);
12807 %}
12808 
12809 // ror expander
12810 
12811 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12812 %{
12813   effect(DEF dst, USE src, USE shift);
12814 
12815   format %{ "ror    $dst, $src, $shift" %}
12816   ins_cost(INSN_COST);
12817   ins_encode %{
12818     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12819             as_Register($shift$$reg));
12820     %}
12821   ins_pipe(ialu_reg_reg_vshift);
12822 %}
12823 
12824 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12825 %{
12826   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12827 
12828   expand %{
12829     rorL_rReg(dst, src, shift, cr);
12830   %}
12831 %}
12832 
12833 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12834 %{
12835   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12836 
12837   expand %{
12838     rorL_rReg(dst, src, shift, cr);
12839   %}
12840 %}
12841 
12842 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12843 %{
12844   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12845 
12846   expand %{
12847     rorI_rReg(dst, src, shift, cr);
12848   %}
12849 %}
12850 
12851 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12852 %{
12853   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12854 
12855   expand %{
12856     rorI_rReg(dst, src, shift, cr);
12857   %}
12858 %}
12859 
12860 // Add/subtract (extended)
12861 
12862 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12863 %{
12864   match(Set dst (AddL src1 (ConvI2L src2)));
12865   ins_cost(INSN_COST);
12866   format %{ "add  $dst, $src1, $src2, sxtw" %}
12867 
12868    ins_encode %{
12869      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12870             as_Register($src2$$reg), ext::sxtw);
12871    %}
12872   ins_pipe(ialu_reg_reg);
12873 %};
12874 
12875 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12876 %{
12877   match(Set dst (SubL src1 (ConvI2L src2)));
12878   ins_cost(INSN_COST);
12879   format %{ "sub  $dst, $src1, $src2, sxtw" %}
12880 
12881    ins_encode %{
12882      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12883             as_Register($src2$$reg), ext::sxtw);
12884    %}
12885   ins_pipe(ialu_reg_reg);
12886 %};
12887 
12888 
12889 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12890 %{
12891   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12892   ins_cost(INSN_COST);
12893   format %{ "add  $dst, $src1, $src2, sxth" %}
12894 
12895    ins_encode %{
12896      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12897             as_Register($src2$$reg), ext::sxth);
12898    %}
12899   ins_pipe(ialu_reg_reg);
12900 %}
12901 
12902 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12903 %{
12904   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12905   ins_cost(INSN_COST);
12906   format %{ "add  $dst, $src1, $src2, sxtb" %}
12907 
12908    ins_encode %{
12909      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12910             as_Register($src2$$reg), ext::sxtb);
12911    %}
12912   ins_pipe(ialu_reg_reg);
12913 %}
12914 
12915 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12916 %{
12917   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12918   ins_cost(INSN_COST);
12919   format %{ "add  $dst, $src1, $src2, uxtb" %}
12920 
12921    ins_encode %{
12922      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12923             as_Register($src2$$reg), ext::uxtb);
12924    %}
12925   ins_pipe(ialu_reg_reg);
12926 %}
12927 
12928 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12929 %{
12930   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12931   ins_cost(INSN_COST);
12932   format %{ "add  $dst, $src1, $src2, sxth" %}
12933 
12934    ins_encode %{
12935      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12936             as_Register($src2$$reg), ext::sxth);
12937    %}
12938   ins_pipe(ialu_reg_reg);
12939 %}
12940 
12941 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12942 %{
12943   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12944   ins_cost(INSN_COST);
12945   format %{ "add  $dst, $src1, $src2, sxtw" %}
12946 
12947    ins_encode %{
12948      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12949             as_Register($src2$$reg), ext::sxtw);
12950    %}
12951   ins_pipe(ialu_reg_reg);
12952 %}
12953 
12954 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12955 %{
12956   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12957   ins_cost(INSN_COST);
12958   format %{ "add  $dst, $src1, $src2, sxtb" %}
12959 
12960    ins_encode %{
12961      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12962             as_Register($src2$$reg), ext::sxtb);
12963    %}
12964   ins_pipe(ialu_reg_reg);
12965 %}
12966 
12967 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12968 %{
12969   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12970   ins_cost(INSN_COST);
12971   format %{ "add  $dst, $src1, $src2, uxtb" %}
12972 
12973    ins_encode %{
12974      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12975             as_Register($src2$$reg), ext::uxtb);
12976    %}
12977   ins_pipe(ialu_reg_reg);
12978 %}
12979 
12980 
12981 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12982 %{
12983   match(Set dst (AddI src1 (AndI src2 mask)));
12984   ins_cost(INSN_COST);
12985   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12986 
12987    ins_encode %{
12988      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12989             as_Register($src2$$reg), ext::uxtb);
12990    %}
12991   ins_pipe(ialu_reg_reg);
12992 %}
12993 
12994 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12995 %{
12996   match(Set dst (AddI src1 (AndI src2 mask)));
12997   ins_cost(INSN_COST);
12998   format %{ "addw  $dst, $src1, $src2, uxth" %}
12999 
13000    ins_encode %{
13001      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13002             as_Register($src2$$reg), ext::uxth);
13003    %}
13004   ins_pipe(ialu_reg_reg);
13005 %}
13006 
13007 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13008 %{
13009   match(Set dst (AddL src1 (AndL src2 mask)));
13010   ins_cost(INSN_COST);
13011   format %{ "add  $dst, $src1, $src2, uxtb" %}
13012 
13013    ins_encode %{
13014      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13015             as_Register($src2$$reg), ext::uxtb);
13016    %}
13017   ins_pipe(ialu_reg_reg);
13018 %}
13019 
13020 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13021 %{
13022   match(Set dst (AddL src1 (AndL src2 mask)));
13023   ins_cost(INSN_COST);
13024   format %{ "add  $dst, $src1, $src2, uxth" %}
13025 
13026    ins_encode %{
13027      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13028             as_Register($src2$$reg), ext::uxth);
13029    %}
13030   ins_pipe(ialu_reg_reg);
13031 %}
13032 
13033 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13034 %{
13035   match(Set dst (AddL src1 (AndL src2 mask)));
13036   ins_cost(INSN_COST);
13037   format %{ "add  $dst, $src1, $src2, uxtw" %}
13038 
13039    ins_encode %{
13040      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13041             as_Register($src2$$reg), ext::uxtw);
13042    %}
13043   ins_pipe(ialu_reg_reg);
13044 %}
13045 
13046 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13047 %{
13048   match(Set dst (SubI src1 (AndI src2 mask)));
13049   ins_cost(INSN_COST);
13050   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13051 
13052    ins_encode %{
13053      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13054             as_Register($src2$$reg), ext::uxtb);
13055    %}
13056   ins_pipe(ialu_reg_reg);
13057 %}
13058 
13059 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13060 %{
13061   match(Set dst (SubI src1 (AndI src2 mask)));
13062   ins_cost(INSN_COST);
13063   format %{ "subw  $dst, $src1, $src2, uxth" %}
13064 
13065    ins_encode %{
13066      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13067             as_Register($src2$$reg), ext::uxth);
13068    %}
13069   ins_pipe(ialu_reg_reg);
13070 %}
13071 
13072 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13073 %{
13074   match(Set dst (SubL src1 (AndL src2 mask)));
13075   ins_cost(INSN_COST);
13076   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13077 
13078    ins_encode %{
13079      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13080             as_Register($src2$$reg), ext::uxtb);
13081    %}
13082   ins_pipe(ialu_reg_reg);
13083 %}
13084 
13085 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13086 %{
13087   match(Set dst (SubL src1 (AndL src2 mask)));
13088   ins_cost(INSN_COST);
13089   format %{ "sub  $dst, $src1, $src2, uxth" %}
13090 
13091    ins_encode %{
13092      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13093             as_Register($src2$$reg), ext::uxth);
13094    %}
13095   ins_pipe(ialu_reg_reg);
13096 %}
13097 
13098 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13099 %{
13100   match(Set dst (SubL src1 (AndL src2 mask)));
13101   ins_cost(INSN_COST);
13102   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13103 
13104    ins_encode %{
13105      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13106             as_Register($src2$$reg), ext::uxtw);
13107    %}
13108   ins_pipe(ialu_reg_reg);
13109 %}
13110 
13111 
13112 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13113 %{
13114   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13115   ins_cost(1.9 * INSN_COST);
13116   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13117 
13118    ins_encode %{
13119      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13120             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13121    %}
13122   ins_pipe(ialu_reg_reg_shift);
13123 %}
13124 
13125 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13126 %{
13127   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13128   ins_cost(1.9 * INSN_COST);
13129   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13130 
13131    ins_encode %{
13132      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13133             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13134    %}
13135   ins_pipe(ialu_reg_reg_shift);
13136 %}
13137 
13138 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13139 %{
13140   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13141   ins_cost(1.9 * INSN_COST);
13142   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13143 
13144    ins_encode %{
13145      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13146             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13147    %}
13148   ins_pipe(ialu_reg_reg_shift);
13149 %}
13150 
13151 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13152 %{
13153   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13154   ins_cost(1.9 * INSN_COST);
13155   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13156 
13157    ins_encode %{
13158      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13159             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13160    %}
13161   ins_pipe(ialu_reg_reg_shift);
13162 %}
13163 
13164 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13165 %{
13166   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13167   ins_cost(1.9 * INSN_COST);
13168   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13169 
13170    ins_encode %{
13171      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13172             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13173    %}
13174   ins_pipe(ialu_reg_reg_shift);
13175 %}
13176 
13177 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13178 %{
13179   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13180   ins_cost(1.9 * INSN_COST);
13181   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13182 
13183    ins_encode %{
13184      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13185             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13186    %}
13187   ins_pipe(ialu_reg_reg_shift);
13188 %}
13189 
13190 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13191 %{
13192   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13193   ins_cost(1.9 * INSN_COST);
13194   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13195 
13196    ins_encode %{
13197      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13198             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13199    %}
13200   ins_pipe(ialu_reg_reg_shift);
13201 %}
13202 
13203 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13204 %{
13205   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13206   ins_cost(1.9 * INSN_COST);
13207   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13208 
13209    ins_encode %{
13210      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13211             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13212    %}
13213   ins_pipe(ialu_reg_reg_shift);
13214 %}
13215 
13216 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13217 %{
13218   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13219   ins_cost(1.9 * INSN_COST);
13220   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13221 
13222    ins_encode %{
13223      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13224             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13225    %}
13226   ins_pipe(ialu_reg_reg_shift);
13227 %}
13228 
13229 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13230 %{
13231   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13232   ins_cost(1.9 * INSN_COST);
13233   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13234 
13235    ins_encode %{
13236      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13237             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13238    %}
13239   ins_pipe(ialu_reg_reg_shift);
13240 %}
13241 
13242 
13243 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13244 %{
13245   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13246   ins_cost(1.9 * INSN_COST);
13247   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13248 
13249    ins_encode %{
13250      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13251             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13252    %}
13253   ins_pipe(ialu_reg_reg_shift);
13254 %};
13255 
13256 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13257 %{
13258   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13259   ins_cost(1.9 * INSN_COST);
13260   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13261 
13262    ins_encode %{
13263      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13264             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13265    %}
13266   ins_pipe(ialu_reg_reg_shift);
13267 %};
13268 
13269 
13270 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13271 %{
13272   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13273   ins_cost(1.9 * INSN_COST);
13274   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13275 
13276    ins_encode %{
13277      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13278             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13279    %}
13280   ins_pipe(ialu_reg_reg_shift);
13281 %}
13282 
13283 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13284 %{
13285   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13286   ins_cost(1.9 * INSN_COST);
13287   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13288 
13289    ins_encode %{
13290      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13291             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13292    %}
13293   ins_pipe(ialu_reg_reg_shift);
13294 %}
13295 
13296 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13297 %{
13298   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13299   ins_cost(1.9 * INSN_COST);
13300   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13301 
13302    ins_encode %{
13303      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13304             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13305    %}
13306   ins_pipe(ialu_reg_reg_shift);
13307 %}
13308 
13309 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13310 %{
13311   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13312   ins_cost(1.9 * INSN_COST);
13313   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13314 
13315    ins_encode %{
13316      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13317             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13318    %}
13319   ins_pipe(ialu_reg_reg_shift);
13320 %}
13321 
13322 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13323 %{
13324   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13325   ins_cost(1.9 * INSN_COST);
13326   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13327 
13328    ins_encode %{
13329      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13330             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13331    %}
13332   ins_pipe(ialu_reg_reg_shift);
13333 %}
13334 
13335 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13336 %{
13337   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13338   ins_cost(1.9 * INSN_COST);
13339   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13340 
13341    ins_encode %{
13342      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13343             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13344    %}
13345   ins_pipe(ialu_reg_reg_shift);
13346 %}
13347 
13348 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13349 %{
13350   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13351   ins_cost(1.9 * INSN_COST);
13352   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13353 
13354    ins_encode %{
13355      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13356             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13357    %}
13358   ins_pipe(ialu_reg_reg_shift);
13359 %}
13360 
13361 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13362 %{
13363   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13364   ins_cost(1.9 * INSN_COST);
13365   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13366 
13367    ins_encode %{
13368      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13369             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13370    %}
13371   ins_pipe(ialu_reg_reg_shift);
13372 %}
13373 
13374 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13375 %{
13376   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13377   ins_cost(1.9 * INSN_COST);
13378   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13379 
13380    ins_encode %{
13381      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13382             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13383    %}
13384   ins_pipe(ialu_reg_reg_shift);
13385 %}
13386 
13387 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13388 %{
13389   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13390   ins_cost(1.9 * INSN_COST);
13391   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13392 
13393    ins_encode %{
13394      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13395             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13396    %}
13397   ins_pipe(ialu_reg_reg_shift);
13398 %}
13399 // END This section of the file is automatically generated. Do not edit --------------
13400 
13401 // ============================================================================
13402 // Floating Point Arithmetic Instructions
13403 
13404 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13405   match(Set dst (AddF src1 src2));
13406 
13407   ins_cost(INSN_COST * 5);
13408   format %{ "fadds   $dst, $src1, $src2" %}
13409 
13410   ins_encode %{
13411     __ fadds(as_FloatRegister($dst$$reg),
13412              as_FloatRegister($src1$$reg),
13413              as_FloatRegister($src2$$reg));
13414   %}
13415 
13416   ins_pipe(fp_dop_reg_reg_s);
13417 %}
13418 
13419 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13420   match(Set dst (AddD src1 src2));
13421 
13422   ins_cost(INSN_COST * 5);
13423   format %{ "faddd   $dst, $src1, $src2" %}
13424 
13425   ins_encode %{
13426     __ faddd(as_FloatRegister($dst$$reg),
13427              as_FloatRegister($src1$$reg),
13428              as_FloatRegister($src2$$reg));
13429   %}
13430 
13431   ins_pipe(fp_dop_reg_reg_d);
13432 %}
13433 
13434 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13435   match(Set dst (SubF src1 src2));
13436 
13437   ins_cost(INSN_COST * 5);
13438   format %{ "fsubs   $dst, $src1, $src2" %}
13439 
13440   ins_encode %{
13441     __ fsubs(as_FloatRegister($dst$$reg),
13442              as_FloatRegister($src1$$reg),
13443              as_FloatRegister($src2$$reg));
13444   %}
13445 
13446   ins_pipe(fp_dop_reg_reg_s);
13447 %}
13448 
13449 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13450   match(Set dst (SubD src1 src2));
13451 
13452   ins_cost(INSN_COST * 5);
13453   format %{ "fsubd   $dst, $src1, $src2" %}
13454 
13455   ins_encode %{
13456     __ fsubd(as_FloatRegister($dst$$reg),
13457              as_FloatRegister($src1$$reg),
13458              as_FloatRegister($src2$$reg));
13459   %}
13460 
13461   ins_pipe(fp_dop_reg_reg_d);
13462 %}
13463 
13464 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13465   match(Set dst (MulF src1 src2));
13466 
13467   ins_cost(INSN_COST * 6);
13468   format %{ "fmuls   $dst, $src1, $src2" %}
13469 
13470   ins_encode %{
13471     __ fmuls(as_FloatRegister($dst$$reg),
13472              as_FloatRegister($src1$$reg),
13473              as_FloatRegister($src2$$reg));
13474   %}
13475 
13476   ins_pipe(fp_dop_reg_reg_s);
13477 %}
13478 
13479 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13480   match(Set dst (MulD src1 src2));
13481 
13482   ins_cost(INSN_COST * 6);
13483   format %{ "fmuld   $dst, $src1, $src2" %}
13484 
13485   ins_encode %{
13486     __ fmuld(as_FloatRegister($dst$$reg),
13487              as_FloatRegister($src1$$reg),
13488              as_FloatRegister($src2$$reg));
13489   %}
13490 
13491   ins_pipe(fp_dop_reg_reg_d);
13492 %}
13493 
13494 // src1 * src2 + src3
13495 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13496   predicate(UseFMA);
13497   match(Set dst (FmaF src3 (Binary src1 src2)));
13498 
13499   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13500 
13501   ins_encode %{
13502     __ fmadds(as_FloatRegister($dst$$reg),
13503              as_FloatRegister($src1$$reg),
13504              as_FloatRegister($src2$$reg),
13505              as_FloatRegister($src3$$reg));
13506   %}
13507 
13508   ins_pipe(pipe_class_default);
13509 %}
13510 
13511 // src1 * src2 + src3
13512 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13513   predicate(UseFMA);
13514   match(Set dst (FmaD src3 (Binary src1 src2)));
13515 
13516   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13517 
13518   ins_encode %{
13519     __ fmaddd(as_FloatRegister($dst$$reg),
13520              as_FloatRegister($src1$$reg),
13521              as_FloatRegister($src2$$reg),
13522              as_FloatRegister($src3$$reg));
13523   %}
13524 
13525   ins_pipe(pipe_class_default);
13526 %}
13527 
13528 // -src1 * src2 + src3
13529 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13530   predicate(UseFMA);
13531   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13532   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13533 
13534   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13535 
13536   ins_encode %{
13537     __ fmsubs(as_FloatRegister($dst$$reg),
13538               as_FloatRegister($src1$$reg),
13539               as_FloatRegister($src2$$reg),
13540               as_FloatRegister($src3$$reg));
13541   %}
13542 
13543   ins_pipe(pipe_class_default);
13544 %}
13545 
13546 // -src1 * src2 + src3
13547 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13548   predicate(UseFMA);
13549   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13550   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13551 
13552   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13553 
13554   ins_encode %{
13555     __ fmsubd(as_FloatRegister($dst$$reg),
13556               as_FloatRegister($src1$$reg),
13557               as_FloatRegister($src2$$reg),
13558               as_FloatRegister($src3$$reg));
13559   %}
13560 
13561   ins_pipe(pipe_class_default);
13562 %}
13563 
13564 // -src1 * src2 - src3
13565 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13566   predicate(UseFMA);
13567   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13568   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13569 
13570   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13571 
13572   ins_encode %{
13573     __ fnmadds(as_FloatRegister($dst$$reg),
13574                as_FloatRegister($src1$$reg),
13575                as_FloatRegister($src2$$reg),
13576                as_FloatRegister($src3$$reg));
13577   %}
13578 
13579   ins_pipe(pipe_class_default);
13580 %}
13581 
13582 // -src1 * src2 - src3
13583 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13584   predicate(UseFMA);
13585   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13586   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13587 
13588   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13589 
13590   ins_encode %{
13591     __ fnmaddd(as_FloatRegister($dst$$reg),
13592                as_FloatRegister($src1$$reg),
13593                as_FloatRegister($src2$$reg),
13594                as_FloatRegister($src3$$reg));
13595   %}
13596 
13597   ins_pipe(pipe_class_default);
13598 %}
13599 
13600 // src1 * src2 - src3
13601 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13602   predicate(UseFMA);
13603   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13604 
13605   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13606 
13607   ins_encode %{
13608     __ fnmsubs(as_FloatRegister($dst$$reg),
13609                as_FloatRegister($src1$$reg),
13610                as_FloatRegister($src2$$reg),
13611                as_FloatRegister($src3$$reg));
13612   %}
13613 
13614   ins_pipe(pipe_class_default);
13615 %}
13616 
13617 // src1 * src2 - src3
13618 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13619   predicate(UseFMA);
13620   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13621 
13622   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13623 
13624   ins_encode %{
13625   // n.b. insn name should be fnmsubd
13626     __ fnmsub(as_FloatRegister($dst$$reg),
13627               as_FloatRegister($src1$$reg),
13628               as_FloatRegister($src2$$reg),
13629               as_FloatRegister($src3$$reg));
13630   %}
13631 
13632   ins_pipe(pipe_class_default);
13633 %}
13634 
13635 
13636 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13637   match(Set dst (DivF src1  src2));
13638 
13639   ins_cost(INSN_COST * 18);
13640   format %{ "fdivs   $dst, $src1, $src2" %}
13641 
13642   ins_encode %{
13643     __ fdivs(as_FloatRegister($dst$$reg),
13644              as_FloatRegister($src1$$reg),
13645              as_FloatRegister($src2$$reg));
13646   %}
13647 
13648   ins_pipe(fp_div_s);
13649 %}
13650 
13651 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13652   match(Set dst (DivD src1  src2));
13653 
13654   ins_cost(INSN_COST * 32);
13655   format %{ "fdivd   $dst, $src1, $src2" %}
13656 
13657   ins_encode %{
13658     __ fdivd(as_FloatRegister($dst$$reg),
13659              as_FloatRegister($src1$$reg),
13660              as_FloatRegister($src2$$reg));
13661   %}
13662 
13663   ins_pipe(fp_div_d);
13664 %}
13665 
13666 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13667   match(Set dst (NegF src));
13668 
13669   ins_cost(INSN_COST * 3);
13670   format %{ "fneg   $dst, $src" %}
13671 
13672   ins_encode %{
13673     __ fnegs(as_FloatRegister($dst$$reg),
13674              as_FloatRegister($src$$reg));
13675   %}
13676 
13677   ins_pipe(fp_uop_s);
13678 %}
13679 
13680 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13681   match(Set dst (NegD src));
13682 
13683   ins_cost(INSN_COST * 3);
13684   format %{ "fnegd   $dst, $src" %}
13685 
13686   ins_encode %{
13687     __ fnegd(as_FloatRegister($dst$$reg),
13688              as_FloatRegister($src$$reg));
13689   %}
13690 
13691   ins_pipe(fp_uop_d);
13692 %}
13693 
13694 instruct absF_reg(vRegF dst, vRegF src) %{
13695   match(Set dst (AbsF src));
13696 
13697   ins_cost(INSN_COST * 3);
13698   format %{ "fabss   $dst, $src" %}
13699   ins_encode %{
13700     __ fabss(as_FloatRegister($dst$$reg),
13701              as_FloatRegister($src$$reg));
13702   %}
13703 
13704   ins_pipe(fp_uop_s);
13705 %}
13706 
13707 instruct absD_reg(vRegD dst, vRegD src) %{
13708   match(Set dst (AbsD src));
13709 
13710   ins_cost(INSN_COST * 3);
13711   format %{ "fabsd   $dst, $src" %}
13712   ins_encode %{
13713     __ fabsd(as_FloatRegister($dst$$reg),
13714              as_FloatRegister($src$$reg));
13715   %}
13716 
13717   ins_pipe(fp_uop_d);
13718 %}
13719 
13720 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13721   match(Set dst (SqrtD src));
13722 
13723   ins_cost(INSN_COST * 50);
13724   format %{ "fsqrtd  $dst, $src" %}
13725   ins_encode %{
13726     __ fsqrtd(as_FloatRegister($dst$$reg),
13727              as_FloatRegister($src$$reg));
13728   %}
13729 
13730   ins_pipe(fp_div_s);
13731 %}
13732 
13733 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13734   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13735 
13736   ins_cost(INSN_COST * 50);
13737   format %{ "fsqrts  $dst, $src" %}
13738   ins_encode %{
13739     __ fsqrts(as_FloatRegister($dst$$reg),
13740              as_FloatRegister($src$$reg));
13741   %}
13742 
13743   ins_pipe(fp_div_d);
13744 %}
13745 
13746 // ============================================================================
13747 // Logical Instructions
13748 
13749 // Integer Logical Instructions
13750 
13751 // And Instructions
13752 
13753 
13754 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13755   match(Set dst (AndI src1 src2));
13756 
13757   format %{ "andw  $dst, $src1, $src2\t# int" %}
13758 
13759   ins_cost(INSN_COST);
13760   ins_encode %{
13761     __ andw(as_Register($dst$$reg),
13762             as_Register($src1$$reg),
13763             as_Register($src2$$reg));
13764   %}
13765 
13766   ins_pipe(ialu_reg_reg);
13767 %}
13768 
13769 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13770   match(Set dst (AndI src1 src2));
13771 
13772   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13773 
13774   ins_cost(INSN_COST);
13775   ins_encode %{
13776     __ andw(as_Register($dst$$reg),
13777             as_Register($src1$$reg),
13778             (unsigned long)($src2$$constant));
13779   %}
13780 
13781   ins_pipe(ialu_reg_imm);
13782 %}
13783 
13784 // Or Instructions
13785 
13786 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13787   match(Set dst (OrI src1 src2));
13788 
13789   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13790 
13791   ins_cost(INSN_COST);
13792   ins_encode %{
13793     __ orrw(as_Register($dst$$reg),
13794             as_Register($src1$$reg),
13795             as_Register($src2$$reg));
13796   %}
13797 
13798   ins_pipe(ialu_reg_reg);
13799 %}
13800 
13801 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13802   match(Set dst (OrI src1 src2));
13803 
13804   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13805 
13806   ins_cost(INSN_COST);
13807   ins_encode %{
13808     __ orrw(as_Register($dst$$reg),
13809             as_Register($src1$$reg),
13810             (unsigned long)($src2$$constant));
13811   %}
13812 
13813   ins_pipe(ialu_reg_imm);
13814 %}
13815 
13816 // Xor Instructions
13817 
13818 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13819   match(Set dst (XorI src1 src2));
13820 
13821   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13822 
13823   ins_cost(INSN_COST);
13824   ins_encode %{
13825     __ eorw(as_Register($dst$$reg),
13826             as_Register($src1$$reg),
13827             as_Register($src2$$reg));
13828   %}
13829 
13830   ins_pipe(ialu_reg_reg);
13831 %}
13832 
13833 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13834   match(Set dst (XorI src1 src2));
13835 
13836   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13837 
13838   ins_cost(INSN_COST);
13839   ins_encode %{
13840     __ eorw(as_Register($dst$$reg),
13841             as_Register($src1$$reg),
13842             (unsigned long)($src2$$constant));
13843   %}
13844 
13845   ins_pipe(ialu_reg_imm);
13846 %}
13847 
13848 // Long Logical Instructions
13849 // TODO
13850 
13851 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13852   match(Set dst (AndL src1 src2));
13853 
13854   format %{ "and  $dst, $src1, $src2\t# int" %}
13855 
13856   ins_cost(INSN_COST);
13857   ins_encode %{
13858     __ andr(as_Register($dst$$reg),
13859             as_Register($src1$$reg),
13860             as_Register($src2$$reg));
13861   %}
13862 
13863   ins_pipe(ialu_reg_reg);
13864 %}
13865 
13866 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13867   match(Set dst (AndL src1 src2));
13868 
13869   format %{ "and  $dst, $src1, $src2\t# int" %}
13870 
13871   ins_cost(INSN_COST);
13872   ins_encode %{
13873     __ andr(as_Register($dst$$reg),
13874             as_Register($src1$$reg),
13875             (unsigned long)($src2$$constant));
13876   %}
13877 
13878   ins_pipe(ialu_reg_imm);
13879 %}
13880 
13881 // Or Instructions
13882 
13883 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13884   match(Set dst (OrL src1 src2));
13885 
13886   format %{ "orr  $dst, $src1, $src2\t# int" %}
13887 
13888   ins_cost(INSN_COST);
13889   ins_encode %{
13890     __ orr(as_Register($dst$$reg),
13891            as_Register($src1$$reg),
13892            as_Register($src2$$reg));
13893   %}
13894 
13895   ins_pipe(ialu_reg_reg);
13896 %}
13897 
13898 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13899   match(Set dst (OrL src1 src2));
13900 
13901   format %{ "orr  $dst, $src1, $src2\t# int" %}
13902 
13903   ins_cost(INSN_COST);
13904   ins_encode %{
13905     __ orr(as_Register($dst$$reg),
13906            as_Register($src1$$reg),
13907            (unsigned long)($src2$$constant));
13908   %}
13909 
13910   ins_pipe(ialu_reg_imm);
13911 %}
13912 
13913 // Xor Instructions
13914 
13915 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13916   match(Set dst (XorL src1 src2));
13917 
13918   format %{ "eor  $dst, $src1, $src2\t# int" %}
13919 
13920   ins_cost(INSN_COST);
13921   ins_encode %{
13922     __ eor(as_Register($dst$$reg),
13923            as_Register($src1$$reg),
13924            as_Register($src2$$reg));
13925   %}
13926 
13927   ins_pipe(ialu_reg_reg);
13928 %}
13929 
13930 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13931   match(Set dst (XorL src1 src2));
13932 
13933   ins_cost(INSN_COST);
13934   format %{ "eor  $dst, $src1, $src2\t# int" %}
13935 
13936   ins_encode %{
13937     __ eor(as_Register($dst$$reg),
13938            as_Register($src1$$reg),
13939            (unsigned long)($src2$$constant));
13940   %}
13941 
13942   ins_pipe(ialu_reg_imm);
13943 %}
13944 
13945 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13946 %{
13947   match(Set dst (ConvI2L src));
13948 
13949   ins_cost(INSN_COST);
13950   format %{ "sxtw  $dst, $src\t# i2l" %}
13951   ins_encode %{
13952     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13953   %}
13954   ins_pipe(ialu_reg_shift);
13955 %}
13956 
13957 // this pattern occurs in bigmath arithmetic
13958 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13959 %{
13960   match(Set dst (AndL (ConvI2L src) mask));
13961 
13962   ins_cost(INSN_COST);
13963   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13964   ins_encode %{
13965     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13966   %}
13967 
13968   ins_pipe(ialu_reg_shift);
13969 %}
13970 
13971 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13972   match(Set dst (ConvL2I src));
13973 
13974   ins_cost(INSN_COST);
13975   format %{ "movw  $dst, $src \t// l2i" %}
13976 
13977   ins_encode %{
13978     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13979   %}
13980 
13981   ins_pipe(ialu_reg);
13982 %}
13983 
13984 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13985 %{
13986   match(Set dst (Conv2B src));
13987   effect(KILL cr);
13988 
13989   format %{
13990     "cmpw $src, zr\n\t"
13991     "cset $dst, ne"
13992   %}
13993 
13994   ins_encode %{
13995     __ cmpw(as_Register($src$$reg), zr);
13996     __ cset(as_Register($dst$$reg), Assembler::NE);
13997   %}
13998 
13999   ins_pipe(ialu_reg);
14000 %}
14001 
14002 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14003 %{
14004   match(Set dst (Conv2B src));
14005   effect(KILL cr);
14006 
14007   format %{
14008     "cmp  $src, zr\n\t"
14009     "cset $dst, ne"
14010   %}
14011 
14012   ins_encode %{
14013     __ cmp(as_Register($src$$reg), zr);
14014     __ cset(as_Register($dst$$reg), Assembler::NE);
14015   %}
14016 
14017   ins_pipe(ialu_reg);
14018 %}
14019 
14020 instruct convD2F_reg(vRegF dst, vRegD src) %{
14021   match(Set dst (ConvD2F src));
14022 
14023   ins_cost(INSN_COST * 5);
14024   format %{ "fcvtd  $dst, $src \t// d2f" %}
14025 
14026   ins_encode %{
14027     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14028   %}
14029 
14030   ins_pipe(fp_d2f);
14031 %}
14032 
14033 instruct convF2D_reg(vRegD dst, vRegF src) %{
14034   match(Set dst (ConvF2D src));
14035 
14036   ins_cost(INSN_COST * 5);
14037   format %{ "fcvts  $dst, $src \t// f2d" %}
14038 
14039   ins_encode %{
14040     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14041   %}
14042 
14043   ins_pipe(fp_f2d);
14044 %}
14045 
14046 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14047   match(Set dst (ConvF2I src));
14048 
14049   ins_cost(INSN_COST * 5);
14050   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14051 
14052   ins_encode %{
14053     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14054   %}
14055 
14056   ins_pipe(fp_f2i);
14057 %}
14058 
14059 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14060   match(Set dst (ConvF2L src));
14061 
14062   ins_cost(INSN_COST * 5);
14063   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14064 
14065   ins_encode %{
14066     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14067   %}
14068 
14069   ins_pipe(fp_f2l);
14070 %}
14071 
14072 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14073   match(Set dst (ConvI2F src));
14074 
14075   ins_cost(INSN_COST * 5);
14076   format %{ "scvtfws  $dst, $src \t// i2f" %}
14077 
14078   ins_encode %{
14079     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14080   %}
14081 
14082   ins_pipe(fp_i2f);
14083 %}
14084 
14085 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14086   match(Set dst (ConvL2F src));
14087 
14088   ins_cost(INSN_COST * 5);
14089   format %{ "scvtfs  $dst, $src \t// l2f" %}
14090 
14091   ins_encode %{
14092     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14093   %}
14094 
14095   ins_pipe(fp_l2f);
14096 %}
14097 
14098 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14099   match(Set dst (ConvD2I src));
14100 
14101   ins_cost(INSN_COST * 5);
14102   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14103 
14104   ins_encode %{
14105     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14106   %}
14107 
14108   ins_pipe(fp_d2i);
14109 %}
14110 
14111 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14112   match(Set dst (ConvD2L src));
14113 
14114   ins_cost(INSN_COST * 5);
14115   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14116 
14117   ins_encode %{
14118     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14119   %}
14120 
14121   ins_pipe(fp_d2l);
14122 %}
14123 
14124 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14125   match(Set dst (ConvI2D src));
14126 
14127   ins_cost(INSN_COST * 5);
14128   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14129 
14130   ins_encode %{
14131     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14132   %}
14133 
14134   ins_pipe(fp_i2d);
14135 %}
14136 
14137 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14138   match(Set dst (ConvL2D src));
14139 
14140   ins_cost(INSN_COST * 5);
14141   format %{ "scvtfd  $dst, $src \t// l2d" %}
14142 
14143   ins_encode %{
14144     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14145   %}
14146 
14147   ins_pipe(fp_l2d);
14148 %}
14149 
14150 // stack <-> reg and reg <-> reg shuffles with no conversion
14151 
14152 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14153 
14154   match(Set dst (MoveF2I src));
14155 
14156   effect(DEF dst, USE src);
14157 
14158   ins_cost(4 * INSN_COST);
14159 
14160   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14161 
14162   ins_encode %{
14163     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14164   %}
14165 
14166   ins_pipe(iload_reg_reg);
14167 
14168 %}
14169 
14170 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14171 
14172   match(Set dst (MoveI2F src));
14173 
14174   effect(DEF dst, USE src);
14175 
14176   ins_cost(4 * INSN_COST);
14177 
14178   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14179 
14180   ins_encode %{
14181     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14182   %}
14183 
14184   ins_pipe(pipe_class_memory);
14185 
14186 %}
14187 
14188 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14189 
14190   match(Set dst (MoveD2L src));
14191 
14192   effect(DEF dst, USE src);
14193 
14194   ins_cost(4 * INSN_COST);
14195 
14196   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14197 
14198   ins_encode %{
14199     __ ldr($dst$$Register, Address(sp, $src$$disp));
14200   %}
14201 
14202   ins_pipe(iload_reg_reg);
14203 
14204 %}
14205 
14206 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14207 
14208   match(Set dst (MoveL2D src));
14209 
14210   effect(DEF dst, USE src);
14211 
14212   ins_cost(4 * INSN_COST);
14213 
14214   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14215 
14216   ins_encode %{
14217     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14218   %}
14219 
14220   ins_pipe(pipe_class_memory);
14221 
14222 %}
14223 
14224 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14225 
14226   match(Set dst (MoveF2I src));
14227 
14228   effect(DEF dst, USE src);
14229 
14230   ins_cost(INSN_COST);
14231 
14232   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14233 
14234   ins_encode %{
14235     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14236   %}
14237 
14238   ins_pipe(pipe_class_memory);
14239 
14240 %}
14241 
14242 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14243 
14244   match(Set dst (MoveI2F src));
14245 
14246   effect(DEF dst, USE src);
14247 
14248   ins_cost(INSN_COST);
14249 
14250   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14251 
14252   ins_encode %{
14253     __ strw($src$$Register, Address(sp, $dst$$disp));
14254   %}
14255 
14256   ins_pipe(istore_reg_reg);
14257 
14258 %}
14259 
14260 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14261 
14262   match(Set dst (MoveD2L src));
14263 
14264   effect(DEF dst, USE src);
14265 
14266   ins_cost(INSN_COST);
14267 
14268   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14269 
14270   ins_encode %{
14271     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14272   %}
14273 
14274   ins_pipe(pipe_class_memory);
14275 
14276 %}
14277 
14278 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14279 
14280   match(Set dst (MoveL2D src));
14281 
14282   effect(DEF dst, USE src);
14283 
14284   ins_cost(INSN_COST);
14285 
14286   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14287 
14288   ins_encode %{
14289     __ str($src$$Register, Address(sp, $dst$$disp));
14290   %}
14291 
14292   ins_pipe(istore_reg_reg);
14293 
14294 %}
14295 
14296 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14297 
14298   match(Set dst (MoveF2I src));
14299 
14300   effect(DEF dst, USE src);
14301 
14302   ins_cost(INSN_COST);
14303 
14304   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14305 
14306   ins_encode %{
14307     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14308   %}
14309 
14310   ins_pipe(fp_f2i);
14311 
14312 %}
14313 
14314 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14315 
14316   match(Set dst (MoveI2F src));
14317 
14318   effect(DEF dst, USE src);
14319 
14320   ins_cost(INSN_COST);
14321 
14322   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14323 
14324   ins_encode %{
14325     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14326   %}
14327 
14328   ins_pipe(fp_i2f);
14329 
14330 %}
14331 
14332 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14333 
14334   match(Set dst (MoveD2L src));
14335 
14336   effect(DEF dst, USE src);
14337 
14338   ins_cost(INSN_COST);
14339 
14340   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14341 
14342   ins_encode %{
14343     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14344   %}
14345 
14346   ins_pipe(fp_d2l);
14347 
14348 %}
14349 
14350 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14351 
14352   match(Set dst (MoveL2D src));
14353 
14354   effect(DEF dst, USE src);
14355 
14356   ins_cost(INSN_COST);
14357 
14358   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14359 
14360   ins_encode %{
14361     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14362   %}
14363 
14364   ins_pipe(fp_l2d);
14365 
14366 %}
14367 
14368 // ============================================================================
14369 // clearing of an array
14370 
14371 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14372 %{
14373   match(Set dummy (ClearArray cnt base));
14374   effect(USE_KILL cnt, USE_KILL base);
14375 
14376   ins_cost(4 * INSN_COST);
14377   format %{ "ClearArray $cnt, $base" %}
14378 
14379   ins_encode %{
14380     __ zero_words($base$$Register, $cnt$$Register);
14381   %}
14382 
14383   ins_pipe(pipe_class_memory);
14384 %}
14385 
14386 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14387 %{
14388   predicate((u_int64_t)n->in(2)->get_long()
14389             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14390   match(Set dummy (ClearArray cnt base));
14391   effect(USE_KILL base);
14392 
14393   ins_cost(4 * INSN_COST);
14394   format %{ "ClearArray $cnt, $base" %}
14395 
14396   ins_encode %{
14397     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14398   %}
14399 
14400   ins_pipe(pipe_class_memory);
14401 %}
14402 
14403 // ============================================================================
14404 // Overflow Math Instructions
14405 
14406 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14407 %{
14408   match(Set cr (OverflowAddI op1 op2));
14409 
14410   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14411   ins_cost(INSN_COST);
14412   ins_encode %{
14413     __ cmnw($op1$$Register, $op2$$Register);
14414   %}
14415 
14416   ins_pipe(icmp_reg_reg);
14417 %}
14418 
14419 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14420 %{
14421   match(Set cr (OverflowAddI op1 op2));
14422 
14423   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14424   ins_cost(INSN_COST);
14425   ins_encode %{
14426     __ cmnw($op1$$Register, $op2$$constant);
14427   %}
14428 
14429   ins_pipe(icmp_reg_imm);
14430 %}
14431 
14432 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14433 %{
14434   match(Set cr (OverflowAddL op1 op2));
14435 
14436   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14437   ins_cost(INSN_COST);
14438   ins_encode %{
14439     __ cmn($op1$$Register, $op2$$Register);
14440   %}
14441 
14442   ins_pipe(icmp_reg_reg);
14443 %}
14444 
14445 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14446 %{
14447   match(Set cr (OverflowAddL op1 op2));
14448 
14449   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14450   ins_cost(INSN_COST);
14451   ins_encode %{
14452     __ cmn($op1$$Register, $op2$$constant);
14453   %}
14454 
14455   ins_pipe(icmp_reg_imm);
14456 %}
14457 
14458 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14459 %{
14460   match(Set cr (OverflowSubI op1 op2));
14461 
14462   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14463   ins_cost(INSN_COST);
14464   ins_encode %{
14465     __ cmpw($op1$$Register, $op2$$Register);
14466   %}
14467 
14468   ins_pipe(icmp_reg_reg);
14469 %}
14470 
14471 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14472 %{
14473   match(Set cr (OverflowSubI op1 op2));
14474 
14475   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14476   ins_cost(INSN_COST);
14477   ins_encode %{
14478     __ cmpw($op1$$Register, $op2$$constant);
14479   %}
14480 
14481   ins_pipe(icmp_reg_imm);
14482 %}
14483 
14484 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14485 %{
14486   match(Set cr (OverflowSubL op1 op2));
14487 
14488   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14489   ins_cost(INSN_COST);
14490   ins_encode %{
14491     __ cmp($op1$$Register, $op2$$Register);
14492   %}
14493 
14494   ins_pipe(icmp_reg_reg);
14495 %}
14496 
14497 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14498 %{
14499   match(Set cr (OverflowSubL op1 op2));
14500 
14501   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14502   ins_cost(INSN_COST);
14503   ins_encode %{
14504     __ cmp($op1$$Register, $op2$$constant);
14505   %}
14506 
14507   ins_pipe(icmp_reg_imm);
14508 %}
14509 
14510 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14511 %{
14512   match(Set cr (OverflowSubI zero op1));
14513 
14514   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14515   ins_cost(INSN_COST);
14516   ins_encode %{
14517     __ cmpw(zr, $op1$$Register);
14518   %}
14519 
14520   ins_pipe(icmp_reg_imm);
14521 %}
14522 
14523 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14524 %{
14525   match(Set cr (OverflowSubL zero op1));
14526 
14527   format %{ "cmp   zr, $op1\t# overflow check long" %}
14528   ins_cost(INSN_COST);
14529   ins_encode %{
14530     __ cmp(zr, $op1$$Register);
14531   %}
14532 
14533   ins_pipe(icmp_reg_imm);
14534 %}
14535 
14536 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14537 %{
14538   match(Set cr (OverflowMulI op1 op2));
14539 
14540   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14541             "cmp   rscratch1, rscratch1, sxtw\n\t"
14542             "movw  rscratch1, #0x80000000\n\t"
14543             "cselw rscratch1, rscratch1, zr, NE\n\t"
14544             "cmpw  rscratch1, #1" %}
14545   ins_cost(5 * INSN_COST);
14546   ins_encode %{
14547     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14548     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14549     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14550     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14551     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14552   %}
14553 
14554   ins_pipe(pipe_slow);
14555 %}
14556 
14557 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14558 %{
14559   match(If cmp (OverflowMulI op1 op2));
14560   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14561             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14562   effect(USE labl, KILL cr);
14563 
14564   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14565             "cmp   rscratch1, rscratch1, sxtw\n\t"
14566             "b$cmp   $labl" %}
14567   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14568   ins_encode %{
14569     Label* L = $labl$$label;
14570     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14571     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14572     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14573     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14574   %}
14575 
14576   ins_pipe(pipe_serial);
14577 %}
14578 
14579 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14580 %{
14581   match(Set cr (OverflowMulL op1 op2));
14582 
14583   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14584             "smulh rscratch2, $op1, $op2\n\t"
14585             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14586             "movw  rscratch1, #0x80000000\n\t"
14587             "cselw rscratch1, rscratch1, zr, NE\n\t"
14588             "cmpw  rscratch1, #1" %}
14589   ins_cost(6 * INSN_COST);
14590   ins_encode %{
14591     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14592     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14593     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14594     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14595     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14596     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14597   %}
14598 
14599   ins_pipe(pipe_slow);
14600 %}
14601 
14602 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14603 %{
14604   match(If cmp (OverflowMulL op1 op2));
14605   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14606             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14607   effect(USE labl, KILL cr);
14608 
14609   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14610             "smulh rscratch2, $op1, $op2\n\t"
14611             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14612             "b$cmp $labl" %}
14613   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14614   ins_encode %{
14615     Label* L = $labl$$label;
14616     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14617     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14618     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14619     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14620     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14621   %}
14622 
14623   ins_pipe(pipe_serial);
14624 %}
14625 
14626 // ============================================================================
14627 // Compare Instructions
14628 
14629 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14630 %{
14631   match(Set cr (CmpI op1 op2));
14632 
14633   effect(DEF cr, USE op1, USE op2);
14634 
14635   ins_cost(INSN_COST);
14636   format %{ "cmpw  $op1, $op2" %}
14637 
14638   ins_encode(aarch64_enc_cmpw(op1, op2));
14639 
14640   ins_pipe(icmp_reg_reg);
14641 %}
14642 
14643 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14644 %{
14645   match(Set cr (CmpI op1 zero));
14646 
14647   effect(DEF cr, USE op1);
14648 
14649   ins_cost(INSN_COST);
14650   format %{ "cmpw $op1, 0" %}
14651 
14652   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14653 
14654   ins_pipe(icmp_reg_imm);
14655 %}
14656 
14657 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14658 %{
14659   match(Set cr (CmpI op1 op2));
14660 
14661   effect(DEF cr, USE op1);
14662 
14663   ins_cost(INSN_COST);
14664   format %{ "cmpw  $op1, $op2" %}
14665 
14666   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14667 
14668   ins_pipe(icmp_reg_imm);
14669 %}
14670 
14671 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14672 %{
14673   match(Set cr (CmpI op1 op2));
14674 
14675   effect(DEF cr, USE op1);
14676 
14677   ins_cost(INSN_COST * 2);
14678   format %{ "cmpw  $op1, $op2" %}
14679 
14680   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14681 
14682   ins_pipe(icmp_reg_imm);
14683 %}
14684 
14685 // Unsigned compare Instructions; really, same as signed compare
14686 // except it should only be used to feed an If or a CMovI which takes a
14687 // cmpOpU.
14688 
14689 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14690 %{
14691   match(Set cr (CmpU op1 op2));
14692 
14693   effect(DEF cr, USE op1, USE op2);
14694 
14695   ins_cost(INSN_COST);
14696   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14697 
14698   ins_encode(aarch64_enc_cmpw(op1, op2));
14699 
14700   ins_pipe(icmp_reg_reg);
14701 %}
14702 
14703 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14704 %{
14705   match(Set cr (CmpU op1 zero));
14706 
14707   effect(DEF cr, USE op1);
14708 
14709   ins_cost(INSN_COST);
14710   format %{ "cmpw $op1, #0\t# unsigned" %}
14711 
14712   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14713 
14714   ins_pipe(icmp_reg_imm);
14715 %}
14716 
14717 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14718 %{
14719   match(Set cr (CmpU op1 op2));
14720 
14721   effect(DEF cr, USE op1);
14722 
14723   ins_cost(INSN_COST);
14724   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14725 
14726   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14727 
14728   ins_pipe(icmp_reg_imm);
14729 %}
14730 
14731 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14732 %{
14733   match(Set cr (CmpU op1 op2));
14734 
14735   effect(DEF cr, USE op1);
14736 
14737   ins_cost(INSN_COST * 2);
14738   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14739 
14740   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14741 
14742   ins_pipe(icmp_reg_imm);
14743 %}
14744 
14745 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14746 %{
14747   match(Set cr (CmpL op1 op2));
14748 
14749   effect(DEF cr, USE op1, USE op2);
14750 
14751   ins_cost(INSN_COST);
14752   format %{ "cmp  $op1, $op2" %}
14753 
14754   ins_encode(aarch64_enc_cmp(op1, op2));
14755 
14756   ins_pipe(icmp_reg_reg);
14757 %}
14758 
14759 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14760 %{
14761   match(Set cr (CmpL op1 zero));
14762 
14763   effect(DEF cr, USE op1);
14764 
14765   ins_cost(INSN_COST);
14766   format %{ "tst  $op1" %}
14767 
14768   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14769 
14770   ins_pipe(icmp_reg_imm);
14771 %}
14772 
14773 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14774 %{
14775   match(Set cr (CmpL op1 op2));
14776 
14777   effect(DEF cr, USE op1);
14778 
14779   ins_cost(INSN_COST);
14780   format %{ "cmp  $op1, $op2" %}
14781 
14782   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14783 
14784   ins_pipe(icmp_reg_imm);
14785 %}
14786 
14787 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14788 %{
14789   match(Set cr (CmpL op1 op2));
14790 
14791   effect(DEF cr, USE op1);
14792 
14793   ins_cost(INSN_COST * 2);
14794   format %{ "cmp  $op1, $op2" %}
14795 
14796   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14797 
14798   ins_pipe(icmp_reg_imm);
14799 %}
14800 
14801 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14802 %{
14803   match(Set cr (CmpUL op1 op2));
14804 
14805   effect(DEF cr, USE op1, USE op2);
14806 
14807   ins_cost(INSN_COST);
14808   format %{ "cmp  $op1, $op2" %}
14809 
14810   ins_encode(aarch64_enc_cmp(op1, op2));
14811 
14812   ins_pipe(icmp_reg_reg);
14813 %}
14814 
14815 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14816 %{
14817   match(Set cr (CmpUL op1 zero));
14818 
14819   effect(DEF cr, USE op1);
14820 
14821   ins_cost(INSN_COST);
14822   format %{ "tst  $op1" %}
14823 
14824   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14825 
14826   ins_pipe(icmp_reg_imm);
14827 %}
14828 
14829 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14830 %{
14831   match(Set cr (CmpUL op1 op2));
14832 
14833   effect(DEF cr, USE op1);
14834 
14835   ins_cost(INSN_COST);
14836   format %{ "cmp  $op1, $op2" %}
14837 
14838   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14839 
14840   ins_pipe(icmp_reg_imm);
14841 %}
14842 
14843 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14844 %{
14845   match(Set cr (CmpUL op1 op2));
14846 
14847   effect(DEF cr, USE op1);
14848 
14849   ins_cost(INSN_COST * 2);
14850   format %{ "cmp  $op1, $op2" %}
14851 
14852   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14853 
14854   ins_pipe(icmp_reg_imm);
14855 %}
14856 
14857 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14858 %{
14859   match(Set cr (CmpP op1 op2));
14860 
14861   effect(DEF cr, USE op1, USE op2);
14862 
14863   ins_cost(INSN_COST);
14864   format %{ "cmp  $op1, $op2\t // ptr" %}
14865 
14866   ins_encode(aarch64_enc_cmpp(op1, op2));
14867 
14868   ins_pipe(icmp_reg_reg);
14869 %}
14870 
14871 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14872 %{
14873   match(Set cr (CmpN op1 op2));
14874 
14875   effect(DEF cr, USE op1, USE op2);
14876 
14877   ins_cost(INSN_COST);
14878   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14879 
14880   ins_encode(aarch64_enc_cmpn(op1, op2));
14881 
14882   ins_pipe(icmp_reg_reg);
14883 %}
14884 
14885 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14886 %{
14887   match(Set cr (CmpP op1 zero));
14888 
14889   effect(DEF cr, USE op1, USE zero);
14890 
14891   ins_cost(INSN_COST);
14892   format %{ "cmp  $op1, 0\t // ptr" %}
14893 
14894   ins_encode(aarch64_enc_testp(op1));
14895 
14896   ins_pipe(icmp_reg_imm);
14897 %}
14898 
14899 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14900 %{
14901   match(Set cr (CmpN op1 zero));
14902 
14903   effect(DEF cr, USE op1, USE zero);
14904 
14905   ins_cost(INSN_COST);
14906   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14907 
14908   ins_encode(aarch64_enc_testn(op1));
14909 
14910   ins_pipe(icmp_reg_imm);
14911 %}
14912 
14913 // FP comparisons
14914 //
14915 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14916 // using normal cmpOp. See declaration of rFlagsReg for details.
14917 
14918 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14919 %{
14920   match(Set cr (CmpF src1 src2));
14921 
14922   ins_cost(3 * INSN_COST);
14923   format %{ "fcmps $src1, $src2" %}
14924 
14925   ins_encode %{
14926     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14927   %}
14928 
14929   ins_pipe(pipe_class_compare);
14930 %}
14931 
14932 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14933 %{
14934   match(Set cr (CmpF src1 src2));
14935 
14936   ins_cost(3 * INSN_COST);
14937   format %{ "fcmps $src1, 0.0" %}
14938 
14939   ins_encode %{
14940     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14941   %}
14942 
14943   ins_pipe(pipe_class_compare);
14944 %}
14945 // FROM HERE
14946 
14947 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14948 %{
14949   match(Set cr (CmpD src1 src2));
14950 
14951   ins_cost(3 * INSN_COST);
14952   format %{ "fcmpd $src1, $src2" %}
14953 
14954   ins_encode %{
14955     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14956   %}
14957 
14958   ins_pipe(pipe_class_compare);
14959 %}
14960 
14961 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14962 %{
14963   match(Set cr (CmpD src1 src2));
14964 
14965   ins_cost(3 * INSN_COST);
14966   format %{ "fcmpd $src1, 0.0" %}
14967 
14968   ins_encode %{
14969     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14970   %}
14971 
14972   ins_pipe(pipe_class_compare);
14973 %}
14974 
14975 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14976 %{
14977   match(Set dst (CmpF3 src1 src2));
14978   effect(KILL cr);
14979 
14980   ins_cost(5 * INSN_COST);
14981   format %{ "fcmps $src1, $src2\n\t"
14982             "csinvw($dst, zr, zr, eq\n\t"
14983             "csnegw($dst, $dst, $dst, lt)"
14984   %}
14985 
14986   ins_encode %{
14987     Label done;
14988     FloatRegister s1 = as_FloatRegister($src1$$reg);
14989     FloatRegister s2 = as_FloatRegister($src2$$reg);
14990     Register d = as_Register($dst$$reg);
14991     __ fcmps(s1, s2);
14992     // installs 0 if EQ else -1
14993     __ csinvw(d, zr, zr, Assembler::EQ);
14994     // keeps -1 if less or unordered else installs 1
14995     __ csnegw(d, d, d, Assembler::LT);
14996     __ bind(done);
14997   %}
14998 
14999   ins_pipe(pipe_class_default);
15000 
15001 %}
15002 
15003 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15004 %{
15005   match(Set dst (CmpD3 src1 src2));
15006   effect(KILL cr);
15007 
15008   ins_cost(5 * INSN_COST);
15009   format %{ "fcmpd $src1, $src2\n\t"
15010             "csinvw($dst, zr, zr, eq\n\t"
15011             "csnegw($dst, $dst, $dst, lt)"
15012   %}
15013 
15014   ins_encode %{
15015     Label done;
15016     FloatRegister s1 = as_FloatRegister($src1$$reg);
15017     FloatRegister s2 = as_FloatRegister($src2$$reg);
15018     Register d = as_Register($dst$$reg);
15019     __ fcmpd(s1, s2);
15020     // installs 0 if EQ else -1
15021     __ csinvw(d, zr, zr, Assembler::EQ);
15022     // keeps -1 if less or unordered else installs 1
15023     __ csnegw(d, d, d, Assembler::LT);
15024     __ bind(done);
15025   %}
15026   ins_pipe(pipe_class_default);
15027 
15028 %}
15029 
15030 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15031 %{
15032   match(Set dst (CmpF3 src1 zero));
15033   effect(KILL cr);
15034 
15035   ins_cost(5 * INSN_COST);
15036   format %{ "fcmps $src1, 0.0\n\t"
15037             "csinvw($dst, zr, zr, eq\n\t"
15038             "csnegw($dst, $dst, $dst, lt)"
15039   %}
15040 
15041   ins_encode %{
15042     Label done;
15043     FloatRegister s1 = as_FloatRegister($src1$$reg);
15044     Register d = as_Register($dst$$reg);
15045     __ fcmps(s1, 0.0D);
15046     // installs 0 if EQ else -1
15047     __ csinvw(d, zr, zr, Assembler::EQ);
15048     // keeps -1 if less or unordered else installs 1
15049     __ csnegw(d, d, d, Assembler::LT);
15050     __ bind(done);
15051   %}
15052 
15053   ins_pipe(pipe_class_default);
15054 
15055 %}
15056 
15057 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15058 %{
15059   match(Set dst (CmpD3 src1 zero));
15060   effect(KILL cr);
15061 
15062   ins_cost(5 * INSN_COST);
15063   format %{ "fcmpd $src1, 0.0\n\t"
15064             "csinvw($dst, zr, zr, eq\n\t"
15065             "csnegw($dst, $dst, $dst, lt)"
15066   %}
15067 
15068   ins_encode %{
15069     Label done;
15070     FloatRegister s1 = as_FloatRegister($src1$$reg);
15071     Register d = as_Register($dst$$reg);
15072     __ fcmpd(s1, 0.0D);
15073     // installs 0 if EQ else -1
15074     __ csinvw(d, zr, zr, Assembler::EQ);
15075     // keeps -1 if less or unordered else installs 1
15076     __ csnegw(d, d, d, Assembler::LT);
15077     __ bind(done);
15078   %}
15079   ins_pipe(pipe_class_default);
15080 
15081 %}
15082 
15083 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15084 %{
15085   match(Set dst (CmpLTMask p q));
15086   effect(KILL cr);
15087 
15088   ins_cost(3 * INSN_COST);
15089 
15090   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15091             "csetw $dst, lt\n\t"
15092             "subw $dst, zr, $dst"
15093   %}
15094 
15095   ins_encode %{
15096     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15097     __ csetw(as_Register($dst$$reg), Assembler::LT);
15098     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15099   %}
15100 
15101   ins_pipe(ialu_reg_reg);
15102 %}
15103 
15104 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15105 %{
15106   match(Set dst (CmpLTMask src zero));
15107   effect(KILL cr);
15108 
15109   ins_cost(INSN_COST);
15110 
15111   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15112 
15113   ins_encode %{
15114     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15115   %}
15116 
15117   ins_pipe(ialu_reg_shift);
15118 %}
15119 
15120 // ============================================================================
15121 // Max and Min
15122 
15123 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15124 %{
15125   match(Set dst (MinI src1 src2));
15126 
15127   effect(DEF dst, USE src1, USE src2, KILL cr);
15128   size(8);
15129 
15130   ins_cost(INSN_COST * 3);
15131   format %{
15132     "cmpw $src1 $src2\t signed int\n\t"
15133     "cselw $dst, $src1, $src2 lt\t"
15134   %}
15135 
15136   ins_encode %{
15137     __ cmpw(as_Register($src1$$reg),
15138             as_Register($src2$$reg));
15139     __ cselw(as_Register($dst$$reg),
15140              as_Register($src1$$reg),
15141              as_Register($src2$$reg),
15142              Assembler::LT);
15143   %}
15144 
15145   ins_pipe(ialu_reg_reg);
15146 %}
15147 // FROM HERE
15148 
15149 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15150 %{
15151   match(Set dst (MaxI src1 src2));
15152 
15153   effect(DEF dst, USE src1, USE src2, KILL cr);
15154   size(8);
15155 
15156   ins_cost(INSN_COST * 3);
15157   format %{
15158     "cmpw $src1 $src2\t signed int\n\t"
15159     "cselw $dst, $src1, $src2 gt\t"
15160   %}
15161 
15162   ins_encode %{
15163     __ cmpw(as_Register($src1$$reg),
15164             as_Register($src2$$reg));
15165     __ cselw(as_Register($dst$$reg),
15166              as_Register($src1$$reg),
15167              as_Register($src2$$reg),
15168              Assembler::GT);
15169   %}
15170 
15171   ins_pipe(ialu_reg_reg);
15172 %}
15173 
15174 // ============================================================================
15175 // Branch Instructions
15176 
15177 // Direct Branch.
15178 instruct branch(label lbl)
15179 %{
15180   match(Goto);
15181 
15182   effect(USE lbl);
15183 
15184   ins_cost(BRANCH_COST);
15185   format %{ "b  $lbl" %}
15186 
15187   ins_encode(aarch64_enc_b(lbl));
15188 
15189   ins_pipe(pipe_branch);
15190 %}
15191 
15192 // Conditional Near Branch
15193 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15194 %{
15195   // Same match rule as `branchConFar'.
15196   match(If cmp cr);
15197 
15198   effect(USE lbl);
15199 
15200   ins_cost(BRANCH_COST);
15201   // If set to 1 this indicates that the current instruction is a
15202   // short variant of a long branch. This avoids using this
15203   // instruction in first-pass matching. It will then only be used in
15204   // the `Shorten_branches' pass.
15205   // ins_short_branch(1);
15206   format %{ "b$cmp  $lbl" %}
15207 
15208   ins_encode(aarch64_enc_br_con(cmp, lbl));
15209 
15210   ins_pipe(pipe_branch_cond);
15211 %}
15212 
15213 // Conditional Near Branch Unsigned
15214 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15215 %{
15216   // Same match rule as `branchConFar'.
15217   match(If cmp cr);
15218 
15219   effect(USE lbl);
15220 
15221   ins_cost(BRANCH_COST);
15222   // If set to 1 this indicates that the current instruction is a
15223   // short variant of a long branch. This avoids using this
15224   // instruction in first-pass matching. It will then only be used in
15225   // the `Shorten_branches' pass.
15226   // ins_short_branch(1);
15227   format %{ "b$cmp  $lbl\t# unsigned" %}
15228 
15229   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15230 
15231   ins_pipe(pipe_branch_cond);
15232 %}
15233 
15234 // Make use of CBZ and CBNZ.  These instructions, as well as being
15235 // shorter than (cmp; branch), have the additional benefit of not
15236 // killing the flags.
15237 
15238 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15239   match(If cmp (CmpI op1 op2));
15240   effect(USE labl);
15241 
15242   ins_cost(BRANCH_COST);
15243   format %{ "cbw$cmp   $op1, $labl" %}
15244   ins_encode %{
15245     Label* L = $labl$$label;
15246     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15247     if (cond == Assembler::EQ)
15248       __ cbzw($op1$$Register, *L);
15249     else
15250       __ cbnzw($op1$$Register, *L);
15251   %}
15252   ins_pipe(pipe_cmp_branch);
15253 %}
15254 
15255 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15256   match(If cmp (CmpL op1 op2));
15257   effect(USE labl);
15258 
15259   ins_cost(BRANCH_COST);
15260   format %{ "cb$cmp   $op1, $labl" %}
15261   ins_encode %{
15262     Label* L = $labl$$label;
15263     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15264     if (cond == Assembler::EQ)
15265       __ cbz($op1$$Register, *L);
15266     else
15267       __ cbnz($op1$$Register, *L);
15268   %}
15269   ins_pipe(pipe_cmp_branch);
15270 %}
15271 
15272 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15273   match(If cmp (CmpP op1 op2));
15274   effect(USE labl);
15275 
15276   ins_cost(BRANCH_COST);
15277   format %{ "cb$cmp   $op1, $labl" %}
15278   ins_encode %{
15279     Label* L = $labl$$label;
15280     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15281     if (cond == Assembler::EQ)
15282       __ cbz($op1$$Register, *L);
15283     else
15284       __ cbnz($op1$$Register, *L);
15285   %}
15286   ins_pipe(pipe_cmp_branch);
15287 %}
15288 
15289 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15290   match(If cmp (CmpN op1 op2));
15291   effect(USE labl);
15292 
15293   ins_cost(BRANCH_COST);
15294   format %{ "cbw$cmp   $op1, $labl" %}
15295   ins_encode %{
15296     Label* L = $labl$$label;
15297     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15298     if (cond == Assembler::EQ)
15299       __ cbzw($op1$$Register, *L);
15300     else
15301       __ cbnzw($op1$$Register, *L);
15302   %}
15303   ins_pipe(pipe_cmp_branch);
15304 %}
15305 
15306 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15307   match(If cmp (CmpP (DecodeN oop) zero));
15308   effect(USE labl);
15309 
15310   ins_cost(BRANCH_COST);
15311   format %{ "cb$cmp   $oop, $labl" %}
15312   ins_encode %{
15313     Label* L = $labl$$label;
15314     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15315     if (cond == Assembler::EQ)
15316       __ cbzw($oop$$Register, *L);
15317     else
15318       __ cbnzw($oop$$Register, *L);
15319   %}
15320   ins_pipe(pipe_cmp_branch);
15321 %}
15322 
15323 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15324   match(If cmp (CmpU op1 op2));
15325   effect(USE labl);
15326 
15327   ins_cost(BRANCH_COST);
15328   format %{ "cbw$cmp   $op1, $labl" %}
15329   ins_encode %{
15330     Label* L = $labl$$label;
15331     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15332     if (cond == Assembler::EQ || cond == Assembler::LS)
15333       __ cbzw($op1$$Register, *L);
15334     else
15335       __ cbnzw($op1$$Register, *L);
15336   %}
15337   ins_pipe(pipe_cmp_branch);
15338 %}
15339 
15340 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15341   match(If cmp (CmpUL op1 op2));
15342   effect(USE labl);
15343 
15344   ins_cost(BRANCH_COST);
15345   format %{ "cb$cmp   $op1, $labl" %}
15346   ins_encode %{
15347     Label* L = $labl$$label;
15348     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15349     if (cond == Assembler::EQ || cond == Assembler::LS)
15350       __ cbz($op1$$Register, *L);
15351     else
15352       __ cbnz($op1$$Register, *L);
15353   %}
15354   ins_pipe(pipe_cmp_branch);
15355 %}
15356 
15357 // Test bit and Branch
15358 
15359 // Patterns for short (< 32KiB) variants
15360 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15361   match(If cmp (CmpL op1 op2));
15362   effect(USE labl);
15363 
15364   ins_cost(BRANCH_COST);
15365   format %{ "cb$cmp   $op1, $labl # long" %}
15366   ins_encode %{
15367     Label* L = $labl$$label;
15368     Assembler::Condition cond =
15369       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15370     __ tbr(cond, $op1$$Register, 63, *L);
15371   %}
15372   ins_pipe(pipe_cmp_branch);
15373   ins_short_branch(1);
15374 %}
15375 
15376 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15377   match(If cmp (CmpI op1 op2));
15378   effect(USE labl);
15379 
15380   ins_cost(BRANCH_COST);
15381   format %{ "cb$cmp   $op1, $labl # int" %}
15382   ins_encode %{
15383     Label* L = $labl$$label;
15384     Assembler::Condition cond =
15385       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15386     __ tbr(cond, $op1$$Register, 31, *L);
15387   %}
15388   ins_pipe(pipe_cmp_branch);
15389   ins_short_branch(1);
15390 %}
15391 
15392 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15393   match(If cmp (CmpL (AndL op1 op2) op3));
15394   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15395   effect(USE labl);
15396 
15397   ins_cost(BRANCH_COST);
15398   format %{ "tb$cmp   $op1, $op2, $labl" %}
15399   ins_encode %{
15400     Label* L = $labl$$label;
15401     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15402     int bit = exact_log2($op2$$constant);
15403     __ tbr(cond, $op1$$Register, bit, *L);
15404   %}
15405   ins_pipe(pipe_cmp_branch);
15406   ins_short_branch(1);
15407 %}
15408 
15409 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15410   match(If cmp (CmpI (AndI op1 op2) op3));
15411   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15412   effect(USE labl);
15413 
15414   ins_cost(BRANCH_COST);
15415   format %{ "tb$cmp   $op1, $op2, $labl" %}
15416   ins_encode %{
15417     Label* L = $labl$$label;
15418     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15419     int bit = exact_log2($op2$$constant);
15420     __ tbr(cond, $op1$$Register, bit, *L);
15421   %}
15422   ins_pipe(pipe_cmp_branch);
15423   ins_short_branch(1);
15424 %}
15425 
15426 // And far variants
15427 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15428   match(If cmp (CmpL op1 op2));
15429   effect(USE labl);
15430 
15431   ins_cost(BRANCH_COST);
15432   format %{ "cb$cmp   $op1, $labl # long" %}
15433   ins_encode %{
15434     Label* L = $labl$$label;
15435     Assembler::Condition cond =
15436       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15437     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15438   %}
15439   ins_pipe(pipe_cmp_branch);
15440 %}
15441 
15442 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15443   match(If cmp (CmpI op1 op2));
15444   effect(USE labl);
15445 
15446   ins_cost(BRANCH_COST);
15447   format %{ "cb$cmp   $op1, $labl # int" %}
15448   ins_encode %{
15449     Label* L = $labl$$label;
15450     Assembler::Condition cond =
15451       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15452     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15453   %}
15454   ins_pipe(pipe_cmp_branch);
15455 %}
15456 
15457 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15458   match(If cmp (CmpL (AndL op1 op2) op3));
15459   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15460   effect(USE labl);
15461 
15462   ins_cost(BRANCH_COST);
15463   format %{ "tb$cmp   $op1, $op2, $labl" %}
15464   ins_encode %{
15465     Label* L = $labl$$label;
15466     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15467     int bit = exact_log2($op2$$constant);
15468     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15469   %}
15470   ins_pipe(pipe_cmp_branch);
15471 %}
15472 
15473 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15474   match(If cmp (CmpI (AndI op1 op2) op3));
15475   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15476   effect(USE labl);
15477 
15478   ins_cost(BRANCH_COST);
15479   format %{ "tb$cmp   $op1, $op2, $labl" %}
15480   ins_encode %{
15481     Label* L = $labl$$label;
15482     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15483     int bit = exact_log2($op2$$constant);
15484     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15485   %}
15486   ins_pipe(pipe_cmp_branch);
15487 %}
15488 
15489 // Test bits
15490 
15491 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15492   match(Set cr (CmpL (AndL op1 op2) op3));
15493   predicate(Assembler::operand_valid_for_logical_immediate
15494             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15495 
15496   ins_cost(INSN_COST);
15497   format %{ "tst $op1, $op2 # long" %}
15498   ins_encode %{
15499     __ tst($op1$$Register, $op2$$constant);
15500   %}
15501   ins_pipe(ialu_reg_reg);
15502 %}
15503 
15504 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15505   match(Set cr (CmpI (AndI op1 op2) op3));
15506   predicate(Assembler::operand_valid_for_logical_immediate
15507             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15508 
15509   ins_cost(INSN_COST);
15510   format %{ "tst $op1, $op2 # int" %}
15511   ins_encode %{
15512     __ tstw($op1$$Register, $op2$$constant);
15513   %}
15514   ins_pipe(ialu_reg_reg);
15515 %}
15516 
15517 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15518   match(Set cr (CmpL (AndL op1 op2) op3));
15519 
15520   ins_cost(INSN_COST);
15521   format %{ "tst $op1, $op2 # long" %}
15522   ins_encode %{
15523     __ tst($op1$$Register, $op2$$Register);
15524   %}
15525   ins_pipe(ialu_reg_reg);
15526 %}
15527 
15528 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15529   match(Set cr (CmpI (AndI op1 op2) op3));
15530 
15531   ins_cost(INSN_COST);
15532   format %{ "tstw $op1, $op2 # int" %}
15533   ins_encode %{
15534     __ tstw($op1$$Register, $op2$$Register);
15535   %}
15536   ins_pipe(ialu_reg_reg);
15537 %}
15538 
15539 
15540 // Conditional Far Branch
15541 // Conditional Far Branch Unsigned
15542 // TODO: fixme
15543 
15544 // counted loop end branch near
15545 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15546 %{
15547   match(CountedLoopEnd cmp cr);
15548 
15549   effect(USE lbl);
15550 
15551   ins_cost(BRANCH_COST);
15552   // short variant.
15553   // ins_short_branch(1);
15554   format %{ "b$cmp $lbl \t// counted loop end" %}
15555 
15556   ins_encode(aarch64_enc_br_con(cmp, lbl));
15557 
15558   ins_pipe(pipe_branch);
15559 %}
15560 
15561 // counted loop end branch near Unsigned
15562 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15563 %{
15564   match(CountedLoopEnd cmp cr);
15565 
15566   effect(USE lbl);
15567 
15568   ins_cost(BRANCH_COST);
15569   // short variant.
15570   // ins_short_branch(1);
15571   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15572 
15573   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15574 
15575   ins_pipe(pipe_branch);
15576 %}
15577 
15578 // counted loop end branch far
15579 // counted loop end branch far unsigned
15580 // TODO: fixme
15581 
15582 // ============================================================================
15583 // inlined locking and unlocking
15584 
15585 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15586 %{
15587   match(Set cr (FastLock object box));
15588   effect(TEMP tmp, TEMP tmp2);
15589 
15590   // TODO
15591   // identify correct cost
15592   ins_cost(5 * INSN_COST);
15593   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15594 
15595   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15596 
15597   ins_pipe(pipe_serial);
15598 %}
15599 
15600 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15601 %{
15602   match(Set cr (FastUnlock object box));
15603   effect(TEMP tmp, TEMP tmp2);
15604 
15605   ins_cost(5 * INSN_COST);
15606   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15607 
15608   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15609 
15610   ins_pipe(pipe_serial);
15611 %}
15612 
15613 
15614 // ============================================================================
15615 // Safepoint Instructions
15616 
15617 // TODO
15618 // provide a near and far version of this code
15619 
15620 instruct safePoint(iRegP poll)
15621 %{
15622   match(SafePoint poll);
15623 
15624   format %{
15625     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15626   %}
15627   ins_encode %{
15628     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15629   %}
15630   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15631 %}
15632 
15633 
15634 // ============================================================================
15635 // Procedure Call/Return Instructions
15636 
15637 // Call Java Static Instruction
15638 
15639 instruct CallStaticJavaDirect(method meth)
15640 %{
15641   match(CallStaticJava);
15642 
15643   effect(USE meth);
15644 
15645   ins_cost(CALL_COST);
15646 
15647   format %{ "call,static $meth \t// ==> " %}
15648 
15649   ins_encode( aarch64_enc_java_static_call(meth),
15650               aarch64_enc_call_epilog );
15651 
15652   ins_pipe(pipe_class_call);
15653 %}
15654 
15655 // TO HERE
15656 
15657 // Call Java Dynamic Instruction
15658 instruct CallDynamicJavaDirect(method meth)
15659 %{
15660   match(CallDynamicJava);
15661 
15662   effect(USE meth);
15663 
15664   ins_cost(CALL_COST);
15665 
15666   format %{ "CALL,dynamic $meth \t// ==> " %}
15667 
15668   ins_encode( aarch64_enc_java_dynamic_call(meth),
15669                aarch64_enc_call_epilog );
15670 
15671   ins_pipe(pipe_class_call);
15672 %}
15673 
15674 // Call Runtime Instruction
15675 
15676 instruct CallRuntimeDirect(method meth)
15677 %{
15678   match(CallRuntime);
15679 
15680   effect(USE meth);
15681 
15682   ins_cost(CALL_COST);
15683 
15684   format %{ "CALL, runtime $meth" %}
15685 
15686   ins_encode( aarch64_enc_java_to_runtime(meth) );
15687 
15688   ins_pipe(pipe_class_call);
15689 %}
15690 
15691 // Call Runtime Instruction
15692 
15693 instruct CallLeafDirect(method meth)
15694 %{
15695   match(CallLeaf);
15696 
15697   effect(USE meth);
15698 
15699   ins_cost(CALL_COST);
15700 
15701   format %{ "CALL, runtime leaf $meth" %}
15702 
15703   ins_encode( aarch64_enc_java_to_runtime(meth) );
15704 
15705   ins_pipe(pipe_class_call);
15706 %}
15707 
15708 // Call Runtime Instruction
15709 
15710 instruct CallLeafNoFPDirect(method meth)
15711 %{
15712   match(CallLeafNoFP);
15713 
15714   effect(USE meth);
15715 
15716   ins_cost(CALL_COST);
15717 
15718   format %{ "CALL, runtime leaf nofp $meth" %}
15719 
15720   ins_encode( aarch64_enc_java_to_runtime(meth) );
15721 
15722   ins_pipe(pipe_class_call);
15723 %}
15724 
15725 // Tail Call; Jump from runtime stub to Java code.
15726 // Also known as an 'interprocedural jump'.
15727 // Target of jump will eventually return to caller.
15728 // TailJump below removes the return address.
15729 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15730 %{
15731   match(TailCall jump_target method_oop);
15732 
15733   ins_cost(CALL_COST);
15734 
15735   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15736 
15737   ins_encode(aarch64_enc_tail_call(jump_target));
15738 
15739   ins_pipe(pipe_class_call);
15740 %}
15741 
15742 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15743 %{
15744   match(TailJump jump_target ex_oop);
15745 
15746   ins_cost(CALL_COST);
15747 
15748   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15749 
15750   ins_encode(aarch64_enc_tail_jmp(jump_target));
15751 
15752   ins_pipe(pipe_class_call);
15753 %}
15754 
15755 // Create exception oop: created by stack-crawling runtime code.
15756 // Created exception is now available to this handler, and is setup
15757 // just prior to jumping to this handler. No code emitted.
15758 // TODO check
15759 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15760 instruct CreateException(iRegP_R0 ex_oop)
15761 %{
15762   match(Set ex_oop (CreateEx));
15763 
15764   format %{ " -- \t// exception oop; no code emitted" %}
15765 
15766   size(0);
15767 
15768   ins_encode( /*empty*/ );
15769 
15770   ins_pipe(pipe_class_empty);
15771 %}
15772 
15773 // Rethrow exception: The exception oop will come in the first
15774 // argument position. Then JUMP (not call) to the rethrow stub code.
15775 instruct RethrowException() %{
15776   match(Rethrow);
15777   ins_cost(CALL_COST);
15778 
15779   format %{ "b rethrow_stub" %}
15780 
15781   ins_encode( aarch64_enc_rethrow() );
15782 
15783   ins_pipe(pipe_class_call);
15784 %}
15785 
15786 
15787 // Return Instruction
15788 // epilog node loads ret address into lr as part of frame pop
15789 instruct Ret()
15790 %{
15791   match(Return);
15792 
15793   format %{ "ret\t// return register" %}
15794 
15795   ins_encode( aarch64_enc_ret() );
15796 
15797   ins_pipe(pipe_branch);
15798 %}
15799 
15800 // Die now.
15801 instruct ShouldNotReachHere() %{
15802   match(Halt);
15803 
15804   ins_cost(CALL_COST);
15805   format %{ "ShouldNotReachHere" %}
15806 
15807   ins_encode %{
15808     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15809     // return true
15810     __ dpcs1(0xdead + 1);
15811   %}
15812 
15813   ins_pipe(pipe_class_default);
15814 %}
15815 
15816 // ============================================================================
15817 // Partial Subtype Check
15818 //
15819 // superklass array for an instance of the superklass.  Set a hidden
15820 // internal cache on a hit (cache is checked with exposed code in
15821 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15822 // encoding ALSO sets flags.
15823 
15824 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15825 %{
15826   match(Set result (PartialSubtypeCheck sub super));
15827   effect(KILL cr, KILL temp);
15828 
15829   ins_cost(1100);  // slightly larger than the next version
15830   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15831 
15832   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15833 
15834   opcode(0x1); // Force zero of result reg on hit
15835 
15836   ins_pipe(pipe_class_memory);
15837 %}
15838 
15839 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15840 %{
15841   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15842   effect(KILL temp, KILL result);
15843 
15844   ins_cost(1100);  // slightly larger than the next version
15845   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15846 
15847   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15848 
15849   opcode(0x0); // Don't zero result reg on hit
15850 
15851   ins_pipe(pipe_class_memory);
15852 %}
15853 
15854 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15855                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15856 %{
15857   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15858   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15859   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15860 
15861   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15862   ins_encode %{
15863     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15864     __ string_compare($str1$$Register, $str2$$Register,
15865                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15866                       $tmp1$$Register,
15867                       fnoreg, fnoreg, StrIntrinsicNode::UU);
15868   %}
15869   ins_pipe(pipe_class_memory);
15870 %}
15871 
15872 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15873                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15874 %{
15875   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15876   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15877   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15878 
15879   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15880   ins_encode %{
15881     __ string_compare($str1$$Register, $str2$$Register,
15882                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15883                       $tmp1$$Register,
15884                       fnoreg, fnoreg, StrIntrinsicNode::LL);
15885   %}
15886   ins_pipe(pipe_class_memory);
15887 %}
15888 
15889 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15890                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15891 %{
15892   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15893   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15894   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15895 
15896   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15897   ins_encode %{
15898     __ string_compare($str1$$Register, $str2$$Register,
15899                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15900                       $tmp1$$Register,
15901                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15902   %}
15903   ins_pipe(pipe_class_memory);
15904 %}
15905 
15906 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15907                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15908 %{
15909   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15910   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15911   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15912 
15913   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15914   ins_encode %{
15915     __ string_compare($str1$$Register, $str2$$Register,
15916                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15917                       $tmp1$$Register,
15918                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15919   %}
15920   ins_pipe(pipe_class_memory);
15921 %}
15922 
15923 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15924        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15925 %{
15926   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15927   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15928   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15929          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15930   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15931 
15932   ins_encode %{
15933     __ string_indexof($str1$$Register, $str2$$Register,
15934                       $cnt1$$Register, $cnt2$$Register,
15935                       $tmp1$$Register, $tmp2$$Register,
15936                       $tmp3$$Register, $tmp4$$Register,
15937                       -1, $result$$Register, StrIntrinsicNode::UU);
15938   %}
15939   ins_pipe(pipe_class_memory);
15940 %}
15941 
15942 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15943        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15944 %{
15945   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15946   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15947   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15948          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15949   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15950 
15951   ins_encode %{
15952     __ string_indexof($str1$$Register, $str2$$Register,
15953                       $cnt1$$Register, $cnt2$$Register,
15954                       $tmp1$$Register, $tmp2$$Register,
15955                       $tmp3$$Register, $tmp4$$Register,
15956                       -1, $result$$Register, StrIntrinsicNode::LL);
15957   %}
15958   ins_pipe(pipe_class_memory);
15959 %}
15960 
15961 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15962        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15963 %{
15964   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15965   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15966   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15967          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15968   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15969 
15970   ins_encode %{
15971     __ string_indexof($str1$$Register, $str2$$Register,
15972                       $cnt1$$Register, $cnt2$$Register,
15973                       $tmp1$$Register, $tmp2$$Register,
15974                       $tmp3$$Register, $tmp4$$Register,
15975                       -1, $result$$Register, StrIntrinsicNode::UL);
15976   %}
15977   ins_pipe(pipe_class_memory);
15978 %}
15979 
15980 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15981        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15982 %{
15983   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15984   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15985   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15986          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15987   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15988 
15989   ins_encode %{
15990     __ string_indexof($str1$$Register, $str2$$Register,
15991                       $cnt1$$Register, $cnt2$$Register,
15992                       $tmp1$$Register, $tmp2$$Register,
15993                       $tmp3$$Register, $tmp4$$Register,
15994                       -1, $result$$Register, StrIntrinsicNode::LU);
15995   %}
15996   ins_pipe(pipe_class_memory);
15997 %}
15998 
15999 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16000                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16001                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16002 %{
16003   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16004   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16005   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16006          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16007   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16008 
16009   ins_encode %{
16010     int icnt2 = (int)$int_cnt2$$constant;
16011     __ string_indexof($str1$$Register, $str2$$Register,
16012                       $cnt1$$Register, zr,
16013                       $tmp1$$Register, $tmp2$$Register,
16014                       $tmp3$$Register, $tmp4$$Register,
16015                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16016   %}
16017   ins_pipe(pipe_class_memory);
16018 %}
16019 
16020 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16021                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16022                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16023 %{
16024   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16025   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16026   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16027          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16028   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16029 
16030   ins_encode %{
16031     int icnt2 = (int)$int_cnt2$$constant;
16032     __ string_indexof($str1$$Register, $str2$$Register,
16033                       $cnt1$$Register, zr,
16034                       $tmp1$$Register, $tmp2$$Register,
16035                       $tmp3$$Register, $tmp4$$Register,
16036                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16037   %}
16038   ins_pipe(pipe_class_memory);
16039 %}
16040 
16041 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16042                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16043                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16044 %{
16045   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16046   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16047   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16048          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16049   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16050 
16051   ins_encode %{
16052     int icnt2 = (int)$int_cnt2$$constant;
16053     __ string_indexof($str1$$Register, $str2$$Register,
16054                       $cnt1$$Register, zr,
16055                       $tmp1$$Register, $tmp2$$Register,
16056                       $tmp3$$Register, $tmp4$$Register,
16057                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16058   %}
16059   ins_pipe(pipe_class_memory);
16060 %}
16061 
16062 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16063                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16064                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16065 %{
16066   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16067   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16068   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16069          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16070   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
16071 
16072   ins_encode %{
16073     int icnt2 = (int)$int_cnt2$$constant;
16074     __ string_indexof($str1$$Register, $str2$$Register,
16075                       $cnt1$$Register, zr,
16076                       $tmp1$$Register, $tmp2$$Register,
16077                       $tmp3$$Register, $tmp4$$Register,
16078                       icnt2, $result$$Register, StrIntrinsicNode::LU);
16079   %}
16080   ins_pipe(pipe_class_memory);
16081 %}
16082 
16083 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16084                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16085                               iRegINoSp tmp3, rFlagsReg cr)
16086 %{
16087   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16088   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16089          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16090 
16091   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16092 
16093   ins_encode %{
16094     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16095                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16096                            $tmp3$$Register);
16097   %}
16098   ins_pipe(pipe_class_memory);
16099 %}
16100 
16101 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16102                         iRegI_R0 result, rFlagsReg cr)
16103 %{
16104   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16105   match(Set result (StrEquals (Binary str1 str2) cnt));
16106   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16107 
16108   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16109   ins_encode %{
16110     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16111     __ string_equals($str1$$Register, $str2$$Register,
16112                      $result$$Register, $cnt$$Register, 1);
16113   %}
16114   ins_pipe(pipe_class_memory);
16115 %}
16116 
16117 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16118                         iRegI_R0 result, rFlagsReg cr)
16119 %{
16120   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16121   match(Set result (StrEquals (Binary str1 str2) cnt));
16122   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16123 
16124   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16125   ins_encode %{
16126     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16127     __ string_equals($str1$$Register, $str2$$Register,
16128                      $result$$Register, $cnt$$Register, 2);
16129   %}
16130   ins_pipe(pipe_class_memory);
16131 %}
16132 
16133 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16134                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16135                        iRegP_R10 tmp, rFlagsReg cr)
16136 %{
16137   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16138   match(Set result (AryEq ary1 ary2));
16139   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16140 
16141   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16142   ins_encode %{
16143     __ arrays_equals($ary1$$Register, $ary2$$Register,
16144                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16145                      $result$$Register, $tmp$$Register, 1);
16146     %}
16147   ins_pipe(pipe_class_memory);
16148 %}
16149 
16150 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16151                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16152                        iRegP_R10 tmp, rFlagsReg cr)
16153 %{
16154   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16155   match(Set result (AryEq ary1 ary2));
16156   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16157 
16158   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16159   ins_encode %{
16160     __ arrays_equals($ary1$$Register, $ary2$$Register,
16161                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16162                      $result$$Register, $tmp$$Register, 2);
16163   %}
16164   ins_pipe(pipe_class_memory);
16165 %}
16166 
16167 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16168 %{
16169   match(Set result (HasNegatives ary1 len));
16170   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16171   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16172   ins_encode %{
16173     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16174   %}
16175   ins_pipe( pipe_slow );
16176 %}
16177 
16178 // fast char[] to byte[] compression
16179 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16180                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16181                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16182                          iRegI_R0 result, rFlagsReg cr)
16183 %{
16184   match(Set result (StrCompressedCopy src (Binary dst len)));
16185   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16186 
16187   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16188   ins_encode %{
16189     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16190                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16191                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16192                            $result$$Register);
16193   %}
16194   ins_pipe( pipe_slow );
16195 %}
16196 
16197 // fast byte[] to char[] inflation
16198 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16199                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16200 %{
16201   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16202   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16203 
16204   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16205   ins_encode %{
16206     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16207                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16208   %}
16209   ins_pipe(pipe_class_memory);
16210 %}
16211 
16212 // encode char[] to byte[] in ISO_8859_1
16213 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16214                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16215                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16216                           iRegI_R0 result, rFlagsReg cr)
16217 %{
16218   match(Set result (EncodeISOArray src (Binary dst len)));
16219   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16220          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16221 
16222   format %{ "Encode array $src,$dst,$len -> $result" %}
16223   ins_encode %{
16224     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16225          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16226          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16227   %}
16228   ins_pipe( pipe_class_memory );
16229 %}
16230 
16231 // ============================================================================
16232 // This name is KNOWN by the ADLC and cannot be changed.
16233 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16234 // for this guy.
16235 instruct tlsLoadP(thread_RegP dst)
16236 %{
16237   match(Set dst (ThreadLocal));
16238 
16239   ins_cost(0);
16240 
16241   format %{ " -- \t// $dst=Thread::current(), empty" %}
16242 
16243   size(0);
16244 
16245   ins_encode( /*empty*/ );
16246 
16247   ins_pipe(pipe_class_empty);
16248 %}
16249 
16250 // ====================VECTOR INSTRUCTIONS=====================================
16251 
16252 // Load vector (32 bits)
16253 instruct loadV4(vecD dst, vmem4 mem)
16254 %{
16255   predicate(n->as_LoadVector()->memory_size() == 4);
16256   match(Set dst (LoadVector mem));
16257   ins_cost(4 * INSN_COST);
16258   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16259   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16260   ins_pipe(vload_reg_mem64);
16261 %}
16262 
16263 // Load vector (64 bits)
16264 instruct loadV8(vecD dst, vmem8 mem)
16265 %{
16266   predicate(n->as_LoadVector()->memory_size() == 8);
16267   match(Set dst (LoadVector mem));
16268   ins_cost(4 * INSN_COST);
16269   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16270   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16271   ins_pipe(vload_reg_mem64);
16272 %}
16273 
16274 // Load Vector (128 bits)
16275 instruct loadV16(vecX dst, vmem16 mem)
16276 %{
16277   predicate(n->as_LoadVector()->memory_size() == 16);
16278   match(Set dst (LoadVector mem));
16279   ins_cost(4 * INSN_COST);
16280   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16281   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16282   ins_pipe(vload_reg_mem128);
16283 %}
16284 
16285 // Store Vector (32 bits)
16286 instruct storeV4(vecD src, vmem4 mem)
16287 %{
16288   predicate(n->as_StoreVector()->memory_size() == 4);
16289   match(Set mem (StoreVector mem src));
16290   ins_cost(4 * INSN_COST);
16291   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16292   ins_encode( aarch64_enc_strvS(src, mem) );
16293   ins_pipe(vstore_reg_mem64);
16294 %}
16295 
16296 // Store Vector (64 bits)
16297 instruct storeV8(vecD src, vmem8 mem)
16298 %{
16299   predicate(n->as_StoreVector()->memory_size() == 8);
16300   match(Set mem (StoreVector mem src));
16301   ins_cost(4 * INSN_COST);
16302   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16303   ins_encode( aarch64_enc_strvD(src, mem) );
16304   ins_pipe(vstore_reg_mem64);
16305 %}
16306 
16307 // Store Vector (128 bits)
16308 instruct storeV16(vecX src, vmem16 mem)
16309 %{
16310   predicate(n->as_StoreVector()->memory_size() == 16);
16311   match(Set mem (StoreVector mem src));
16312   ins_cost(4 * INSN_COST);
16313   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16314   ins_encode( aarch64_enc_strvQ(src, mem) );
16315   ins_pipe(vstore_reg_mem128);
16316 %}
16317 
16318 instruct replicate8B(vecD dst, iRegIorL2I src)
16319 %{
16320   predicate(n->as_Vector()->length() == 4 ||
16321             n->as_Vector()->length() == 8);
16322   match(Set dst (ReplicateB src));
16323   ins_cost(INSN_COST);
16324   format %{ "dup  $dst, $src\t# vector (8B)" %}
16325   ins_encode %{
16326     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16327   %}
16328   ins_pipe(vdup_reg_reg64);
16329 %}
16330 
16331 instruct replicate16B(vecX dst, iRegIorL2I src)
16332 %{
16333   predicate(n->as_Vector()->length() == 16);
16334   match(Set dst (ReplicateB src));
16335   ins_cost(INSN_COST);
16336   format %{ "dup  $dst, $src\t# vector (16B)" %}
16337   ins_encode %{
16338     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16339   %}
16340   ins_pipe(vdup_reg_reg128);
16341 %}
16342 
16343 instruct replicate8B_imm(vecD dst, immI con)
16344 %{
16345   predicate(n->as_Vector()->length() == 4 ||
16346             n->as_Vector()->length() == 8);
16347   match(Set dst (ReplicateB con));
16348   ins_cost(INSN_COST);
16349   format %{ "movi  $dst, $con\t# vector(8B)" %}
16350   ins_encode %{
16351     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16352   %}
16353   ins_pipe(vmovi_reg_imm64);
16354 %}
16355 
16356 instruct replicate16B_imm(vecX dst, immI con)
16357 %{
16358   predicate(n->as_Vector()->length() == 16);
16359   match(Set dst (ReplicateB con));
16360   ins_cost(INSN_COST);
16361   format %{ "movi  $dst, $con\t# vector(16B)" %}
16362   ins_encode %{
16363     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16364   %}
16365   ins_pipe(vmovi_reg_imm128);
16366 %}
16367 
16368 instruct replicate4S(vecD dst, iRegIorL2I src)
16369 %{
16370   predicate(n->as_Vector()->length() == 2 ||
16371             n->as_Vector()->length() == 4);
16372   match(Set dst (ReplicateS src));
16373   ins_cost(INSN_COST);
16374   format %{ "dup  $dst, $src\t# vector (4S)" %}
16375   ins_encode %{
16376     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16377   %}
16378   ins_pipe(vdup_reg_reg64);
16379 %}
16380 
16381 instruct replicate8S(vecX dst, iRegIorL2I src)
16382 %{
16383   predicate(n->as_Vector()->length() == 8);
16384   match(Set dst (ReplicateS src));
16385   ins_cost(INSN_COST);
16386   format %{ "dup  $dst, $src\t# vector (8S)" %}
16387   ins_encode %{
16388     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16389   %}
16390   ins_pipe(vdup_reg_reg128);
16391 %}
16392 
16393 instruct replicate4S_imm(vecD dst, immI con)
16394 %{
16395   predicate(n->as_Vector()->length() == 2 ||
16396             n->as_Vector()->length() == 4);
16397   match(Set dst (ReplicateS con));
16398   ins_cost(INSN_COST);
16399   format %{ "movi  $dst, $con\t# vector(4H)" %}
16400   ins_encode %{
16401     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16402   %}
16403   ins_pipe(vmovi_reg_imm64);
16404 %}
16405 
16406 instruct replicate8S_imm(vecX dst, immI con)
16407 %{
16408   predicate(n->as_Vector()->length() == 8);
16409   match(Set dst (ReplicateS con));
16410   ins_cost(INSN_COST);
16411   format %{ "movi  $dst, $con\t# vector(8H)" %}
16412   ins_encode %{
16413     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16414   %}
16415   ins_pipe(vmovi_reg_imm128);
16416 %}
16417 
16418 instruct replicate2I(vecD dst, iRegIorL2I src)
16419 %{
16420   predicate(n->as_Vector()->length() == 2);
16421   match(Set dst (ReplicateI src));
16422   ins_cost(INSN_COST);
16423   format %{ "dup  $dst, $src\t# vector (2I)" %}
16424   ins_encode %{
16425     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16426   %}
16427   ins_pipe(vdup_reg_reg64);
16428 %}
16429 
16430 instruct replicate4I(vecX dst, iRegIorL2I src)
16431 %{
16432   predicate(n->as_Vector()->length() == 4);
16433   match(Set dst (ReplicateI src));
16434   ins_cost(INSN_COST);
16435   format %{ "dup  $dst, $src\t# vector (4I)" %}
16436   ins_encode %{
16437     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16438   %}
16439   ins_pipe(vdup_reg_reg128);
16440 %}
16441 
16442 instruct replicate2I_imm(vecD dst, immI con)
16443 %{
16444   predicate(n->as_Vector()->length() == 2);
16445   match(Set dst (ReplicateI con));
16446   ins_cost(INSN_COST);
16447   format %{ "movi  $dst, $con\t# vector(2I)" %}
16448   ins_encode %{
16449     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16450   %}
16451   ins_pipe(vmovi_reg_imm64);
16452 %}
16453 
16454 instruct replicate4I_imm(vecX dst, immI con)
16455 %{
16456   predicate(n->as_Vector()->length() == 4);
16457   match(Set dst (ReplicateI con));
16458   ins_cost(INSN_COST);
16459   format %{ "movi  $dst, $con\t# vector(4I)" %}
16460   ins_encode %{
16461     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16462   %}
16463   ins_pipe(vmovi_reg_imm128);
16464 %}
16465 
16466 instruct replicate2L(vecX dst, iRegL src)
16467 %{
16468   predicate(n->as_Vector()->length() == 2);
16469   match(Set dst (ReplicateL src));
16470   ins_cost(INSN_COST);
16471   format %{ "dup  $dst, $src\t# vector (2L)" %}
16472   ins_encode %{
16473     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16474   %}
16475   ins_pipe(vdup_reg_reg128);
16476 %}
16477 
16478 instruct replicate2L_zero(vecX dst, immI0 zero)
16479 %{
16480   predicate(n->as_Vector()->length() == 2);
16481   match(Set dst (ReplicateI zero));
16482   ins_cost(INSN_COST);
16483   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16484   ins_encode %{
16485     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16486            as_FloatRegister($dst$$reg),
16487            as_FloatRegister($dst$$reg));
16488   %}
16489   ins_pipe(vmovi_reg_imm128);
16490 %}
16491 
16492 instruct replicate2F(vecD dst, vRegF src)
16493 %{
16494   predicate(n->as_Vector()->length() == 2);
16495   match(Set dst (ReplicateF src));
16496   ins_cost(INSN_COST);
16497   format %{ "dup  $dst, $src\t# vector (2F)" %}
16498   ins_encode %{
16499     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16500            as_FloatRegister($src$$reg));
16501   %}
16502   ins_pipe(vdup_reg_freg64);
16503 %}
16504 
16505 instruct replicate4F(vecX dst, vRegF src)
16506 %{
16507   predicate(n->as_Vector()->length() == 4);
16508   match(Set dst (ReplicateF src));
16509   ins_cost(INSN_COST);
16510   format %{ "dup  $dst, $src\t# vector (4F)" %}
16511   ins_encode %{
16512     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16513            as_FloatRegister($src$$reg));
16514   %}
16515   ins_pipe(vdup_reg_freg128);
16516 %}
16517 
16518 instruct replicate2D(vecX dst, vRegD src)
16519 %{
16520   predicate(n->as_Vector()->length() == 2);
16521   match(Set dst (ReplicateD src));
16522   ins_cost(INSN_COST);
16523   format %{ "dup  $dst, $src\t# vector (2D)" %}
16524   ins_encode %{
16525     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16526            as_FloatRegister($src$$reg));
16527   %}
16528   ins_pipe(vdup_reg_dreg128);
16529 %}
16530 
16531 // ====================REDUCTION ARITHMETIC====================================
16532 
16533 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16534 %{
16535   match(Set dst (AddReductionVI src1 src2));
16536   ins_cost(INSN_COST);
16537   effect(TEMP tmp, TEMP tmp2);
16538   format %{ "umov  $tmp, $src2, S, 0\n\t"
16539             "umov  $tmp2, $src2, S, 1\n\t"
16540             "addw  $dst, $src1, $tmp\n\t"
16541             "addw  $dst, $dst, $tmp2\t add reduction2i"
16542   %}
16543   ins_encode %{
16544     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16545     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16546     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16547     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16548   %}
16549   ins_pipe(pipe_class_default);
16550 %}
16551 
16552 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16553 %{
16554   match(Set dst (AddReductionVI src1 src2));
16555   ins_cost(INSN_COST);
16556   effect(TEMP tmp, TEMP tmp2);
16557   format %{ "addv  $tmp, T4S, $src2\n\t"
16558             "umov  $tmp2, $tmp, S, 0\n\t"
16559             "addw  $dst, $tmp2, $src1\t add reduction4i"
16560   %}
16561   ins_encode %{
16562     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16563             as_FloatRegister($src2$$reg));
16564     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16565     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16566   %}
16567   ins_pipe(pipe_class_default);
16568 %}
16569 
16570 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16571 %{
16572   match(Set dst (MulReductionVI src1 src2));
16573   ins_cost(INSN_COST);
16574   effect(TEMP tmp, TEMP dst);
16575   format %{ "umov  $tmp, $src2, S, 0\n\t"
16576             "mul   $dst, $tmp, $src1\n\t"
16577             "umov  $tmp, $src2, S, 1\n\t"
16578             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16579   %}
16580   ins_encode %{
16581     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16582     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16583     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16584     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16585   %}
16586   ins_pipe(pipe_class_default);
16587 %}
16588 
16589 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16590 %{
16591   match(Set dst (MulReductionVI src1 src2));
16592   ins_cost(INSN_COST);
16593   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16594   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16595             "mul   $tmp, $tmp, $src2\n\t"
16596             "umov  $tmp2, $tmp, S, 0\n\t"
16597             "mul   $dst, $tmp2, $src1\n\t"
16598             "umov  $tmp2, $tmp, S, 1\n\t"
16599             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16600   %}
16601   ins_encode %{
16602     __ ins(as_FloatRegister($tmp$$reg), __ D,
16603            as_FloatRegister($src2$$reg), 0, 1);
16604     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16605            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16606     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16607     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16608     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16609     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16610   %}
16611   ins_pipe(pipe_class_default);
16612 %}
16613 
16614 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16615 %{
16616   match(Set dst (AddReductionVF src1 src2));
16617   ins_cost(INSN_COST);
16618   effect(TEMP tmp, TEMP dst);
16619   format %{ "fadds $dst, $src1, $src2\n\t"
16620             "ins   $tmp, S, $src2, 0, 1\n\t"
16621             "fadds $dst, $dst, $tmp\t add reduction2f"
16622   %}
16623   ins_encode %{
16624     __ fadds(as_FloatRegister($dst$$reg),
16625              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16626     __ ins(as_FloatRegister($tmp$$reg), __ S,
16627            as_FloatRegister($src2$$reg), 0, 1);
16628     __ fadds(as_FloatRegister($dst$$reg),
16629              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16630   %}
16631   ins_pipe(pipe_class_default);
16632 %}
16633 
16634 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16635 %{
16636   match(Set dst (AddReductionVF src1 src2));
16637   ins_cost(INSN_COST);
16638   effect(TEMP tmp, TEMP dst);
16639   format %{ "fadds $dst, $src1, $src2\n\t"
16640             "ins   $tmp, S, $src2, 0, 1\n\t"
16641             "fadds $dst, $dst, $tmp\n\t"
16642             "ins   $tmp, S, $src2, 0, 2\n\t"
16643             "fadds $dst, $dst, $tmp\n\t"
16644             "ins   $tmp, S, $src2, 0, 3\n\t"
16645             "fadds $dst, $dst, $tmp\t add reduction4f"
16646   %}
16647   ins_encode %{
16648     __ fadds(as_FloatRegister($dst$$reg),
16649              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16650     __ ins(as_FloatRegister($tmp$$reg), __ S,
16651            as_FloatRegister($src2$$reg), 0, 1);
16652     __ fadds(as_FloatRegister($dst$$reg),
16653              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16654     __ ins(as_FloatRegister($tmp$$reg), __ S,
16655            as_FloatRegister($src2$$reg), 0, 2);
16656     __ fadds(as_FloatRegister($dst$$reg),
16657              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16658     __ ins(as_FloatRegister($tmp$$reg), __ S,
16659            as_FloatRegister($src2$$reg), 0, 3);
16660     __ fadds(as_FloatRegister($dst$$reg),
16661              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16662   %}
16663   ins_pipe(pipe_class_default);
16664 %}
16665 
16666 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16667 %{
16668   match(Set dst (MulReductionVF src1 src2));
16669   ins_cost(INSN_COST);
16670   effect(TEMP tmp, TEMP dst);
16671   format %{ "fmuls $dst, $src1, $src2\n\t"
16672             "ins   $tmp, S, $src2, 0, 1\n\t"
16673             "fmuls $dst, $dst, $tmp\t add reduction4f"
16674   %}
16675   ins_encode %{
16676     __ fmuls(as_FloatRegister($dst$$reg),
16677              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16678     __ ins(as_FloatRegister($tmp$$reg), __ S,
16679            as_FloatRegister($src2$$reg), 0, 1);
16680     __ fmuls(as_FloatRegister($dst$$reg),
16681              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16682   %}
16683   ins_pipe(pipe_class_default);
16684 %}
16685 
16686 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16687 %{
16688   match(Set dst (MulReductionVF src1 src2));
16689   ins_cost(INSN_COST);
16690   effect(TEMP tmp, TEMP dst);
16691   format %{ "fmuls $dst, $src1, $src2\n\t"
16692             "ins   $tmp, S, $src2, 0, 1\n\t"
16693             "fmuls $dst, $dst, $tmp\n\t"
16694             "ins   $tmp, S, $src2, 0, 2\n\t"
16695             "fmuls $dst, $dst, $tmp\n\t"
16696             "ins   $tmp, S, $src2, 0, 3\n\t"
16697             "fmuls $dst, $dst, $tmp\t add reduction4f"
16698   %}
16699   ins_encode %{
16700     __ fmuls(as_FloatRegister($dst$$reg),
16701              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16702     __ ins(as_FloatRegister($tmp$$reg), __ S,
16703            as_FloatRegister($src2$$reg), 0, 1);
16704     __ fmuls(as_FloatRegister($dst$$reg),
16705              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16706     __ ins(as_FloatRegister($tmp$$reg), __ S,
16707            as_FloatRegister($src2$$reg), 0, 2);
16708     __ fmuls(as_FloatRegister($dst$$reg),
16709              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16710     __ ins(as_FloatRegister($tmp$$reg), __ S,
16711            as_FloatRegister($src2$$reg), 0, 3);
16712     __ fmuls(as_FloatRegister($dst$$reg),
16713              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16714   %}
16715   ins_pipe(pipe_class_default);
16716 %}
16717 
16718 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16719 %{
16720   match(Set dst (AddReductionVD src1 src2));
16721   ins_cost(INSN_COST);
16722   effect(TEMP tmp, TEMP dst);
16723   format %{ "faddd $dst, $src1, $src2\n\t"
16724             "ins   $tmp, D, $src2, 0, 1\n\t"
16725             "faddd $dst, $dst, $tmp\t add reduction2d"
16726   %}
16727   ins_encode %{
16728     __ faddd(as_FloatRegister($dst$$reg),
16729              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16730     __ ins(as_FloatRegister($tmp$$reg), __ D,
16731            as_FloatRegister($src2$$reg), 0, 1);
16732     __ faddd(as_FloatRegister($dst$$reg),
16733              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16734   %}
16735   ins_pipe(pipe_class_default);
16736 %}
16737 
16738 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16739 %{
16740   match(Set dst (MulReductionVD src1 src2));
16741   ins_cost(INSN_COST);
16742   effect(TEMP tmp, TEMP dst);
16743   format %{ "fmuld $dst, $src1, $src2\n\t"
16744             "ins   $tmp, D, $src2, 0, 1\n\t"
16745             "fmuld $dst, $dst, $tmp\t add reduction2d"
16746   %}
16747   ins_encode %{
16748     __ fmuld(as_FloatRegister($dst$$reg),
16749              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16750     __ ins(as_FloatRegister($tmp$$reg), __ D,
16751            as_FloatRegister($src2$$reg), 0, 1);
16752     __ fmuld(as_FloatRegister($dst$$reg),
16753              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16754   %}
16755   ins_pipe(pipe_class_default);
16756 %}
16757 
16758 // ====================VECTOR ARITHMETIC=======================================
16759 
16760 // --------------------------------- ADD --------------------------------------
16761 
16762 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16763 %{
16764   predicate(n->as_Vector()->length() == 4 ||
16765             n->as_Vector()->length() == 8);
16766   match(Set dst (AddVB src1 src2));
16767   ins_cost(INSN_COST);
16768   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16769   ins_encode %{
16770     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16771             as_FloatRegister($src1$$reg),
16772             as_FloatRegister($src2$$reg));
16773   %}
16774   ins_pipe(vdop64);
16775 %}
16776 
16777 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16778 %{
16779   predicate(n->as_Vector()->length() == 16);
16780   match(Set dst (AddVB src1 src2));
16781   ins_cost(INSN_COST);
16782   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16783   ins_encode %{
16784     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16785             as_FloatRegister($src1$$reg),
16786             as_FloatRegister($src2$$reg));
16787   %}
16788   ins_pipe(vdop128);
16789 %}
16790 
16791 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16792 %{
16793   predicate(n->as_Vector()->length() == 2 ||
16794             n->as_Vector()->length() == 4);
16795   match(Set dst (AddVS src1 src2));
16796   ins_cost(INSN_COST);
16797   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16798   ins_encode %{
16799     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16800             as_FloatRegister($src1$$reg),
16801             as_FloatRegister($src2$$reg));
16802   %}
16803   ins_pipe(vdop64);
16804 %}
16805 
16806 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16807 %{
16808   predicate(n->as_Vector()->length() == 8);
16809   match(Set dst (AddVS src1 src2));
16810   ins_cost(INSN_COST);
16811   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16812   ins_encode %{
16813     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16814             as_FloatRegister($src1$$reg),
16815             as_FloatRegister($src2$$reg));
16816   %}
16817   ins_pipe(vdop128);
16818 %}
16819 
16820 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16821 %{
16822   predicate(n->as_Vector()->length() == 2);
16823   match(Set dst (AddVI src1 src2));
16824   ins_cost(INSN_COST);
16825   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16826   ins_encode %{
16827     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16828             as_FloatRegister($src1$$reg),
16829             as_FloatRegister($src2$$reg));
16830   %}
16831   ins_pipe(vdop64);
16832 %}
16833 
16834 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16835 %{
16836   predicate(n->as_Vector()->length() == 4);
16837   match(Set dst (AddVI src1 src2));
16838   ins_cost(INSN_COST);
16839   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16840   ins_encode %{
16841     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16842             as_FloatRegister($src1$$reg),
16843             as_FloatRegister($src2$$reg));
16844   %}
16845   ins_pipe(vdop128);
16846 %}
16847 
16848 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16849 %{
16850   predicate(n->as_Vector()->length() == 2);
16851   match(Set dst (AddVL src1 src2));
16852   ins_cost(INSN_COST);
16853   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16854   ins_encode %{
16855     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16856             as_FloatRegister($src1$$reg),
16857             as_FloatRegister($src2$$reg));
16858   %}
16859   ins_pipe(vdop128);
16860 %}
16861 
16862 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16863 %{
16864   predicate(n->as_Vector()->length() == 2);
16865   match(Set dst (AddVF src1 src2));
16866   ins_cost(INSN_COST);
16867   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16868   ins_encode %{
16869     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16870             as_FloatRegister($src1$$reg),
16871             as_FloatRegister($src2$$reg));
16872   %}
16873   ins_pipe(vdop_fp64);
16874 %}
16875 
16876 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16877 %{
16878   predicate(n->as_Vector()->length() == 4);
16879   match(Set dst (AddVF src1 src2));
16880   ins_cost(INSN_COST);
16881   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16882   ins_encode %{
16883     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16884             as_FloatRegister($src1$$reg),
16885             as_FloatRegister($src2$$reg));
16886   %}
16887   ins_pipe(vdop_fp128);
16888 %}
16889 
16890 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16891 %{
16892   match(Set dst (AddVD src1 src2));
16893   ins_cost(INSN_COST);
16894   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16895   ins_encode %{
16896     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16897             as_FloatRegister($src1$$reg),
16898             as_FloatRegister($src2$$reg));
16899   %}
16900   ins_pipe(vdop_fp128);
16901 %}
16902 
16903 // --------------------------------- SUB --------------------------------------
16904 
16905 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16906 %{
16907   predicate(n->as_Vector()->length() == 4 ||
16908             n->as_Vector()->length() == 8);
16909   match(Set dst (SubVB src1 src2));
16910   ins_cost(INSN_COST);
16911   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16912   ins_encode %{
16913     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16914             as_FloatRegister($src1$$reg),
16915             as_FloatRegister($src2$$reg));
16916   %}
16917   ins_pipe(vdop64);
16918 %}
16919 
16920 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16921 %{
16922   predicate(n->as_Vector()->length() == 16);
16923   match(Set dst (SubVB src1 src2));
16924   ins_cost(INSN_COST);
16925   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16926   ins_encode %{
16927     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16928             as_FloatRegister($src1$$reg),
16929             as_FloatRegister($src2$$reg));
16930   %}
16931   ins_pipe(vdop128);
16932 %}
16933 
16934 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16935 %{
16936   predicate(n->as_Vector()->length() == 2 ||
16937             n->as_Vector()->length() == 4);
16938   match(Set dst (SubVS src1 src2));
16939   ins_cost(INSN_COST);
16940   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16941   ins_encode %{
16942     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16943             as_FloatRegister($src1$$reg),
16944             as_FloatRegister($src2$$reg));
16945   %}
16946   ins_pipe(vdop64);
16947 %}
16948 
16949 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16950 %{
16951   predicate(n->as_Vector()->length() == 8);
16952   match(Set dst (SubVS src1 src2));
16953   ins_cost(INSN_COST);
16954   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16955   ins_encode %{
16956     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16957             as_FloatRegister($src1$$reg),
16958             as_FloatRegister($src2$$reg));
16959   %}
16960   ins_pipe(vdop128);
16961 %}
16962 
16963 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16964 %{
16965   predicate(n->as_Vector()->length() == 2);
16966   match(Set dst (SubVI src1 src2));
16967   ins_cost(INSN_COST);
16968   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16969   ins_encode %{
16970     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16971             as_FloatRegister($src1$$reg),
16972             as_FloatRegister($src2$$reg));
16973   %}
16974   ins_pipe(vdop64);
16975 %}
16976 
16977 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16978 %{
16979   predicate(n->as_Vector()->length() == 4);
16980   match(Set dst (SubVI src1 src2));
16981   ins_cost(INSN_COST);
16982   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16983   ins_encode %{
16984     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16985             as_FloatRegister($src1$$reg),
16986             as_FloatRegister($src2$$reg));
16987   %}
16988   ins_pipe(vdop128);
16989 %}
16990 
16991 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16992 %{
16993   predicate(n->as_Vector()->length() == 2);
16994   match(Set dst (SubVL src1 src2));
16995   ins_cost(INSN_COST);
16996   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16997   ins_encode %{
16998     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16999             as_FloatRegister($src1$$reg),
17000             as_FloatRegister($src2$$reg));
17001   %}
17002   ins_pipe(vdop128);
17003 %}
17004 
17005 instruct vsub2F(vecD dst, vecD src1, vecD src2)
17006 %{
17007   predicate(n->as_Vector()->length() == 2);
17008   match(Set dst (SubVF src1 src2));
17009   ins_cost(INSN_COST);
17010   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17011   ins_encode %{
17012     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17013             as_FloatRegister($src1$$reg),
17014             as_FloatRegister($src2$$reg));
17015   %}
17016   ins_pipe(vdop_fp64);
17017 %}
17018 
17019 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17020 %{
17021   predicate(n->as_Vector()->length() == 4);
17022   match(Set dst (SubVF src1 src2));
17023   ins_cost(INSN_COST);
17024   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17025   ins_encode %{
17026     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17027             as_FloatRegister($src1$$reg),
17028             as_FloatRegister($src2$$reg));
17029   %}
17030   ins_pipe(vdop_fp128);
17031 %}
17032 
17033 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17034 %{
17035   predicate(n->as_Vector()->length() == 2);
17036   match(Set dst (SubVD src1 src2));
17037   ins_cost(INSN_COST);
17038   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17039   ins_encode %{
17040     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17041             as_FloatRegister($src1$$reg),
17042             as_FloatRegister($src2$$reg));
17043   %}
17044   ins_pipe(vdop_fp128);
17045 %}
17046 
17047 // --------------------------------- MUL --------------------------------------
17048 
17049 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17050 %{
17051   predicate(n->as_Vector()->length() == 2 ||
17052             n->as_Vector()->length() == 4);
17053   match(Set dst (MulVS src1 src2));
17054   ins_cost(INSN_COST);
17055   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17056   ins_encode %{
17057     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17058             as_FloatRegister($src1$$reg),
17059             as_FloatRegister($src2$$reg));
17060   %}
17061   ins_pipe(vmul64);
17062 %}
17063 
17064 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17065 %{
17066   predicate(n->as_Vector()->length() == 8);
17067   match(Set dst (MulVS src1 src2));
17068   ins_cost(INSN_COST);
17069   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17070   ins_encode %{
17071     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17072             as_FloatRegister($src1$$reg),
17073             as_FloatRegister($src2$$reg));
17074   %}
17075   ins_pipe(vmul128);
17076 %}
17077 
17078 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17079 %{
17080   predicate(n->as_Vector()->length() == 2);
17081   match(Set dst (MulVI src1 src2));
17082   ins_cost(INSN_COST);
17083   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17084   ins_encode %{
17085     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17086             as_FloatRegister($src1$$reg),
17087             as_FloatRegister($src2$$reg));
17088   %}
17089   ins_pipe(vmul64);
17090 %}
17091 
17092 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17093 %{
17094   predicate(n->as_Vector()->length() == 4);
17095   match(Set dst (MulVI src1 src2));
17096   ins_cost(INSN_COST);
17097   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17098   ins_encode %{
17099     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17100             as_FloatRegister($src1$$reg),
17101             as_FloatRegister($src2$$reg));
17102   %}
17103   ins_pipe(vmul128);
17104 %}
17105 
17106 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17107 %{
17108   predicate(n->as_Vector()->length() == 2);
17109   match(Set dst (MulVF src1 src2));
17110   ins_cost(INSN_COST);
17111   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17112   ins_encode %{
17113     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17114             as_FloatRegister($src1$$reg),
17115             as_FloatRegister($src2$$reg));
17116   %}
17117   ins_pipe(vmuldiv_fp64);
17118 %}
17119 
17120 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17121 %{
17122   predicate(n->as_Vector()->length() == 4);
17123   match(Set dst (MulVF src1 src2));
17124   ins_cost(INSN_COST);
17125   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17126   ins_encode %{
17127     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17128             as_FloatRegister($src1$$reg),
17129             as_FloatRegister($src2$$reg));
17130   %}
17131   ins_pipe(vmuldiv_fp128);
17132 %}
17133 
17134 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17135 %{
17136   predicate(n->as_Vector()->length() == 2);
17137   match(Set dst (MulVD src1 src2));
17138   ins_cost(INSN_COST);
17139   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17140   ins_encode %{
17141     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17142             as_FloatRegister($src1$$reg),
17143             as_FloatRegister($src2$$reg));
17144   %}
17145   ins_pipe(vmuldiv_fp128);
17146 %}
17147 
17148 // --------------------------------- MLA --------------------------------------
17149 
17150 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17151 %{
17152   predicate(n->as_Vector()->length() == 2 ||
17153             n->as_Vector()->length() == 4);
17154   match(Set dst (AddVS dst (MulVS src1 src2)));
17155   ins_cost(INSN_COST);
17156   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17157   ins_encode %{
17158     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17159             as_FloatRegister($src1$$reg),
17160             as_FloatRegister($src2$$reg));
17161   %}
17162   ins_pipe(vmla64);
17163 %}
17164 
17165 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17166 %{
17167   predicate(n->as_Vector()->length() == 8);
17168   match(Set dst (AddVS dst (MulVS src1 src2)));
17169   ins_cost(INSN_COST);
17170   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17171   ins_encode %{
17172     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17173             as_FloatRegister($src1$$reg),
17174             as_FloatRegister($src2$$reg));
17175   %}
17176   ins_pipe(vmla128);
17177 %}
17178 
17179 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17180 %{
17181   predicate(n->as_Vector()->length() == 2);
17182   match(Set dst (AddVI dst (MulVI src1 src2)));
17183   ins_cost(INSN_COST);
17184   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17185   ins_encode %{
17186     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17187             as_FloatRegister($src1$$reg),
17188             as_FloatRegister($src2$$reg));
17189   %}
17190   ins_pipe(vmla64);
17191 %}
17192 
17193 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17194 %{
17195   predicate(n->as_Vector()->length() == 4);
17196   match(Set dst (AddVI dst (MulVI src1 src2)));
17197   ins_cost(INSN_COST);
17198   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17199   ins_encode %{
17200     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17201             as_FloatRegister($src1$$reg),
17202             as_FloatRegister($src2$$reg));
17203   %}
17204   ins_pipe(vmla128);
17205 %}
17206 
17207 // dst + src1 * src2
17208 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17209   predicate(UseFMA && n->as_Vector()->length() == 2);
17210   match(Set dst (FmaVF  dst (Binary src1 src2)));
17211   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17212   ins_cost(INSN_COST);
17213   ins_encode %{
17214     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17215             as_FloatRegister($src1$$reg),
17216             as_FloatRegister($src2$$reg));
17217   %}
17218   ins_pipe(vmuldiv_fp64);
17219 %}
17220 
17221 // dst + src1 * src2
17222 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17223   predicate(UseFMA && n->as_Vector()->length() == 4);
17224   match(Set dst (FmaVF  dst (Binary src1 src2)));
17225   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17226   ins_cost(INSN_COST);
17227   ins_encode %{
17228     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17229             as_FloatRegister($src1$$reg),
17230             as_FloatRegister($src2$$reg));
17231   %}
17232   ins_pipe(vmuldiv_fp128);
17233 %}
17234 
17235 // dst + src1 * src2
17236 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17237   predicate(UseFMA && n->as_Vector()->length() == 2);
17238   match(Set dst (FmaVD  dst (Binary src1 src2)));
17239   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17240   ins_cost(INSN_COST);
17241   ins_encode %{
17242     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17243             as_FloatRegister($src1$$reg),
17244             as_FloatRegister($src2$$reg));
17245   %}
17246   ins_pipe(vmuldiv_fp128);
17247 %}
17248 
17249 // --------------------------------- MLS --------------------------------------
17250 
17251 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17252 %{
17253   predicate(n->as_Vector()->length() == 2 ||
17254             n->as_Vector()->length() == 4);
17255   match(Set dst (SubVS dst (MulVS src1 src2)));
17256   ins_cost(INSN_COST);
17257   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17258   ins_encode %{
17259     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17260             as_FloatRegister($src1$$reg),
17261             as_FloatRegister($src2$$reg));
17262   %}
17263   ins_pipe(vmla64);
17264 %}
17265 
17266 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17267 %{
17268   predicate(n->as_Vector()->length() == 8);
17269   match(Set dst (SubVS dst (MulVS src1 src2)));
17270   ins_cost(INSN_COST);
17271   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17272   ins_encode %{
17273     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17274             as_FloatRegister($src1$$reg),
17275             as_FloatRegister($src2$$reg));
17276   %}
17277   ins_pipe(vmla128);
17278 %}
17279 
17280 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17281 %{
17282   predicate(n->as_Vector()->length() == 2);
17283   match(Set dst (SubVI dst (MulVI src1 src2)));
17284   ins_cost(INSN_COST);
17285   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17286   ins_encode %{
17287     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17288             as_FloatRegister($src1$$reg),
17289             as_FloatRegister($src2$$reg));
17290   %}
17291   ins_pipe(vmla64);
17292 %}
17293 
17294 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17295 %{
17296   predicate(n->as_Vector()->length() == 4);
17297   match(Set dst (SubVI dst (MulVI src1 src2)));
17298   ins_cost(INSN_COST);
17299   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17300   ins_encode %{
17301     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17302             as_FloatRegister($src1$$reg),
17303             as_FloatRegister($src2$$reg));
17304   %}
17305   ins_pipe(vmla128);
17306 %}
17307 
17308 // dst - src1 * src2
17309 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17310   predicate(UseFMA && n->as_Vector()->length() == 2);
17311   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17312   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17313   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17314   ins_cost(INSN_COST);
17315   ins_encode %{
17316     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17317             as_FloatRegister($src1$$reg),
17318             as_FloatRegister($src2$$reg));
17319   %}
17320   ins_pipe(vmuldiv_fp64);
17321 %}
17322 
17323 // dst - src1 * src2
17324 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17325   predicate(UseFMA && n->as_Vector()->length() == 4);
17326   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17327   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17328   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17329   ins_cost(INSN_COST);
17330   ins_encode %{
17331     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17332             as_FloatRegister($src1$$reg),
17333             as_FloatRegister($src2$$reg));
17334   %}
17335   ins_pipe(vmuldiv_fp128);
17336 %}
17337 
17338 // dst - src1 * src2
17339 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17340   predicate(UseFMA && n->as_Vector()->length() == 2);
17341   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17342   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17343   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17344   ins_cost(INSN_COST);
17345   ins_encode %{
17346     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17347             as_FloatRegister($src1$$reg),
17348             as_FloatRegister($src2$$reg));
17349   %}
17350   ins_pipe(vmuldiv_fp128);
17351 %}
17352 
17353 // --------------------------------- DIV --------------------------------------
17354 
17355 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17356 %{
17357   predicate(n->as_Vector()->length() == 2);
17358   match(Set dst (DivVF src1 src2));
17359   ins_cost(INSN_COST);
17360   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17361   ins_encode %{
17362     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17363             as_FloatRegister($src1$$reg),
17364             as_FloatRegister($src2$$reg));
17365   %}
17366   ins_pipe(vmuldiv_fp64);
17367 %}
17368 
17369 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17370 %{
17371   predicate(n->as_Vector()->length() == 4);
17372   match(Set dst (DivVF src1 src2));
17373   ins_cost(INSN_COST);
17374   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17375   ins_encode %{
17376     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17377             as_FloatRegister($src1$$reg),
17378             as_FloatRegister($src2$$reg));
17379   %}
17380   ins_pipe(vmuldiv_fp128);
17381 %}
17382 
17383 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17384 %{
17385   predicate(n->as_Vector()->length() == 2);
17386   match(Set dst (DivVD src1 src2));
17387   ins_cost(INSN_COST);
17388   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17389   ins_encode %{
17390     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17391             as_FloatRegister($src1$$reg),
17392             as_FloatRegister($src2$$reg));
17393   %}
17394   ins_pipe(vmuldiv_fp128);
17395 %}
17396 
17397 // --------------------------------- SQRT -------------------------------------
17398 
17399 instruct vsqrt2D(vecX dst, vecX src)
17400 %{
17401   predicate(n->as_Vector()->length() == 2);
17402   match(Set dst (SqrtVD src));
17403   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17404   ins_encode %{
17405     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17406              as_FloatRegister($src$$reg));
17407   %}
17408   ins_pipe(vsqrt_fp128);
17409 %}
17410 
17411 // --------------------------------- ABS --------------------------------------
17412 
17413 instruct vabs2F(vecD dst, vecD src)
17414 %{
17415   predicate(n->as_Vector()->length() == 2);
17416   match(Set dst (AbsVF src));
17417   ins_cost(INSN_COST * 3);
17418   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17419   ins_encode %{
17420     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17421             as_FloatRegister($src$$reg));
17422   %}
17423   ins_pipe(vunop_fp64);
17424 %}
17425 
17426 instruct vabs4F(vecX dst, vecX src)
17427 %{
17428   predicate(n->as_Vector()->length() == 4);
17429   match(Set dst (AbsVF src));
17430   ins_cost(INSN_COST * 3);
17431   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17432   ins_encode %{
17433     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17434             as_FloatRegister($src$$reg));
17435   %}
17436   ins_pipe(vunop_fp128);
17437 %}
17438 
17439 instruct vabs2D(vecX dst, vecX src)
17440 %{
17441   predicate(n->as_Vector()->length() == 2);
17442   match(Set dst (AbsVD src));
17443   ins_cost(INSN_COST * 3);
17444   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17445   ins_encode %{
17446     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17447             as_FloatRegister($src$$reg));
17448   %}
17449   ins_pipe(vunop_fp128);
17450 %}
17451 
17452 // --------------------------------- NEG --------------------------------------
17453 
17454 instruct vneg2F(vecD dst, vecD src)
17455 %{
17456   predicate(n->as_Vector()->length() == 2);
17457   match(Set dst (NegVF src));
17458   ins_cost(INSN_COST * 3);
17459   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17460   ins_encode %{
17461     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17462             as_FloatRegister($src$$reg));
17463   %}
17464   ins_pipe(vunop_fp64);
17465 %}
17466 
17467 instruct vneg4F(vecX dst, vecX src)
17468 %{
17469   predicate(n->as_Vector()->length() == 4);
17470   match(Set dst (NegVF src));
17471   ins_cost(INSN_COST * 3);
17472   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17473   ins_encode %{
17474     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17475             as_FloatRegister($src$$reg));
17476   %}
17477   ins_pipe(vunop_fp128);
17478 %}
17479 
17480 instruct vneg2D(vecX dst, vecX src)
17481 %{
17482   predicate(n->as_Vector()->length() == 2);
17483   match(Set dst (NegVD src));
17484   ins_cost(INSN_COST * 3);
17485   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17486   ins_encode %{
17487     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17488             as_FloatRegister($src$$reg));
17489   %}
17490   ins_pipe(vunop_fp128);
17491 %}
17492 
17493 // --------------------------------- AND --------------------------------------
17494 
17495 instruct vand8B(vecD dst, vecD src1, vecD src2)
17496 %{
17497   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17498             n->as_Vector()->length_in_bytes() == 8);
17499   match(Set dst (AndV src1 src2));
17500   ins_cost(INSN_COST);
17501   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17502   ins_encode %{
17503     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17504             as_FloatRegister($src1$$reg),
17505             as_FloatRegister($src2$$reg));
17506   %}
17507   ins_pipe(vlogical64);
17508 %}
17509 
17510 instruct vand16B(vecX dst, vecX src1, vecX src2)
17511 %{
17512   predicate(n->as_Vector()->length_in_bytes() == 16);
17513   match(Set dst (AndV src1 src2));
17514   ins_cost(INSN_COST);
17515   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17516   ins_encode %{
17517     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17518             as_FloatRegister($src1$$reg),
17519             as_FloatRegister($src2$$reg));
17520   %}
17521   ins_pipe(vlogical128);
17522 %}
17523 
17524 // --------------------------------- OR ---------------------------------------
17525 
17526 instruct vor8B(vecD dst, vecD src1, vecD src2)
17527 %{
17528   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17529             n->as_Vector()->length_in_bytes() == 8);
17530   match(Set dst (OrV src1 src2));
17531   ins_cost(INSN_COST);
17532   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17533   ins_encode %{
17534     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17535             as_FloatRegister($src1$$reg),
17536             as_FloatRegister($src2$$reg));
17537   %}
17538   ins_pipe(vlogical64);
17539 %}
17540 
17541 instruct vor16B(vecX dst, vecX src1, vecX src2)
17542 %{
17543   predicate(n->as_Vector()->length_in_bytes() == 16);
17544   match(Set dst (OrV src1 src2));
17545   ins_cost(INSN_COST);
17546   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17547   ins_encode %{
17548     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17549             as_FloatRegister($src1$$reg),
17550             as_FloatRegister($src2$$reg));
17551   %}
17552   ins_pipe(vlogical128);
17553 %}
17554 
17555 // --------------------------------- XOR --------------------------------------
17556 
17557 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17558 %{
17559   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17560             n->as_Vector()->length_in_bytes() == 8);
17561   match(Set dst (XorV src1 src2));
17562   ins_cost(INSN_COST);
17563   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17564   ins_encode %{
17565     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17566             as_FloatRegister($src1$$reg),
17567             as_FloatRegister($src2$$reg));
17568   %}
17569   ins_pipe(vlogical64);
17570 %}
17571 
17572 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17573 %{
17574   predicate(n->as_Vector()->length_in_bytes() == 16);
17575   match(Set dst (XorV src1 src2));
17576   ins_cost(INSN_COST);
17577   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17578   ins_encode %{
17579     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17580             as_FloatRegister($src1$$reg),
17581             as_FloatRegister($src2$$reg));
17582   %}
17583   ins_pipe(vlogical128);
17584 %}
17585 
17586 // ------------------------------ Shift ---------------------------------------
17587 
17588 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17589   match(Set dst (LShiftCntV cnt));
17590   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17591   ins_encode %{
17592     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17593   %}
17594   ins_pipe(vdup_reg_reg128);
17595 %}
17596 
17597 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17598 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17599   match(Set dst (RShiftCntV cnt));
17600   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17601   ins_encode %{
17602     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17603     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17604   %}
17605   ins_pipe(vdup_reg_reg128);
17606 %}
17607 
17608 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17609   predicate(n->as_Vector()->length() == 4 ||
17610             n->as_Vector()->length() == 8);
17611   match(Set dst (LShiftVB src shift));
17612   match(Set dst (RShiftVB src shift));
17613   ins_cost(INSN_COST);
17614   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17615   ins_encode %{
17616     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17617             as_FloatRegister($src$$reg),
17618             as_FloatRegister($shift$$reg));
17619   %}
17620   ins_pipe(vshift64);
17621 %}
17622 
17623 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17624   predicate(n->as_Vector()->length() == 16);
17625   match(Set dst (LShiftVB src shift));
17626   match(Set dst (RShiftVB src shift));
17627   ins_cost(INSN_COST);
17628   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17629   ins_encode %{
17630     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17631             as_FloatRegister($src$$reg),
17632             as_FloatRegister($shift$$reg));
17633   %}
17634   ins_pipe(vshift128);
17635 %}
17636 
17637 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17638   predicate(n->as_Vector()->length() == 4 ||
17639             n->as_Vector()->length() == 8);
17640   match(Set dst (URShiftVB src shift));
17641   ins_cost(INSN_COST);
17642   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17643   ins_encode %{
17644     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17645             as_FloatRegister($src$$reg),
17646             as_FloatRegister($shift$$reg));
17647   %}
17648   ins_pipe(vshift64);
17649 %}
17650 
17651 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17652   predicate(n->as_Vector()->length() == 16);
17653   match(Set dst (URShiftVB src shift));
17654   ins_cost(INSN_COST);
17655   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17656   ins_encode %{
17657     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17658             as_FloatRegister($src$$reg),
17659             as_FloatRegister($shift$$reg));
17660   %}
17661   ins_pipe(vshift128);
17662 %}
17663 
17664 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17665   predicate(n->as_Vector()->length() == 4 ||
17666             n->as_Vector()->length() == 8);
17667   match(Set dst (LShiftVB src shift));
17668   ins_cost(INSN_COST);
17669   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17670   ins_encode %{
17671     int sh = (int)$shift$$constant;
17672     if (sh >= 8) {
17673       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17674              as_FloatRegister($src$$reg),
17675              as_FloatRegister($src$$reg));
17676     } else {
17677       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17678              as_FloatRegister($src$$reg), sh);
17679     }
17680   %}
17681   ins_pipe(vshift64_imm);
17682 %}
17683 
17684 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17685   predicate(n->as_Vector()->length() == 16);
17686   match(Set dst (LShiftVB src shift));
17687   ins_cost(INSN_COST);
17688   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17689   ins_encode %{
17690     int sh = (int)$shift$$constant;
17691     if (sh >= 8) {
17692       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17693              as_FloatRegister($src$$reg),
17694              as_FloatRegister($src$$reg));
17695     } else {
17696       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17697              as_FloatRegister($src$$reg), sh);
17698     }
17699   %}
17700   ins_pipe(vshift128_imm);
17701 %}
17702 
17703 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17704   predicate(n->as_Vector()->length() == 4 ||
17705             n->as_Vector()->length() == 8);
17706   match(Set dst (RShiftVB src shift));
17707   ins_cost(INSN_COST);
17708   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17709   ins_encode %{
17710     int sh = (int)$shift$$constant;
17711     if (sh >= 8) sh = 7;
17712     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17713            as_FloatRegister($src$$reg), sh);
17714   %}
17715   ins_pipe(vshift64_imm);
17716 %}
17717 
17718 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17719   predicate(n->as_Vector()->length() == 16);
17720   match(Set dst (RShiftVB src shift));
17721   ins_cost(INSN_COST);
17722   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17723   ins_encode %{
17724     int sh = (int)$shift$$constant;
17725     if (sh >= 8) sh = 7;
17726     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17727            as_FloatRegister($src$$reg), sh);
17728   %}
17729   ins_pipe(vshift128_imm);
17730 %}
17731 
17732 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17733   predicate(n->as_Vector()->length() == 4 ||
17734             n->as_Vector()->length() == 8);
17735   match(Set dst (URShiftVB src shift));
17736   ins_cost(INSN_COST);
17737   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17738   ins_encode %{
17739     int sh = (int)$shift$$constant;
17740     if (sh >= 8) {
17741       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17742              as_FloatRegister($src$$reg),
17743              as_FloatRegister($src$$reg));
17744     } else {
17745       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17746              as_FloatRegister($src$$reg), sh);
17747     }
17748   %}
17749   ins_pipe(vshift64_imm);
17750 %}
17751 
17752 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17753   predicate(n->as_Vector()->length() == 16);
17754   match(Set dst (URShiftVB src shift));
17755   ins_cost(INSN_COST);
17756   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17757   ins_encode %{
17758     int sh = (int)$shift$$constant;
17759     if (sh >= 8) {
17760       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17761              as_FloatRegister($src$$reg),
17762              as_FloatRegister($src$$reg));
17763     } else {
17764       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17765              as_FloatRegister($src$$reg), sh);
17766     }
17767   %}
17768   ins_pipe(vshift128_imm);
17769 %}
17770 
17771 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17772   predicate(n->as_Vector()->length() == 2 ||
17773             n->as_Vector()->length() == 4);
17774   match(Set dst (LShiftVS src shift));
17775   match(Set dst (RShiftVS src shift));
17776   ins_cost(INSN_COST);
17777   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17778   ins_encode %{
17779     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17780             as_FloatRegister($src$$reg),
17781             as_FloatRegister($shift$$reg));
17782   %}
17783   ins_pipe(vshift64);
17784 %}
17785 
17786 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17787   predicate(n->as_Vector()->length() == 8);
17788   match(Set dst (LShiftVS src shift));
17789   match(Set dst (RShiftVS src shift));
17790   ins_cost(INSN_COST);
17791   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17792   ins_encode %{
17793     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17794             as_FloatRegister($src$$reg),
17795             as_FloatRegister($shift$$reg));
17796   %}
17797   ins_pipe(vshift128);
17798 %}
17799 
17800 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17801   predicate(n->as_Vector()->length() == 2 ||
17802             n->as_Vector()->length() == 4);
17803   match(Set dst (URShiftVS src shift));
17804   ins_cost(INSN_COST);
17805   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17806   ins_encode %{
17807     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17808             as_FloatRegister($src$$reg),
17809             as_FloatRegister($shift$$reg));
17810   %}
17811   ins_pipe(vshift64);
17812 %}
17813 
17814 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17815   predicate(n->as_Vector()->length() == 8);
17816   match(Set dst (URShiftVS src shift));
17817   ins_cost(INSN_COST);
17818   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17819   ins_encode %{
17820     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17821             as_FloatRegister($src$$reg),
17822             as_FloatRegister($shift$$reg));
17823   %}
17824   ins_pipe(vshift128);
17825 %}
17826 
17827 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17828   predicate(n->as_Vector()->length() == 2 ||
17829             n->as_Vector()->length() == 4);
17830   match(Set dst (LShiftVS src shift));
17831   ins_cost(INSN_COST);
17832   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17833   ins_encode %{
17834     int sh = (int)$shift$$constant;
17835     if (sh >= 16) {
17836       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17837              as_FloatRegister($src$$reg),
17838              as_FloatRegister($src$$reg));
17839     } else {
17840       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17841              as_FloatRegister($src$$reg), sh);
17842     }
17843   %}
17844   ins_pipe(vshift64_imm);
17845 %}
17846 
17847 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17848   predicate(n->as_Vector()->length() == 8);
17849   match(Set dst (LShiftVS src shift));
17850   ins_cost(INSN_COST);
17851   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17852   ins_encode %{
17853     int sh = (int)$shift$$constant;
17854     if (sh >= 16) {
17855       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17856              as_FloatRegister($src$$reg),
17857              as_FloatRegister($src$$reg));
17858     } else {
17859       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17860              as_FloatRegister($src$$reg), sh);
17861     }
17862   %}
17863   ins_pipe(vshift128_imm);
17864 %}
17865 
17866 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17867   predicate(n->as_Vector()->length() == 2 ||
17868             n->as_Vector()->length() == 4);
17869   match(Set dst (RShiftVS src shift));
17870   ins_cost(INSN_COST);
17871   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17872   ins_encode %{
17873     int sh = (int)$shift$$constant;
17874     if (sh >= 16) sh = 15;
17875     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17876            as_FloatRegister($src$$reg), sh);
17877   %}
17878   ins_pipe(vshift64_imm);
17879 %}
17880 
17881 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17882   predicate(n->as_Vector()->length() == 8);
17883   match(Set dst (RShiftVS src shift));
17884   ins_cost(INSN_COST);
17885   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17886   ins_encode %{
17887     int sh = (int)$shift$$constant;
17888     if (sh >= 16) sh = 15;
17889     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17890            as_FloatRegister($src$$reg), sh);
17891   %}
17892   ins_pipe(vshift128_imm);
17893 %}
17894 
17895 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17896   predicate(n->as_Vector()->length() == 2 ||
17897             n->as_Vector()->length() == 4);
17898   match(Set dst (URShiftVS src shift));
17899   ins_cost(INSN_COST);
17900   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17901   ins_encode %{
17902     int sh = (int)$shift$$constant;
17903     if (sh >= 16) {
17904       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17905              as_FloatRegister($src$$reg),
17906              as_FloatRegister($src$$reg));
17907     } else {
17908       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17909              as_FloatRegister($src$$reg), sh);
17910     }
17911   %}
17912   ins_pipe(vshift64_imm);
17913 %}
17914 
17915 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17916   predicate(n->as_Vector()->length() == 8);
17917   match(Set dst (URShiftVS src shift));
17918   ins_cost(INSN_COST);
17919   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17920   ins_encode %{
17921     int sh = (int)$shift$$constant;
17922     if (sh >= 16) {
17923       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17924              as_FloatRegister($src$$reg),
17925              as_FloatRegister($src$$reg));
17926     } else {
17927       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17928              as_FloatRegister($src$$reg), sh);
17929     }
17930   %}
17931   ins_pipe(vshift128_imm);
17932 %}
17933 
17934 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17935   predicate(n->as_Vector()->length() == 2);
17936   match(Set dst (LShiftVI src shift));
17937   match(Set dst (RShiftVI src shift));
17938   ins_cost(INSN_COST);
17939   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17940   ins_encode %{
17941     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17942             as_FloatRegister($src$$reg),
17943             as_FloatRegister($shift$$reg));
17944   %}
17945   ins_pipe(vshift64);
17946 %}
17947 
17948 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17949   predicate(n->as_Vector()->length() == 4);
17950   match(Set dst (LShiftVI src shift));
17951   match(Set dst (RShiftVI src shift));
17952   ins_cost(INSN_COST);
17953   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17954   ins_encode %{
17955     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17956             as_FloatRegister($src$$reg),
17957             as_FloatRegister($shift$$reg));
17958   %}
17959   ins_pipe(vshift128);
17960 %}
17961 
17962 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17963   predicate(n->as_Vector()->length() == 2);
17964   match(Set dst (URShiftVI src shift));
17965   ins_cost(INSN_COST);
17966   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17967   ins_encode %{
17968     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17969             as_FloatRegister($src$$reg),
17970             as_FloatRegister($shift$$reg));
17971   %}
17972   ins_pipe(vshift64);
17973 %}
17974 
17975 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17976   predicate(n->as_Vector()->length() == 4);
17977   match(Set dst (URShiftVI src shift));
17978   ins_cost(INSN_COST);
17979   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17980   ins_encode %{
17981     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17982             as_FloatRegister($src$$reg),
17983             as_FloatRegister($shift$$reg));
17984   %}
17985   ins_pipe(vshift128);
17986 %}
17987 
17988 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17989   predicate(n->as_Vector()->length() == 2);
17990   match(Set dst (LShiftVI src shift));
17991   ins_cost(INSN_COST);
17992   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17993   ins_encode %{
17994     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17995            as_FloatRegister($src$$reg),
17996            (int)$shift$$constant);
17997   %}
17998   ins_pipe(vshift64_imm);
17999 %}
18000 
18001 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18002   predicate(n->as_Vector()->length() == 4);
18003   match(Set dst (LShiftVI src shift));
18004   ins_cost(INSN_COST);
18005   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18006   ins_encode %{
18007     __ shl(as_FloatRegister($dst$$reg), __ T4S,
18008            as_FloatRegister($src$$reg),
18009            (int)$shift$$constant);
18010   %}
18011   ins_pipe(vshift128_imm);
18012 %}
18013 
18014 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18015   predicate(n->as_Vector()->length() == 2);
18016   match(Set dst (RShiftVI src shift));
18017   ins_cost(INSN_COST);
18018   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18019   ins_encode %{
18020     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18021             as_FloatRegister($src$$reg),
18022             (int)$shift$$constant);
18023   %}
18024   ins_pipe(vshift64_imm);
18025 %}
18026 
18027 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18028   predicate(n->as_Vector()->length() == 4);
18029   match(Set dst (RShiftVI src shift));
18030   ins_cost(INSN_COST);
18031   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18032   ins_encode %{
18033     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18034             as_FloatRegister($src$$reg),
18035             (int)$shift$$constant);
18036   %}
18037   ins_pipe(vshift128_imm);
18038 %}
18039 
18040 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18041   predicate(n->as_Vector()->length() == 2);
18042   match(Set dst (URShiftVI src shift));
18043   ins_cost(INSN_COST);
18044   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18045   ins_encode %{
18046     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18047             as_FloatRegister($src$$reg),
18048             (int)$shift$$constant);
18049   %}
18050   ins_pipe(vshift64_imm);
18051 %}
18052 
18053 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18054   predicate(n->as_Vector()->length() == 4);
18055   match(Set dst (URShiftVI src shift));
18056   ins_cost(INSN_COST);
18057   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18058   ins_encode %{
18059     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18060             as_FloatRegister($src$$reg),
18061             (int)$shift$$constant);
18062   %}
18063   ins_pipe(vshift128_imm);
18064 %}
18065 
18066 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18067   predicate(n->as_Vector()->length() == 2);
18068   match(Set dst (LShiftVL src shift));
18069   match(Set dst (RShiftVL src shift));
18070   ins_cost(INSN_COST);
18071   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18072   ins_encode %{
18073     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18074             as_FloatRegister($src$$reg),
18075             as_FloatRegister($shift$$reg));
18076   %}
18077   ins_pipe(vshift128);
18078 %}
18079 
18080 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18081   predicate(n->as_Vector()->length() == 2);
18082   match(Set dst (URShiftVL src shift));
18083   ins_cost(INSN_COST);
18084   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18085   ins_encode %{
18086     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18087             as_FloatRegister($src$$reg),
18088             as_FloatRegister($shift$$reg));
18089   %}
18090   ins_pipe(vshift128);
18091 %}
18092 
18093 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18094   predicate(n->as_Vector()->length() == 2);
18095   match(Set dst (LShiftVL src shift));
18096   ins_cost(INSN_COST);
18097   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18098   ins_encode %{
18099     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18100            as_FloatRegister($src$$reg),
18101            (int)$shift$$constant);
18102   %}
18103   ins_pipe(vshift128_imm);
18104 %}
18105 
18106 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18107   predicate(n->as_Vector()->length() == 2);
18108   match(Set dst (RShiftVL src shift));
18109   ins_cost(INSN_COST);
18110   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18111   ins_encode %{
18112     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18113             as_FloatRegister($src$$reg),
18114             (int)$shift$$constant);
18115   %}
18116   ins_pipe(vshift128_imm);
18117 %}
18118 
18119 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18120   predicate(n->as_Vector()->length() == 2);
18121   match(Set dst (URShiftVL src shift));
18122   ins_cost(INSN_COST);
18123   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18124   ins_encode %{
18125     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18126             as_FloatRegister($src$$reg),
18127             (int)$shift$$constant);
18128   %}
18129   ins_pipe(vshift128_imm);
18130 %}
18131 
18132 //----------PEEPHOLE RULES-----------------------------------------------------
18133 // These must follow all instruction definitions as they use the names
18134 // defined in the instructions definitions.
18135 //
18136 // peepmatch ( root_instr_name [preceding_instruction]* );
18137 //
18138 // peepconstraint %{
18139 // (instruction_number.operand_name relational_op instruction_number.operand_name
18140 //  [, ...] );
18141 // // instruction numbers are zero-based using left to right order in peepmatch
18142 //
18143 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18144 // // provide an instruction_number.operand_name for each operand that appears
18145 // // in the replacement instruction's match rule
18146 //
18147 // ---------VM FLAGS---------------------------------------------------------
18148 //
18149 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18150 //
18151 // Each peephole rule is given an identifying number starting with zero and
18152 // increasing by one in the order seen by the parser.  An individual peephole
18153 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18154 // on the command-line.
18155 //
18156 // ---------CURRENT LIMITATIONS----------------------------------------------
18157 //
18158 // Only match adjacent instructions in same basic block
18159 // Only equality constraints
18160 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18161 // Only one replacement instruction
18162 //
18163 // ---------EXAMPLE----------------------------------------------------------
18164 //
18165 // // pertinent parts of existing instructions in architecture description
18166 // instruct movI(iRegINoSp dst, iRegI src)
18167 // %{
18168 //   match(Set dst (CopyI src));
18169 // %}
18170 //
18171 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18172 // %{
18173 //   match(Set dst (AddI dst src));
18174 //   effect(KILL cr);
18175 // %}
18176 //
18177 // // Change (inc mov) to lea
18178 // peephole %{
18179 //   // increment preceeded by register-register move
18180 //   peepmatch ( incI_iReg movI );
18181 //   // require that the destination register of the increment
18182 //   // match the destination register of the move
18183 //   peepconstraint ( 0.dst == 1.dst );
18184 //   // construct a replacement instruction that sets
18185 //   // the destination to ( move's source register + one )
18186 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18187 // %}
18188 //
18189 
18190 // Implementation no longer uses movX instructions since
18191 // machine-independent system no longer uses CopyX nodes.
18192 //
18193 // peephole
18194 // %{
18195 //   peepmatch (incI_iReg movI);
18196 //   peepconstraint (0.dst == 1.dst);
18197 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18198 // %}
18199 
18200 // peephole
18201 // %{
18202 //   peepmatch (decI_iReg movI);
18203 //   peepconstraint (0.dst == 1.dst);
18204 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18205 // %}
18206 
18207 // peephole
18208 // %{
18209 //   peepmatch (addI_iReg_imm movI);
18210 //   peepconstraint (0.dst == 1.dst);
18211 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18212 // %}
18213 
18214 // peephole
18215 // %{
18216 //   peepmatch (incL_iReg movL);
18217 //   peepconstraint (0.dst == 1.dst);
18218 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18219 // %}
18220 
18221 // peephole
18222 // %{
18223 //   peepmatch (decL_iReg movL);
18224 //   peepconstraint (0.dst == 1.dst);
18225 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18226 // %}
18227 
18228 // peephole
18229 // %{
18230 //   peepmatch (addL_iReg_imm movL);
18231 //   peepconstraint (0.dst == 1.dst);
18232 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18233 // %}
18234 
18235 // peephole
18236 // %{
18237 //   peepmatch (addP_iReg_imm movP);
18238 //   peepconstraint (0.dst == 1.dst);
18239 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18240 // %}
18241 
18242 // // Change load of spilled value to only a spill
18243 // instruct storeI(memory mem, iRegI src)
18244 // %{
18245 //   match(Set mem (StoreI mem src));
18246 // %}
18247 //
18248 // instruct loadI(iRegINoSp dst, memory mem)
18249 // %{
18250 //   match(Set dst (LoadI mem));
18251 // %}
18252 //
18253 
18254 //----------SMARTSPILL RULES---------------------------------------------------
18255 // These must follow all instruction definitions as they use the names
18256 // defined in the instructions definitions.
18257 
18258 // Local Variables:
18259 // mode: c++
18260 // End: