1 //
   2 // Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 
1000 class CallStubImpl {
1001 
1002   //--------------------------------------------------------------
1003   //---<  Used for optimization in Compile::shorten_branches  >---
1004   //--------------------------------------------------------------
1005 
1006  public:
1007   // Size of call trampoline stub.
1008   static uint size_call_trampoline() {
1009     return 0; // no call trampolines on this platform
1010   }
1011 
1012   // number of relocations needed by a call trampoline stub
1013   static uint reloc_call_trampoline() {
1014     return 0; // no call trampolines on this platform
1015   }
1016 };
1017 
1018 class HandlerImpl {
1019 
1020  public:
1021 
1022   static int emit_exception_handler(CodeBuffer &cbuf);
1023   static int emit_deopt_handler(CodeBuffer& cbuf);
1024 
1025   static uint size_exception_handler() {
1026     return MacroAssembler::far_branch_size();
1027   }
1028 
1029   static uint size_deopt_handler() {
1030     // count one adr and one far branch instruction
1031     return 4 * NativeInstruction::instruction_size;
1032   }
1033 };
1034 
1035   // graph traversal helpers
1036 
1037   MemBarNode *parent_membar(const Node *n);
1038   MemBarNode *child_membar(const MemBarNode *n);
1039   bool leading_membar(const MemBarNode *barrier);
1040 
1041   bool is_card_mark_membar(const MemBarNode *barrier);
1042 
1043   MemBarNode *leading_to_normal(MemBarNode *leading);
1044   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1045   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1046   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1047   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048 
1049   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050 
1051   bool unnecessary_acquire(const Node *barrier);
1052   bool needs_acquiring_load(const Node *load);
1053 
1054   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055 
1056   bool unnecessary_release(const Node *barrier);
1057   bool unnecessary_volatile(const Node *barrier);
1058   bool needs_releasing_store(const Node *store);
1059 
1060   // predicate controlling translation of StoreCM
1061   bool unnecessary_storestore(const Node *storecm);
1062 %}
1063 
1064 source %{
1065 
1066   // Optimizaton of volatile gets and puts
1067   // -------------------------------------
1068   //
1069   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1070   // use to implement volatile reads and writes. For a volatile read
1071   // we simply need
1072   //
1073   //   ldar<x>
1074   //
1075   // and for a volatile write we need
1076   //
1077   //   stlr<x>
1078   // 
1079   // Alternatively, we can implement them by pairing a normal
1080   // load/store with a memory barrier. For a volatile read we need
1081   // 
1082   //   ldr<x>
1083   //   dmb ishld
1084   //
1085   // for a volatile write
1086   //
1087   //   dmb ish
1088   //   str<x>
1089   //   dmb ish
1090   //
1091   // In order to generate the desired instruction sequence we need to
1092   // be able to identify specific 'signature' ideal graph node
1093   // sequences which i) occur as a translation of a volatile reads or
1094   // writes and ii) do not occur through any other translation or
1095   // graph transformation. We can then provide alternative aldc
1096   // matching rules which translate these node sequences to the
1097   // desired machine code sequences. Selection of the alternative
1098   // rules can be implemented by predicates which identify the
1099   // relevant node sequences.
1100   //
1101   // The ideal graph generator translates a volatile read to the node
1102   // sequence
1103   //
1104   //   LoadX[mo_acquire]
1105   //   MemBarAcquire
1106   //
1107   // As a special case when using the compressed oops optimization we
1108   // may also see this variant
1109   //
1110   //   LoadN[mo_acquire]
1111   //   DecodeN
1112   //   MemBarAcquire
1113   //
1114   // A volatile write is translated to the node sequence
1115   //
1116   //   MemBarRelease
1117   //   StoreX[mo_release] {CardMark}-optional
1118   //   MemBarVolatile
1119   //
1120   // n.b. the above node patterns are generated with a strict
1121   // 'signature' configuration of input and output dependencies (see
1122   // the predicates below for exact details). The card mark may be as
1123   // simple as a few extra nodes or, in a few GC configurations, may
1124   // include more complex control flow between the leading and
1125   // trailing memory barriers. However, whatever the card mark
1126   // configuration these signatures are unique to translated volatile
1127   // reads/stores -- they will not appear as a result of any other
1128   // bytecode translation or inlining nor as a consequence of
1129   // optimizing transforms.
1130   //
1131   // We also want to catch inlined unsafe volatile gets and puts and
1132   // be able to implement them using either ldar<x>/stlr<x> or some
1133   // combination of ldr<x>/stlr<x> and dmb instructions.
1134   //
1135   // Inlined unsafe volatiles puts manifest as a minor variant of the
1136   // normal volatile put node sequence containing an extra cpuorder
1137   // membar
1138   //
1139   //   MemBarRelease
1140   //   MemBarCPUOrder
1141   //   StoreX[mo_release] {CardMark}-optional
1142   //   MemBarVolatile
1143   //
1144   // n.b. as an aside, the cpuorder membar is not itself subject to
1145   // matching and translation by adlc rules.  However, the rule
1146   // predicates need to detect its presence in order to correctly
1147   // select the desired adlc rules.
1148   //
1149   // Inlined unsafe volatile gets manifest as a somewhat different
1150   // node sequence to a normal volatile get
1151   //
1152   //   MemBarCPUOrder
1153   //        ||       \\
1154   //   MemBarAcquire LoadX[mo_acquire]
1155   //        ||
1156   //   MemBarCPUOrder
1157   //
1158   // In this case the acquire membar does not directly depend on the
1159   // load. However, we can be sure that the load is generated from an
1160   // inlined unsafe volatile get if we see it dependent on this unique
1161   // sequence of membar nodes. Similarly, given an acquire membar we
1162   // can know that it was added because of an inlined unsafe volatile
1163   // get if it is fed and feeds a cpuorder membar and if its feed
1164   // membar also feeds an acquiring load.
1165   //
1166   // So, where we can identify these volatile read and write
1167   // signatures we can choose to plant either of the above two code
1168   // sequences. For a volatile read we can simply plant a normal
1169   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1170   // also choose to inhibit translation of the MemBarAcquire and
1171   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1172   //
1173   // When we recognise a volatile store signature we can choose to
1174   // plant at a dmb ish as a translation for the MemBarRelease, a
1175   // normal str<x> and then a dmb ish for the MemBarVolatile.
1176   // Alternatively, we can inhibit translation of the MemBarRelease
1177   // and MemBarVolatile and instead plant a simple stlr<x>
1178   // instruction.
1179   //
1180   // Of course, the above only applies when we see these signature
1181   // configurations. We still want to plant dmb instructions in any
1182   // other cases where we may see a MemBarAcquire, MemBarRelease or
1183   // MemBarVolatile. For example, at the end of a constructor which
1184   // writes final/volatile fields we will see a MemBarRelease
1185   // instruction and this needs a 'dmb ish' lest we risk the
1186   // constructed object being visible without making the
1187   // final/volatile field writes visible.
1188   //
1189   // n.b. the translation rules below which rely on detection of the
1190   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1191   // If we see anything other than the signature configurations we
1192   // always just translate the loads and stores to ldr<x> and str<x>
1193   // and translate acquire, release and volatile membars to the
1194   // relevant dmb instructions.
1195   //
1196 
1197   // graph traversal helpers used for volatile put/get optimization
1198 
1199   // 1) general purpose helpers
1200 
1201   // if node n is linked to a parent MemBarNode by an intervening
1202   // Control and Memory ProjNode return the MemBarNode otherwise return
1203   // NULL.
1204   //
1205   // n may only be a Load or a MemBar.
1206 
1207   MemBarNode *parent_membar(const Node *n)
1208   {
1209     Node *ctl = NULL;
1210     Node *mem = NULL;
1211     Node *membar = NULL;
1212 
1213     if (n->is_Load()) {
1214       ctl = n->lookup(LoadNode::Control);
1215       mem = n->lookup(LoadNode::Memory);
1216     } else if (n->is_MemBar()) {
1217       ctl = n->lookup(TypeFunc::Control);
1218       mem = n->lookup(TypeFunc::Memory);
1219     } else {
1220         return NULL;
1221     }
1222 
1223     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
1224       return NULL;
1225 
1226     membar = ctl->lookup(0);
1227 
1228     if (!membar || !membar->is_MemBar())
1229       return NULL;
1230 
1231     if (mem->lookup(0) != membar)
1232       return NULL;
1233 
1234     return membar->as_MemBar();
1235   }
1236 
1237   // if n is linked to a child MemBarNode by intervening Control and
1238   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1239 
1240   MemBarNode *child_membar(const MemBarNode *n)
1241   {
1242     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1243     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1244 
1245     // MemBar needs to have both a Ctl and Mem projection
1246     if (! ctl || ! mem)
1247       return NULL;
1248 
1249     MemBarNode *child = NULL;
1250     Node *x;
1251 
1252     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1253       x = ctl->fast_out(i);
1254       // if we see a membar we keep hold of it. we may also see a new
1255       // arena copy of the original but it will appear later
1256       if (x->is_MemBar()) {
1257           child = x->as_MemBar();
1258           break;
1259       }
1260     }
1261 
1262     if (child == NULL)
1263       return NULL;
1264 
1265     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1266       x = mem->fast_out(i);
1267       // if we see a membar we keep hold of it. we may also see a new
1268       // arena copy of the original but it will appear later
1269       if (x == child) {
1270         return child;
1271       }
1272     }
1273     return NULL;
1274   }
1275 
1276   // helper predicate use to filter candidates for a leading memory
1277   // barrier
1278   //
1279   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1280   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1281 
1282   bool leading_membar(const MemBarNode *barrier)
1283   {
1284     int opcode = barrier->Opcode();
1285     // if this is a release membar we are ok
1286     if (opcode == Op_MemBarRelease)
1287       return true;
1288     // if its a cpuorder membar . . .
1289     if (opcode != Op_MemBarCPUOrder)
1290       return false;
1291     // then the parent has to be a release membar
1292     MemBarNode *parent = parent_membar(barrier);
1293     if (!parent)
1294       return false;
1295     opcode = parent->Opcode();
1296     return opcode == Op_MemBarRelease;
1297   }
1298  
1299   // 2) card mark detection helper
1300 
1301   // helper predicate which can be used to detect a volatile membar
1302   // introduced as part of a conditional card mark sequence either by
1303   // G1 or by CMS when UseCondCardMark is true.
1304   //
1305   // membar can be definitively determined to be part of a card mark
1306   // sequence if and only if all the following hold
1307   //
1308   // i) it is a MemBarVolatile
1309   //
1310   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1311   // true
1312   //
1313   // iii) the node's Mem projection feeds a StoreCM node.
1314   
1315   bool is_card_mark_membar(const MemBarNode *barrier)
1316   {
1317     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark))
1318       return false;
1319 
1320     if (barrier->Opcode() != Op_MemBarVolatile)
1321       return false;
1322 
1323     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1324 
1325     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1326       Node *y = mem->fast_out(i);
1327       if (y->Opcode() == Op_StoreCM) {
1328         return true;
1329       }
1330     }
1331   
1332     return false;
1333   }
1334 
1335 
1336   // 3) helper predicates to traverse volatile put graphs which may
1337   // contain GC barrier subgraphs
1338 
1339   // Preamble
1340   // --------
1341   //
1342   // for volatile writes we can omit generating barriers and employ a
1343   // releasing store when we see a node sequence sequence with a
1344   // leading MemBarRelease and a trailing MemBarVolatile as follows
1345   //
1346   //   MemBarRelease
1347   //  {      ||      } -- optional
1348   //  {MemBarCPUOrder}
1349   //         ||     \\
1350   //         ||     StoreX[mo_release]
1351   //         | \     /
1352   //         | MergeMem
1353   //         | /
1354   //   MemBarVolatile
1355   //
1356   // where
1357   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1358   //  | \ and / indicate further routing of the Ctl and Mem feeds
1359   // 
1360   // this is the graph we see for non-object stores. however, for a
1361   // volatile Object store (StoreN/P) we may see other nodes below the
1362   // leading membar because of the need for a GC pre- or post-write
1363   // barrier.
1364   //
1365   // with most GC configurations we with see this simple variant which
1366   // includes a post-write barrier card mark.
1367   //
1368   //   MemBarRelease______________________________
1369   //         ||    \\               Ctl \        \\
1370   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1371   //         | \     /                       . . .  /
1372   //         | MergeMem
1373   //         | /
1374   //         ||      /
1375   //   MemBarVolatile
1376   //
1377   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1378   // the object address to an int used to compute the card offset) and
1379   // Ctl+Mem to a StoreB node (which does the actual card mark).
1380   //
1381   // n.b. a StoreCM node will only appear in this configuration when
1382   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1383   // because it implies a requirement to order visibility of the card
1384   // mark (StoreCM) relative to the object put (StoreP/N) using a
1385   // StoreStore memory barrier (arguably this ought to be represented
1386   // explicitly in the ideal graph but that is not how it works). This
1387   // ordering is required for both non-volatile and volatile
1388   // puts. Normally that means we need to translate a StoreCM using
1389   // the sequence
1390   //
1391   //   dmb ishst
1392   //   stlrb
1393   //
1394   // However, in the case of a volatile put if we can recognise this
1395   // configuration and plant an stlr for the object write then we can
1396   // omit the dmb and just plant an strb since visibility of the stlr
1397   // is ordered before visibility of subsequent stores. StoreCM nodes
1398   // also arise when using G1 or using CMS with conditional card
1399   // marking. In these cases (as we shall see) we don't need to insert
1400   // the dmb when translating StoreCM because there is already an
1401   // intervening StoreLoad barrier between it and the StoreP/N.
1402   //
1403   // It is also possible to perform the card mark conditionally on it
1404   // currently being unmarked in which case the volatile put graph
1405   // will look slightly different
1406   //
1407   //   MemBarRelease
1408   //   MemBarCPUOrder___________________________________________
1409   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1410   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1411   //         | \     /                              \            |
1412   //         | MergeMem                            . . .      StoreB
1413   //         | /                                                /
1414   //         ||     /
1415   //   MemBarVolatile
1416   //
1417   // It is worth noting at this stage that both the above
1418   // configurations can be uniquely identified by checking that the
1419   // memory flow includes the following subgraph:
1420   //
1421   //   MemBarRelease
1422   //   MemBarCPUOrder
1423   //          |  \      . . .
1424   //          |  StoreX[mo_release]  . . .
1425   //          |   /
1426   //         MergeMem
1427   //          |
1428   //   MemBarVolatile
1429   //
1430   // This is referred to as a *normal* subgraph. It can easily be
1431   // detected starting from any candidate MemBarRelease,
1432   // StoreX[mo_release] or MemBarVolatile.
1433   //
1434   // the code below uses two helper predicates, leading_to_normal and
1435   // normal_to_leading to identify this configuration, one validating
1436   // the layout starting from the top membar and searching down and
1437   // the other validating the layout starting from the lower membar
1438   // and searching up.
1439   //
1440   // There are two special case GC configurations when a normal graph
1441   // may not be generated: when using G1 (which always employs a
1442   // conditional card mark); and when using CMS with conditional card
1443   // marking configured. These GCs are both concurrent rather than
1444   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1445   // graph between the leading and trailing membar nodes, in
1446   // particular enforcing stronger memory serialisation beween the
1447   // object put and the corresponding conditional card mark. CMS
1448   // employs a post-write GC barrier while G1 employs both a pre- and
1449   // post-write GC barrier. Of course the extra nodes may be absent --
1450   // they are only inserted for object puts. This significantly
1451   // complicates the task of identifying whether a MemBarRelease,
1452   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1453   // when using these GC configurations (see below).
1454   //
1455   // In both cases the post-write subtree includes an auxiliary
1456   // MemBarVolatile (StoreLoad barrier) separating the object put and
1457   // the read of the corresponding card. This poses two additional
1458   // problems.
1459   //
1460   // Firstly, a card mark MemBarVolatile needs to be distinguished
1461   // from a normal trailing MemBarVolatile. Resolving this first
1462   // problem is straightforward: a card mark MemBarVolatile always
1463   // projects a Mem feed to a StoreCM node and that is a unique marker
1464   //
1465   //      MemBarVolatile (card mark)
1466   //       C |    \     . . .
1467   //         |   StoreCM   . . .
1468   //       . . .
1469   //
1470   // The second problem is how the code generator is to translate the
1471   // card mark barrier? It always needs to be translated to a "dmb
1472   // ish" instruction whether or not it occurs as part of a volatile
1473   // put. A StoreLoad barrier is needed after the object put to ensure
1474   // i) visibility to GC threads of the object put and ii) visibility
1475   // to the mutator thread of any card clearing write by a GC
1476   // thread. Clearly a normal store (str) will not guarantee this
1477   // ordering but neither will a releasing store (stlr). The latter
1478   // guarantees that the object put is visible but does not guarantee
1479   // that writes by other threads have also been observed.
1480   // 
1481   // So, returning to the task of translating the object put and the
1482   // leading/trailing membar nodes: what do the non-normal node graph
1483   // look like for these 2 special cases? and how can we determine the
1484   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1485   // in both normal and non-normal cases?
1486   //
1487   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1488   // which selects conditonal execution based on the value loaded
1489   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1490   // intervening StoreLoad barrier (MemBarVolatile).
1491   //
1492   // So, with CMS we may see a node graph which looks like this
1493   //
1494   //   MemBarRelease
1495   //   MemBarCPUOrder_(leading)__________________
1496   //     C |    M \       \\                   C \
1497   //       |       \    StoreN/P[mo_release]  CastP2X
1498   //       |    Bot \    /
1499   //       |       MergeMem
1500   //       |         /
1501   //      MemBarVolatile (card mark)
1502   //     C |  ||    M |
1503   //       | LoadB    |
1504   //       |   |      |
1505   //       | Cmp      |\
1506   //       | /        | \
1507   //       If         |  \
1508   //       | \        |   \
1509   // IfFalse  IfTrue  |    \
1510   //       \     / \  |     \
1511   //        \   / StoreCM    |
1512   //         \ /      |      |
1513   //        Region   . . .   |
1514   //          | \           /
1515   //          |  . . .  \  / Bot
1516   //          |       MergeMem
1517   //          |          |
1518   //        MemBarVolatile (trailing)
1519   //
1520   // The first MergeMem merges the AliasIdxBot Mem slice from the
1521   // leading membar and the oopptr Mem slice from the Store into the
1522   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1523   // Mem slice from the card mark membar and the AliasIdxRaw slice
1524   // from the StoreCM into the trailing membar (n.b. the latter
1525   // proceeds via a Phi associated with the If region).
1526   //
1527   // G1 is quite a lot more complicated. The nodes inserted on behalf
1528   // of G1 may comprise: a pre-write graph which adds the old value to
1529   // the SATB queue; the releasing store itself; and, finally, a
1530   // post-write graph which performs a card mark.
1531   //
1532   // The pre-write graph may be omitted, but only when the put is
1533   // writing to a newly allocated (young gen) object and then only if
1534   // there is a direct memory chain to the Initialize node for the
1535   // object allocation. This will not happen for a volatile put since
1536   // any memory chain passes through the leading membar.
1537   //
1538   // The pre-write graph includes a series of 3 If tests. The outermost
1539   // If tests whether SATB is enabled (no else case). The next If tests
1540   // whether the old value is non-NULL (no else case). The third tests
1541   // whether the SATB queue index is > 0, if so updating the queue. The
1542   // else case for this third If calls out to the runtime to allocate a
1543   // new queue buffer.
1544   //
1545   // So with G1 the pre-write and releasing store subgraph looks like
1546   // this (the nested Ifs are omitted).
1547   //
1548   //  MemBarRelease (leading)____________
1549   //     C |  ||  M \   M \    M \  M \ . . .
1550   //       | LoadB   \  LoadL  LoadN   \
1551   //       | /        \                 \
1552   //       If         |\                 \
1553   //       | \        | \                 \
1554   //  IfFalse  IfTrue |  \                 \
1555   //       |     |    |   \                 |
1556   //       |     If   |   /\                |
1557   //       |     |          \               |
1558   //       |                 \              |
1559   //       |    . . .         \             |
1560   //       | /       | /       |            |
1561   //      Region  Phi[M]       |            |
1562   //       | \       |         |            |
1563   //       |  \_____ | ___     |            |
1564   //     C | C \     |   C \ M |            |
1565   //       | CastP2X | StoreN/P[mo_release] |
1566   //       |         |         |            |
1567   //     C |       M |       M |          M |
1568   //        \        |         |           /
1569   //                  . . . 
1570   //          (post write subtree elided)
1571   //                    . . .
1572   //             C \         M /
1573   //         MemBarVolatile (trailing)
1574   //
1575   // n.b. the LoadB in this subgraph is not the card read -- it's a
1576   // read of the SATB queue active flag.
1577   //
1578   // The G1 post-write subtree is also optional, this time when the
1579   // new value being written is either null or can be identified as a
1580   // newly allocated (young gen) object with no intervening control
1581   // flow. The latter cannot happen but the former may, in which case
1582   // the card mark membar is omitted and the memory feeds from the
1583   // leading membar and the StoreN/P are merged direct into the
1584   // trailing membar as per the normal subgraph. So, the only special
1585   // case which arises is when the post-write subgraph is generated.
1586   //
1587   // The kernel of the post-write G1 subgraph is the card mark itself
1588   // which includes a card mark memory barrier (MemBarVolatile), a
1589   // card test (LoadB), and a conditional update (If feeding a
1590   // StoreCM). These nodes are surrounded by a series of nested Ifs
1591   // which try to avoid doing the card mark. The top level If skips if
1592   // the object reference does not cross regions (i.e. it tests if
1593   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1594   // need not be recorded. The next If, which skips on a NULL value,
1595   // may be absent (it is not generated if the type of value is >=
1596   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1597   // checking if card_val != young).  n.b. although this test requires
1598   // a pre-read of the card it can safely be done before the StoreLoad
1599   // barrier. However that does not bypass the need to reread the card
1600   // after the barrier.
1601   //
1602   //                (pre-write subtree elided)
1603   //        . . .                  . . .    . . .  . . .
1604   //        C |                    M |     M |    M |
1605   //       Region                  Phi[M] StoreN    |
1606   //          |                     / \      |      |
1607   //         / \_______            /   \     |      |
1608   //      C / C \      . . .            \    |      |
1609   //       If   CastP2X . . .            |   |      |
1610   //       / \                           |   |      |
1611   //      /   \                          |   |      |
1612   // IfFalse IfTrue                      |   |      |
1613   //   |       |                         |   |     /|
1614   //   |       If                        |   |    / |
1615   //   |      / \                        |   |   /  |
1616   //   |     /   \                        \  |  /   |
1617   //   | IfFalse IfTrue                   MergeMem  |
1618   //   |  . . .    / \                       /      |
1619   //   |          /   \                     /       |
1620   //   |     IfFalse IfTrue                /        |
1621   //   |      . . .    |                  /         |
1622   //   |               If                /          |
1623   //   |               / \              /           |
1624   //   |              /   \            /            |
1625   //   |         IfFalse IfTrue       /             |
1626   //   |           . . .   |         /              |
1627   //   |                    \       /               |
1628   //   |                     \     /                |
1629   //   |             MemBarVolatile__(card mark)    |
1630   //   |                ||   C |  M \  M \          |
1631   //   |               LoadB   If    |    |         |
1632   //   |                      / \    |    |         |
1633   //   |                     . . .   |    |         |
1634   //   |                          \  |    |        /
1635   //   |                        StoreCM   |       /
1636   //   |                          . . .   |      /
1637   //   |                        _________/      /
1638   //   |                       /  _____________/
1639   //   |   . . .       . . .  |  /            /
1640   //   |    |                 | /   _________/
1641   //   |    |               Phi[M] /        /
1642   //   |    |                 |   /        /
1643   //   |    |                 |  /        /
1644   //   |  Region  . . .     Phi[M]  _____/
1645   //   |    /                 |    /
1646   //   |                      |   /   
1647   //   | . . .   . . .        |  /
1648   //   | /                    | /
1649   // Region           |  |  Phi[M]
1650   //   |              |  |  / Bot
1651   //    \            MergeMem 
1652   //     \            /
1653   //     MemBarVolatile
1654   //
1655   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1656   // from the leading membar and the oopptr Mem slice from the Store
1657   // into the card mark membar i.e. the memory flow to the card mark
1658   // membar still looks like a normal graph.
1659   //
1660   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1661   // Mem slices (from the StoreCM and other card mark queue stores).
1662   // However in this case the AliasIdxBot Mem slice does not come
1663   // direct from the card mark membar. It is merged through a series
1664   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1665   // from the leading membar with the Mem feed from the card mark
1666   // membar. Each Phi corresponds to one of the Ifs which may skip
1667   // around the card mark membar. So when the If implementing the NULL
1668   // value check has been elided the total number of Phis is 2
1669   // otherwise it is 3.
1670   //
1671   // So, the upshot is that in all cases the volatile put graph will
1672   // include a *normal* memory subgraph betwen the leading membar and
1673   // its child membar. When that child is not a card mark membar then
1674   // it marks the end of a volatile put subgraph. If the child is a
1675   // card mark membar then the normal subgraph will form part of a
1676   // volatile put subgraph if and only if the child feeds an
1677   // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That
1678   // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging
1679   // the leading barrier memory flow (for G1).
1680   // 
1681   // The predicates controlling generation of instructions for store
1682   // and barrier nodes employ a few simple helper functions (described
1683   // below) which identify the presence or absence of these subgraph
1684   // configurations and provide a means of traversing from one node in
1685   // the subgraph to another.
1686 
1687   // leading_to_normal
1688   //
1689   //graph traversal helper which detects the normal case Mem feed
1690   // from a release membar (or, optionally, its cpuorder child) to a
1691   // dependent volatile membar i.e. it ensures that the following Mem
1692   // flow subgraph is present.
1693   //
1694   //   MemBarRelease
1695   //   MemBarCPUOrder
1696   //          |  \      . . .
1697   //          |  StoreN/P[mo_release]  . . .
1698   //          |   /
1699   //         MergeMem
1700   //          |
1701   //   MemBarVolatile
1702   //
1703   // if the correct configuration is present returns the volatile
1704   // membar otherwise NULL.
1705   //
1706   // the input membar is expected to be either a cpuorder membar or a
1707   // release membar. in the latter case it should not have a cpu membar
1708   // child.
1709   //
1710   // the returned membar may be a card mark membar rather than a
1711   // trailing membar.
1712 
1713   MemBarNode *leading_to_normal(MemBarNode *leading)
1714   {
1715     assert((leading->Opcode() == Op_MemBarRelease ||
1716             leading->Opcode() == Op_MemBarCPUOrder),
1717            "expecting a volatile or cpuroder membar!");
1718 
1719     // check the mem flow
1720     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1721 
1722     if (!mem)
1723       return NULL;
1724 
1725     Node *x = NULL;
1726     StoreNode * st = NULL;
1727     MergeMemNode *mm = NULL;
1728 
1729     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1730       x = mem->fast_out(i);
1731       if (x->is_MergeMem()) {
1732         if (mm != NULL)
1733           return NULL;
1734         // two merge mems is one too many
1735         mm = x->as_MergeMem();
1736       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1737         // two releasing stores is one too many
1738         if (st != NULL)
1739           return NULL;
1740         st = x->as_Store();
1741       }
1742     }
1743 
1744     if (!mm || !st)
1745       return NULL;
1746 
1747     bool found = false;
1748     // ensure the store feeds the merge
1749     for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1750       if (st->fast_out(i) == mm) {
1751         found = true;
1752         break;
1753       }
1754     }
1755 
1756     if (!found)
1757       return NULL;
1758 
1759     MemBarNode *mbvol = NULL;
1760     // ensure the merge feeds a volatile membar
1761     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1762       x = mm->fast_out(i);
1763       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1764         mbvol = x->as_MemBar();
1765         break;
1766       }
1767     }
1768 
1769     return mbvol;
1770   }
1771 
1772   // normal_to_leading
1773   //
1774   // graph traversal helper which detects the normal case Mem feed
1775   // from either a card mark or a trailing membar to a preceding
1776   // release membar (optionally its cpuorder child) i.e. it ensures
1777   // that the following Mem flow subgraph is present.
1778   //
1779   //   MemBarRelease
1780   //   MemBarCPUOrder {leading}
1781   //          |  \      . . .
1782   //          |  StoreN/P[mo_release]  . . .
1783   //          |   /
1784   //         MergeMem
1785   //          |
1786   //   MemBarVolatile
1787   //
1788   // this predicate checks for the same flow as the previous predicate
1789   // but starting from the bottom rather than the top.
1790   //
1791   // if the configuration is present returns the cpuorder member for
1792   // preference or when absent the release membar otherwise NULL.
1793   //
1794   // n.b. the input membar is expected to be a MemBarVolatile but
1795   // need not be a card mark membar.
1796 
1797   MemBarNode *normal_to_leading(const MemBarNode *barrier)
1798   {
1799     // input must be a volatile membar
1800     assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar");
1801     Node *x;
1802 
1803     // the Mem feed to the membar should be a merge
1804     x = barrier->in(TypeFunc::Memory);
1805     if (!x->is_MergeMem())
1806       return NULL;
1807 
1808     MergeMemNode *mm = x->as_MergeMem();
1809 
1810     // the AliasIdxBot slice should be another MemBar projection
1811     x = mm->in(Compile::AliasIdxBot);
1812     // ensure this is a non control projection
1813     if (!x->is_Proj() || x->is_CFG())
1814       return NULL;
1815     // if it is fed by a membar that's the one we want
1816     x = x->in(0);
1817 
1818     if (!x->is_MemBar())
1819       return NULL;
1820 
1821     MemBarNode *leading = x->as_MemBar();
1822     // reject invalid candidates
1823     if (!leading_membar(leading))
1824       return NULL;
1825 
1826     // ok, we have a leading ReleaseMembar, now for the sanity clauses
1827 
1828     // the leading membar must feed Mem to a releasing store
1829     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1830     StoreNode *st = NULL;
1831     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1832       x = mem->fast_out(i);
1833       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1834         st = x->as_Store();
1835         break;
1836       }
1837     }
1838     if (st == NULL)
1839       return NULL;
1840 
1841     // the releasing store has to feed the same merge
1842     for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
1843       if (st->fast_out(i) == mm)
1844         return leading;
1845     }
1846 
1847     return NULL;
1848   }
1849 
1850   // card_mark_to_trailing
1851   //
1852   // graph traversal helper which detects extra, non-normal Mem feed
1853   // from a card mark volatile membar to a trailing membar i.e. it
1854   // ensures that one of the following three GC post-write Mem flow
1855   // subgraphs is present.
1856   //
1857   // 1)
1858   //     . . .
1859   //       |
1860   //   MemBarVolatile (card mark)
1861   //      |          |     
1862   //      |        StoreCM
1863   //      |          |
1864   //      |        . . .
1865   //  Bot |  / 
1866   //   MergeMem 
1867   //      |
1868   //   MemBarVolatile (trailing)
1869   //
1870   //
1871   // 2)
1872   //   MemBarRelease/CPUOrder (leading)
1873   //    |
1874   //    | 
1875   //    |\       . . .
1876   //    | \        | 
1877   //    |  \  MemBarVolatile (card mark) 
1878   //    |   \   |     |
1879   //     \   \  |   StoreCM    . . .
1880   //      \   \ |
1881   //       \  Phi
1882   //        \ /
1883   //        Phi  . . .
1884   //     Bot |   /
1885   //       MergeMem
1886   //         |
1887   //   MemBarVolatile (trailing)
1888   //
1889   // 3)
1890   //   MemBarRelease/CPUOrder (leading)
1891   //    |
1892   //    |\
1893   //    | \
1894   //    |  \      . . .
1895   //    |   \       |
1896   //    |\   \  MemBarVolatile (card mark)
1897   //    | \   \   |     |
1898   //    |  \   \  |   StoreCM    . . .
1899   //    |   \   \ |
1900   //     \   \  Phi
1901   //      \   \ /  
1902   //       \  Phi
1903   //        \ /
1904   //        Phi  . . .
1905   //     Bot |   /
1906   //       MergeMem
1907   //         |
1908   //   MemBarVolatile (trailing)
1909   //
1910   // configuration 1 is only valid if UseConcMarkSweepGC &&
1911   // UseCondCardMark
1912   //
1913   // configurations 2 and 3 are only valid if UseG1GC.
1914   //
1915   // if a valid configuration is present returns the trailing membar
1916   // otherwise NULL.
1917   //
1918   // n.b. the supplied membar is expected to be a card mark
1919   // MemBarVolatile i.e. the caller must ensure the input node has the
1920   // correct operand and feeds Mem to a StoreCM node
1921 
1922   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
1923   {
1924     // input must be a card mark volatile membar
1925     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
1926 
1927     Node *feed = barrier->proj_out(TypeFunc::Memory);
1928     Node *x;
1929     MergeMemNode *mm = NULL;
1930 
1931     const int MAX_PHIS = 3;     // max phis we will search through
1932     int phicount = 0;           // current search count
1933 
1934     bool retry_feed = true;
1935     while (retry_feed) {
1936       // see if we have a direct MergeMem feed
1937       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
1938         x = feed->fast_out(i);
1939         // the correct Phi will be merging a Bot memory slice
1940         if (x->is_MergeMem()) {
1941           mm = x->as_MergeMem();
1942           break;
1943         }
1944       }
1945       if (mm) {
1946         retry_feed = false;
1947       } else if (UseG1GC & phicount++ < MAX_PHIS) {
1948         // the barrier may feed indirectly via one or two Phi nodes
1949         PhiNode *phi = NULL;
1950         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
1951           x = feed->fast_out(i);
1952           // the correct Phi will be merging a Bot memory slice
1953           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
1954             phi = x->as_Phi();
1955             break;
1956           }
1957         }
1958         if (!phi)
1959           return NULL;
1960         // look for another merge below this phi
1961         feed = phi;
1962       } else {
1963         // couldn't find a merge
1964         return NULL;
1965       }
1966     }
1967 
1968     // sanity check this feed turns up as the expected slice
1969     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
1970 
1971     MemBarNode *trailing = NULL;
1972     // be sure we have a volatile membar below the merge
1973     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
1974       x = mm->fast_out(i);
1975       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
1976         trailing = x->as_MemBar();
1977         break;
1978       }
1979     }
1980 
1981     return trailing;
1982   }
1983 
1984   // trailing_to_card_mark
1985   //
1986   // graph traversal helper which detects extra, non-normal Mem feed
1987   // from a trailing membar to a preceding card mark volatile membar
1988   // i.e. it identifies whether one of the three possible extra GC
1989   // post-write Mem flow subgraphs is present
1990   //
1991   // this predicate checks for the same flow as the previous predicate
1992   // but starting from the bottom rather than the top.
1993   //
1994   // if the configurationis present returns the card mark membar
1995   // otherwise NULL
1996 
1997   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
1998   {
1999     assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
2000 
2001     Node *x = trailing->in(TypeFunc::Memory);
2002     // the Mem feed to the membar should be a merge
2003     if (!x->is_MergeMem())
2004       return NULL;
2005 
2006     MergeMemNode *mm = x->as_MergeMem();
2007 
2008     x = mm->in(Compile::AliasIdxBot);
2009     // with G1 we may possibly see a Phi or two before we see a Memory
2010     // Proj from the card mark membar
2011 
2012     const int MAX_PHIS = 3;     // max phis we will search through
2013     int phicount = 0;           // current search count
2014 
2015     bool retry_feed = !x->is_Proj();
2016 
2017     while (retry_feed) {
2018       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2019         PhiNode *phi = x->as_Phi();
2020         ProjNode *proj = NULL;
2021         PhiNode *nextphi = NULL;
2022         bool found_leading = false;
2023         for (uint i = 1; i < phi->req(); i++) {
2024           x = phi->in(i);
2025           if (x->is_Phi()) {
2026             nextphi = x->as_Phi();
2027           } else if (x->is_Proj()) {
2028             int opcode = x->in(0)->Opcode();
2029             if (opcode == Op_MemBarVolatile) {
2030               proj = x->as_Proj();
2031             } else if (opcode == Op_MemBarRelease ||
2032                        opcode == Op_MemBarCPUOrder) {
2033               // probably a leading membar
2034               found_leading = true;
2035             }
2036           }
2037         }
2038         // if we found a correct looking proj then retry from there
2039         // otherwise we must see a leading and a phi or this the
2040         // wrong config
2041         if (proj != NULL) {
2042           x = proj;
2043           retry_feed = false;
2044         } else if (found_leading && nextphi != NULL) {
2045           // retry from this phi to check phi2
2046           x = nextphi;
2047         } else {
2048           // not what we were looking for
2049           return NULL;
2050         }
2051       } else {
2052         return NULL;
2053       }
2054     }
2055     // the proj has to come from the card mark membar
2056     x = x->in(0);
2057     if (!x->is_MemBar())
2058       return NULL;
2059 
2060     MemBarNode *card_mark_membar = x->as_MemBar();
2061 
2062     if (!is_card_mark_membar(card_mark_membar))
2063       return NULL;
2064 
2065     return card_mark_membar;
2066   }
2067 
2068   // trailing_to_leading
2069   //
2070   // graph traversal helper which checks the Mem flow up the graph
2071   // from a (non-card mark) volatile membar attempting to locate and
2072   // return an associated leading membar. it first looks for a
2073   // subgraph in the normal configuration (relying on helper
2074   // normal_to_leading). failing that it then looks for one of the
2075   // possible post-write card mark subgraphs linking the trailing node
2076   // to a the card mark membar (relying on helper
2077   // trailing_to_card_mark), and then checks that the card mark membar
2078   // is fed by a leading membar (once again relying on auxiliary
2079   // predicate normal_to_leading).
2080   //
2081   // if the configuration is valid returns the cpuorder member for
2082   // preference or when absent the release membar otherwise NULL.
2083   //
2084   // n.b. the input membar is expected to be a volatile membar but
2085   // must *not* be a card mark membar.
2086 
2087   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2088   {
2089     assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
2090 
2091     MemBarNode *leading = normal_to_leading(trailing);
2092 
2093     if (leading)
2094       return leading;
2095 
2096     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2097 
2098     if (!card_mark_membar)
2099       return NULL;
2100 
2101     return normal_to_leading(card_mark_membar);
2102   }
2103 
2104   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2105 
2106 bool unnecessary_acquire(const Node *barrier)
2107 {
2108   // assert barrier->is_MemBar();
2109   if (UseBarriersForVolatile)
2110     // we need to plant a dmb
2111     return false;
2112 
2113   // a volatile read derived from bytecode (or also from an inlined
2114   // SHA field read via LibraryCallKit::load_field_from_object)
2115   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2116   // with a bogus read dependency on it's preceding load. so in those
2117   // cases we will find the load node at the PARMS offset of the
2118   // acquire membar.  n.b. there may be an intervening DecodeN node.
2119   //
2120   // a volatile load derived from an inlined unsafe field access
2121   // manifests as a cpuorder membar with Ctl and Mem projections
2122   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2123   // acquire then feeds another cpuorder membar via Ctl and Mem
2124   // projections. The load has no output dependency on these trailing
2125   // membars because subsequent nodes inserted into the graph take
2126   // their control feed from the final membar cpuorder meaning they
2127   // are all ordered after the load.
2128 
2129   Node *x = barrier->lookup(TypeFunc::Parms);
2130   if (x) {
2131     // we are starting from an acquire and it has a fake dependency
2132     //
2133     // need to check for
2134     //
2135     //   LoadX[mo_acquire]
2136     //   {  |1   }
2137     //   {DecodeN}
2138     //      |Parms
2139     //   MemBarAcquire*
2140     //
2141     // where * tags node we were passed
2142     // and |k means input k
2143     if (x->is_DecodeNarrowPtr())
2144       x = x->in(1);
2145 
2146     return (x->is_Load() && x->as_Load()->is_acquire());
2147   }
2148   
2149   // now check for an unsafe volatile get
2150 
2151   // need to check for
2152   //
2153   //   MemBarCPUOrder
2154   //        ||       \\
2155   //   MemBarAcquire* LoadX[mo_acquire]
2156   //        ||
2157   //   MemBarCPUOrder
2158   //
2159   // where * tags node we were passed
2160   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2161 
2162   // check for a parent MemBarCPUOrder
2163   ProjNode *ctl;
2164   ProjNode *mem;
2165   MemBarNode *parent = parent_membar(barrier);
2166   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2167     return false;
2168   ctl = parent->proj_out(TypeFunc::Control);
2169   mem = parent->proj_out(TypeFunc::Memory);
2170   if (!ctl || !mem)
2171     return false;
2172   // ensure the proj nodes both feed a LoadX[mo_acquire]
2173   LoadNode *ld = NULL;
2174   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2175     x = ctl->fast_out(i);
2176     // if we see a load we keep hold of it and stop searching
2177     if (x->is_Load()) {
2178       ld = x->as_Load();
2179       break;
2180     }
2181   }
2182   // it must be an acquiring load
2183   if (! ld || ! ld->is_acquire())
2184     return false;
2185   for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2186     x = mem->fast_out(i);
2187     // if we see the same load we drop it and stop searching
2188     if (x == ld) {
2189       ld = NULL;
2190       break;
2191     }
2192   }
2193   // we must have dropped the load
2194   if (ld)
2195     return false;
2196   // check for a child cpuorder membar
2197   MemBarNode *child  = child_membar(barrier->as_MemBar());
2198   if (!child || child->Opcode() != Op_MemBarCPUOrder)
2199     return false;
2200 
2201   return true;
2202 }
2203 
2204 bool needs_acquiring_load(const Node *n)
2205 {
2206   // assert n->is_Load();
2207   if (UseBarriersForVolatile)
2208     // we use a normal load and a dmb
2209     return false;
2210 
2211   LoadNode *ld = n->as_Load();
2212 
2213   if (!ld->is_acquire())
2214     return false;
2215 
2216   // check if this load is feeding an acquire membar
2217   //
2218   //   LoadX[mo_acquire]
2219   //   {  |1   }
2220   //   {DecodeN}
2221   //      |Parms
2222   //   MemBarAcquire*
2223   //
2224   // where * tags node we were passed
2225   // and |k means input k
2226 
2227   Node *start = ld;
2228   Node *mbacq = NULL;
2229 
2230   // if we hit a DecodeNarrowPtr we reset the start node and restart
2231   // the search through the outputs
2232  restart:
2233 
2234   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2235     Node *x = start->fast_out(i);
2236     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2237       mbacq = x;
2238     } else if (!mbacq &&
2239                (x->is_DecodeNarrowPtr() ||
2240                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2241       start = x;
2242       goto restart;
2243     }
2244   }
2245 
2246   if (mbacq) {
2247     return true;
2248   }
2249 
2250   // now check for an unsafe volatile get
2251 
2252   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2253   //
2254   //     MemBarCPUOrder
2255   //        ||       \\
2256   //   MemBarAcquire* LoadX[mo_acquire]
2257   //        ||
2258   //   MemBarCPUOrder
2259 
2260   MemBarNode *membar;
2261 
2262   membar = parent_membar(ld);
2263 
2264   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
2265     return false;
2266 
2267   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2268 
2269   membar = child_membar(membar);
2270 
2271   if (!membar || !membar->Opcode() == Op_MemBarAcquire)
2272     return false;
2273 
2274   membar = child_membar(membar);
2275   
2276   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
2277     return false;
2278 
2279   return true;
2280 }
2281 
2282 bool unnecessary_release(const Node *n)
2283 {
2284   assert((n->is_MemBar() &&
2285           n->Opcode() == Op_MemBarRelease),
2286          "expecting a release membar");
2287 
2288   if (UseBarriersForVolatile)
2289     // we need to plant a dmb
2290     return false;
2291 
2292   // if there is a dependent CPUOrder barrier then use that as the
2293   // leading
2294 
2295   MemBarNode *barrier = n->as_MemBar();
2296   // check for an intervening cpuorder membar
2297   MemBarNode *b = child_membar(barrier);
2298   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2299     // ok, so start the check from the dependent cpuorder barrier
2300     barrier = b;
2301   }
2302 
2303   // must start with a normal feed
2304   MemBarNode *child_barrier = leading_to_normal(barrier);
2305 
2306   if (!child_barrier)
2307     return false;
2308 
2309   if (!is_card_mark_membar(child_barrier))
2310     // this is the trailing membar and we are done
2311     return true;
2312 
2313   // must be sure this card mark feeds a trailing membar
2314   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2315   return (trailing != NULL);
2316 }
2317 
2318 bool unnecessary_volatile(const Node *n)
2319 {
2320   // assert n->is_MemBar();
2321   if (UseBarriersForVolatile)
2322     // we need to plant a dmb
2323     return false;
2324 
2325   MemBarNode *mbvol = n->as_MemBar();
2326 
2327   // first we check if this is part of a card mark. if so then we have
2328   // to generate a StoreLoad barrier
2329   
2330   if (is_card_mark_membar(mbvol))
2331       return false;
2332 
2333   // ok, if it's not a card mark then we still need to check if it is
2334   // a trailing membar of a volatile put hgraph.
2335 
2336   return (trailing_to_leading(mbvol) != NULL);
2337 }
2338 
2339 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2340 
2341 bool needs_releasing_store(const Node *n)
2342 {
2343   // assert n->is_Store();
2344   if (UseBarriersForVolatile)
2345     // we use a normal store and dmb combination
2346     return false;
2347 
2348   StoreNode *st = n->as_Store();
2349 
2350   // the store must be marked as releasing
2351   if (!st->is_release())
2352     return false;
2353 
2354   // the store must be fed by a membar
2355 
2356   Node *x = st->lookup(StoreNode::Memory);
2357 
2358   if (! x || !x->is_Proj())
2359     return false;
2360 
2361   ProjNode *proj = x->as_Proj();
2362 
2363   x = proj->lookup(0);
2364 
2365   if (!x || !x->is_MemBar())
2366     return false;
2367 
2368   MemBarNode *barrier = x->as_MemBar();
2369 
2370   // if the barrier is a release membar or a cpuorder mmebar fed by a
2371   // release membar then we need to check whether that forms part of a
2372   // volatile put graph.
2373 
2374   // reject invalid candidates
2375   if (!leading_membar(barrier))
2376     return false;
2377 
2378   // does this lead a normal subgraph?
2379   MemBarNode *mbvol = leading_to_normal(barrier);
2380 
2381   if (!mbvol)
2382     return false;
2383 
2384   // all done unless this is a card mark
2385   if (!is_card_mark_membar(mbvol))
2386     return true;
2387   
2388   // we found a card mark -- just make sure we have a trailing barrier
2389 
2390   return (card_mark_to_trailing(mbvol) != NULL);
2391 }
2392 
2393 // predicate controlling translation of StoreCM
2394 //
2395 // returns true if a StoreStore must precede the card write otherwise
2396 // false
2397 
2398 bool unnecessary_storestore(const Node *storecm)
2399 {
2400   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2401 
2402   // we only ever need to generate a dmb ishst between an object put
2403   // and the associated card mark when we are using CMS without
2404   // conditional card marking
2405 
2406   if (!UseConcMarkSweepGC || UseCondCardMark)
2407     return true;
2408 
2409   // if we are implementing volatile puts using barriers then the
2410   // object put as an str so we must insert the dmb ishst
2411 
2412   if (UseBarriersForVolatile)
2413     return false;
2414 
2415   // we can omit the dmb ishst if this StoreCM is part of a volatile
2416   // put because in thta case the put will be implemented by stlr
2417   //
2418   // we need to check for a normal subgraph feeding this StoreCM.
2419   // that means the StoreCM must be fed Memory from a leading membar,
2420   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2421   // leading membar must be part of a normal subgraph
2422 
2423   Node *x = storecm->in(StoreNode::Memory);
2424 
2425   if (!x->is_Proj())
2426     return false;
2427 
2428   x = x->in(0);
2429 
2430   if (!x->is_MemBar())
2431     return false;
2432 
2433   MemBarNode *leading = x->as_MemBar();
2434 
2435   // reject invalid candidates
2436   if (!leading_membar(leading))
2437     return false;
2438 
2439   // we can omit the StoreStore if it is the head of a normal subgraph
2440   return (leading_to_normal(leading) != NULL);
2441 }
2442 
2443 
2444 #define __ _masm.
2445 
2446 // advance declarations for helper functions to convert register
2447 // indices to register objects
2448 
2449 // the ad file has to provide implementations of certain methods
2450 // expected by the generic code
2451 //
2452 // REQUIRED FUNCTIONALITY
2453 
2454 //=============================================================================
2455 
2456 // !!!!! Special hack to get all types of calls to specify the byte offset
2457 //       from the start of the call to the point where the return address
2458 //       will point.
2459 
2460 int MachCallStaticJavaNode::ret_addr_offset()
2461 {
2462   // call should be a simple bl
2463   int off = 4;
2464   return off;
2465 }
2466 
2467 int MachCallDynamicJavaNode::ret_addr_offset()
2468 {
2469   return 16; // movz, movk, movk, bl
2470 }
2471 
2472 int MachCallRuntimeNode::ret_addr_offset() {
2473   // for generated stubs the call will be
2474   //   far_call(addr)
2475   // for real runtime callouts it will be six instructions
2476   // see aarch64_enc_java_to_runtime
2477   //   adr(rscratch2, retaddr)
2478   //   lea(rscratch1, RuntimeAddress(addr)
2479   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2480   //   blrt rscratch1
2481   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2482   if (cb) {
2483     return MacroAssembler::far_branch_size();
2484   } else {
2485     return 6 * NativeInstruction::instruction_size;
2486   }
2487 }
2488 
2489 // Indicate if the safepoint node needs the polling page as an input
2490 
2491 // the shared code plants the oop data at the start of the generated
2492 // code for the safepoint node and that needs ot be at the load
2493 // instruction itself. so we cannot plant a mov of the safepoint poll
2494 // address followed by a load. setting this to true means the mov is
2495 // scheduled as a prior instruction. that's better for scheduling
2496 // anyway.
2497 
2498 bool SafePointNode::needs_polling_address_input()
2499 {
2500   return true;
2501 }
2502 
2503 //=============================================================================
2504 
2505 #ifndef PRODUCT
2506 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2507   st->print("BREAKPOINT");
2508 }
2509 #endif
2510 
2511 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2512   MacroAssembler _masm(&cbuf);
2513   __ brk(0);
2514 }
2515 
2516 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2517   return MachNode::size(ra_);
2518 }
2519 
2520 //=============================================================================
2521 
2522 #ifndef PRODUCT
2523   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2524     st->print("nop \t# %d bytes pad for loops and calls", _count);
2525   }
2526 #endif
2527 
2528   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2529     MacroAssembler _masm(&cbuf);
2530     for (int i = 0; i < _count; i++) {
2531       __ nop();
2532     }
2533   }
2534 
2535   uint MachNopNode::size(PhaseRegAlloc*) const {
2536     return _count * NativeInstruction::instruction_size;
2537   }
2538 
2539 //=============================================================================
2540 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2541 
2542 int Compile::ConstantTable::calculate_table_base_offset() const {
2543   return 0;  // absolute addressing, no offset
2544 }
2545 
2546 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2547 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2548   ShouldNotReachHere();
2549 }
2550 
2551 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2552   // Empty encoding
2553 }
2554 
2555 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2556   return 0;
2557 }
2558 
2559 #ifndef PRODUCT
2560 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2561   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2562 }
2563 #endif
2564 
2565 #ifndef PRODUCT
2566 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2567   Compile* C = ra_->C;
2568 
2569   int framesize = C->frame_slots() << LogBytesPerInt;
2570 
2571   if (C->need_stack_bang(framesize))
2572     st->print("# stack bang size=%d\n\t", framesize);
2573 
2574   if (framesize < ((1 << 9) + 2 * wordSize)) {
2575     st->print("sub  sp, sp, #%d\n\t", framesize);
2576     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2577     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2578   } else {
2579     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2580     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2581     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2582     st->print("sub  sp, sp, rscratch1");
2583   }
2584 }
2585 #endif
2586 
2587 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2588   Compile* C = ra_->C;
2589   MacroAssembler _masm(&cbuf);
2590 
2591   // n.b. frame size includes space for return pc and rfp
2592   const long framesize = C->frame_size_in_bytes();
2593   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2594 
2595   // insert a nop at the start of the prolog so we can patch in a
2596   // branch if we need to invalidate the method later
2597   __ nop();
2598 
2599   int bangsize = C->bang_size_in_bytes();
2600   if (C->need_stack_bang(bangsize) && UseStackBanging)
2601     __ generate_stack_overflow_check(bangsize);
2602 
2603   __ build_frame(framesize);
2604 
2605   if (NotifySimulator) {
2606     __ notify(Assembler::method_entry);
2607   }
2608 
2609   if (VerifyStackAtCalls) {
2610     Unimplemented();
2611   }
2612 
2613   C->set_frame_complete(cbuf.insts_size());
2614 
2615   if (C->has_mach_constant_base_node()) {
2616     // NOTE: We set the table base offset here because users might be
2617     // emitted before MachConstantBaseNode.
2618     Compile::ConstantTable& constant_table = C->constant_table();
2619     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2620   }
2621 }
2622 
2623 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2624 {
2625   return MachNode::size(ra_); // too many variables; just compute it
2626                               // the hard way
2627 }
2628 
2629 int MachPrologNode::reloc() const
2630 {
2631   return 0;
2632 }
2633 
2634 //=============================================================================
2635 
2636 #ifndef PRODUCT
2637 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2638   Compile* C = ra_->C;
2639   int framesize = C->frame_slots() << LogBytesPerInt;
2640 
2641   st->print("# pop frame %d\n\t",framesize);
2642 
2643   if (framesize == 0) {
2644     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2645   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2646     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2647     st->print("add  sp, sp, #%d\n\t", framesize);
2648   } else {
2649     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2650     st->print("add  sp, sp, rscratch1\n\t");
2651     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2652   }
2653 
2654   if (do_polling() && C->is_method_compilation()) {
2655     st->print("# touch polling page\n\t");
2656     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2657     st->print("ldr zr, [rscratch1]");
2658   }
2659 }
2660 #endif
2661 
2662 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2663   Compile* C = ra_->C;
2664   MacroAssembler _masm(&cbuf);
2665   int framesize = C->frame_slots() << LogBytesPerInt;
2666 
2667   __ remove_frame(framesize);
2668 
2669   if (NotifySimulator) {
2670     __ notify(Assembler::method_reentry);
2671   }
2672 
2673   if (do_polling() && C->is_method_compilation()) {
2674     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
2675   }
2676 }
2677 
2678 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
2679   // Variable size. Determine dynamically.
2680   return MachNode::size(ra_);
2681 }
2682 
2683 int MachEpilogNode::reloc() const {
2684   // Return number of relocatable values contained in this instruction.
2685   return 1; // 1 for polling page.
2686 }
2687 
2688 const Pipeline * MachEpilogNode::pipeline() const {
2689   return MachNode::pipeline_class();
2690 }
2691 
2692 // This method seems to be obsolete. It is declared in machnode.hpp
2693 // and defined in all *.ad files, but it is never called. Should we
2694 // get rid of it?
2695 int MachEpilogNode::safepoint_offset() const {
2696   assert(do_polling(), "no return for this epilog node");
2697   return 4;
2698 }
2699 
2700 //=============================================================================
2701 
2702 // Figure out which register class each belongs in: rc_int, rc_float or
2703 // rc_stack.
2704 enum RC { rc_bad, rc_int, rc_float, rc_stack };
2705 
2706 static enum RC rc_class(OptoReg::Name reg) {
2707 
2708   if (reg == OptoReg::Bad) {
2709     return rc_bad;
2710   }
2711 
2712   // we have 30 int registers * 2 halves
2713   // (rscratch1 and rscratch2 are omitted)
2714 
2715   if (reg < 60) {
2716     return rc_int;
2717   }
2718 
2719   // we have 32 float register * 2 halves
2720   if (reg < 60 + 128) {
2721     return rc_float;
2722   }
2723 
2724   // Between float regs & stack is the flags regs.
2725   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
2726 
2727   return rc_stack;
2728 }
2729 
2730 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
2731   Compile* C = ra_->C;
2732 
2733   // Get registers to move.
2734   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
2735   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
2736   OptoReg::Name dst_hi = ra_->get_reg_second(this);
2737   OptoReg::Name dst_lo = ra_->get_reg_first(this);
2738 
2739   enum RC src_hi_rc = rc_class(src_hi);
2740   enum RC src_lo_rc = rc_class(src_lo);
2741   enum RC dst_hi_rc = rc_class(dst_hi);
2742   enum RC dst_lo_rc = rc_class(dst_lo);
2743 
2744   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
2745 
2746   if (src_hi != OptoReg::Bad) {
2747     assert((src_lo&1)==0 && src_lo+1==src_hi &&
2748            (dst_lo&1)==0 && dst_lo+1==dst_hi,
2749            "expected aligned-adjacent pairs");
2750   }
2751 
2752   if (src_lo == dst_lo && src_hi == dst_hi) {
2753     return 0;            // Self copy, no move.
2754   }
2755 
2756   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
2757               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
2758   int src_offset = ra_->reg2offset(src_lo);
2759   int dst_offset = ra_->reg2offset(dst_lo);
2760 
2761   if (bottom_type()->isa_vect() != NULL) {
2762     uint ireg = ideal_reg();
2763     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
2764     if (cbuf) {
2765       MacroAssembler _masm(cbuf);
2766       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
2767       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
2768         // stack->stack
2769         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
2770         if (ireg == Op_VecD) {
2771           __ unspill(rscratch1, true, src_offset);
2772           __ spill(rscratch1, true, dst_offset);
2773         } else {
2774           __ spill_copy128(src_offset, dst_offset);
2775         }
2776       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
2777         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2778                ireg == Op_VecD ? __ T8B : __ T16B,
2779                as_FloatRegister(Matcher::_regEncode[src_lo]));
2780       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
2781         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
2782                        ireg == Op_VecD ? __ D : __ Q,
2783                        ra_->reg2offset(dst_lo));
2784       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
2785         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2786                        ireg == Op_VecD ? __ D : __ Q,
2787                        ra_->reg2offset(src_lo));
2788       } else {
2789         ShouldNotReachHere();
2790       }
2791     }
2792   } else if (cbuf) {
2793     MacroAssembler _masm(cbuf);
2794     switch (src_lo_rc) {
2795     case rc_int:
2796       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
2797         if (is64) {
2798             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
2799                    as_Register(Matcher::_regEncode[src_lo]));
2800         } else {
2801             MacroAssembler _masm(cbuf);
2802             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
2803                     as_Register(Matcher::_regEncode[src_lo]));
2804         }
2805       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
2806         if (is64) {
2807             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2808                      as_Register(Matcher::_regEncode[src_lo]));
2809         } else {
2810             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2811                      as_Register(Matcher::_regEncode[src_lo]));
2812         }
2813       } else {                    // gpr --> stack spill
2814         assert(dst_lo_rc == rc_stack, "spill to bad register class");
2815         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
2816       }
2817       break;
2818     case rc_float:
2819       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
2820         if (is64) {
2821             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
2822                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2823         } else {
2824             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
2825                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2826         }
2827       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
2828           if (cbuf) {
2829             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2830                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2831         } else {
2832             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2833                      as_FloatRegister(Matcher::_regEncode[src_lo]));
2834         }
2835       } else {                    // fpr --> stack spill
2836         assert(dst_lo_rc == rc_stack, "spill to bad register class");
2837         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
2838                  is64 ? __ D : __ S, dst_offset);
2839       }
2840       break;
2841     case rc_stack:
2842       if (dst_lo_rc == rc_int) {  // stack --> gpr load
2843         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
2844       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
2845         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
2846                    is64 ? __ D : __ S, src_offset);
2847       } else {                    // stack --> stack copy
2848         assert(dst_lo_rc == rc_stack, "spill to bad register class");
2849         __ unspill(rscratch1, is64, src_offset);
2850         __ spill(rscratch1, is64, dst_offset);
2851       }
2852       break;
2853     default:
2854       assert(false, "bad rc_class for spill");
2855       ShouldNotReachHere();
2856     }
2857   }
2858 
2859   if (st) {
2860     st->print("spill ");
2861     if (src_lo_rc == rc_stack) {
2862       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
2863     } else {
2864       st->print("%s -> ", Matcher::regName[src_lo]);
2865     }
2866     if (dst_lo_rc == rc_stack) {
2867       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
2868     } else {
2869       st->print("%s", Matcher::regName[dst_lo]);
2870     }
2871     if (bottom_type()->isa_vect() != NULL) {
2872       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
2873     } else {
2874       st->print("\t# spill size = %d", is64 ? 64:32);
2875     }
2876   }
2877 
2878   return 0;
2879 
2880 }
2881 
2882 #ifndef PRODUCT
2883 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2884   if (!ra_)
2885     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
2886   else
2887     implementation(NULL, ra_, false, st);
2888 }
2889 #endif
2890 
2891 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2892   implementation(&cbuf, ra_, false, NULL);
2893 }
2894 
2895 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2896   return MachNode::size(ra_);
2897 }
2898 
2899 //=============================================================================
2900 
2901 #ifndef PRODUCT
2902 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2903   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2904   int reg = ra_->get_reg_first(this);
2905   st->print("add %s, rsp, #%d]\t# box lock",
2906             Matcher::regName[reg], offset);
2907 }
2908 #endif
2909 
2910 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2911   MacroAssembler _masm(&cbuf);
2912 
2913   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2914   int reg    = ra_->get_encode(this);
2915 
2916   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
2917     __ add(as_Register(reg), sp, offset);
2918   } else {
2919     ShouldNotReachHere();
2920   }
2921 }
2922 
2923 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
2924   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
2925   return 4;
2926 }
2927 
2928 //=============================================================================
2929 
2930 #ifndef PRODUCT
2931 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2932 {
2933   st->print_cr("# MachUEPNode");
2934   if (UseCompressedClassPointers) {
2935     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2936     if (Universe::narrow_klass_shift() != 0) {
2937       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
2938     }
2939   } else {
2940    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2941   }
2942   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
2943   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
2944 }
2945 #endif
2946 
2947 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2948 {
2949   // This is the unverified entry point.
2950   MacroAssembler _masm(&cbuf);
2951 
2952   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2953   Label skip;
2954   // TODO
2955   // can we avoid this skip and still use a reloc?
2956   __ br(Assembler::EQ, skip);
2957   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2958   __ bind(skip);
2959 }
2960 
2961 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2962 {
2963   return MachNode::size(ra_);
2964 }
2965 
2966 // REQUIRED EMIT CODE
2967 
2968 //=============================================================================
2969 
2970 // Emit exception handler code.
2971 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2972 {
2973   // mov rscratch1 #exception_blob_entry_point
2974   // br rscratch1
2975   // Note that the code buffer's insts_mark is always relative to insts.
2976   // That's why we must use the macroassembler to generate a handler.
2977   MacroAssembler _masm(&cbuf);
2978   address base = __ start_a_stub(size_exception_handler());
2979   if (base == NULL) {
2980     ciEnv::current()->record_failure("CodeCache is full");
2981     return 0;  // CodeBuffer::expand failed
2982   }
2983   int offset = __ offset();
2984   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2985   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2986   __ end_a_stub();
2987   return offset;
2988 }
2989 
2990 // Emit deopt handler code.
2991 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2992 {
2993   // Note that the code buffer's insts_mark is always relative to insts.
2994   // That's why we must use the macroassembler to generate a handler.
2995   MacroAssembler _masm(&cbuf);
2996   address base = __ start_a_stub(size_deopt_handler());
2997   if (base == NULL) {
2998     ciEnv::current()->record_failure("CodeCache is full");
2999     return 0;  // CodeBuffer::expand failed
3000   }
3001   int offset = __ offset();
3002 
3003   __ adr(lr, __ pc());
3004   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3005 
3006   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3007   __ end_a_stub();
3008   return offset;
3009 }
3010 
3011 // REQUIRED MATCHER CODE
3012 
3013 //=============================================================================
3014 
3015 const bool Matcher::match_rule_supported(int opcode) {
3016 
3017   // TODO
3018   // identify extra cases that we might want to provide match rules for
3019   // e.g. Op_StrEquals and other intrinsics
3020   if (!has_match_rule(opcode)) {
3021     return false;
3022   }
3023 
3024   return true;  // Per default match rules are supported.
3025 }
3026 
3027 int Matcher::regnum_to_fpu_offset(int regnum)
3028 {
3029   Unimplemented();
3030   return 0;
3031 }
3032 
3033 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
3034 {
3035   Unimplemented();
3036   return false;
3037 }
3038 
3039 const bool Matcher::isSimpleConstant64(jlong value) {
3040   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3041   // Probably always true, even if a temp register is required.
3042   return true;
3043 }
3044 
3045 // true just means we have fast l2f conversion
3046 const bool Matcher::convL2FSupported(void) {
3047   return true;
3048 }
3049 
3050 // Vector width in bytes.
3051 const int Matcher::vector_width_in_bytes(BasicType bt) {
3052   int size = MIN2(16,(int)MaxVectorSize);
3053   // Minimum 2 values in vector
3054   if (size < 2*type2aelembytes(bt)) size = 0;
3055   // But never < 4
3056   if (size < 4) size = 0;
3057   return size;
3058 }
3059 
3060 // Limits on vector size (number of elements) loaded into vector.
3061 const int Matcher::max_vector_size(const BasicType bt) {
3062   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3063 }
3064 const int Matcher::min_vector_size(const BasicType bt) {
3065 //  For the moment limit the vector size to 8 bytes
3066     int size = 8 / type2aelembytes(bt);
3067     if (size < 2) size = 2;
3068     return size;
3069 }
3070 
3071 // Vector ideal reg.
3072 const int Matcher::vector_ideal_reg(int len) {
3073   switch(len) {
3074     case  8: return Op_VecD;
3075     case 16: return Op_VecX;
3076   }
3077   ShouldNotReachHere();
3078   return 0;
3079 }
3080 
3081 const int Matcher::vector_shift_count_ideal_reg(int size) {
3082   return Op_VecX;
3083 }
3084 
3085 // AES support not yet implemented
3086 const bool Matcher::pass_original_key_for_aes() {
3087   return false;
3088 }
3089 
3090 // x86 supports misaligned vectors store/load.
3091 const bool Matcher::misaligned_vectors_ok() {
3092   return !AlignVector; // can be changed by flag
3093 }
3094 
3095 // false => size gets scaled to BytesPerLong, ok.
3096 const bool Matcher::init_array_count_is_in_bytes = false;
3097 
3098 // Threshold size for cleararray.
3099 const int Matcher::init_array_short_size = 18 * BytesPerLong;
3100 
3101 // Use conditional move (CMOVL)
3102 const int Matcher::long_cmove_cost() {
3103   // long cmoves are no more expensive than int cmoves
3104   return 0;
3105 }
3106 
3107 const int Matcher::float_cmove_cost() {
3108   // float cmoves are no more expensive than int cmoves
3109   return 0;
3110 }
3111 
3112 // Does the CPU require late expand (see block.cpp for description of late expand)?
3113 const bool Matcher::require_postalloc_expand = false;
3114 
3115 // Should the Matcher clone shifts on addressing modes, expecting them
3116 // to be subsumed into complex addressing expressions or compute them
3117 // into registers?  True for Intel but false for most RISCs
3118 const bool Matcher::clone_shift_expressions = false;
3119 
3120 // Do we need to mask the count passed to shift instructions or does
3121 // the cpu only look at the lower 5/6 bits anyway?
3122 const bool Matcher::need_masked_shift_count = false;
3123 
3124 // This affects two different things:
3125 //  - how Decode nodes are matched
3126 //  - how ImplicitNullCheck opportunities are recognized
3127 // If true, the matcher will try to remove all Decodes and match them
3128 // (as operands) into nodes. NullChecks are not prepared to deal with
3129 // Decodes by final_graph_reshaping().
3130 // If false, final_graph_reshaping() forces the decode behind the Cmp
3131 // for a NullCheck. The matcher matches the Decode node into a register.
3132 // Implicit_null_check optimization moves the Decode along with the
3133 // memory operation back up before the NullCheck.
3134 bool Matcher::narrow_oop_use_complex_address() {
3135   return Universe::narrow_oop_shift() == 0;
3136 }
3137 
3138 bool Matcher::narrow_klass_use_complex_address() {
3139 // TODO
3140 // decide whether we need to set this to true
3141   return false;
3142 }
3143 
3144 // Is it better to copy float constants, or load them directly from
3145 // memory?  Intel can load a float constant from a direct address,
3146 // requiring no extra registers.  Most RISCs will have to materialize
3147 // an address into a register first, so they would do better to copy
3148 // the constant from stack.
3149 const bool Matcher::rematerialize_float_constants = false;
3150 
3151 // If CPU can load and store mis-aligned doubles directly then no
3152 // fixup is needed.  Else we split the double into 2 integer pieces
3153 // and move it piece-by-piece.  Only happens when passing doubles into
3154 // C code as the Java calling convention forces doubles to be aligned.
3155 const bool Matcher::misaligned_doubles_ok = true;
3156 
3157 // No-op on amd64
3158 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3159   Unimplemented();
3160 }
3161 
3162 // Advertise here if the CPU requires explicit rounding operations to
3163 // implement the UseStrictFP mode.
3164 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3165 
3166 // Are floats converted to double when stored to stack during
3167 // deoptimization?
3168 bool Matcher::float_in_double() { return true; }
3169 
3170 // Do ints take an entire long register or just half?
3171 // The relevant question is how the int is callee-saved:
3172 // the whole long is written but de-opt'ing will have to extract
3173 // the relevant 32 bits.
3174 const bool Matcher::int_in_long = true;
3175 
3176 // Return whether or not this register is ever used as an argument.
3177 // This function is used on startup to build the trampoline stubs in
3178 // generateOptoStub.  Registers not mentioned will be killed by the VM
3179 // call in the trampoline, and arguments in those registers not be
3180 // available to the callee.
3181 bool Matcher::can_be_java_arg(int reg)
3182 {
3183   return
3184     reg ==  R0_num || reg == R0_H_num ||
3185     reg ==  R1_num || reg == R1_H_num ||
3186     reg ==  R2_num || reg == R2_H_num ||
3187     reg ==  R3_num || reg == R3_H_num ||
3188     reg ==  R4_num || reg == R4_H_num ||
3189     reg ==  R5_num || reg == R5_H_num ||
3190     reg ==  R6_num || reg == R6_H_num ||
3191     reg ==  R7_num || reg == R7_H_num ||
3192     reg ==  V0_num || reg == V0_H_num ||
3193     reg ==  V1_num || reg == V1_H_num ||
3194     reg ==  V2_num || reg == V2_H_num ||
3195     reg ==  V3_num || reg == V3_H_num ||
3196     reg ==  V4_num || reg == V4_H_num ||
3197     reg ==  V5_num || reg == V5_H_num ||
3198     reg ==  V6_num || reg == V6_H_num ||
3199     reg ==  V7_num || reg == V7_H_num;
3200 }
3201 
3202 bool Matcher::is_spillable_arg(int reg)
3203 {
3204   return can_be_java_arg(reg);
3205 }
3206 
3207 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3208   return false;
3209 }
3210 
3211 RegMask Matcher::divI_proj_mask() {
3212   ShouldNotReachHere();
3213   return RegMask();
3214 }
3215 
3216 // Register for MODI projection of divmodI.
3217 RegMask Matcher::modI_proj_mask() {
3218   ShouldNotReachHere();
3219   return RegMask();
3220 }
3221 
3222 // Register for DIVL projection of divmodL.
3223 RegMask Matcher::divL_proj_mask() {
3224   ShouldNotReachHere();
3225   return RegMask();
3226 }
3227 
3228 // Register for MODL projection of divmodL.
3229 RegMask Matcher::modL_proj_mask() {
3230   ShouldNotReachHere();
3231   return RegMask();
3232 }
3233 
3234 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3235   return FP_REG_mask();
3236 }
3237 
3238 // helper for encoding java_to_runtime calls on sim
3239 //
3240 // this is needed to compute the extra arguments required when
3241 // planting a call to the simulator blrt instruction. the TypeFunc
3242 // can be queried to identify the counts for integral, and floating
3243 // arguments and the return type
3244 
3245 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3246 {
3247   int gps = 0;
3248   int fps = 0;
3249   const TypeTuple *domain = tf->domain();
3250   int max = domain->cnt();
3251   for (int i = TypeFunc::Parms; i < max; i++) {
3252     const Type *t = domain->field_at(i);
3253     switch(t->basic_type()) {
3254     case T_FLOAT:
3255     case T_DOUBLE:
3256       fps++;
3257     default:
3258       gps++;
3259     }
3260   }
3261   gpcnt = gps;
3262   fpcnt = fps;
3263   BasicType rt = tf->return_type();
3264   switch (rt) {
3265   case T_VOID:
3266     rtype = MacroAssembler::ret_type_void;
3267     break;
3268   default:
3269     rtype = MacroAssembler::ret_type_integral;
3270     break;
3271   case T_FLOAT:
3272     rtype = MacroAssembler::ret_type_float;
3273     break;
3274   case T_DOUBLE:
3275     rtype = MacroAssembler::ret_type_double;
3276     break;
3277   }
3278 }
3279 
3280 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3281   MacroAssembler _masm(&cbuf);                                          \
3282   {                                                                     \
3283     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3284     guarantee(DISP == 0, "mode not permitted for volatile");            \
3285     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3286     __ INSN(REG, as_Register(BASE));                                    \
3287   }
3288 
3289 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3290 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3291 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3292                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3293 
3294   // Used for all non-volatile memory accesses.  The use of
3295   // $mem->opcode() to discover whether this pattern uses sign-extended
3296   // offsets is something of a kludge.
3297   static void loadStore(MacroAssembler masm, mem_insn insn,
3298                          Register reg, int opcode,
3299                          Register base, int index, int size, int disp)
3300   {
3301     Address::extend scale;
3302 
3303     // Hooboy, this is fugly.  We need a way to communicate to the
3304     // encoder that the index needs to be sign extended, so we have to
3305     // enumerate all the cases.
3306     switch (opcode) {
3307     case INDINDEXSCALEDOFFSETI2L:
3308     case INDINDEXSCALEDI2L:
3309     case INDINDEXSCALEDOFFSETI2LN:
3310     case INDINDEXSCALEDI2LN:
3311     case INDINDEXOFFSETI2L:
3312     case INDINDEXOFFSETI2LN:
3313       scale = Address::sxtw(size);
3314       break;
3315     default:
3316       scale = Address::lsl(size);
3317     }
3318 
3319     if (index == -1) {
3320       (masm.*insn)(reg, Address(base, disp));
3321     } else {
3322       if (disp == 0) {
3323         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3324       } else {
3325         masm.lea(rscratch1, Address(base, disp));
3326         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3327       }
3328     }
3329   }
3330 
3331   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3332                          FloatRegister reg, int opcode,
3333                          Register base, int index, int size, int disp)
3334   {
3335     Address::extend scale;
3336 
3337     switch (opcode) {
3338     case INDINDEXSCALEDOFFSETI2L:
3339     case INDINDEXSCALEDI2L:
3340     case INDINDEXSCALEDOFFSETI2LN:
3341     case INDINDEXSCALEDI2LN:
3342       scale = Address::sxtw(size);
3343       break;
3344     default:
3345       scale = Address::lsl(size);
3346     }
3347 
3348      if (index == -1) {
3349       (masm.*insn)(reg, Address(base, disp));
3350     } else {
3351       if (disp == 0) {
3352         (masm.*insn)(reg, Address(base, as_Register(index), scale));
3353       } else {
3354         masm.lea(rscratch1, Address(base, disp));
3355         (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
3356       }
3357     }
3358   }
3359 
3360   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3361                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3362                          int opcode, Register base, int index, int size, int disp)
3363   {
3364     if (index == -1) {
3365       (masm.*insn)(reg, T, Address(base, disp));
3366     } else {
3367       assert(disp == 0, "unsupported address mode");
3368       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3369     }
3370   }
3371 
3372 %}
3373 
3374 
3375 
3376 //----------ENCODING BLOCK-----------------------------------------------------
3377 // This block specifies the encoding classes used by the compiler to
3378 // output byte streams.  Encoding classes are parameterized macros
3379 // used by Machine Instruction Nodes in order to generate the bit
3380 // encoding of the instruction.  Operands specify their base encoding
3381 // interface with the interface keyword.  There are currently
3382 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3383 // COND_INTER.  REG_INTER causes an operand to generate a function
3384 // which returns its register number when queried.  CONST_INTER causes
3385 // an operand to generate a function which returns the value of the
3386 // constant when queried.  MEMORY_INTER causes an operand to generate
3387 // four functions which return the Base Register, the Index Register,
3388 // the Scale Value, and the Offset Value of the operand when queried.
3389 // COND_INTER causes an operand to generate six functions which return
3390 // the encoding code (ie - encoding bits for the instruction)
3391 // associated with each basic boolean condition for a conditional
3392 // instruction.
3393 //
3394 // Instructions specify two basic values for encoding.  Again, a
3395 // function is available to check if the constant displacement is an
3396 // oop. They use the ins_encode keyword to specify their encoding
3397 // classes (which must be a sequence of enc_class names, and their
3398 // parameters, specified in the encoding block), and they use the
3399 // opcode keyword to specify, in order, their primary, secondary, and
3400 // tertiary opcode.  Only the opcode sections which a particular
3401 // instruction needs for encoding need to be specified.
3402 encode %{
3403   // Build emit functions for each basic byte or larger field in the
3404   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3405   // from C++ code in the enc_class source block.  Emit functions will
3406   // live in the main source block for now.  In future, we can
3407   // generalize this by adding a syntax that specifies the sizes of
3408   // fields in an order, so that the adlc can build the emit functions
3409   // automagically
3410 
3411   // catch all for unimplemented encodings
3412   enc_class enc_unimplemented %{
3413     MacroAssembler _masm(&cbuf);
3414     __ unimplemented("C2 catch all");
3415   %}
3416 
3417   // BEGIN Non-volatile memory access
3418 
3419   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3420     Register dst_reg = as_Register($dst$$reg);
3421     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3422                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3423   %}
3424 
3425   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3426     Register dst_reg = as_Register($dst$$reg);
3427     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3428                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3429   %}
3430 
3431   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3432     Register dst_reg = as_Register($dst$$reg);
3433     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3434                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3435   %}
3436 
3437   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3438     Register dst_reg = as_Register($dst$$reg);
3439     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3440                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3441   %}
3442 
3443   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3444     Register dst_reg = as_Register($dst$$reg);
3445     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3446                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3447   %}
3448 
3449   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3450     Register dst_reg = as_Register($dst$$reg);
3451     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3452                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3453   %}
3454 
3455   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3456     Register dst_reg = as_Register($dst$$reg);
3457     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3458                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3459   %}
3460 
3461   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3462     Register dst_reg = as_Register($dst$$reg);
3463     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3464                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3465   %}
3466 
3467   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3468     Register dst_reg = as_Register($dst$$reg);
3469     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3470                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3471   %}
3472 
3473   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3474     Register dst_reg = as_Register($dst$$reg);
3475     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3476                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3477   %}
3478 
3479   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3480     Register dst_reg = as_Register($dst$$reg);
3481     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3482                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3483   %}
3484 
3485   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3486     Register dst_reg = as_Register($dst$$reg);
3487     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3488                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3489   %}
3490 
3491   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3492     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3493     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3494                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3495   %}
3496 
3497   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3498     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3499     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3500                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3501   %}
3502 
3503   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3504     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3505     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3506        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3507   %}
3508 
3509   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3510     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3511     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3512        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3513   %}
3514 
3515   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3516     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3517     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3518        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3519   %}
3520 
3521   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3522     Register src_reg = as_Register($src$$reg);
3523     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3524                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3525   %}
3526 
3527   enc_class aarch64_enc_strb0(memory mem) %{
3528     MacroAssembler _masm(&cbuf);
3529     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3530                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3531   %}
3532 
3533   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3534     MacroAssembler _masm(&cbuf);
3535     __ membar(Assembler::StoreStore);
3536     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3537                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3538   %}
3539 
3540   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3541     Register src_reg = as_Register($src$$reg);
3542     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3543                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3544   %}
3545 
3546   enc_class aarch64_enc_strh0(memory mem) %{
3547     MacroAssembler _masm(&cbuf);
3548     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
3549                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3550   %}
3551 
3552   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
3553     Register src_reg = as_Register($src$$reg);
3554     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
3555                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3556   %}
3557 
3558   enc_class aarch64_enc_strw0(memory mem) %{
3559     MacroAssembler _masm(&cbuf);
3560     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
3561                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3562   %}
3563 
3564   enc_class aarch64_enc_str(iRegL src, memory mem) %{
3565     Register src_reg = as_Register($src$$reg);
3566     // we sometimes get asked to store the stack pointer into the
3567     // current thread -- we cannot do that directly on AArch64
3568     if (src_reg == r31_sp) {
3569       MacroAssembler _masm(&cbuf);
3570       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3571       __ mov(rscratch2, sp);
3572       src_reg = rscratch2;
3573     }
3574     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
3575                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3576   %}
3577 
3578   enc_class aarch64_enc_str0(memory mem) %{
3579     MacroAssembler _masm(&cbuf);
3580     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
3581                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3582   %}
3583 
3584   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
3585     FloatRegister src_reg = as_FloatRegister($src$$reg);
3586     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
3587                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3588   %}
3589 
3590   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
3591     FloatRegister src_reg = as_FloatRegister($src$$reg);
3592     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
3593                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3594   %}
3595 
3596   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
3597     FloatRegister src_reg = as_FloatRegister($src$$reg);
3598     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
3599        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3600   %}
3601 
3602   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
3603     FloatRegister src_reg = as_FloatRegister($src$$reg);
3604     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
3605        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3606   %}
3607 
3608   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
3609     FloatRegister src_reg = as_FloatRegister($src$$reg);
3610     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
3611        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3612   %}
3613 
3614   // END Non-volatile memory access
3615 
3616   // volatile loads and stores
3617 
3618   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
3619     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3620                  rscratch1, stlrb);
3621   %}
3622 
3623   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
3624     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3625                  rscratch1, stlrh);
3626   %}
3627 
3628   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
3629     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3630                  rscratch1, stlrw);
3631   %}
3632 
3633 
3634   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
3635     Register dst_reg = as_Register($dst$$reg);
3636     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3637              rscratch1, ldarb);
3638     __ sxtbw(dst_reg, dst_reg);
3639   %}
3640 
3641   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
3642     Register dst_reg = as_Register($dst$$reg);
3643     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3644              rscratch1, ldarb);
3645     __ sxtb(dst_reg, dst_reg);
3646   %}
3647 
3648   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
3649     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3650              rscratch1, ldarb);
3651   %}
3652 
3653   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
3654     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3655              rscratch1, ldarb);
3656   %}
3657 
3658   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
3659     Register dst_reg = as_Register($dst$$reg);
3660     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3661              rscratch1, ldarh);
3662     __ sxthw(dst_reg, dst_reg);
3663   %}
3664 
3665   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
3666     Register dst_reg = as_Register($dst$$reg);
3667     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3668              rscratch1, ldarh);
3669     __ sxth(dst_reg, dst_reg);
3670   %}
3671 
3672   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
3673     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3674              rscratch1, ldarh);
3675   %}
3676 
3677   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
3678     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3679              rscratch1, ldarh);
3680   %}
3681 
3682   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
3683     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3684              rscratch1, ldarw);
3685   %}
3686 
3687   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
3688     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3689              rscratch1, ldarw);
3690   %}
3691 
3692   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
3693     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3694              rscratch1, ldar);
3695   %}
3696 
3697   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
3698     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3699              rscratch1, ldarw);
3700     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
3701   %}
3702 
3703   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
3704     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3705              rscratch1, ldar);
3706     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
3707   %}
3708 
3709   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
3710     Register src_reg = as_Register($src$$reg);
3711     // we sometimes get asked to store the stack pointer into the
3712     // current thread -- we cannot do that directly on AArch64
3713     if (src_reg == r31_sp) {
3714         MacroAssembler _masm(&cbuf);
3715       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
3716       __ mov(rscratch2, sp);
3717       src_reg = rscratch2;
3718     }
3719     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3720                  rscratch1, stlr);
3721   %}
3722 
3723   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
3724     {
3725       MacroAssembler _masm(&cbuf);
3726       FloatRegister src_reg = as_FloatRegister($src$$reg);
3727       __ fmovs(rscratch2, src_reg);
3728     }
3729     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3730                  rscratch1, stlrw);
3731   %}
3732 
3733   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
3734     {
3735       MacroAssembler _masm(&cbuf);
3736       FloatRegister src_reg = as_FloatRegister($src$$reg);
3737       __ fmovd(rscratch2, src_reg);
3738     }
3739     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
3740                  rscratch1, stlr);
3741   %}
3742 
3743   // synchronized read/update encodings
3744 
3745   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
3746     MacroAssembler _masm(&cbuf);
3747     Register dst_reg = as_Register($dst$$reg);
3748     Register base = as_Register($mem$$base);
3749     int index = $mem$$index;
3750     int scale = $mem$$scale;
3751     int disp = $mem$$disp;
3752     if (index == -1) {
3753        if (disp != 0) {
3754         __ lea(rscratch1, Address(base, disp));
3755         __ ldaxr(dst_reg, rscratch1);
3756       } else {
3757         // TODO
3758         // should we ever get anything other than this case?
3759         __ ldaxr(dst_reg, base);
3760       }
3761     } else {
3762       Register index_reg = as_Register(index);
3763       if (disp == 0) {
3764         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
3765         __ ldaxr(dst_reg, rscratch1);
3766       } else {
3767         __ lea(rscratch1, Address(base, disp));
3768         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
3769         __ ldaxr(dst_reg, rscratch1);
3770       }
3771     }
3772   %}
3773 
3774   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
3775     MacroAssembler _masm(&cbuf);
3776     Register src_reg = as_Register($src$$reg);
3777     Register base = as_Register($mem$$base);
3778     int index = $mem$$index;
3779     int scale = $mem$$scale;
3780     int disp = $mem$$disp;
3781     if (index == -1) {
3782        if (disp != 0) {
3783         __ lea(rscratch2, Address(base, disp));
3784         __ stlxr(rscratch1, src_reg, rscratch2);
3785       } else {
3786         // TODO
3787         // should we ever get anything other than this case?
3788         __ stlxr(rscratch1, src_reg, base);
3789       }
3790     } else {
3791       Register index_reg = as_Register(index);
3792       if (disp == 0) {
3793         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3794         __ stlxr(rscratch1, src_reg, rscratch2);
3795       } else {
3796         __ lea(rscratch2, Address(base, disp));
3797         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3798         __ stlxr(rscratch1, src_reg, rscratch2);
3799       }
3800     }
3801     __ cmpw(rscratch1, zr);
3802   %}
3803 
3804   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
3805     MacroAssembler _masm(&cbuf);
3806     Register old_reg = as_Register($oldval$$reg);
3807     Register new_reg = as_Register($newval$$reg);
3808     Register base = as_Register($mem$$base);
3809     Register addr_reg;
3810     int index = $mem$$index;
3811     int scale = $mem$$scale;
3812     int disp = $mem$$disp;
3813     if (index == -1) {
3814        if (disp != 0) {
3815         __ lea(rscratch2, Address(base, disp));
3816         addr_reg = rscratch2;
3817       } else {
3818         // TODO
3819         // should we ever get anything other than this case?
3820         addr_reg = base;
3821       }
3822     } else {
3823       Register index_reg = as_Register(index);
3824       if (disp == 0) {
3825         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3826         addr_reg = rscratch2;
3827       } else {
3828         __ lea(rscratch2, Address(base, disp));
3829         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3830         addr_reg = rscratch2;
3831       }
3832     }
3833     Label retry_load, done;
3834     __ bind(retry_load);
3835     __ ldxr(rscratch1, addr_reg);
3836     __ cmp(rscratch1, old_reg);
3837     __ br(Assembler::NE, done);
3838     __ stlxr(rscratch1, new_reg, addr_reg);
3839     __ cbnzw(rscratch1, retry_load);
3840     __ bind(done);
3841   %}
3842 
3843   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
3844     MacroAssembler _masm(&cbuf);
3845     Register old_reg = as_Register($oldval$$reg);
3846     Register new_reg = as_Register($newval$$reg);
3847     Register base = as_Register($mem$$base);
3848     Register addr_reg;
3849     int index = $mem$$index;
3850     int scale = $mem$$scale;
3851     int disp = $mem$$disp;
3852     if (index == -1) {
3853        if (disp != 0) {
3854         __ lea(rscratch2, Address(base, disp));
3855         addr_reg = rscratch2;
3856       } else {
3857         // TODO
3858         // should we ever get anything other than this case?
3859         addr_reg = base;
3860       }
3861     } else {
3862       Register index_reg = as_Register(index);
3863       if (disp == 0) {
3864         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
3865         addr_reg = rscratch2;
3866       } else {
3867         __ lea(rscratch2, Address(base, disp));
3868         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
3869         addr_reg = rscratch2;
3870       }
3871     }
3872     Label retry_load, done;
3873     __ bind(retry_load);
3874     __ ldxrw(rscratch1, addr_reg);
3875     __ cmpw(rscratch1, old_reg);
3876     __ br(Assembler::NE, done);
3877     __ stlxrw(rscratch1, new_reg, addr_reg);
3878     __ cbnzw(rscratch1, retry_load);
3879     __ bind(done);
3880   %}
3881 
3882   // auxiliary used for CompareAndSwapX to set result register
3883   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
3884     MacroAssembler _masm(&cbuf);
3885     Register res_reg = as_Register($res$$reg);
3886     __ cset(res_reg, Assembler::EQ);
3887   %}
3888 
3889   // prefetch encodings
3890 
3891   enc_class aarch64_enc_prefetchw(memory mem) %{
3892     MacroAssembler _masm(&cbuf);
3893     Register base = as_Register($mem$$base);
3894     int index = $mem$$index;
3895     int scale = $mem$$scale;
3896     int disp = $mem$$disp;
3897     if (index == -1) {
3898       __ prfm(Address(base, disp), PSTL1KEEP);
3899       __ nop();
3900     } else {
3901       Register index_reg = as_Register(index);
3902       if (disp == 0) {
3903         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
3904       } else {
3905         __ lea(rscratch1, Address(base, disp));
3906         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3907       }
3908     }
3909   %}
3910 
3911   enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
3912     MacroAssembler _masm(&cbuf);
3913     Register cnt_reg = as_Register($cnt$$reg);
3914     Register base_reg = as_Register($base$$reg);
3915     // base is word aligned
3916     // cnt is count of words
3917 
3918     Label loop;
3919     Label entry;
3920 
3921 //  Algorithm:
3922 //
3923 //    scratch1 = cnt & 7;
3924 //    cnt -= scratch1;
3925 //    p += scratch1;
3926 //    switch (scratch1) {
3927 //      do {
3928 //        cnt -= 8;
3929 //          p[-8] = 0;
3930 //        case 7:
3931 //          p[-7] = 0;
3932 //        case 6:
3933 //          p[-6] = 0;
3934 //          // ...
3935 //        case 1:
3936 //          p[-1] = 0;
3937 //        case 0:
3938 //          p += 8;
3939 //      } while (cnt);
3940 //    }
3941 
3942     const int unroll = 8; // Number of str(zr) instructions we'll unroll
3943 
3944     __ andr(rscratch1, cnt_reg, unroll - 1);  // tmp1 = cnt % unroll
3945     __ sub(cnt_reg, cnt_reg, rscratch1);      // cnt -= unroll
3946     // base_reg always points to the end of the region we're about to zero
3947     __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
3948     __ adr(rscratch2, entry);
3949     __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
3950     __ br(rscratch2);
3951     __ bind(loop);
3952     __ sub(cnt_reg, cnt_reg, unroll);
3953     for (int i = -unroll; i < 0; i++)
3954       __ str(zr, Address(base_reg, i * wordSize));
3955     __ bind(entry);
3956     __ add(base_reg, base_reg, unroll * wordSize);
3957     __ cbnz(cnt_reg, loop);
3958   %}
3959 
3960   /// mov envcodings
3961 
3962   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3963     MacroAssembler _masm(&cbuf);
3964     u_int32_t con = (u_int32_t)$src$$constant;
3965     Register dst_reg = as_Register($dst$$reg);
3966     if (con == 0) {
3967       __ movw(dst_reg, zr);
3968     } else {
3969       __ movw(dst_reg, con);
3970     }
3971   %}
3972 
3973   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3974     MacroAssembler _masm(&cbuf);
3975     Register dst_reg = as_Register($dst$$reg);
3976     u_int64_t con = (u_int64_t)$src$$constant;
3977     if (con == 0) {
3978       __ mov(dst_reg, zr);
3979     } else {
3980       __ mov(dst_reg, con);
3981     }
3982   %}
3983 
3984   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3985     MacroAssembler _masm(&cbuf);
3986     Register dst_reg = as_Register($dst$$reg);
3987     address con = (address)$src$$constant;
3988     if (con == NULL || con == (address)1) {
3989       ShouldNotReachHere();
3990     } else {
3991       relocInfo::relocType rtype = $src->constant_reloc();
3992       if (rtype == relocInfo::oop_type) {
3993         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3994       } else if (rtype == relocInfo::metadata_type) {
3995         __ mov_metadata(dst_reg, (Metadata*)con);
3996       } else {
3997         assert(rtype == relocInfo::none, "unexpected reloc type");
3998         if (con < (address)(uintptr_t)os::vm_page_size()) {
3999           __ mov(dst_reg, con);
4000         } else {
4001           unsigned long offset;
4002           __ adrp(dst_reg, con, offset);
4003           __ add(dst_reg, dst_reg, offset);
4004         }
4005       }
4006     }
4007   %}
4008 
4009   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4010     MacroAssembler _masm(&cbuf);
4011     Register dst_reg = as_Register($dst$$reg);
4012     __ mov(dst_reg, zr);
4013   %}
4014 
4015   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4016     MacroAssembler _masm(&cbuf);
4017     Register dst_reg = as_Register($dst$$reg);
4018     __ mov(dst_reg, (u_int64_t)1);
4019   %}
4020 
4021   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4022     MacroAssembler _masm(&cbuf);
4023     address page = (address)$src$$constant;
4024     Register dst_reg = as_Register($dst$$reg);
4025     unsigned long off;
4026     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4027     assert(off == 0, "assumed offset == 0");
4028   %}
4029 
4030   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4031     MacroAssembler _masm(&cbuf);
4032     address page = (address)$src$$constant;
4033     Register dst_reg = as_Register($dst$$reg);
4034     unsigned long off;
4035     __ adrp(dst_reg, ExternalAddress(page), off);
4036     assert(off == 0, "assumed offset == 0");
4037   %}
4038 
4039   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4040     MacroAssembler _masm(&cbuf);
4041     Register dst_reg = as_Register($dst$$reg);
4042     address con = (address)$src$$constant;
4043     if (con == NULL) {
4044       ShouldNotReachHere();
4045     } else {
4046       relocInfo::relocType rtype = $src->constant_reloc();
4047       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4048       __ set_narrow_oop(dst_reg, (jobject)con);
4049     }
4050   %}
4051 
4052   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4053     MacroAssembler _masm(&cbuf);
4054     Register dst_reg = as_Register($dst$$reg);
4055     __ mov(dst_reg, zr);
4056   %}
4057 
4058   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4059     MacroAssembler _masm(&cbuf);
4060     Register dst_reg = as_Register($dst$$reg);
4061     address con = (address)$src$$constant;
4062     if (con == NULL) {
4063       ShouldNotReachHere();
4064     } else {
4065       relocInfo::relocType rtype = $src->constant_reloc();
4066       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4067       __ set_narrow_klass(dst_reg, (Klass *)con);
4068     }
4069   %}
4070 
4071   // arithmetic encodings
4072 
4073   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4074     MacroAssembler _masm(&cbuf);
4075     Register dst_reg = as_Register($dst$$reg);
4076     Register src_reg = as_Register($src1$$reg);
4077     int32_t con = (int32_t)$src2$$constant;
4078     // add has primary == 0, subtract has primary == 1
4079     if ($primary) { con = -con; }
4080     if (con < 0) {
4081       __ subw(dst_reg, src_reg, -con);
4082     } else {
4083       __ addw(dst_reg, src_reg, con);
4084     }
4085   %}
4086 
4087   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4088     MacroAssembler _masm(&cbuf);
4089     Register dst_reg = as_Register($dst$$reg);
4090     Register src_reg = as_Register($src1$$reg);
4091     int32_t con = (int32_t)$src2$$constant;
4092     // add has primary == 0, subtract has primary == 1
4093     if ($primary) { con = -con; }
4094     if (con < 0) {
4095       __ sub(dst_reg, src_reg, -con);
4096     } else {
4097       __ add(dst_reg, src_reg, con);
4098     }
4099   %}
4100 
4101   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4102     MacroAssembler _masm(&cbuf);
4103    Register dst_reg = as_Register($dst$$reg);
4104    Register src1_reg = as_Register($src1$$reg);
4105    Register src2_reg = as_Register($src2$$reg);
4106     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4107   %}
4108 
4109   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4110     MacroAssembler _masm(&cbuf);
4111    Register dst_reg = as_Register($dst$$reg);
4112    Register src1_reg = as_Register($src1$$reg);
4113    Register src2_reg = as_Register($src2$$reg);
4114     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4115   %}
4116 
4117   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4118     MacroAssembler _masm(&cbuf);
4119    Register dst_reg = as_Register($dst$$reg);
4120    Register src1_reg = as_Register($src1$$reg);
4121    Register src2_reg = as_Register($src2$$reg);
4122     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4123   %}
4124 
4125   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4126     MacroAssembler _masm(&cbuf);
4127    Register dst_reg = as_Register($dst$$reg);
4128    Register src1_reg = as_Register($src1$$reg);
4129    Register src2_reg = as_Register($src2$$reg);
4130     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4131   %}
4132 
4133   // compare instruction encodings
4134 
4135   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4136     MacroAssembler _masm(&cbuf);
4137     Register reg1 = as_Register($src1$$reg);
4138     Register reg2 = as_Register($src2$$reg);
4139     __ cmpw(reg1, reg2);
4140   %}
4141 
4142   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4143     MacroAssembler _masm(&cbuf);
4144     Register reg = as_Register($src1$$reg);
4145     int32_t val = $src2$$constant;
4146     if (val >= 0) {
4147       __ subsw(zr, reg, val);
4148     } else {
4149       __ addsw(zr, reg, -val);
4150     }
4151   %}
4152 
4153   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4154     MacroAssembler _masm(&cbuf);
4155     Register reg1 = as_Register($src1$$reg);
4156     u_int32_t val = (u_int32_t)$src2$$constant;
4157     __ movw(rscratch1, val);
4158     __ cmpw(reg1, rscratch1);
4159   %}
4160 
4161   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4162     MacroAssembler _masm(&cbuf);
4163     Register reg1 = as_Register($src1$$reg);
4164     Register reg2 = as_Register($src2$$reg);
4165     __ cmp(reg1, reg2);
4166   %}
4167 
4168   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4169     MacroAssembler _masm(&cbuf);
4170     Register reg = as_Register($src1$$reg);
4171     int64_t val = $src2$$constant;
4172     if (val >= 0) {
4173       __ subs(zr, reg, val);
4174     } else if (val != -val) {
4175       __ adds(zr, reg, -val);
4176     } else {
4177     // aargh, Long.MIN_VALUE is a special case
4178       __ orr(rscratch1, zr, (u_int64_t)val);
4179       __ subs(zr, reg, rscratch1);
4180     }
4181   %}
4182 
4183   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4184     MacroAssembler _masm(&cbuf);
4185     Register reg1 = as_Register($src1$$reg);
4186     u_int64_t val = (u_int64_t)$src2$$constant;
4187     __ mov(rscratch1, val);
4188     __ cmp(reg1, rscratch1);
4189   %}
4190 
4191   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4192     MacroAssembler _masm(&cbuf);
4193     Register reg1 = as_Register($src1$$reg);
4194     Register reg2 = as_Register($src2$$reg);
4195     __ cmp(reg1, reg2);
4196   %}
4197 
4198   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4199     MacroAssembler _masm(&cbuf);
4200     Register reg1 = as_Register($src1$$reg);
4201     Register reg2 = as_Register($src2$$reg);
4202     __ cmpw(reg1, reg2);
4203   %}
4204 
4205   enc_class aarch64_enc_testp(iRegP src) %{
4206     MacroAssembler _masm(&cbuf);
4207     Register reg = as_Register($src$$reg);
4208     __ cmp(reg, zr);
4209   %}
4210 
4211   enc_class aarch64_enc_testn(iRegN src) %{
4212     MacroAssembler _masm(&cbuf);
4213     Register reg = as_Register($src$$reg);
4214     __ cmpw(reg, zr);
4215   %}
4216 
4217   enc_class aarch64_enc_b(label lbl) %{
4218     MacroAssembler _masm(&cbuf);
4219     Label *L = $lbl$$label;
4220     __ b(*L);
4221   %}
4222 
4223   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4224     MacroAssembler _masm(&cbuf);
4225     Label *L = $lbl$$label;
4226     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4227   %}
4228 
4229   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4230     MacroAssembler _masm(&cbuf);
4231     Label *L = $lbl$$label;
4232     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4233   %}
4234 
4235   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4236   %{
4237      Register sub_reg = as_Register($sub$$reg);
4238      Register super_reg = as_Register($super$$reg);
4239      Register temp_reg = as_Register($temp$$reg);
4240      Register result_reg = as_Register($result$$reg);
4241 
4242      Label miss;
4243      MacroAssembler _masm(&cbuf);
4244      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4245                                      NULL, &miss,
4246                                      /*set_cond_codes:*/ true);
4247      if ($primary) {
4248        __ mov(result_reg, zr);
4249      }
4250      __ bind(miss);
4251   %}
4252 
4253   enc_class aarch64_enc_java_static_call(method meth) %{
4254     MacroAssembler _masm(&cbuf);
4255 
4256     address addr = (address)$meth$$method;
4257     address call;
4258     if (!_method) {
4259       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4260       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4261     } else if (_optimized_virtual) {
4262       call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
4263     } else {
4264       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
4265     }
4266     if (call == NULL) {
4267       ciEnv::current()->record_failure("CodeCache is full"); 
4268       return;
4269     }
4270 
4271     if (_method) {
4272       // Emit stub for static call
4273       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4274       if (stub == NULL) {
4275         ciEnv::current()->record_failure("CodeCache is full"); 
4276         return;
4277       }
4278     }
4279   %}
4280 
4281   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4282     MacroAssembler _masm(&cbuf);
4283     address call = __ ic_call((address)$meth$$method);
4284     if (call == NULL) {
4285       ciEnv::current()->record_failure("CodeCache is full"); 
4286       return;
4287     }
4288   %}
4289 
4290   enc_class aarch64_enc_call_epilog() %{
4291     MacroAssembler _masm(&cbuf);
4292     if (VerifyStackAtCalls) {
4293       // Check that stack depth is unchanged: find majik cookie on stack
4294       __ call_Unimplemented();
4295     }
4296   %}
4297 
4298   enc_class aarch64_enc_java_to_runtime(method meth) %{
4299     MacroAssembler _masm(&cbuf);
4300 
4301     // some calls to generated routines (arraycopy code) are scheduled
4302     // by C2 as runtime calls. if so we can call them using a br (they
4303     // will be in a reachable segment) otherwise we have to use a blrt
4304     // which loads the absolute address into a register.
4305     address entry = (address)$meth$$method;
4306     CodeBlob *cb = CodeCache::find_blob(entry);
4307     if (cb) {
4308       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4309       if (call == NULL) {
4310         ciEnv::current()->record_failure("CodeCache is full"); 
4311         return;
4312       }
4313     } else {
4314       int gpcnt;
4315       int fpcnt;
4316       int rtype;
4317       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4318       Label retaddr;
4319       __ adr(rscratch2, retaddr);
4320       __ lea(rscratch1, RuntimeAddress(entry));
4321       // Leave a breadcrumb for JavaThread::pd_last_frame().
4322       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4323       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4324       __ bind(retaddr);
4325       __ add(sp, sp, 2 * wordSize);
4326     }
4327   %}
4328 
4329   enc_class aarch64_enc_rethrow() %{
4330     MacroAssembler _masm(&cbuf);
4331     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4332   %}
4333 
4334   enc_class aarch64_enc_ret() %{
4335     MacroAssembler _masm(&cbuf);
4336     __ ret(lr);
4337   %}
4338 
4339   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4340     MacroAssembler _masm(&cbuf);
4341     Register target_reg = as_Register($jump_target$$reg);
4342     __ br(target_reg);
4343   %}
4344 
4345   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4346     MacroAssembler _masm(&cbuf);
4347     Register target_reg = as_Register($jump_target$$reg);
4348     // exception oop should be in r0
4349     // ret addr has been popped into lr
4350     // callee expects it in r3
4351     __ mov(r3, lr);
4352     __ br(target_reg);
4353   %}
4354 
4355   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4356     MacroAssembler _masm(&cbuf);
4357     Register oop = as_Register($object$$reg);
4358     Register box = as_Register($box$$reg);
4359     Register disp_hdr = as_Register($tmp$$reg);
4360     Register tmp = as_Register($tmp2$$reg);
4361     Label cont;
4362     Label object_has_monitor;
4363     Label cas_failed;
4364 
4365     assert_different_registers(oop, box, tmp, disp_hdr);
4366 
4367     // Load markOop from object into displaced_header.
4368     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4369 
4370     // Always do locking in runtime.
4371     if (EmitSync & 0x01) {
4372       __ cmp(oop, zr);
4373       return;
4374     }
4375 
4376     if (UseBiasedLocking) {
4377       __ biased_locking_enter(disp_hdr, oop, box, tmp, true, cont);
4378     }
4379 
4380     // Handle existing monitor
4381     if (EmitSync & 0x02) {
4382       // we can use AArch64's bit test and branch here but
4383       // markoopDesc does not define a bit index just the bit value
4384       // so assert in case the bit pos changes
4385 #     define __monitor_value_log2 1
4386       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4387       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4388 #     undef __monitor_value_log2
4389     }
4390 
4391     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4392     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4393 
4394     // Load Compare Value application register.
4395 
4396     // Initialize the box. (Must happen before we update the object mark!)
4397     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4398 
4399     // Compare object markOop with mark and if equal exchange scratch1
4400     // with object markOop.
4401     // Note that this is simply a CAS: it does not generate any
4402     // barriers.  These are separately generated by
4403     // membar_acquire_lock().
4404     {
4405       Label retry_load;
4406       __ bind(retry_load);
4407       __ ldxr(tmp, oop);
4408       __ cmp(tmp, disp_hdr);
4409       __ br(Assembler::NE, cas_failed);
4410       // use stlxr to ensure update is immediately visible
4411       __ stlxr(tmp, box, oop);
4412       __ cbzw(tmp, cont);
4413       __ b(retry_load);
4414     }
4415 
4416     // Formerly:
4417     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4418     //               /*newv=*/box,
4419     //               /*addr=*/oop,
4420     //               /*tmp=*/tmp,
4421     //               cont,
4422     //               /*fail*/NULL);
4423 
4424     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4425 
4426     // If the compare-and-exchange succeeded, then we found an unlocked
4427     // object, will have now locked it will continue at label cont
4428 
4429     __ bind(cas_failed);
4430     // We did not see an unlocked object so try the fast recursive case.
4431 
4432     // Check if the owner is self by comparing the value in the
4433     // markOop of object (disp_hdr) with the stack pointer.
4434     __ mov(rscratch1, sp);
4435     __ sub(disp_hdr, disp_hdr, rscratch1);
4436     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4437     // If condition is true we are cont and hence we can store 0 as the
4438     // displaced header in the box, which indicates that it is a recursive lock.
4439     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4440     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4441 
4442     // Handle existing monitor.
4443     if ((EmitSync & 0x02) == 0) {
4444       __ b(cont);
4445 
4446       __ bind(object_has_monitor);
4447       // The object's monitor m is unlocked iff m->owner == NULL,
4448       // otherwise m->owner may contain a thread or a stack address.
4449       //
4450       // Try to CAS m->owner from NULL to current thread.
4451       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4452       __ mov(disp_hdr, zr);
4453 
4454       {
4455         Label retry_load, fail;
4456         __ bind(retry_load);
4457         __ ldxr(rscratch1, tmp);
4458         __ cmp(disp_hdr, rscratch1);
4459         __ br(Assembler::NE, fail);
4460         // use stlxr to ensure update is immediately visible
4461         __ stlxr(rscratch1, rthread, tmp);
4462         __ cbnzw(rscratch1, retry_load);
4463         __ bind(fail);
4464       }
4465 
4466       // Label next;
4467       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4468       //               /*newv=*/rthread,
4469       //               /*addr=*/tmp,
4470       //               /*tmp=*/rscratch1,
4471       //               /*succeed*/next,
4472       //               /*fail*/NULL);
4473       // __ bind(next);
4474 
4475       // store a non-null value into the box.
4476       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4477 
4478       // PPC port checks the following invariants
4479       // #ifdef ASSERT
4480       // bne(flag, cont);
4481       // We have acquired the monitor, check some invariants.
4482       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4483       // Invariant 1: _recursions should be 0.
4484       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4485       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4486       //                        "monitor->_recursions should be 0", -1);
4487       // Invariant 2: OwnerIsThread shouldn't be 0.
4488       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4489       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4490       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4491       // #endif
4492     }
4493 
4494     __ bind(cont);
4495     // flag == EQ indicates success
4496     // flag == NE indicates failure
4497 
4498   %}
4499 
4500   // TODO
4501   // reimplement this with custom cmpxchgptr code
4502   // which avoids some of the unnecessary branching
4503   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4504     MacroAssembler _masm(&cbuf);
4505     Register oop = as_Register($object$$reg);
4506     Register box = as_Register($box$$reg);
4507     Register disp_hdr = as_Register($tmp$$reg);
4508     Register tmp = as_Register($tmp2$$reg);
4509     Label cont;
4510     Label object_has_monitor;
4511     Label cas_failed;
4512 
4513     assert_different_registers(oop, box, tmp, disp_hdr);
4514 
4515     // Always do locking in runtime.
4516     if (EmitSync & 0x01) {
4517       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4518       return;
4519     }
4520 
4521     if (UseBiasedLocking) {
4522       __ biased_locking_exit(oop, tmp, cont);
4523     }
4524 
4525     // Find the lock address and load the displaced header from the stack.
4526     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4527 
4528     // If the displaced header is 0, we have a recursive unlock.
4529     __ cmp(disp_hdr, zr);
4530     __ br(Assembler::EQ, cont);
4531 
4532 
4533     // Handle existing monitor.
4534     if ((EmitSync & 0x02) == 0) {
4535       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4536       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4537     }
4538 
4539     // Check if it is still a light weight lock, this is is true if we
4540     // see the stack address of the basicLock in the markOop of the
4541     // object.
4542 
4543       {
4544         Label retry_load;
4545         __ bind(retry_load);
4546         __ ldxr(tmp, oop);
4547         __ cmp(box, tmp);
4548         __ br(Assembler::NE, cas_failed);
4549         // use stlxr to ensure update is immediately visible
4550         __ stlxr(tmp, disp_hdr, oop);
4551         __ cbzw(tmp, cont);
4552         __ b(retry_load);
4553       }
4554 
4555     // __ cmpxchgptr(/*compare_value=*/box,
4556     //               /*exchange_value=*/disp_hdr,
4557     //               /*where=*/oop,
4558     //               /*result=*/tmp,
4559     //               cont,
4560     //               /*cas_failed*/NULL);
4561     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4562 
4563     __ bind(cas_failed);
4564 
4565     // Handle existing monitor.
4566     if ((EmitSync & 0x02) == 0) {
4567       __ b(cont);
4568 
4569       __ bind(object_has_monitor);
4570       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4571       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4572       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4573       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4574       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4575       __ cmp(rscratch1, zr);
4576       __ br(Assembler::NE, cont);
4577 
4578       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4579       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4580       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4581       __ cmp(rscratch1, zr);
4582       __ cbnz(rscratch1, cont);
4583       // need a release store here
4584       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4585       __ stlr(rscratch1, tmp); // rscratch1 is zero
4586     }
4587 
4588     __ bind(cont);
4589     // flag == EQ indicates success
4590     // flag == NE indicates failure
4591   %}
4592 
4593 %}
4594 
4595 //----------FRAME--------------------------------------------------------------
4596 // Definition of frame structure and management information.
4597 //
4598 //  S T A C K   L A Y O U T    Allocators stack-slot number
4599 //                             |   (to get allocators register number
4600 //  G  Owned by    |        |  v    add OptoReg::stack0())
4601 //  r   CALLER     |        |
4602 //  o     |        +--------+      pad to even-align allocators stack-slot
4603 //  w     V        |  pad0  |        numbers; owned by CALLER
4604 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4605 //  h     ^        |   in   |  5
4606 //        |        |  args  |  4   Holes in incoming args owned by SELF
4607 //  |     |        |        |  3
4608 //  |     |        +--------+
4609 //  V     |        | old out|      Empty on Intel, window on Sparc
4610 //        |    old |preserve|      Must be even aligned.
4611 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4612 //        |        |   in   |  3   area for Intel ret address
4613 //     Owned by    |preserve|      Empty on Sparc.
4614 //       SELF      +--------+
4615 //        |        |  pad2  |  2   pad to align old SP
4616 //        |        +--------+  1
4617 //        |        | locks  |  0
4618 //        |        +--------+----> OptoReg::stack0(), even aligned
4619 //        |        |  pad1  | 11   pad to align new SP
4620 //        |        +--------+
4621 //        |        |        | 10
4622 //        |        | spills |  9   spills
4623 //        V        |        |  8   (pad0 slot for callee)
4624 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4625 //        ^        |  out   |  7
4626 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4627 //     Owned by    +--------+
4628 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4629 //        |    new |preserve|      Must be even-aligned.
4630 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4631 //        |        |        |
4632 //
4633 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4634 //         known from SELF's arguments and the Java calling convention.
4635 //         Region 6-7 is determined per call site.
4636 // Note 2: If the calling convention leaves holes in the incoming argument
4637 //         area, those holes are owned by SELF.  Holes in the outgoing area
4638 //         are owned by the CALLEE.  Holes should not be nessecary in the
4639 //         incoming area, as the Java calling convention is completely under
4640 //         the control of the AD file.  Doubles can be sorted and packed to
4641 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4642 //         varargs C calling conventions.
4643 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4644 //         even aligned with pad0 as needed.
4645 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4646 //           (the latter is true on Intel but is it false on AArch64?)
4647 //         region 6-11 is even aligned; it may be padded out more so that
4648 //         the region from SP to FP meets the minimum stack alignment.
4649 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4650 //         alignment.  Region 11, pad1, may be dynamically extended so that
4651 //         SP meets the minimum alignment.
4652 
4653 frame %{
4654   // What direction does stack grow in (assumed to be same for C & Java)
4655   stack_direction(TOWARDS_LOW);
4656 
4657   // These three registers define part of the calling convention
4658   // between compiled code and the interpreter.
4659 
4660   // Inline Cache Register or methodOop for I2C.
4661   inline_cache_reg(R12);
4662 
4663   // Method Oop Register when calling interpreter.
4664   interpreter_method_oop_reg(R12);
4665 
4666   // Number of stack slots consumed by locking an object
4667   sync_stack_slots(2);
4668 
4669   // Compiled code's Frame Pointer
4670   frame_pointer(R31);
4671 
4672   // Interpreter stores its frame pointer in a register which is
4673   // stored to the stack by I2CAdaptors.
4674   // I2CAdaptors convert from interpreted java to compiled java.
4675   interpreter_frame_pointer(R29);
4676 
4677   // Stack alignment requirement
4678   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4679 
4680   // Number of stack slots between incoming argument block and the start of
4681   // a new frame.  The PROLOG must add this many slots to the stack.  The
4682   // EPILOG must remove this many slots. aarch64 needs two slots for
4683   // return address and fp.
4684   // TODO think this is correct but check
4685   in_preserve_stack_slots(4);
4686 
4687   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4688   // for calls to C.  Supports the var-args backing area for register parms.
4689   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4690 
4691   // The after-PROLOG location of the return address.  Location of
4692   // return address specifies a type (REG or STACK) and a number
4693   // representing the register number (i.e. - use a register name) or
4694   // stack slot.
4695   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4696   // Otherwise, it is above the locks and verification slot and alignment word
4697   // TODO this may well be correct but need to check why that - 2 is there
4698   // ppc port uses 0 but we definitely need to allow for fixed_slots
4699   // which folds in the space used for monitors
4700   return_addr(STACK - 2 +
4701               round_to((Compile::current()->in_preserve_stack_slots() +
4702                         Compile::current()->fixed_slots()),
4703                        stack_alignment_in_slots()));
4704 
4705   // Body of function which returns an integer array locating
4706   // arguments either in registers or in stack slots.  Passed an array
4707   // of ideal registers called "sig" and a "length" count.  Stack-slot
4708   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4709   // arguments for a CALLEE.  Incoming stack arguments are
4710   // automatically biased by the preserve_stack_slots field above.
4711 
4712   calling_convention
4713   %{
4714     // No difference between ingoing/outgoing just pass false
4715     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4716   %}
4717 
4718   c_calling_convention
4719   %{
4720     // This is obviously always outgoing
4721     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
4722   %}
4723 
4724   // Location of compiled Java return values.  Same as C for now.
4725   return_value
4726   %{
4727     // TODO do we allow ideal_reg == Op_RegN???
4728     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4729            "only return normal values");
4730 
4731     static const int lo[Op_RegL + 1] = { // enum name
4732       0,                                 // Op_Node
4733       0,                                 // Op_Set
4734       R0_num,                            // Op_RegN
4735       R0_num,                            // Op_RegI
4736       R0_num,                            // Op_RegP
4737       V0_num,                            // Op_RegF
4738       V0_num,                            // Op_RegD
4739       R0_num                             // Op_RegL
4740     };
4741 
4742     static const int hi[Op_RegL + 1] = { // enum name
4743       0,                                 // Op_Node
4744       0,                                 // Op_Set
4745       OptoReg::Bad,                       // Op_RegN
4746       OptoReg::Bad,                      // Op_RegI
4747       R0_H_num,                          // Op_RegP
4748       OptoReg::Bad,                      // Op_RegF
4749       V0_H_num,                          // Op_RegD
4750       R0_H_num                           // Op_RegL
4751     };
4752 
4753     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4754   %}
4755 %}
4756 
4757 //----------ATTRIBUTES---------------------------------------------------------
4758 //----------Operand Attributes-------------------------------------------------
4759 op_attrib op_cost(1);        // Required cost attribute
4760 
4761 //----------Instruction Attributes---------------------------------------------
4762 ins_attrib ins_cost(INSN_COST); // Required cost attribute
4763 ins_attrib ins_size(32);        // Required size attribute (in bits)
4764 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4765                                 // a non-matching short branch variant
4766                                 // of some long branch?
4767 ins_attrib ins_alignment(4);    // Required alignment attribute (must
4768                                 // be a power of 2) specifies the
4769                                 // alignment that some part of the
4770                                 // instruction (not necessarily the
4771                                 // start) requires.  If > 1, a
4772                                 // compute_padding() function must be
4773                                 // provided for the instruction
4774 
4775 //----------OPERANDS-----------------------------------------------------------
4776 // Operand definitions must precede instruction definitions for correct parsing
4777 // in the ADLC because operands constitute user defined types which are used in
4778 // instruction definitions.
4779 
4780 //----------Simple Operands----------------------------------------------------
4781 
4782 // Integer operands 32 bit
4783 // 32 bit immediate
4784 operand immI()
4785 %{
4786   match(ConI);
4787 
4788   op_cost(0);
4789   format %{ %}
4790   interface(CONST_INTER);
4791 %}
4792 
4793 // 32 bit zero
4794 operand immI0()
4795 %{
4796   predicate(n->get_int() == 0);
4797   match(ConI);
4798 
4799   op_cost(0);
4800   format %{ %}
4801   interface(CONST_INTER);
4802 %}
4803 
4804 // 32 bit unit increment
4805 operand immI_1()
4806 %{
4807   predicate(n->get_int() == 1);
4808   match(ConI);
4809 
4810   op_cost(0);
4811   format %{ %}
4812   interface(CONST_INTER);
4813 %}
4814 
4815 // 32 bit unit decrement
4816 operand immI_M1()
4817 %{
4818   predicate(n->get_int() == -1);
4819   match(ConI);
4820 
4821   op_cost(0);
4822   format %{ %}
4823   interface(CONST_INTER);
4824 %}
4825 
4826 operand immI_le_4()
4827 %{
4828   predicate(n->get_int() <= 4);
4829   match(ConI);
4830 
4831   op_cost(0);
4832   format %{ %}
4833   interface(CONST_INTER);
4834 %}
4835 
4836 operand immI_31()
4837 %{
4838   predicate(n->get_int() == 31);
4839   match(ConI);
4840 
4841   op_cost(0);
4842   format %{ %}
4843   interface(CONST_INTER);
4844 %}
4845 
4846 operand immI_8()
4847 %{
4848   predicate(n->get_int() == 8);
4849   match(ConI);
4850 
4851   op_cost(0);
4852   format %{ %}
4853   interface(CONST_INTER);
4854 %}
4855 
4856 operand immI_16()
4857 %{
4858   predicate(n->get_int() == 16);
4859   match(ConI);
4860 
4861   op_cost(0);
4862   format %{ %}
4863   interface(CONST_INTER);
4864 %}
4865 
4866 operand immI_24()
4867 %{
4868   predicate(n->get_int() == 24);
4869   match(ConI);
4870 
4871   op_cost(0);
4872   format %{ %}
4873   interface(CONST_INTER);
4874 %}
4875 
4876 operand immI_32()
4877 %{
4878   predicate(n->get_int() == 32);
4879   match(ConI);
4880 
4881   op_cost(0);
4882   format %{ %}
4883   interface(CONST_INTER);
4884 %}
4885 
4886 operand immI_48()
4887 %{
4888   predicate(n->get_int() == 48);
4889   match(ConI);
4890 
4891   op_cost(0);
4892   format %{ %}
4893   interface(CONST_INTER);
4894 %}
4895 
4896 operand immI_56()
4897 %{
4898   predicate(n->get_int() == 56);
4899   match(ConI);
4900 
4901   op_cost(0);
4902   format %{ %}
4903   interface(CONST_INTER);
4904 %}
4905 
4906 operand immI_64()
4907 %{
4908   predicate(n->get_int() == 64);
4909   match(ConI);
4910 
4911   op_cost(0);
4912   format %{ %}
4913   interface(CONST_INTER);
4914 %}
4915 
4916 operand immI_255()
4917 %{
4918   predicate(n->get_int() == 255);
4919   match(ConI);
4920 
4921   op_cost(0);
4922   format %{ %}
4923   interface(CONST_INTER);
4924 %}
4925 
4926 operand immI_65535()
4927 %{
4928   predicate(n->get_int() == 65535);
4929   match(ConI);
4930 
4931   op_cost(0);
4932   format %{ %}
4933   interface(CONST_INTER);
4934 %}
4935 
4936 operand immL_63()
4937 %{
4938   predicate(n->get_int() == 63);
4939   match(ConI);
4940 
4941   op_cost(0);
4942   format %{ %}
4943   interface(CONST_INTER);
4944 %}
4945 
4946 operand immL_255()
4947 %{
4948   predicate(n->get_int() == 255);
4949   match(ConI);
4950 
4951   op_cost(0);
4952   format %{ %}
4953   interface(CONST_INTER);
4954 %}
4955 
4956 operand immL_65535()
4957 %{
4958   predicate(n->get_long() == 65535L);
4959   match(ConL);
4960 
4961   op_cost(0);
4962   format %{ %}
4963   interface(CONST_INTER);
4964 %}
4965 
4966 operand immL_4294967295()
4967 %{
4968   predicate(n->get_long() == 4294967295L);
4969   match(ConL);
4970 
4971   op_cost(0);
4972   format %{ %}
4973   interface(CONST_INTER);
4974 %}
4975 
4976 operand immL_bitmask()
4977 %{
4978   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4979             && is_power_of_2(n->get_long() + 1));
4980   match(ConL);
4981 
4982   op_cost(0);
4983   format %{ %}
4984   interface(CONST_INTER);
4985 %}
4986 
4987 operand immI_bitmask()
4988 %{
4989   predicate(((n->get_int() & 0xc0000000) == 0)
4990             && is_power_of_2(n->get_int() + 1));
4991   match(ConI);
4992 
4993   op_cost(0);
4994   format %{ %}
4995   interface(CONST_INTER);
4996 %}
4997 
4998 // Scale values for scaled offset addressing modes (up to long but not quad)
4999 operand immIScale()
5000 %{
5001   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5002   match(ConI);
5003 
5004   op_cost(0);
5005   format %{ %}
5006   interface(CONST_INTER);
5007 %}
5008 
5009 // 26 bit signed offset -- for pc-relative branches
5010 operand immI26()
5011 %{
5012   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5013   match(ConI);
5014 
5015   op_cost(0);
5016   format %{ %}
5017   interface(CONST_INTER);
5018 %}
5019 
5020 // 19 bit signed offset -- for pc-relative loads
5021 operand immI19()
5022 %{
5023   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5024   match(ConI);
5025 
5026   op_cost(0);
5027   format %{ %}
5028   interface(CONST_INTER);
5029 %}
5030 
5031 // 12 bit unsigned offset -- for base plus immediate loads
5032 operand immIU12()
5033 %{
5034   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5035   match(ConI);
5036 
5037   op_cost(0);
5038   format %{ %}
5039   interface(CONST_INTER);
5040 %}
5041 
5042 operand immLU12()
5043 %{
5044   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5045   match(ConL);
5046 
5047   op_cost(0);
5048   format %{ %}
5049   interface(CONST_INTER);
5050 %}
5051 
5052 // Offset for scaled or unscaled immediate loads and stores
5053 operand immIOffset()
5054 %{
5055   predicate(Address::offset_ok_for_immed(n->get_int()));
5056   match(ConI);
5057 
5058   op_cost(0);
5059   format %{ %}
5060   interface(CONST_INTER);
5061 %}
5062 
5063 operand immLoffset()
5064 %{
5065   predicate(Address::offset_ok_for_immed(n->get_long()));
5066   match(ConL);
5067 
5068   op_cost(0);
5069   format %{ %}
5070   interface(CONST_INTER);
5071 %}
5072 
5073 // 32 bit integer valid for add sub immediate
5074 operand immIAddSub()
5075 %{
5076   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5077   match(ConI);
5078   op_cost(0);
5079   format %{ %}
5080   interface(CONST_INTER);
5081 %}
5082 
5083 // 32 bit unsigned integer valid for logical immediate
5084 // TODO -- check this is right when e.g the mask is 0x80000000
5085 operand immILog()
5086 %{
5087   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5088   match(ConI);
5089 
5090   op_cost(0);
5091   format %{ %}
5092   interface(CONST_INTER);
5093 %}
5094 
5095 // Integer operands 64 bit
5096 // 64 bit immediate
5097 operand immL()
5098 %{
5099   match(ConL);
5100 
5101   op_cost(0);
5102   format %{ %}
5103   interface(CONST_INTER);
5104 %}
5105 
5106 // 64 bit zero
5107 operand immL0()
5108 %{
5109   predicate(n->get_long() == 0);
5110   match(ConL);
5111 
5112   op_cost(0);
5113   format %{ %}
5114   interface(CONST_INTER);
5115 %}
5116 
5117 // 64 bit unit increment
5118 operand immL_1()
5119 %{
5120   predicate(n->get_long() == 1);
5121   match(ConL);
5122 
5123   op_cost(0);
5124   format %{ %}
5125   interface(CONST_INTER);
5126 %}
5127 
5128 // 64 bit unit decrement
5129 operand immL_M1()
5130 %{
5131   predicate(n->get_long() == -1);
5132   match(ConL);
5133 
5134   op_cost(0);
5135   format %{ %}
5136   interface(CONST_INTER);
5137 %}
5138 
5139 // 32 bit offset of pc in thread anchor
5140 
5141 operand immL_pc_off()
5142 %{
5143   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5144                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5145   match(ConL);
5146 
5147   op_cost(0);
5148   format %{ %}
5149   interface(CONST_INTER);
5150 %}
5151 
5152 // 64 bit integer valid for add sub immediate
5153 operand immLAddSub()
5154 %{
5155   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5156   match(ConL);
5157   op_cost(0);
5158   format %{ %}
5159   interface(CONST_INTER);
5160 %}
5161 
5162 // 64 bit integer valid for logical immediate
5163 operand immLLog()
5164 %{
5165   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5166   match(ConL);
5167   op_cost(0);
5168   format %{ %}
5169   interface(CONST_INTER);
5170 %}
5171 
5172 // Long Immediate: low 32-bit mask
5173 operand immL_32bits()
5174 %{
5175   predicate(n->get_long() == 0xFFFFFFFFL);
5176   match(ConL);
5177   op_cost(0);
5178   format %{ %}
5179   interface(CONST_INTER);
5180 %}
5181 
5182 // Pointer operands
5183 // Pointer Immediate
5184 operand immP()
5185 %{
5186   match(ConP);
5187 
5188   op_cost(0);
5189   format %{ %}
5190   interface(CONST_INTER);
5191 %}
5192 
5193 // NULL Pointer Immediate
5194 operand immP0()
5195 %{
5196   predicate(n->get_ptr() == 0);
5197   match(ConP);
5198 
5199   op_cost(0);
5200   format %{ %}
5201   interface(CONST_INTER);
5202 %}
5203 
5204 // Pointer Immediate One
5205 // this is used in object initialization (initial object header)
5206 operand immP_1()
5207 %{
5208   predicate(n->get_ptr() == 1);
5209   match(ConP);
5210 
5211   op_cost(0);
5212   format %{ %}
5213   interface(CONST_INTER);
5214 %}
5215 
5216 // Polling Page Pointer Immediate
5217 operand immPollPage()
5218 %{
5219   predicate((address)n->get_ptr() == os::get_polling_page());
5220   match(ConP);
5221 
5222   op_cost(0);
5223   format %{ %}
5224   interface(CONST_INTER);
5225 %}
5226 
5227 // Card Table Byte Map Base
5228 operand immByteMapBase()
5229 %{
5230   // Get base of card map
5231   predicate((jbyte*)n->get_ptr() ==
5232         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5233   match(ConP);
5234 
5235   op_cost(0);
5236   format %{ %}
5237   interface(CONST_INTER);
5238 %}
5239 
5240 // Pointer Immediate Minus One
5241 // this is used when we want to write the current PC to the thread anchor
5242 operand immP_M1()
5243 %{
5244   predicate(n->get_ptr() == -1);
5245   match(ConP);
5246 
5247   op_cost(0);
5248   format %{ %}
5249   interface(CONST_INTER);
5250 %}
5251 
5252 // Pointer Immediate Minus Two
5253 // this is used when we want to write the current PC to the thread anchor
5254 operand immP_M2()
5255 %{
5256   predicate(n->get_ptr() == -2);
5257   match(ConP);
5258 
5259   op_cost(0);
5260   format %{ %}
5261   interface(CONST_INTER);
5262 %}
5263 
5264 // Float and Double operands
5265 // Double Immediate
5266 operand immD()
5267 %{
5268   match(ConD);
5269   op_cost(0);
5270   format %{ %}
5271   interface(CONST_INTER);
5272 %}
5273 
5274 // Double Immediate: +0.0d
5275 operand immD0()
5276 %{
5277   predicate(jlong_cast(n->getd()) == 0);
5278   match(ConD);
5279 
5280   op_cost(0);
5281   format %{ %}
5282   interface(CONST_INTER);
5283 %}
5284 
5285 // constant 'double +0.0'.
5286 operand immDPacked()
5287 %{
5288   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5289   match(ConD);
5290   op_cost(0);
5291   format %{ %}
5292   interface(CONST_INTER);
5293 %}
5294 
5295 // Float Immediate
5296 operand immF()
5297 %{
5298   match(ConF);
5299   op_cost(0);
5300   format %{ %}
5301   interface(CONST_INTER);
5302 %}
5303 
5304 // Float Immediate: +0.0f.
5305 operand immF0()
5306 %{
5307   predicate(jint_cast(n->getf()) == 0);
5308   match(ConF);
5309 
5310   op_cost(0);
5311   format %{ %}
5312   interface(CONST_INTER);
5313 %}
5314 
5315 //
5316 operand immFPacked()
5317 %{
5318   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5319   match(ConF);
5320   op_cost(0);
5321   format %{ %}
5322   interface(CONST_INTER);
5323 %}
5324 
5325 // Narrow pointer operands
5326 // Narrow Pointer Immediate
5327 operand immN()
5328 %{
5329   match(ConN);
5330 
5331   op_cost(0);
5332   format %{ %}
5333   interface(CONST_INTER);
5334 %}
5335 
5336 // Narrow NULL Pointer Immediate
5337 operand immN0()
5338 %{
5339   predicate(n->get_narrowcon() == 0);
5340   match(ConN);
5341 
5342   op_cost(0);
5343   format %{ %}
5344   interface(CONST_INTER);
5345 %}
5346 
5347 operand immNKlass()
5348 %{
5349   match(ConNKlass);
5350 
5351   op_cost(0);
5352   format %{ %}
5353   interface(CONST_INTER);
5354 %}
5355 
5356 // Integer 32 bit Register Operands
5357 // Integer 32 bitRegister (excludes SP)
5358 operand iRegI()
5359 %{
5360   constraint(ALLOC_IN_RC(any_reg32));
5361   match(RegI);
5362   match(iRegINoSp);
5363   op_cost(0);
5364   format %{ %}
5365   interface(REG_INTER);
5366 %}
5367 
5368 // Integer 32 bit Register not Special
5369 operand iRegINoSp()
5370 %{
5371   constraint(ALLOC_IN_RC(no_special_reg32));
5372   match(RegI);
5373   op_cost(0);
5374   format %{ %}
5375   interface(REG_INTER);
5376 %}
5377 
5378 // Integer 64 bit Register Operands
5379 // Integer 64 bit Register (includes SP)
5380 operand iRegL()
5381 %{
5382   constraint(ALLOC_IN_RC(any_reg));
5383   match(RegL);
5384   match(iRegLNoSp);
5385   op_cost(0);
5386   format %{ %}
5387   interface(REG_INTER);
5388 %}
5389 
5390 // Integer 64 bit Register not Special
5391 operand iRegLNoSp()
5392 %{
5393   constraint(ALLOC_IN_RC(no_special_reg));
5394   match(RegL);
5395   format %{ %}
5396   interface(REG_INTER);
5397 %}
5398 
5399 // Pointer Register Operands
5400 // Pointer Register
5401 operand iRegP()
5402 %{
5403   constraint(ALLOC_IN_RC(ptr_reg));
5404   match(RegP);
5405   match(iRegPNoSp);
5406   match(iRegP_R0);
5407   //match(iRegP_R2);
5408   //match(iRegP_R4);
5409   //match(iRegP_R5);
5410   match(thread_RegP);
5411   op_cost(0);
5412   format %{ %}
5413   interface(REG_INTER);
5414 %}
5415 
5416 // Pointer 64 bit Register not Special
5417 operand iRegPNoSp()
5418 %{
5419   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5420   match(RegP);
5421   // match(iRegP);
5422   // match(iRegP_R0);
5423   // match(iRegP_R2);
5424   // match(iRegP_R4);
5425   // match(iRegP_R5);
5426   // match(thread_RegP);
5427   op_cost(0);
5428   format %{ %}
5429   interface(REG_INTER);
5430 %}
5431 
5432 // Pointer 64 bit Register R0 only
5433 operand iRegP_R0()
5434 %{
5435   constraint(ALLOC_IN_RC(r0_reg));
5436   match(RegP);
5437   // match(iRegP);
5438   match(iRegPNoSp);
5439   op_cost(0);
5440   format %{ %}
5441   interface(REG_INTER);
5442 %}
5443 
5444 // Pointer 64 bit Register R1 only
5445 operand iRegP_R1()
5446 %{
5447   constraint(ALLOC_IN_RC(r1_reg));
5448   match(RegP);
5449   // match(iRegP);
5450   match(iRegPNoSp);
5451   op_cost(0);
5452   format %{ %}
5453   interface(REG_INTER);
5454 %}
5455 
5456 // Pointer 64 bit Register R2 only
5457 operand iRegP_R2()
5458 %{
5459   constraint(ALLOC_IN_RC(r2_reg));
5460   match(RegP);
5461   // match(iRegP);
5462   match(iRegPNoSp);
5463   op_cost(0);
5464   format %{ %}
5465   interface(REG_INTER);
5466 %}
5467 
5468 // Pointer 64 bit Register R3 only
5469 operand iRegP_R3()
5470 %{
5471   constraint(ALLOC_IN_RC(r3_reg));
5472   match(RegP);
5473   // match(iRegP);
5474   match(iRegPNoSp);
5475   op_cost(0);
5476   format %{ %}
5477   interface(REG_INTER);
5478 %}
5479 
5480 // Pointer 64 bit Register R4 only
5481 operand iRegP_R4()
5482 %{
5483   constraint(ALLOC_IN_RC(r4_reg));
5484   match(RegP);
5485   // match(iRegP);
5486   match(iRegPNoSp);
5487   op_cost(0);
5488   format %{ %}
5489   interface(REG_INTER);
5490 %}
5491 
5492 // Pointer 64 bit Register R5 only
5493 operand iRegP_R5()
5494 %{
5495   constraint(ALLOC_IN_RC(r5_reg));
5496   match(RegP);
5497   // match(iRegP);
5498   match(iRegPNoSp);
5499   op_cost(0);
5500   format %{ %}
5501   interface(REG_INTER);
5502 %}
5503 
5504 // Pointer 64 bit Register R10 only
5505 operand iRegP_R10()
5506 %{
5507   constraint(ALLOC_IN_RC(r10_reg));
5508   match(RegP);
5509   // match(iRegP);
5510   match(iRegPNoSp);
5511   op_cost(0);
5512   format %{ %}
5513   interface(REG_INTER);
5514 %}
5515 
5516 // Long 64 bit Register R11 only
5517 operand iRegL_R11()
5518 %{
5519   constraint(ALLOC_IN_RC(r11_reg));
5520   match(RegL);
5521   match(iRegLNoSp);
5522   op_cost(0);
5523   format %{ %}
5524   interface(REG_INTER);
5525 %}
5526 
5527 // Pointer 64 bit Register FP only
5528 operand iRegP_FP()
5529 %{
5530   constraint(ALLOC_IN_RC(fp_reg));
5531   match(RegP);
5532   // match(iRegP);
5533   op_cost(0);
5534   format %{ %}
5535   interface(REG_INTER);
5536 %}
5537 
5538 // Register R0 only
5539 operand iRegI_R0()
5540 %{
5541   constraint(ALLOC_IN_RC(int_r0_reg));
5542   match(RegI);
5543   match(iRegINoSp);
5544   op_cost(0);
5545   format %{ %}
5546   interface(REG_INTER);
5547 %}
5548 
5549 // Register R2 only
5550 operand iRegI_R2()
5551 %{
5552   constraint(ALLOC_IN_RC(int_r2_reg));
5553   match(RegI);
5554   match(iRegINoSp);
5555   op_cost(0);
5556   format %{ %}
5557   interface(REG_INTER);
5558 %}
5559 
5560 // Register R3 only
5561 operand iRegI_R3()
5562 %{
5563   constraint(ALLOC_IN_RC(int_r3_reg));
5564   match(RegI);
5565   match(iRegINoSp);
5566   op_cost(0);
5567   format %{ %}
5568   interface(REG_INTER);
5569 %}
5570 
5571 
5572 // Register R2 only
5573 operand iRegI_R4()
5574 %{
5575   constraint(ALLOC_IN_RC(int_r4_reg));
5576   match(RegI);
5577   match(iRegINoSp);
5578   op_cost(0);
5579   format %{ %}
5580   interface(REG_INTER);
5581 %}
5582 
5583 
5584 // Pointer Register Operands
5585 // Narrow Pointer Register
5586 operand iRegN()
5587 %{
5588   constraint(ALLOC_IN_RC(any_reg32));
5589   match(RegN);
5590   match(iRegNNoSp);
5591   op_cost(0);
5592   format %{ %}
5593   interface(REG_INTER);
5594 %}
5595 
5596 // Integer 64 bit Register not Special
5597 operand iRegNNoSp()
5598 %{
5599   constraint(ALLOC_IN_RC(no_special_reg32));
5600   match(RegN);
5601   op_cost(0);
5602   format %{ %}
5603   interface(REG_INTER);
5604 %}
5605 
5606 // heap base register -- used for encoding immN0
5607 
5608 operand iRegIHeapbase()
5609 %{
5610   constraint(ALLOC_IN_RC(heapbase_reg));
5611   match(RegI);
5612   op_cost(0);
5613   format %{ %}
5614   interface(REG_INTER);
5615 %}
5616 
5617 // Float Register
5618 // Float register operands
5619 operand vRegF()
5620 %{
5621   constraint(ALLOC_IN_RC(float_reg));
5622   match(RegF);
5623 
5624   op_cost(0);
5625   format %{ %}
5626   interface(REG_INTER);
5627 %}
5628 
5629 // Double Register
5630 // Double register operands
5631 operand vRegD()
5632 %{
5633   constraint(ALLOC_IN_RC(double_reg));
5634   match(RegD);
5635 
5636   op_cost(0);
5637   format %{ %}
5638   interface(REG_INTER);
5639 %}
5640 
5641 operand vecD()
5642 %{
5643   constraint(ALLOC_IN_RC(vectord_reg));
5644   match(VecD);
5645 
5646   op_cost(0);
5647   format %{ %}
5648   interface(REG_INTER);
5649 %}
5650 
5651 operand vecX()
5652 %{
5653   constraint(ALLOC_IN_RC(vectorx_reg));
5654   match(VecX);
5655 
5656   op_cost(0);
5657   format %{ %}
5658   interface(REG_INTER);
5659 %}
5660 
5661 operand vRegD_V0()
5662 %{
5663   constraint(ALLOC_IN_RC(v0_reg));
5664   match(RegD);
5665   op_cost(0);
5666   format %{ %}
5667   interface(REG_INTER);
5668 %}
5669 
5670 operand vRegD_V1()
5671 %{
5672   constraint(ALLOC_IN_RC(v1_reg));
5673   match(RegD);
5674   op_cost(0);
5675   format %{ %}
5676   interface(REG_INTER);
5677 %}
5678 
5679 operand vRegD_V2()
5680 %{
5681   constraint(ALLOC_IN_RC(v2_reg));
5682   match(RegD);
5683   op_cost(0);
5684   format %{ %}
5685   interface(REG_INTER);
5686 %}
5687 
5688 operand vRegD_V3()
5689 %{
5690   constraint(ALLOC_IN_RC(v3_reg));
5691   match(RegD);
5692   op_cost(0);
5693   format %{ %}
5694   interface(REG_INTER);
5695 %}
5696 
5697 // Flags register, used as output of signed compare instructions
5698 
5699 // note that on AArch64 we also use this register as the output for
5700 // for floating point compare instructions (CmpF CmpD). this ensures
5701 // that ordered inequality tests use GT, GE, LT or LE none of which
5702 // pass through cases where the result is unordered i.e. one or both
5703 // inputs to the compare is a NaN. this means that the ideal code can
5704 // replace e.g. a GT with an LE and not end up capturing the NaN case
5705 // (where the comparison should always fail). EQ and NE tests are
5706 // always generated in ideal code so that unordered folds into the NE
5707 // case, matching the behaviour of AArch64 NE.
5708 //
5709 // This differs from x86 where the outputs of FP compares use a
5710 // special FP flags registers and where compares based on this
5711 // register are distinguished into ordered inequalities (cmpOpUCF) and
5712 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
5713 // to explicitly handle the unordered case in branches. x86 also has
5714 // to include extra CMoveX rules to accept a cmpOpUCF input.
5715 
5716 operand rFlagsReg()
5717 %{
5718   constraint(ALLOC_IN_RC(int_flags));
5719   match(RegFlags);
5720 
5721   op_cost(0);
5722   format %{ "RFLAGS" %}
5723   interface(REG_INTER);
5724 %}
5725 
5726 // Flags register, used as output of unsigned compare instructions
5727 operand rFlagsRegU()
5728 %{
5729   constraint(ALLOC_IN_RC(int_flags));
5730   match(RegFlags);
5731 
5732   op_cost(0);
5733   format %{ "RFLAGSU" %}
5734   interface(REG_INTER);
5735 %}
5736 
5737 // Special Registers
5738 
5739 // Method Register
5740 operand inline_cache_RegP(iRegP reg)
5741 %{
5742   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
5743   match(reg);
5744   match(iRegPNoSp);
5745   op_cost(0);
5746   format %{ %}
5747   interface(REG_INTER);
5748 %}
5749 
5750 operand interpreter_method_oop_RegP(iRegP reg)
5751 %{
5752   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
5753   match(reg);
5754   match(iRegPNoSp);
5755   op_cost(0);
5756   format %{ %}
5757   interface(REG_INTER);
5758 %}
5759 
5760 // Thread Register
5761 operand thread_RegP(iRegP reg)
5762 %{
5763   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
5764   match(reg);
5765   op_cost(0);
5766   format %{ %}
5767   interface(REG_INTER);
5768 %}
5769 
5770 operand lr_RegP(iRegP reg)
5771 %{
5772   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
5773   match(reg);
5774   op_cost(0);
5775   format %{ %}
5776   interface(REG_INTER);
5777 %}
5778 
5779 //----------Memory Operands----------------------------------------------------
5780 
5781 operand indirect(iRegP reg)
5782 %{
5783   constraint(ALLOC_IN_RC(ptr_reg));
5784   match(reg);
5785   op_cost(0);
5786   format %{ "[$reg]" %}
5787   interface(MEMORY_INTER) %{
5788     base($reg);
5789     index(0xffffffff);
5790     scale(0x0);
5791     disp(0x0);
5792   %}
5793 %}
5794 
5795 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
5796 %{
5797   constraint(ALLOC_IN_RC(ptr_reg));
5798   match(AddP (AddP reg (LShiftL lreg scale)) off);
5799   op_cost(INSN_COST);
5800   format %{ "$reg, $lreg lsl($scale), $off" %}
5801   interface(MEMORY_INTER) %{
5802     base($reg);
5803     index($lreg);
5804     scale($scale);
5805     disp($off);
5806   %}
5807 %}
5808 
5809 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
5810 %{
5811   constraint(ALLOC_IN_RC(ptr_reg));
5812   match(AddP (AddP reg (LShiftL lreg scale)) off);
5813   op_cost(INSN_COST);
5814   format %{ "$reg, $lreg lsl($scale), $off" %}
5815   interface(MEMORY_INTER) %{
5816     base($reg);
5817     index($lreg);
5818     scale($scale);
5819     disp($off);
5820   %}
5821 %}
5822 
5823 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
5824 %{
5825   constraint(ALLOC_IN_RC(ptr_reg));
5826   match(AddP (AddP reg (ConvI2L ireg)) off);
5827   op_cost(INSN_COST);
5828   format %{ "$reg, $ireg, $off I2L" %}
5829   interface(MEMORY_INTER) %{
5830     base($reg);
5831     index($ireg);
5832     scale(0x0);
5833     disp($off);
5834   %}
5835 %}
5836 
5837 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
5838 %{
5839   constraint(ALLOC_IN_RC(ptr_reg));
5840   match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
5841   op_cost(INSN_COST);
5842   format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
5843   interface(MEMORY_INTER) %{
5844     base($reg);
5845     index($ireg);
5846     scale($scale);
5847     disp($off);
5848   %}
5849 %}
5850 
5851 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
5852 %{
5853   constraint(ALLOC_IN_RC(ptr_reg));
5854   match(AddP reg (LShiftL (ConvI2L ireg) scale));
5855   op_cost(0);
5856   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
5857   interface(MEMORY_INTER) %{
5858     base($reg);
5859     index($ireg);
5860     scale($scale);
5861     disp(0x0);
5862   %}
5863 %}
5864 
5865 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
5866 %{
5867   constraint(ALLOC_IN_RC(ptr_reg));
5868   match(AddP reg (LShiftL lreg scale));
5869   op_cost(0);
5870   format %{ "$reg, $lreg lsl($scale)" %}
5871   interface(MEMORY_INTER) %{
5872     base($reg);
5873     index($lreg);
5874     scale($scale);
5875     disp(0x0);
5876   %}
5877 %}
5878 
5879 operand indIndex(iRegP reg, iRegL lreg)
5880 %{
5881   constraint(ALLOC_IN_RC(ptr_reg));
5882   match(AddP reg lreg);
5883   op_cost(0);
5884   format %{ "$reg, $lreg" %}
5885   interface(MEMORY_INTER) %{
5886     base($reg);
5887     index($lreg);
5888     scale(0x0);
5889     disp(0x0);
5890   %}
5891 %}
5892 
5893 operand indOffI(iRegP reg, immIOffset off)
5894 %{
5895   constraint(ALLOC_IN_RC(ptr_reg));
5896   match(AddP reg off);
5897   op_cost(0);
5898   format %{ "[$reg, $off]" %}
5899   interface(MEMORY_INTER) %{
5900     base($reg);
5901     index(0xffffffff);
5902     scale(0x0);
5903     disp($off);
5904   %}
5905 %}
5906 
5907 operand indOffL(iRegP reg, immLoffset off)
5908 %{
5909   constraint(ALLOC_IN_RC(ptr_reg));
5910   match(AddP reg off);
5911   op_cost(0);
5912   format %{ "[$reg, $off]" %}
5913   interface(MEMORY_INTER) %{
5914     base($reg);
5915     index(0xffffffff);
5916     scale(0x0);
5917     disp($off);
5918   %}
5919 %}
5920 
5921 
5922 operand indirectN(iRegN reg)
5923 %{
5924   predicate(Universe::narrow_oop_shift() == 0);
5925   constraint(ALLOC_IN_RC(ptr_reg));
5926   match(DecodeN reg);
5927   op_cost(0);
5928   format %{ "[$reg]\t# narrow" %}
5929   interface(MEMORY_INTER) %{
5930     base($reg);
5931     index(0xffffffff);
5932     scale(0x0);
5933     disp(0x0);
5934   %}
5935 %}
5936 
5937 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
5938 %{
5939   predicate(Universe::narrow_oop_shift() == 0);
5940   constraint(ALLOC_IN_RC(ptr_reg));
5941   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5942   op_cost(0);
5943   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5944   interface(MEMORY_INTER) %{
5945     base($reg);
5946     index($lreg);
5947     scale($scale);
5948     disp($off);
5949   %}
5950 %}
5951 
5952 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
5953 %{
5954   predicate(Universe::narrow_oop_shift() == 0);
5955   constraint(ALLOC_IN_RC(ptr_reg));
5956   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5957   op_cost(INSN_COST);
5958   format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
5959   interface(MEMORY_INTER) %{
5960     base($reg);
5961     index($lreg);
5962     scale($scale);
5963     disp($off);
5964   %}
5965 %}
5966 
5967 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
5968 %{
5969   predicate(Universe::narrow_oop_shift() == 0);
5970   constraint(ALLOC_IN_RC(ptr_reg));
5971   match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
5972   op_cost(INSN_COST);
5973   format %{ "$reg, $ireg, $off I2L\t# narrow" %}
5974   interface(MEMORY_INTER) %{
5975     base($reg);
5976     index($ireg);
5977     scale(0x0);
5978     disp($off);
5979   %}
5980 %}
5981 
5982 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
5983 %{
5984   predicate(Universe::narrow_oop_shift() == 0);
5985   constraint(ALLOC_IN_RC(ptr_reg));
5986   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
5987   op_cost(INSN_COST);
5988   format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
5989   interface(MEMORY_INTER) %{
5990     base($reg);
5991     index($ireg);
5992     scale($scale);
5993     disp($off);
5994   %}
5995 %}
5996 
5997 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5998 %{
5999   predicate(Universe::narrow_oop_shift() == 0);
6000   constraint(ALLOC_IN_RC(ptr_reg));
6001   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6002   op_cost(0);
6003   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6004   interface(MEMORY_INTER) %{
6005     base($reg);
6006     index($ireg);
6007     scale($scale);
6008     disp(0x0);
6009   %}
6010 %}
6011 
6012 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6013 %{
6014   predicate(Universe::narrow_oop_shift() == 0);
6015   constraint(ALLOC_IN_RC(ptr_reg));
6016   match(AddP (DecodeN reg) (LShiftL lreg scale));
6017   op_cost(0);
6018   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6019   interface(MEMORY_INTER) %{
6020     base($reg);
6021     index($lreg);
6022     scale($scale);
6023     disp(0x0);
6024   %}
6025 %}
6026 
6027 operand indIndexN(iRegN reg, iRegL lreg)
6028 %{
6029   predicate(Universe::narrow_oop_shift() == 0);
6030   constraint(ALLOC_IN_RC(ptr_reg));
6031   match(AddP (DecodeN reg) lreg);
6032   op_cost(0);
6033   format %{ "$reg, $lreg\t# narrow" %}
6034   interface(MEMORY_INTER) %{
6035     base($reg);
6036     index($lreg);
6037     scale(0x0);
6038     disp(0x0);
6039   %}
6040 %}
6041 
6042 operand indOffIN(iRegN reg, immIOffset off)
6043 %{
6044   predicate(Universe::narrow_oop_shift() == 0);
6045   constraint(ALLOC_IN_RC(ptr_reg));
6046   match(AddP (DecodeN reg) off);
6047   op_cost(0);
6048   format %{ "[$reg, $off]\t# narrow" %}
6049   interface(MEMORY_INTER) %{
6050     base($reg);
6051     index(0xffffffff);
6052     scale(0x0);
6053     disp($off);
6054   %}
6055 %}
6056 
6057 operand indOffLN(iRegN reg, immLoffset off)
6058 %{
6059   predicate(Universe::narrow_oop_shift() == 0);
6060   constraint(ALLOC_IN_RC(ptr_reg));
6061   match(AddP (DecodeN reg) off);
6062   op_cost(0);
6063   format %{ "[$reg, $off]\t# narrow" %}
6064   interface(MEMORY_INTER) %{
6065     base($reg);
6066     index(0xffffffff);
6067     scale(0x0);
6068     disp($off);
6069   %}
6070 %}
6071 
6072 
6073 
6074 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6075 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6076 %{
6077   constraint(ALLOC_IN_RC(ptr_reg));
6078   match(AddP reg off);
6079   op_cost(0);
6080   format %{ "[$reg, $off]" %}
6081   interface(MEMORY_INTER) %{
6082     base($reg);
6083     index(0xffffffff);
6084     scale(0x0);
6085     disp($off);
6086   %}
6087 %}
6088 
6089 //----------Special Memory Operands--------------------------------------------
6090 // Stack Slot Operand - This operand is used for loading and storing temporary
6091 //                      values on the stack where a match requires a value to
6092 //                      flow through memory.
6093 operand stackSlotP(sRegP reg)
6094 %{
6095   constraint(ALLOC_IN_RC(stack_slots));
6096   op_cost(100);
6097   // No match rule because this operand is only generated in matching
6098   // match(RegP);
6099   format %{ "[$reg]" %}
6100   interface(MEMORY_INTER) %{
6101     base(0x1e);  // RSP
6102     index(0x0);  // No Index
6103     scale(0x0);  // No Scale
6104     disp($reg);  // Stack Offset
6105   %}
6106 %}
6107 
6108 operand stackSlotI(sRegI reg)
6109 %{
6110   constraint(ALLOC_IN_RC(stack_slots));
6111   // No match rule because this operand is only generated in matching
6112   // match(RegI);
6113   format %{ "[$reg]" %}
6114   interface(MEMORY_INTER) %{
6115     base(0x1e);  // RSP
6116     index(0x0);  // No Index
6117     scale(0x0);  // No Scale
6118     disp($reg);  // Stack Offset
6119   %}
6120 %}
6121 
6122 operand stackSlotF(sRegF reg)
6123 %{
6124   constraint(ALLOC_IN_RC(stack_slots));
6125   // No match rule because this operand is only generated in matching
6126   // match(RegF);
6127   format %{ "[$reg]" %}
6128   interface(MEMORY_INTER) %{
6129     base(0x1e);  // RSP
6130     index(0x0);  // No Index
6131     scale(0x0);  // No Scale
6132     disp($reg);  // Stack Offset
6133   %}
6134 %}
6135 
6136 operand stackSlotD(sRegD reg)
6137 %{
6138   constraint(ALLOC_IN_RC(stack_slots));
6139   // No match rule because this operand is only generated in matching
6140   // match(RegD);
6141   format %{ "[$reg]" %}
6142   interface(MEMORY_INTER) %{
6143     base(0x1e);  // RSP
6144     index(0x0);  // No Index
6145     scale(0x0);  // No Scale
6146     disp($reg);  // Stack Offset
6147   %}
6148 %}
6149 
6150 operand stackSlotL(sRegL reg)
6151 %{
6152   constraint(ALLOC_IN_RC(stack_slots));
6153   // No match rule because this operand is only generated in matching
6154   // match(RegL);
6155   format %{ "[$reg]" %}
6156   interface(MEMORY_INTER) %{
6157     base(0x1e);  // RSP
6158     index(0x0);  // No Index
6159     scale(0x0);  // No Scale
6160     disp($reg);  // Stack Offset
6161   %}
6162 %}
6163 
6164 // Operands for expressing Control Flow
6165 // NOTE: Label is a predefined operand which should not be redefined in
6166 //       the AD file. It is generically handled within the ADLC.
6167 
6168 //----------Conditional Branch Operands----------------------------------------
6169 // Comparison Op  - This is the operation of the comparison, and is limited to
6170 //                  the following set of codes:
6171 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6172 //
6173 // Other attributes of the comparison, such as unsignedness, are specified
6174 // by the comparison instruction that sets a condition code flags register.
6175 // That result is represented by a flags operand whose subtype is appropriate
6176 // to the unsignedness (etc.) of the comparison.
6177 //
6178 // Later, the instruction which matches both the Comparison Op (a Bool) and
6179 // the flags (produced by the Cmp) specifies the coding of the comparison op
6180 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6181 
6182 // used for signed integral comparisons and fp comparisons
6183 
6184 operand cmpOp()
6185 %{
6186   match(Bool);
6187 
6188   format %{ "" %}
6189   interface(COND_INTER) %{
6190     equal(0x0, "eq");
6191     not_equal(0x1, "ne");
6192     less(0xb, "lt");
6193     greater_equal(0xa, "ge");
6194     less_equal(0xd, "le");
6195     greater(0xc, "gt");
6196     overflow(0x6, "vs");
6197     no_overflow(0x7, "vc");
6198   %}
6199 %}
6200 
6201 // used for unsigned integral comparisons
6202 
6203 operand cmpOpU()
6204 %{
6205   match(Bool);
6206 
6207   format %{ "" %}
6208   interface(COND_INTER) %{
6209     equal(0x0, "eq");
6210     not_equal(0x1, "ne");
6211     less(0x3, "lo");
6212     greater_equal(0x2, "hs");
6213     less_equal(0x9, "ls");
6214     greater(0x8, "hi");
6215     overflow(0x6, "vs");
6216     no_overflow(0x7, "vc");
6217   %}
6218 %}
6219 
6220 // Special operand allowing long args to int ops to be truncated for free
6221 
6222 operand iRegL2I(iRegL reg) %{
6223 
6224   op_cost(0);
6225 
6226   match(ConvL2I reg);
6227 
6228   format %{ "l2i($reg)" %}
6229 
6230   interface(REG_INTER)
6231 %}
6232 
6233 opclass vmem(indirect, indIndex, indOffI, indOffL);
6234 
6235 //----------OPERAND CLASSES----------------------------------------------------
6236 // Operand Classes are groups of operands that are used as to simplify
6237 // instruction definitions by not requiring the AD writer to specify
6238 // separate instructions for every form of operand when the
6239 // instruction accepts multiple operand types with the same basic
6240 // encoding and format. The classic case of this is memory operands.
6241 
6242 // memory is used to define read/write location for load/store
6243 // instruction defs. we can turn a memory op into an Address
6244 
6245 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
6246                indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
6247 
6248 
6249 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6250 // operations. it allows the src to be either an iRegI or a (ConvL2I
6251 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6252 // can be elided because the 32-bit instruction will just employ the
6253 // lower 32 bits anyway.
6254 //
6255 // n.b. this does not elide all L2I conversions. if the truncated
6256 // value is consumed by more than one operation then the ConvL2I
6257 // cannot be bundled into the consuming nodes so an l2i gets planted
6258 // (actually a movw $dst $src) and the downstream instructions consume
6259 // the result of the l2i as an iRegI input. That's a shame since the
6260 // movw is actually redundant but its not too costly.
6261 
6262 opclass iRegIorL2I(iRegI, iRegL2I);
6263 
6264 //----------PIPELINE-----------------------------------------------------------
6265 // Rules which define the behavior of the target architectures pipeline.
6266 // Integer ALU reg operation
6267 pipeline %{
6268 
6269 attributes %{
6270   // ARM instructions are of fixed length
6271   fixed_size_instructions;        // Fixed size instructions TODO does
6272   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6273   // ARM instructions come in 32-bit word units
6274   instruction_unit_size = 4;         // An instruction is 4 bytes long
6275   instruction_fetch_unit_size = 64;  // The processor fetches one line
6276   instruction_fetch_units = 1;       // of 64 bytes
6277 
6278   // List of nop instructions
6279   nops( MachNop );
6280 %}
6281 
6282 // We don't use an actual pipeline model so don't care about resources
6283 // or description. we do use pipeline classes to introduce fixed
6284 // latencies
6285 
6286 //----------RESOURCES----------------------------------------------------------
6287 // Resources are the functional units available to the machine
6288 
6289 resources( INS0, INS1, INS01 = INS0 | INS1,
6290            ALU0, ALU1, ALU = ALU0 | ALU1,
6291            MAC,
6292            DIV,
6293            BRANCH,
6294            LDST,
6295            NEON_FP);
6296 
6297 //----------PIPELINE DESCRIPTION-----------------------------------------------
6298 // Pipeline Description specifies the stages in the machine's pipeline
6299 
6300 pipe_desc(ISS, EX1, EX2, WR);
6301 
6302 //----------PIPELINE CLASSES---------------------------------------------------
6303 // Pipeline Classes describe the stages in which input and output are
6304 // referenced by the hardware pipeline.
6305 
6306 //------- Integer ALU operations --------------------------
6307 
6308 // Integer ALU reg-reg operation
6309 // Operands needed in EX1, result generated in EX2
6310 // Eg.  ADD     x0, x1, x2
6311 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6312 %{
6313   single_instruction;
6314   dst    : EX2(write);
6315   src1   : EX1(read);
6316   src2   : EX1(read);
6317   INS01  : ISS; // Dual issue as instruction 0 or 1
6318   ALU    : EX2;
6319 %}
6320 
6321 // Integer ALU reg-reg operation with constant shift
6322 // Shifted register must be available in LATE_ISS instead of EX1
6323 // Eg.  ADD     x0, x1, x2, LSL #2
6324 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6325 %{
6326   single_instruction;
6327   dst    : EX2(write);
6328   src1   : EX1(read);
6329   src2   : ISS(read);
6330   INS01  : ISS;
6331   ALU    : EX2;
6332 %}
6333 
6334 // Integer ALU reg operation with constant shift
6335 // Eg.  LSL     x0, x1, #shift
6336 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6337 %{
6338   single_instruction;
6339   dst    : EX2(write);
6340   src1   : ISS(read);
6341   INS01  : ISS;
6342   ALU    : EX2;
6343 %}
6344 
6345 // Integer ALU reg-reg operation with variable shift
6346 // Both operands must be available in LATE_ISS instead of EX1
6347 // Result is available in EX1 instead of EX2
6348 // Eg.  LSLV    x0, x1, x2
6349 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6350 %{
6351   single_instruction;
6352   dst    : EX1(write);
6353   src1   : ISS(read);
6354   src2   : ISS(read);
6355   INS01  : ISS;
6356   ALU    : EX1;
6357 %}
6358 
6359 // Integer ALU reg-reg operation with extract
6360 // As for _vshift above, but result generated in EX2
6361 // Eg.  EXTR    x0, x1, x2, #N
6362 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6363 %{
6364   single_instruction;
6365   dst    : EX2(write);
6366   src1   : ISS(read);
6367   src2   : ISS(read);
6368   INS1   : ISS; // Can only dual issue as Instruction 1
6369   ALU    : EX1;
6370 %}
6371 
6372 // Integer ALU reg operation
6373 // Eg.  NEG     x0, x1
6374 pipe_class ialu_reg(iRegI dst, iRegI src)
6375 %{
6376   single_instruction;
6377   dst    : EX2(write);
6378   src    : EX1(read);
6379   INS01  : ISS;
6380   ALU    : EX2;
6381 %}
6382 
6383 // Integer ALU reg mmediate operation
6384 // Eg.  ADD     x0, x1, #N
6385 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6386 %{
6387   single_instruction;
6388   dst    : EX2(write);
6389   src1   : EX1(read);
6390   INS01  : ISS;
6391   ALU    : EX2;
6392 %}
6393 
6394 // Integer ALU immediate operation (no source operands)
6395 // Eg.  MOV     x0, #N
6396 pipe_class ialu_imm(iRegI dst)
6397 %{
6398   single_instruction;
6399   dst    : EX1(write);
6400   INS01  : ISS;
6401   ALU    : EX1;
6402 %}
6403 
6404 //------- Compare operation -------------------------------
6405 
6406 // Compare reg-reg
6407 // Eg.  CMP     x0, x1
6408 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6409 %{
6410   single_instruction;
6411 //  fixed_latency(16);
6412   cr     : EX2(write);
6413   op1    : EX1(read);
6414   op2    : EX1(read);
6415   INS01  : ISS;
6416   ALU    : EX2;
6417 %}
6418 
6419 // Compare reg-reg
6420 // Eg.  CMP     x0, #N
6421 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6422 %{
6423   single_instruction;
6424 //  fixed_latency(16);
6425   cr     : EX2(write);
6426   op1    : EX1(read);
6427   INS01  : ISS;
6428   ALU    : EX2;
6429 %}
6430 
6431 //------- Conditional instructions ------------------------
6432 
6433 // Conditional no operands
6434 // Eg.  CSINC   x0, zr, zr, <cond>
6435 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6436 %{
6437   single_instruction;
6438   cr     : EX1(read);
6439   dst    : EX2(write);
6440   INS01  : ISS;
6441   ALU    : EX2;
6442 %}
6443 
6444 // Conditional 2 operand
6445 // EG.  CSEL    X0, X1, X2, <cond>
6446 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6447 %{
6448   single_instruction;
6449   cr     : EX1(read);
6450   src1   : EX1(read);
6451   src2   : EX1(read);
6452   dst    : EX2(write);
6453   INS01  : ISS;
6454   ALU    : EX2;
6455 %}
6456 
6457 // Conditional 2 operand
6458 // EG.  CSEL    X0, X1, X2, <cond>
6459 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6460 %{
6461   single_instruction;
6462   cr     : EX1(read);
6463   src    : EX1(read);
6464   dst    : EX2(write);
6465   INS01  : ISS;
6466   ALU    : EX2;
6467 %}
6468 
6469 //------- Multiply pipeline operations --------------------
6470 
6471 // Multiply reg-reg
6472 // Eg.  MUL     w0, w1, w2
6473 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6474 %{
6475   single_instruction;
6476   dst    : WR(write);
6477   src1   : ISS(read);
6478   src2   : ISS(read);
6479   INS01  : ISS;
6480   MAC    : WR;
6481 %}
6482 
6483 // Multiply accumulate
6484 // Eg.  MADD    w0, w1, w2, w3
6485 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6486 %{
6487   single_instruction;
6488   dst    : WR(write);
6489   src1   : ISS(read);
6490   src2   : ISS(read);
6491   src3   : ISS(read);
6492   INS01  : ISS;
6493   MAC    : WR;
6494 %}
6495 
6496 // Eg.  MUL     w0, w1, w2
6497 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6498 %{
6499   single_instruction;
6500   fixed_latency(3); // Maximum latency for 64 bit mul
6501   dst    : WR(write);
6502   src1   : ISS(read);
6503   src2   : ISS(read);
6504   INS01  : ISS;
6505   MAC    : WR;
6506 %}
6507 
6508 // Multiply accumulate
6509 // Eg.  MADD    w0, w1, w2, w3
6510 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6511 %{
6512   single_instruction;
6513   fixed_latency(3); // Maximum latency for 64 bit mul
6514   dst    : WR(write);
6515   src1   : ISS(read);
6516   src2   : ISS(read);
6517   src3   : ISS(read);
6518   INS01  : ISS;
6519   MAC    : WR;
6520 %}
6521 
6522 //------- Divide pipeline operations --------------------
6523 
6524 // Eg.  SDIV    w0, w1, w2
6525 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6526 %{
6527   single_instruction;
6528   fixed_latency(8); // Maximum latency for 32 bit divide
6529   dst    : WR(write);
6530   src1   : ISS(read);
6531   src2   : ISS(read);
6532   INS0   : ISS; // Can only dual issue as instruction 0
6533   DIV    : WR;
6534 %}
6535 
6536 // Eg.  SDIV    x0, x1, x2
6537 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6538 %{
6539   single_instruction;
6540   fixed_latency(16); // Maximum latency for 64 bit divide
6541   dst    : WR(write);
6542   src1   : ISS(read);
6543   src2   : ISS(read);
6544   INS0   : ISS; // Can only dual issue as instruction 0
6545   DIV    : WR;
6546 %}
6547 
6548 //------- Load pipeline operations ------------------------
6549 
6550 // Load - prefetch
6551 // Eg.  PFRM    <mem>
6552 pipe_class iload_prefetch(memory mem)
6553 %{
6554   single_instruction;
6555   mem    : ISS(read);
6556   INS01  : ISS;
6557   LDST   : WR;
6558 %}
6559 
6560 // Load - reg, mem
6561 // Eg.  LDR     x0, <mem>
6562 pipe_class iload_reg_mem(iRegI dst, memory mem)
6563 %{
6564   single_instruction;
6565   dst    : WR(write);
6566   mem    : ISS(read);
6567   INS01  : ISS;
6568   LDST   : WR;
6569 %}
6570 
6571 // Load - reg, reg
6572 // Eg.  LDR     x0, [sp, x1]
6573 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6574 %{
6575   single_instruction;
6576   dst    : WR(write);
6577   src    : ISS(read);
6578   INS01  : ISS;
6579   LDST   : WR;
6580 %}
6581 
6582 //------- Store pipeline operations -----------------------
6583 
6584 // Store - zr, mem
6585 // Eg.  STR     zr, <mem>
6586 pipe_class istore_mem(memory mem)
6587 %{
6588   single_instruction;
6589   mem    : ISS(read);
6590   INS01  : ISS;
6591   LDST   : WR;
6592 %}
6593 
6594 // Store - reg, mem
6595 // Eg.  STR     x0, <mem>
6596 pipe_class istore_reg_mem(iRegI src, memory mem)
6597 %{
6598   single_instruction;
6599   mem    : ISS(read);
6600   src    : EX2(read);
6601   INS01  : ISS;
6602   LDST   : WR;
6603 %}
6604 
6605 // Store - reg, reg
6606 // Eg. STR      x0, [sp, x1]
6607 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6608 %{
6609   single_instruction;
6610   dst    : ISS(read);
6611   src    : EX2(read);
6612   INS01  : ISS;
6613   LDST   : WR;
6614 %}
6615 
6616 //------- Store pipeline operations -----------------------
6617 
6618 // Branch
6619 pipe_class pipe_branch()
6620 %{
6621   single_instruction;
6622   INS01  : ISS;
6623   BRANCH : EX1;
6624 %}
6625 
6626 // Conditional branch
6627 pipe_class pipe_branch_cond(rFlagsReg cr)
6628 %{
6629   single_instruction;
6630   cr     : EX1(read);
6631   INS01  : ISS;
6632   BRANCH : EX1;
6633 %}
6634 
6635 // Compare & Branch
6636 // EG.  CBZ/CBNZ
6637 pipe_class pipe_cmp_branch(iRegI op1)
6638 %{
6639   single_instruction;
6640   op1    : EX1(read);
6641   INS01  : ISS;
6642   BRANCH : EX1;
6643 %}
6644 
6645 //------- Synchronisation operations ----------------------
6646 
6647 // Any operation requiring serialization.
6648 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6649 pipe_class pipe_serial()
6650 %{
6651   single_instruction;
6652   force_serialization;
6653   fixed_latency(16);
6654   INS01  : ISS(2); // Cannot dual issue with any other instruction
6655   LDST   : WR;
6656 %}
6657 
6658 // Generic big/slow expanded idiom - also serialized
6659 pipe_class pipe_slow()
6660 %{
6661   instruction_count(10);
6662   multiple_bundles;
6663   force_serialization;
6664   fixed_latency(16);
6665   INS01  : ISS(2); // Cannot dual issue with any other instruction
6666   LDST   : WR;
6667 %}
6668 
6669 // Empty pipeline class
6670 pipe_class pipe_class_empty()
6671 %{
6672   single_instruction;
6673   fixed_latency(0);
6674 %}
6675 
6676 // Default pipeline class.
6677 pipe_class pipe_class_default()
6678 %{
6679   single_instruction;
6680   fixed_latency(2);
6681 %}
6682 
6683 // Pipeline class for compares.
6684 pipe_class pipe_class_compare()
6685 %{
6686   single_instruction;
6687   fixed_latency(16);
6688 %}
6689 
6690 // Pipeline class for memory operations.
6691 pipe_class pipe_class_memory()
6692 %{
6693   single_instruction;
6694   fixed_latency(16);
6695 %}
6696 
6697 // Pipeline class for call.
6698 pipe_class pipe_class_call()
6699 %{
6700   single_instruction;
6701   fixed_latency(100);
6702 %}
6703 
6704 // Define the class for the Nop node.
6705 define %{
6706    MachNop = pipe_class_empty;
6707 %}
6708 
6709 %}
6710 //----------INSTRUCTIONS-------------------------------------------------------
6711 //
6712 // match      -- States which machine-independent subtree may be replaced
6713 //               by this instruction.
6714 // ins_cost   -- The estimated cost of this instruction is used by instruction
6715 //               selection to identify a minimum cost tree of machine
6716 //               instructions that matches a tree of machine-independent
6717 //               instructions.
6718 // format     -- A string providing the disassembly for this instruction.
6719 //               The value of an instruction's operand may be inserted
6720 //               by referring to it with a '$' prefix.
6721 // opcode     -- Three instruction opcodes may be provided.  These are referred
6722 //               to within an encode class as $primary, $secondary, and $tertiary
6723 //               rrspectively.  The primary opcode is commonly used to
6724 //               indicate the type of machine instruction, while secondary
6725 //               and tertiary are often used for prefix options or addressing
6726 //               modes.
6727 // ins_encode -- A list of encode classes with parameters. The encode class
6728 //               name must have been defined in an 'enc_class' specification
6729 //               in the encode section of the architecture description.
6730 
6731 // ============================================================================
6732 // Memory (Load/Store) Instructions
6733 
6734 // Load Instructions
6735 
6736 // Load Byte (8 bit signed)
6737 instruct loadB(iRegINoSp dst, memory mem)
6738 %{
6739   match(Set dst (LoadB mem));
6740   predicate(!needs_acquiring_load(n));
6741 
6742   ins_cost(4 * INSN_COST);
6743   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6744 
6745   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6746 
6747   ins_pipe(iload_reg_mem);
6748 %}
6749 
6750 // Load Byte (8 bit signed) into long
6751 instruct loadB2L(iRegLNoSp dst, memory mem)
6752 %{
6753   match(Set dst (ConvI2L (LoadB mem)));
6754   predicate(!needs_acquiring_load(n->in(1)));
6755 
6756   ins_cost(4 * INSN_COST);
6757   format %{ "ldrsb  $dst, $mem\t# byte" %}
6758 
6759   ins_encode(aarch64_enc_ldrsb(dst, mem));
6760 
6761   ins_pipe(iload_reg_mem);
6762 %}
6763 
6764 // Load Byte (8 bit unsigned)
6765 instruct loadUB(iRegINoSp dst, memory mem)
6766 %{
6767   match(Set dst (LoadUB mem));
6768   predicate(!needs_acquiring_load(n));
6769 
6770   ins_cost(4 * INSN_COST);
6771   format %{ "ldrbw  $dst, $mem\t# byte" %}
6772 
6773   ins_encode(aarch64_enc_ldrb(dst, mem));
6774 
6775   ins_pipe(iload_reg_mem);
6776 %}
6777 
6778 // Load Byte (8 bit unsigned) into long
6779 instruct loadUB2L(iRegLNoSp dst, memory mem)
6780 %{
6781   match(Set dst (ConvI2L (LoadUB mem)));
6782   predicate(!needs_acquiring_load(n->in(1)));
6783 
6784   ins_cost(4 * INSN_COST);
6785   format %{ "ldrb  $dst, $mem\t# byte" %}
6786 
6787   ins_encode(aarch64_enc_ldrb(dst, mem));
6788 
6789   ins_pipe(iload_reg_mem);
6790 %}
6791 
6792 // Load Short (16 bit signed)
6793 instruct loadS(iRegINoSp dst, memory mem)
6794 %{
6795   match(Set dst (LoadS mem));
6796   predicate(!needs_acquiring_load(n));
6797 
6798   ins_cost(4 * INSN_COST);
6799   format %{ "ldrshw  $dst, $mem\t# short" %}
6800 
6801   ins_encode(aarch64_enc_ldrshw(dst, mem));
6802 
6803   ins_pipe(iload_reg_mem);
6804 %}
6805 
6806 // Load Short (16 bit signed) into long
6807 instruct loadS2L(iRegLNoSp dst, memory mem)
6808 %{
6809   match(Set dst (ConvI2L (LoadS mem)));
6810   predicate(!needs_acquiring_load(n->in(1)));
6811 
6812   ins_cost(4 * INSN_COST);
6813   format %{ "ldrsh  $dst, $mem\t# short" %}
6814 
6815   ins_encode(aarch64_enc_ldrsh(dst, mem));
6816 
6817   ins_pipe(iload_reg_mem);
6818 %}
6819 
6820 // Load Char (16 bit unsigned)
6821 instruct loadUS(iRegINoSp dst, memory mem)
6822 %{
6823   match(Set dst (LoadUS mem));
6824   predicate(!needs_acquiring_load(n));
6825 
6826   ins_cost(4 * INSN_COST);
6827   format %{ "ldrh  $dst, $mem\t# short" %}
6828 
6829   ins_encode(aarch64_enc_ldrh(dst, mem));
6830 
6831   ins_pipe(iload_reg_mem);
6832 %}
6833 
6834 // Load Short/Char (16 bit unsigned) into long
6835 instruct loadUS2L(iRegLNoSp dst, memory mem)
6836 %{
6837   match(Set dst (ConvI2L (LoadUS mem)));
6838   predicate(!needs_acquiring_load(n->in(1)));
6839 
6840   ins_cost(4 * INSN_COST);
6841   format %{ "ldrh  $dst, $mem\t# short" %}
6842 
6843   ins_encode(aarch64_enc_ldrh(dst, mem));
6844 
6845   ins_pipe(iload_reg_mem);
6846 %}
6847 
6848 // Load Integer (32 bit signed)
6849 instruct loadI(iRegINoSp dst, memory mem)
6850 %{
6851   match(Set dst (LoadI mem));
6852   predicate(!needs_acquiring_load(n));
6853 
6854   ins_cost(4 * INSN_COST);
6855   format %{ "ldrw  $dst, $mem\t# int" %}
6856 
6857   ins_encode(aarch64_enc_ldrw(dst, mem));
6858 
6859   ins_pipe(iload_reg_mem);
6860 %}
6861 
6862 // Load Integer (32 bit signed) into long
6863 instruct loadI2L(iRegLNoSp dst, memory mem)
6864 %{
6865   match(Set dst (ConvI2L (LoadI mem)));
6866   predicate(!needs_acquiring_load(n->in(1)));
6867 
6868   ins_cost(4 * INSN_COST);
6869   format %{ "ldrsw  $dst, $mem\t# int" %}
6870 
6871   ins_encode(aarch64_enc_ldrsw(dst, mem));
6872 
6873   ins_pipe(iload_reg_mem);
6874 %}
6875 
6876 // Load Integer (32 bit unsigned) into long
6877 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6878 %{
6879   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6880   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6881 
6882   ins_cost(4 * INSN_COST);
6883   format %{ "ldrw  $dst, $mem\t# int" %}
6884 
6885   ins_encode(aarch64_enc_ldrw(dst, mem));
6886 
6887   ins_pipe(iload_reg_mem);
6888 %}
6889 
6890 // Load Long (64 bit signed)
6891 instruct loadL(iRegLNoSp dst, memory mem)
6892 %{
6893   match(Set dst (LoadL mem));
6894   predicate(!needs_acquiring_load(n));
6895 
6896   ins_cost(4 * INSN_COST);
6897   format %{ "ldr  $dst, $mem\t# int" %}
6898 
6899   ins_encode(aarch64_enc_ldr(dst, mem));
6900 
6901   ins_pipe(iload_reg_mem);
6902 %}
6903 
6904 // Load Range
6905 instruct loadRange(iRegINoSp dst, memory mem)
6906 %{
6907   match(Set dst (LoadRange mem));
6908 
6909   ins_cost(4 * INSN_COST);
6910   format %{ "ldrw  $dst, $mem\t# range" %}
6911 
6912   ins_encode(aarch64_enc_ldrw(dst, mem));
6913 
6914   ins_pipe(iload_reg_mem);
6915 %}
6916 
6917 // Load Pointer
6918 instruct loadP(iRegPNoSp dst, memory mem)
6919 %{
6920   match(Set dst (LoadP mem));
6921   predicate(!needs_acquiring_load(n));
6922 
6923   ins_cost(4 * INSN_COST);
6924   format %{ "ldr  $dst, $mem\t# ptr" %}
6925 
6926   ins_encode(aarch64_enc_ldr(dst, mem));
6927 
6928   ins_pipe(iload_reg_mem);
6929 %}
6930 
6931 // Load Compressed Pointer
6932 instruct loadN(iRegNNoSp dst, memory mem)
6933 %{
6934   match(Set dst (LoadN mem));
6935   predicate(!needs_acquiring_load(n));
6936 
6937   ins_cost(4 * INSN_COST);
6938   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6939 
6940   ins_encode(aarch64_enc_ldrw(dst, mem));
6941 
6942   ins_pipe(iload_reg_mem);
6943 %}
6944 
6945 // Load Klass Pointer
6946 instruct loadKlass(iRegPNoSp dst, memory mem)
6947 %{
6948   match(Set dst (LoadKlass mem));
6949   predicate(!needs_acquiring_load(n));
6950 
6951   ins_cost(4 * INSN_COST);
6952   format %{ "ldr  $dst, $mem\t# class" %}
6953 
6954   ins_encode(aarch64_enc_ldr(dst, mem));
6955 
6956   ins_pipe(iload_reg_mem);
6957 %}
6958 
6959 // Load Narrow Klass Pointer
6960 instruct loadNKlass(iRegNNoSp dst, memory mem)
6961 %{
6962   match(Set dst (LoadNKlass mem));
6963   predicate(!needs_acquiring_load(n));
6964 
6965   ins_cost(4 * INSN_COST);
6966   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6967 
6968   ins_encode(aarch64_enc_ldrw(dst, mem));
6969 
6970   ins_pipe(iload_reg_mem);
6971 %}
6972 
6973 // Load Float
6974 instruct loadF(vRegF dst, memory mem)
6975 %{
6976   match(Set dst (LoadF mem));
6977   predicate(!needs_acquiring_load(n));
6978 
6979   ins_cost(4 * INSN_COST);
6980   format %{ "ldrs  $dst, $mem\t# float" %}
6981 
6982   ins_encode( aarch64_enc_ldrs(dst, mem) );
6983 
6984   ins_pipe(pipe_class_memory);
6985 %}
6986 
6987 // Load Double
6988 instruct loadD(vRegD dst, memory mem)
6989 %{
6990   match(Set dst (LoadD mem));
6991   predicate(!needs_acquiring_load(n));
6992 
6993   ins_cost(4 * INSN_COST);
6994   format %{ "ldrd  $dst, $mem\t# double" %}
6995 
6996   ins_encode( aarch64_enc_ldrd(dst, mem) );
6997 
6998   ins_pipe(pipe_class_memory);
6999 %}
7000 
7001 
7002 // Load Int Constant
7003 instruct loadConI(iRegINoSp dst, immI src)
7004 %{
7005   match(Set dst src);
7006 
7007   ins_cost(INSN_COST);
7008   format %{ "mov $dst, $src\t# int" %}
7009 
7010   ins_encode( aarch64_enc_movw_imm(dst, src) );
7011 
7012   ins_pipe(ialu_imm);
7013 %}
7014 
7015 // Load Long Constant
7016 instruct loadConL(iRegLNoSp dst, immL src)
7017 %{
7018   match(Set dst src);
7019 
7020   ins_cost(INSN_COST);
7021   format %{ "mov $dst, $src\t# long" %}
7022 
7023   ins_encode( aarch64_enc_mov_imm(dst, src) );
7024 
7025   ins_pipe(ialu_imm);
7026 %}
7027 
7028 // Load Pointer Constant
7029 
7030 instruct loadConP(iRegPNoSp dst, immP con)
7031 %{
7032   match(Set dst con);
7033 
7034   ins_cost(INSN_COST * 4);
7035   format %{
7036     "mov  $dst, $con\t# ptr\n\t"
7037   %}
7038 
7039   ins_encode(aarch64_enc_mov_p(dst, con));
7040 
7041   ins_pipe(ialu_imm);
7042 %}
7043 
7044 // Load Null Pointer Constant
7045 
7046 instruct loadConP0(iRegPNoSp dst, immP0 con)
7047 %{
7048   match(Set dst con);
7049 
7050   ins_cost(INSN_COST);
7051   format %{ "mov  $dst, $con\t# NULL ptr" %}
7052 
7053   ins_encode(aarch64_enc_mov_p0(dst, con));
7054 
7055   ins_pipe(ialu_imm);
7056 %}
7057 
7058 // Load Pointer Constant One
7059 
7060 instruct loadConP1(iRegPNoSp dst, immP_1 con)
7061 %{
7062   match(Set dst con);
7063 
7064   ins_cost(INSN_COST);
7065   format %{ "mov  $dst, $con\t# NULL ptr" %}
7066 
7067   ins_encode(aarch64_enc_mov_p1(dst, con));
7068 
7069   ins_pipe(ialu_imm);
7070 %}
7071 
7072 // Load Poll Page Constant
7073 
7074 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
7075 %{
7076   match(Set dst con);
7077 
7078   ins_cost(INSN_COST);
7079   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
7080 
7081   ins_encode(aarch64_enc_mov_poll_page(dst, con));
7082 
7083   ins_pipe(ialu_imm);
7084 %}
7085 
7086 // Load Byte Map Base Constant
7087 
7088 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
7089 %{
7090   match(Set dst con);
7091 
7092   ins_cost(INSN_COST);
7093   format %{ "adr  $dst, $con\t# Byte Map Base" %}
7094 
7095   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
7096 
7097   ins_pipe(ialu_imm);
7098 %}
7099 
7100 // Load Narrow Pointer Constant
7101 
7102 instruct loadConN(iRegNNoSp dst, immN con)
7103 %{
7104   match(Set dst con);
7105 
7106   ins_cost(INSN_COST * 4);
7107   format %{ "mov  $dst, $con\t# compressed ptr" %}
7108 
7109   ins_encode(aarch64_enc_mov_n(dst, con));
7110 
7111   ins_pipe(ialu_imm);
7112 %}
7113 
7114 // Load Narrow Null Pointer Constant
7115 
7116 instruct loadConN0(iRegNNoSp dst, immN0 con)
7117 %{
7118   match(Set dst con);
7119 
7120   ins_cost(INSN_COST);
7121   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
7122 
7123   ins_encode(aarch64_enc_mov_n0(dst, con));
7124 
7125   ins_pipe(ialu_imm);
7126 %}
7127 
7128 // Load Narrow Klass Constant
7129 
7130 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
7131 %{
7132   match(Set dst con);
7133 
7134   ins_cost(INSN_COST);
7135   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
7136 
7137   ins_encode(aarch64_enc_mov_nk(dst, con));
7138 
7139   ins_pipe(ialu_imm);
7140 %}
7141 
7142 // Load Packed Float Constant
7143 
7144 instruct loadConF_packed(vRegF dst, immFPacked con) %{
7145   match(Set dst con);
7146   ins_cost(INSN_COST * 4);
7147   format %{ "fmovs  $dst, $con"%}
7148   ins_encode %{
7149     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
7150   %}
7151 
7152   ins_pipe(pipe_class_default);
7153 %}
7154 
7155 // Load Float Constant
7156 
7157 instruct loadConF(vRegF dst, immF con) %{
7158   match(Set dst con);
7159 
7160   ins_cost(INSN_COST * 4);
7161 
7162   format %{
7163     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7164   %}
7165 
7166   ins_encode %{
7167     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
7168   %}
7169 
7170   ins_pipe(pipe_class_default);
7171 %}
7172 
7173 // Load Packed Double Constant
7174 
7175 instruct loadConD_packed(vRegD dst, immDPacked con) %{
7176   match(Set dst con);
7177   ins_cost(INSN_COST);
7178   format %{ "fmovd  $dst, $con"%}
7179   ins_encode %{
7180     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
7181   %}
7182 
7183   ins_pipe(pipe_class_default);
7184 %}
7185 
7186 // Load Double Constant
7187 
7188 instruct loadConD(vRegD dst, immD con) %{
7189   match(Set dst con);
7190 
7191   ins_cost(INSN_COST * 5);
7192   format %{
7193     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
7194   %}
7195 
7196   ins_encode %{
7197     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
7198   %}
7199 
7200   ins_pipe(pipe_class_default);
7201 %}
7202 
7203 // Store Instructions
7204 
7205 // Store CMS card-mark Immediate
7206 instruct storeimmCM0(immI0 zero, memory mem)
7207 %{
7208   match(Set mem (StoreCM mem zero));
7209   predicate(unnecessary_storestore(n));
7210 
7211   ins_cost(INSN_COST);
7212   format %{ "strb zr, $mem\t# byte" %}
7213 
7214   ins_encode(aarch64_enc_strb0(mem));
7215 
7216   ins_pipe(istore_mem);
7217 %}
7218 
7219 // Store CMS card-mark Immediate with intervening StoreStore
7220 // needed when using CMS with no conditional card marking
7221 instruct storeimmCM0_ordered(immI0 zero, memory mem)
7222 %{
7223   match(Set mem (StoreCM mem zero));
7224 
7225   ins_cost(INSN_COST * 2);
7226   format %{ "dmb ishst"
7227       "\n\tstrb zr, $mem\t# byte" %}
7228 
7229   ins_encode(aarch64_enc_strb0_ordered(mem));
7230 
7231   ins_pipe(istore_mem);
7232 %}
7233 
7234 // Store Byte
7235 instruct storeB(iRegIorL2I src, memory mem)
7236 %{
7237   match(Set mem (StoreB mem src));
7238   predicate(!needs_releasing_store(n));
7239 
7240   ins_cost(INSN_COST);
7241   format %{ "strb  $src, $mem\t# byte" %}
7242 
7243   ins_encode(aarch64_enc_strb(src, mem));
7244 
7245   ins_pipe(istore_reg_mem);
7246 %}
7247 
7248 
7249 instruct storeimmB0(immI0 zero, memory mem)
7250 %{
7251   match(Set mem (StoreB mem zero));
7252   predicate(!needs_releasing_store(n));
7253 
7254   ins_cost(INSN_COST);
7255   format %{ "strb rscractch2, $mem\t# byte" %}
7256 
7257   ins_encode(aarch64_enc_strb0(mem));
7258 
7259   ins_pipe(istore_mem);
7260 %}
7261 
7262 // Store Char/Short
7263 instruct storeC(iRegIorL2I src, memory mem)
7264 %{
7265   match(Set mem (StoreC mem src));
7266   predicate(!needs_releasing_store(n));
7267 
7268   ins_cost(INSN_COST);
7269   format %{ "strh  $src, $mem\t# short" %}
7270 
7271   ins_encode(aarch64_enc_strh(src, mem));
7272 
7273   ins_pipe(istore_reg_mem);
7274 %}
7275 
7276 instruct storeimmC0(immI0 zero, memory mem)
7277 %{
7278   match(Set mem (StoreC mem zero));
7279   predicate(!needs_releasing_store(n));
7280 
7281   ins_cost(INSN_COST);
7282   format %{ "strh  zr, $mem\t# short" %}
7283 
7284   ins_encode(aarch64_enc_strh0(mem));
7285 
7286   ins_pipe(istore_mem);
7287 %}
7288 
7289 // Store Integer
7290 
7291 instruct storeI(iRegIorL2I src, memory mem)
7292 %{
7293   match(Set mem(StoreI mem src));
7294   predicate(!needs_releasing_store(n));
7295 
7296   ins_cost(INSN_COST);
7297   format %{ "strw  $src, $mem\t# int" %}
7298 
7299   ins_encode(aarch64_enc_strw(src, mem));
7300 
7301   ins_pipe(istore_reg_mem);
7302 %}
7303 
7304 instruct storeimmI0(immI0 zero, memory mem)
7305 %{
7306   match(Set mem(StoreI mem zero));
7307   predicate(!needs_releasing_store(n));
7308 
7309   ins_cost(INSN_COST);
7310   format %{ "strw  zr, $mem\t# int" %}
7311 
7312   ins_encode(aarch64_enc_strw0(mem));
7313 
7314   ins_pipe(istore_mem);
7315 %}
7316 
7317 // Store Long (64 bit signed)
7318 instruct storeL(iRegL src, memory mem)
7319 %{
7320   match(Set mem (StoreL mem src));
7321   predicate(!needs_releasing_store(n));
7322 
7323   ins_cost(INSN_COST);
7324   format %{ "str  $src, $mem\t# int" %}
7325 
7326   ins_encode(aarch64_enc_str(src, mem));
7327 
7328   ins_pipe(istore_reg_mem);
7329 %}
7330 
7331 // Store Long (64 bit signed)
7332 instruct storeimmL0(immL0 zero, memory mem)
7333 %{
7334   match(Set mem (StoreL mem zero));
7335   predicate(!needs_releasing_store(n));
7336 
7337   ins_cost(INSN_COST);
7338   format %{ "str  zr, $mem\t# int" %}
7339 
7340   ins_encode(aarch64_enc_str0(mem));
7341 
7342   ins_pipe(istore_mem);
7343 %}
7344 
7345 // Store Pointer
7346 instruct storeP(iRegP src, memory mem)
7347 %{
7348   match(Set mem (StoreP mem src));
7349   predicate(!needs_releasing_store(n));
7350 
7351   ins_cost(INSN_COST);
7352   format %{ "str  $src, $mem\t# ptr" %}
7353 
7354   ins_encode(aarch64_enc_str(src, mem));
7355 
7356   ins_pipe(istore_reg_mem);
7357 %}
7358 
7359 // Store Pointer
7360 instruct storeimmP0(immP0 zero, memory mem)
7361 %{
7362   match(Set mem (StoreP mem zero));
7363   predicate(!needs_releasing_store(n));
7364 
7365   ins_cost(INSN_COST);
7366   format %{ "str zr, $mem\t# ptr" %}
7367 
7368   ins_encode(aarch64_enc_str0(mem));
7369 
7370   ins_pipe(istore_mem);
7371 %}
7372 
7373 // Store Compressed Pointer
7374 instruct storeN(iRegN src, memory mem)
7375 %{
7376   match(Set mem (StoreN mem src));
7377   predicate(!needs_releasing_store(n));
7378 
7379   ins_cost(INSN_COST);
7380   format %{ "strw  $src, $mem\t# compressed ptr" %}
7381 
7382   ins_encode(aarch64_enc_strw(src, mem));
7383 
7384   ins_pipe(istore_reg_mem);
7385 %}
7386 
7387 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7388 %{
7389   match(Set mem (StoreN mem zero));
7390   predicate(Universe::narrow_oop_base() == NULL &&
7391             Universe::narrow_klass_base() == NULL &&
7392             (!needs_releasing_store(n)));
7393 
7394   ins_cost(INSN_COST);
7395   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7396 
7397   ins_encode(aarch64_enc_strw(heapbase, mem));
7398 
7399   ins_pipe(istore_reg_mem);
7400 %}
7401 
7402 // Store Float
7403 instruct storeF(vRegF src, memory mem)
7404 %{
7405   match(Set mem (StoreF mem src));
7406   predicate(!needs_releasing_store(n));
7407 
7408   ins_cost(INSN_COST);
7409   format %{ "strs  $src, $mem\t# float" %}
7410 
7411   ins_encode( aarch64_enc_strs(src, mem) );
7412 
7413   ins_pipe(pipe_class_memory);
7414 %}
7415 
7416 // TODO
7417 // implement storeImmF0 and storeFImmPacked
7418 
7419 // Store Double
7420 instruct storeD(vRegD src, memory mem)
7421 %{
7422   match(Set mem (StoreD mem src));
7423   predicate(!needs_releasing_store(n));
7424 
7425   ins_cost(INSN_COST);
7426   format %{ "strd  $src, $mem\t# double" %}
7427 
7428   ins_encode( aarch64_enc_strd(src, mem) );
7429 
7430   ins_pipe(pipe_class_memory);
7431 %}
7432 
7433 // Store Compressed Klass Pointer
7434 instruct storeNKlass(iRegN src, memory mem)
7435 %{
7436   predicate(!needs_releasing_store(n));
7437   match(Set mem (StoreNKlass mem src));
7438 
7439   ins_cost(INSN_COST);
7440   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7441 
7442   ins_encode(aarch64_enc_strw(src, mem));
7443 
7444   ins_pipe(istore_reg_mem);
7445 %}
7446 
7447 // TODO
7448 // implement storeImmD0 and storeDImmPacked
7449 
7450 // prefetch instructions
7451 // Must be safe to execute with invalid address (cannot fault).
7452 
7453 instruct prefetchalloc( memory mem ) %{
7454   match(PrefetchAllocation mem);
7455 
7456   ins_cost(INSN_COST);
7457   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7458 
7459   ins_encode( aarch64_enc_prefetchw(mem) );
7460 
7461   ins_pipe(iload_prefetch);
7462 %}
7463 
7464 //  ---------------- volatile loads and stores ----------------
7465 
7466 // Load Byte (8 bit signed)
7467 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7468 %{
7469   match(Set dst (LoadB mem));
7470 
7471   ins_cost(VOLATILE_REF_COST);
7472   format %{ "ldarsb  $dst, $mem\t# byte" %}
7473 
7474   ins_encode(aarch64_enc_ldarsb(dst, mem));
7475 
7476   ins_pipe(pipe_serial);
7477 %}
7478 
7479 // Load Byte (8 bit signed) into long
7480 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7481 %{
7482   match(Set dst (ConvI2L (LoadB mem)));
7483 
7484   ins_cost(VOLATILE_REF_COST);
7485   format %{ "ldarsb  $dst, $mem\t# byte" %}
7486 
7487   ins_encode(aarch64_enc_ldarsb(dst, mem));
7488 
7489   ins_pipe(pipe_serial);
7490 %}
7491 
7492 // Load Byte (8 bit unsigned)
7493 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7494 %{
7495   match(Set dst (LoadUB mem));
7496 
7497   ins_cost(VOLATILE_REF_COST);
7498   format %{ "ldarb  $dst, $mem\t# byte" %}
7499 
7500   ins_encode(aarch64_enc_ldarb(dst, mem));
7501 
7502   ins_pipe(pipe_serial);
7503 %}
7504 
7505 // Load Byte (8 bit unsigned) into long
7506 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7507 %{
7508   match(Set dst (ConvI2L (LoadUB mem)));
7509 
7510   ins_cost(VOLATILE_REF_COST);
7511   format %{ "ldarb  $dst, $mem\t# byte" %}
7512 
7513   ins_encode(aarch64_enc_ldarb(dst, mem));
7514 
7515   ins_pipe(pipe_serial);
7516 %}
7517 
7518 // Load Short (16 bit signed)
7519 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7520 %{
7521   match(Set dst (LoadS mem));
7522 
7523   ins_cost(VOLATILE_REF_COST);
7524   format %{ "ldarshw  $dst, $mem\t# short" %}
7525 
7526   ins_encode(aarch64_enc_ldarshw(dst, mem));
7527 
7528   ins_pipe(pipe_serial);
7529 %}
7530 
7531 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7532 %{
7533   match(Set dst (LoadUS mem));
7534 
7535   ins_cost(VOLATILE_REF_COST);
7536   format %{ "ldarhw  $dst, $mem\t# short" %}
7537 
7538   ins_encode(aarch64_enc_ldarhw(dst, mem));
7539 
7540   ins_pipe(pipe_serial);
7541 %}
7542 
7543 // Load Short/Char (16 bit unsigned) into long
7544 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7545 %{
7546   match(Set dst (ConvI2L (LoadUS mem)));
7547 
7548   ins_cost(VOLATILE_REF_COST);
7549   format %{ "ldarh  $dst, $mem\t# short" %}
7550 
7551   ins_encode(aarch64_enc_ldarh(dst, mem));
7552 
7553   ins_pipe(pipe_serial);
7554 %}
7555 
7556 // Load Short/Char (16 bit signed) into long
7557 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7558 %{
7559   match(Set dst (ConvI2L (LoadS mem)));
7560 
7561   ins_cost(VOLATILE_REF_COST);
7562   format %{ "ldarh  $dst, $mem\t# short" %}
7563 
7564   ins_encode(aarch64_enc_ldarsh(dst, mem));
7565 
7566   ins_pipe(pipe_serial);
7567 %}
7568 
7569 // Load Integer (32 bit signed)
7570 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7571 %{
7572   match(Set dst (LoadI mem));
7573 
7574   ins_cost(VOLATILE_REF_COST);
7575   format %{ "ldarw  $dst, $mem\t# int" %}
7576 
7577   ins_encode(aarch64_enc_ldarw(dst, mem));
7578 
7579   ins_pipe(pipe_serial);
7580 %}
7581 
7582 // Load Integer (32 bit unsigned) into long
7583 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7584 %{
7585   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7586 
7587   ins_cost(VOLATILE_REF_COST);
7588   format %{ "ldarw  $dst, $mem\t# int" %}
7589 
7590   ins_encode(aarch64_enc_ldarw(dst, mem));
7591 
7592   ins_pipe(pipe_serial);
7593 %}
7594 
7595 // Load Long (64 bit signed)
7596 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7597 %{
7598   match(Set dst (LoadL mem));
7599 
7600   ins_cost(VOLATILE_REF_COST);
7601   format %{ "ldar  $dst, $mem\t# int" %}
7602 
7603   ins_encode(aarch64_enc_ldar(dst, mem));
7604 
7605   ins_pipe(pipe_serial);
7606 %}
7607 
7608 // Load Pointer
7609 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7610 %{
7611   match(Set dst (LoadP mem));
7612 
7613   ins_cost(VOLATILE_REF_COST);
7614   format %{ "ldar  $dst, $mem\t# ptr" %}
7615 
7616   ins_encode(aarch64_enc_ldar(dst, mem));
7617 
7618   ins_pipe(pipe_serial);
7619 %}
7620 
7621 // Load Compressed Pointer
7622 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7623 %{
7624   match(Set dst (LoadN mem));
7625 
7626   ins_cost(VOLATILE_REF_COST);
7627   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7628 
7629   ins_encode(aarch64_enc_ldarw(dst, mem));
7630 
7631   ins_pipe(pipe_serial);
7632 %}
7633 
7634 // Load Float
7635 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7636 %{
7637   match(Set dst (LoadF mem));
7638 
7639   ins_cost(VOLATILE_REF_COST);
7640   format %{ "ldars  $dst, $mem\t# float" %}
7641 
7642   ins_encode( aarch64_enc_fldars(dst, mem) );
7643 
7644   ins_pipe(pipe_serial);
7645 %}
7646 
7647 // Load Double
7648 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7649 %{
7650   match(Set dst (LoadD mem));
7651 
7652   ins_cost(VOLATILE_REF_COST);
7653   format %{ "ldard  $dst, $mem\t# double" %}
7654 
7655   ins_encode( aarch64_enc_fldard(dst, mem) );
7656 
7657   ins_pipe(pipe_serial);
7658 %}
7659 
7660 // Store Byte
7661 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7662 %{
7663   match(Set mem (StoreB mem src));
7664 
7665   ins_cost(VOLATILE_REF_COST);
7666   format %{ "stlrb  $src, $mem\t# byte" %}
7667 
7668   ins_encode(aarch64_enc_stlrb(src, mem));
7669 
7670   ins_pipe(pipe_class_memory);
7671 %}
7672 
7673 // Store Char/Short
7674 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7675 %{
7676   match(Set mem (StoreC mem src));
7677 
7678   ins_cost(VOLATILE_REF_COST);
7679   format %{ "stlrh  $src, $mem\t# short" %}
7680 
7681   ins_encode(aarch64_enc_stlrh(src, mem));
7682 
7683   ins_pipe(pipe_class_memory);
7684 %}
7685 
7686 // Store Integer
7687 
7688 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7689 %{
7690   match(Set mem(StoreI mem src));
7691 
7692   ins_cost(VOLATILE_REF_COST);
7693   format %{ "stlrw  $src, $mem\t# int" %}
7694 
7695   ins_encode(aarch64_enc_stlrw(src, mem));
7696 
7697   ins_pipe(pipe_class_memory);
7698 %}
7699 
7700 // Store Long (64 bit signed)
7701 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7702 %{
7703   match(Set mem (StoreL mem src));
7704 
7705   ins_cost(VOLATILE_REF_COST);
7706   format %{ "stlr  $src, $mem\t# int" %}
7707 
7708   ins_encode(aarch64_enc_stlr(src, mem));
7709 
7710   ins_pipe(pipe_class_memory);
7711 %}
7712 
7713 // Store Pointer
7714 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7715 %{
7716   match(Set mem (StoreP mem src));
7717 
7718   ins_cost(VOLATILE_REF_COST);
7719   format %{ "stlr  $src, $mem\t# ptr" %}
7720 
7721   ins_encode(aarch64_enc_stlr(src, mem));
7722 
7723   ins_pipe(pipe_class_memory);
7724 %}
7725 
7726 // Store Compressed Pointer
7727 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7728 %{
7729   match(Set mem (StoreN mem src));
7730 
7731   ins_cost(VOLATILE_REF_COST);
7732   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7733 
7734   ins_encode(aarch64_enc_stlrw(src, mem));
7735 
7736   ins_pipe(pipe_class_memory);
7737 %}
7738 
7739 // Store Float
7740 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7741 %{
7742   match(Set mem (StoreF mem src));
7743 
7744   ins_cost(VOLATILE_REF_COST);
7745   format %{ "stlrs  $src, $mem\t# float" %}
7746 
7747   ins_encode( aarch64_enc_fstlrs(src, mem) );
7748 
7749   ins_pipe(pipe_class_memory);
7750 %}
7751 
7752 // TODO
7753 // implement storeImmF0 and storeFImmPacked
7754 
7755 // Store Double
7756 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7757 %{
7758   match(Set mem (StoreD mem src));
7759 
7760   ins_cost(VOLATILE_REF_COST);
7761   format %{ "stlrd  $src, $mem\t# double" %}
7762 
7763   ins_encode( aarch64_enc_fstlrd(src, mem) );
7764 
7765   ins_pipe(pipe_class_memory);
7766 %}
7767 
7768 //  ---------------- end of volatile loads and stores ----------------
7769 
7770 // ============================================================================
7771 // BSWAP Instructions
7772 
7773 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7774   match(Set dst (ReverseBytesI src));
7775 
7776   ins_cost(INSN_COST);
7777   format %{ "revw  $dst, $src" %}
7778 
7779   ins_encode %{
7780     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7781   %}
7782 
7783   ins_pipe(ialu_reg);
7784 %}
7785 
7786 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7787   match(Set dst (ReverseBytesL src));
7788 
7789   ins_cost(INSN_COST);
7790   format %{ "rev  $dst, $src" %}
7791 
7792   ins_encode %{
7793     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7794   %}
7795 
7796   ins_pipe(ialu_reg);
7797 %}
7798 
7799 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7800   match(Set dst (ReverseBytesUS src));
7801 
7802   ins_cost(INSN_COST);
7803   format %{ "rev16w  $dst, $src" %}
7804 
7805   ins_encode %{
7806     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7807   %}
7808 
7809   ins_pipe(ialu_reg);
7810 %}
7811 
7812 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7813   match(Set dst (ReverseBytesS src));
7814 
7815   ins_cost(INSN_COST);
7816   format %{ "rev16w  $dst, $src\n\t"
7817             "sbfmw $dst, $dst, #0, #15" %}
7818 
7819   ins_encode %{
7820     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7821     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7822   %}
7823 
7824   ins_pipe(ialu_reg);
7825 %}
7826 
7827 // ============================================================================
7828 // Zero Count Instructions
7829 
7830 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7831   match(Set dst (CountLeadingZerosI src));
7832 
7833   ins_cost(INSN_COST);
7834   format %{ "clzw  $dst, $src" %}
7835   ins_encode %{
7836     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7837   %}
7838 
7839   ins_pipe(ialu_reg);
7840 %}
7841 
7842 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7843   match(Set dst (CountLeadingZerosL src));
7844 
7845   ins_cost(INSN_COST);
7846   format %{ "clz   $dst, $src" %}
7847   ins_encode %{
7848     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7849   %}
7850 
7851   ins_pipe(ialu_reg);
7852 %}
7853 
7854 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7855   match(Set dst (CountTrailingZerosI src));
7856 
7857   ins_cost(INSN_COST * 2);
7858   format %{ "rbitw  $dst, $src\n\t"
7859             "clzw   $dst, $dst" %}
7860   ins_encode %{
7861     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7862     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7863   %}
7864 
7865   ins_pipe(ialu_reg);
7866 %}
7867 
7868 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7869   match(Set dst (CountTrailingZerosL src));
7870 
7871   ins_cost(INSN_COST * 2);
7872   format %{ "rbit   $dst, $src\n\t"
7873             "clz    $dst, $dst" %}
7874   ins_encode %{
7875     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7876     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7877   %}
7878 
7879   ins_pipe(ialu_reg);
7880 %}
7881 
7882 //---------- Population Count Instructions -------------------------------------
7883 //
7884 
7885 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7886   predicate(UsePopCountInstruction);
7887   match(Set dst (PopCountI src));
7888   effect(TEMP tmp);
7889   ins_cost(INSN_COST * 13);
7890 
7891   format %{ "movw   $src, $src\n\t"
7892             "mov    $tmp, $src\t# vector (1D)\n\t"
7893             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7894             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7895             "mov    $dst, $tmp\t# vector (1D)" %}
7896   ins_encode %{
7897     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7898     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7899     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7900     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7901     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7902   %}
7903 
7904   ins_pipe(pipe_class_default);
7905 %}
7906 
7907 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7908   predicate(UsePopCountInstruction);
7909   match(Set dst (PopCountI (LoadI mem)));
7910   effect(TEMP tmp);
7911   ins_cost(INSN_COST * 13);
7912 
7913   format %{ "ldrs   $tmp, $mem\n\t"
7914             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7915             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7916             "mov    $dst, $tmp\t# vector (1D)" %}
7917   ins_encode %{
7918     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7919     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7920                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7921     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7922     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7923     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7924   %}
7925 
7926   ins_pipe(pipe_class_default);
7927 %}
7928 
7929 // Note: Long.bitCount(long) returns an int.
7930 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7931   predicate(UsePopCountInstruction);
7932   match(Set dst (PopCountL src));
7933   effect(TEMP tmp);
7934   ins_cost(INSN_COST * 13);
7935 
7936   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7937             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7938             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7939             "mov    $dst, $tmp\t# vector (1D)" %}
7940   ins_encode %{
7941     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7942     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7943     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7944     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7945   %}
7946 
7947   ins_pipe(pipe_class_default);
7948 %}
7949 
7950 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7951   predicate(UsePopCountInstruction);
7952   match(Set dst (PopCountL (LoadL mem)));
7953   effect(TEMP tmp);
7954   ins_cost(INSN_COST * 13);
7955 
7956   format %{ "ldrd   $tmp, $mem\n\t"
7957             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7958             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7959             "mov    $dst, $tmp\t# vector (1D)" %}
7960   ins_encode %{
7961     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7962     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7963                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7964     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7965     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7966     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7967   %}
7968 
7969   ins_pipe(pipe_class_default);
7970 %}
7971 
7972 // ============================================================================
7973 // MemBar Instruction
7974 
7975 instruct load_fence() %{
7976   match(LoadFence);
7977   ins_cost(VOLATILE_REF_COST);
7978 
7979   format %{ "load_fence" %}
7980 
7981   ins_encode %{
7982     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7983   %}
7984   ins_pipe(pipe_serial);
7985 %}
7986 
7987 instruct unnecessary_membar_acquire() %{
7988   predicate(unnecessary_acquire(n));
7989   match(MemBarAcquire);
7990   ins_cost(0);
7991 
7992   format %{ "membar_acquire (elided)" %}
7993 
7994   ins_encode %{
7995     __ block_comment("membar_acquire (elided)");
7996   %}
7997 
7998   ins_pipe(pipe_class_empty);
7999 %}
8000 
8001 instruct membar_acquire() %{
8002   match(MemBarAcquire);
8003   ins_cost(VOLATILE_REF_COST);
8004 
8005   format %{ "membar_acquire" %}
8006 
8007   ins_encode %{
8008     __ block_comment("membar_acquire");
8009     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8010   %}
8011 
8012   ins_pipe(pipe_serial);
8013 %}
8014 
8015 
8016 instruct membar_acquire_lock() %{
8017   match(MemBarAcquireLock);
8018   ins_cost(VOLATILE_REF_COST);
8019 
8020   format %{ "membar_acquire_lock" %}
8021 
8022   ins_encode %{
8023     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8024   %}
8025 
8026   ins_pipe(pipe_serial);
8027 %}
8028 
8029 instruct store_fence() %{
8030   match(StoreFence);
8031   ins_cost(VOLATILE_REF_COST);
8032 
8033   format %{ "store_fence" %}
8034 
8035   ins_encode %{
8036     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8037   %}
8038   ins_pipe(pipe_serial);
8039 %}
8040 
8041 instruct unnecessary_membar_release() %{
8042   predicate(unnecessary_release(n));
8043   match(MemBarRelease);
8044   ins_cost(0);
8045 
8046   format %{ "membar_release (elided)" %}
8047 
8048   ins_encode %{
8049     __ block_comment("membar_release (elided)");
8050   %}
8051   ins_pipe(pipe_serial);
8052 %}
8053 
8054 instruct membar_release() %{
8055   match(MemBarRelease);
8056   ins_cost(VOLATILE_REF_COST);
8057 
8058   format %{ "membar_release" %}
8059 
8060   ins_encode %{
8061     __ block_comment("membar_release");
8062     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8063   %}
8064   ins_pipe(pipe_serial);
8065 %}
8066 
8067 instruct membar_storestore() %{
8068   match(MemBarStoreStore);
8069   ins_cost(VOLATILE_REF_COST);
8070 
8071   format %{ "MEMBAR-store-store" %}
8072 
8073   ins_encode %{
8074     __ membar(Assembler::StoreStore);
8075   %}
8076   ins_pipe(pipe_serial);
8077 %}
8078 
8079 instruct membar_release_lock() %{
8080   match(MemBarReleaseLock);
8081   ins_cost(VOLATILE_REF_COST);
8082 
8083   format %{ "membar_release_lock" %}
8084 
8085   ins_encode %{
8086     __ membar(Assembler::LoadStore|Assembler::StoreStore);
8087   %}
8088 
8089   ins_pipe(pipe_serial);
8090 %}
8091 
8092 instruct unnecessary_membar_volatile() %{
8093   predicate(unnecessary_volatile(n));
8094   match(MemBarVolatile);
8095   ins_cost(0);
8096 
8097   format %{ "membar_volatile (elided)" %}
8098 
8099   ins_encode %{
8100     __ block_comment("membar_volatile (elided)");
8101   %}
8102 
8103   ins_pipe(pipe_serial);
8104 %}
8105 
8106 instruct membar_volatile() %{
8107   match(MemBarVolatile);
8108   ins_cost(VOLATILE_REF_COST*100);
8109 
8110   format %{ "membar_volatile" %}
8111 
8112   ins_encode %{
8113     __ block_comment("membar_volatile");
8114     __ membar(Assembler::StoreLoad);
8115   %}
8116 
8117   ins_pipe(pipe_serial);
8118 %}
8119 
8120 // ============================================================================
8121 // Cast/Convert Instructions
8122 
8123 instruct castX2P(iRegPNoSp dst, iRegL src) %{
8124   match(Set dst (CastX2P src));
8125 
8126   ins_cost(INSN_COST);
8127   format %{ "mov $dst, $src\t# long -> ptr" %}
8128 
8129   ins_encode %{
8130     if ($dst$$reg != $src$$reg) {
8131       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8132     }
8133   %}
8134 
8135   ins_pipe(ialu_reg);
8136 %}
8137 
8138 instruct castP2X(iRegLNoSp dst, iRegP src) %{
8139   match(Set dst (CastP2X src));
8140 
8141   ins_cost(INSN_COST);
8142   format %{ "mov $dst, $src\t# ptr -> long" %}
8143 
8144   ins_encode %{
8145     if ($dst$$reg != $src$$reg) {
8146       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
8147     }
8148   %}
8149 
8150   ins_pipe(ialu_reg);
8151 %}
8152 
8153 // Convert oop into int for vectors alignment masking
8154 instruct convP2I(iRegINoSp dst, iRegP src) %{
8155   match(Set dst (ConvL2I (CastP2X src)));
8156 
8157   ins_cost(INSN_COST);
8158   format %{ "movw $dst, $src\t# ptr -> int" %}
8159   ins_encode %{
8160     __ movw($dst$$Register, $src$$Register);
8161   %}
8162 
8163   ins_pipe(ialu_reg);
8164 %}
8165 
8166 // Convert compressed oop into int for vectors alignment masking
8167 // in case of 32bit oops (heap < 4Gb).
8168 instruct convN2I(iRegINoSp dst, iRegN src)
8169 %{
8170   predicate(Universe::narrow_oop_shift() == 0);
8171   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8172 
8173   ins_cost(INSN_COST);
8174   format %{ "mov dst, $src\t# compressed ptr -> int" %}
8175   ins_encode %{
8176     __ movw($dst$$Register, $src$$Register);
8177   %}
8178 
8179   ins_pipe(ialu_reg);
8180 %}
8181 
8182 
8183 // Convert oop pointer into compressed form
8184 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8185   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8186   match(Set dst (EncodeP src));
8187   effect(KILL cr);
8188   ins_cost(INSN_COST * 3);
8189   format %{ "encode_heap_oop $dst, $src" %}
8190   ins_encode %{
8191     Register s = $src$$Register;
8192     Register d = $dst$$Register;
8193     __ encode_heap_oop(d, s);
8194   %}
8195   ins_pipe(ialu_reg);
8196 %}
8197 
8198 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
8199   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8200   match(Set dst (EncodeP src));
8201   ins_cost(INSN_COST * 3);
8202   format %{ "encode_heap_oop_not_null $dst, $src" %}
8203   ins_encode %{
8204     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8205   %}
8206   ins_pipe(ialu_reg);
8207 %}
8208 
8209 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8210   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8211             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8212   match(Set dst (DecodeN src));
8213   ins_cost(INSN_COST * 3);
8214   format %{ "decode_heap_oop $dst, $src" %}
8215   ins_encode %{
8216     Register s = $src$$Register;
8217     Register d = $dst$$Register;
8218     __ decode_heap_oop(d, s);
8219   %}
8220   ins_pipe(ialu_reg);
8221 %}
8222 
8223 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8224   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8225             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8226   match(Set dst (DecodeN src));
8227   ins_cost(INSN_COST * 3);
8228   format %{ "decode_heap_oop_not_null $dst, $src" %}
8229   ins_encode %{
8230     Register s = $src$$Register;
8231     Register d = $dst$$Register;
8232     __ decode_heap_oop_not_null(d, s);
8233   %}
8234   ins_pipe(ialu_reg);
8235 %}
8236 
8237 // n.b. AArch64 implementations of encode_klass_not_null and
8238 // decode_klass_not_null do not modify the flags register so, unlike
8239 // Intel, we don't kill CR as a side effect here
8240 
8241 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
8242   match(Set dst (EncodePKlass src));
8243 
8244   ins_cost(INSN_COST * 3);
8245   format %{ "encode_klass_not_null $dst,$src" %}
8246 
8247   ins_encode %{
8248     Register src_reg = as_Register($src$$reg);
8249     Register dst_reg = as_Register($dst$$reg);
8250     __ encode_klass_not_null(dst_reg, src_reg);
8251   %}
8252 
8253    ins_pipe(ialu_reg);
8254 %}
8255 
8256 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8257   match(Set dst (DecodeNKlass src));
8258 
8259   ins_cost(INSN_COST * 3);
8260   format %{ "decode_klass_not_null $dst,$src" %}
8261 
8262   ins_encode %{
8263     Register src_reg = as_Register($src$$reg);
8264     Register dst_reg = as_Register($dst$$reg);
8265     if (dst_reg != src_reg) {
8266       __ decode_klass_not_null(dst_reg, src_reg);
8267     } else {
8268       __ decode_klass_not_null(dst_reg);
8269     }
8270   %}
8271 
8272    ins_pipe(ialu_reg);
8273 %}
8274 
8275 instruct checkCastPP(iRegPNoSp dst)
8276 %{
8277   match(Set dst (CheckCastPP dst));
8278 
8279   size(0);
8280   format %{ "# checkcastPP of $dst" %}
8281   ins_encode(/* empty encoding */);
8282   ins_pipe(pipe_class_empty);
8283 %}
8284 
8285 instruct castPP(iRegPNoSp dst)
8286 %{
8287   match(Set dst (CastPP dst));
8288 
8289   size(0);
8290   format %{ "# castPP of $dst" %}
8291   ins_encode(/* empty encoding */);
8292   ins_pipe(pipe_class_empty);
8293 %}
8294 
8295 instruct castII(iRegI dst)
8296 %{
8297   match(Set dst (CastII dst));
8298 
8299   size(0);
8300   format %{ "# castII of $dst" %}
8301   ins_encode(/* empty encoding */);
8302   ins_cost(0);
8303   ins_pipe(pipe_class_empty);
8304 %}
8305 
8306 // ============================================================================
8307 // Atomic operation instructions
8308 //
8309 // Intel and SPARC both implement Ideal Node LoadPLocked and
8310 // Store{PIL}Conditional instructions using a normal load for the
8311 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8312 //
8313 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8314 // pair to lock object allocations from Eden space when not using
8315 // TLABs.
8316 //
8317 // There does not appear to be a Load{IL}Locked Ideal Node and the
8318 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8319 // and to use StoreIConditional only for 32-bit and StoreLConditional
8320 // only for 64-bit.
8321 //
8322 // We implement LoadPLocked and StorePLocked instructions using,
8323 // respectively the AArch64 hw load-exclusive and store-conditional
8324 // instructions. Whereas we must implement each of
8325 // Store{IL}Conditional using a CAS which employs a pair of
8326 // instructions comprising a load-exclusive followed by a
8327 // store-conditional.
8328 
8329 
8330 // Locked-load (linked load) of the current heap-top
8331 // used when updating the eden heap top
8332 // implemented using ldaxr on AArch64
8333 
8334 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8335 %{
8336   match(Set dst (LoadPLocked mem));
8337 
8338   ins_cost(VOLATILE_REF_COST);
8339 
8340   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8341 
8342   ins_encode(aarch64_enc_ldaxr(dst, mem));
8343 
8344   ins_pipe(pipe_serial);
8345 %}
8346 
8347 // Conditional-store of the updated heap-top.
8348 // Used during allocation of the shared heap.
8349 // Sets flag (EQ) on success.
8350 // implemented using stlxr on AArch64.
8351 
8352 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8353 %{
8354   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8355 
8356   ins_cost(VOLATILE_REF_COST);
8357 
8358  // TODO
8359  // do we need to do a store-conditional release or can we just use a
8360  // plain store-conditional?
8361 
8362   format %{
8363     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8364     "cmpw rscratch1, zr\t# EQ on successful write"
8365   %}
8366 
8367   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8368 
8369   ins_pipe(pipe_serial);
8370 %}
8371 
8372 // this has to be implemented as a CAS
8373 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8374 %{
8375   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8376 
8377   ins_cost(VOLATILE_REF_COST);
8378 
8379   format %{
8380     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8381     "cmpw rscratch1, zr\t# EQ on successful write"
8382   %}
8383 
8384   ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
8385 
8386   ins_pipe(pipe_slow);
8387 %}
8388 
8389 // this has to be implemented as a CAS
8390 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8391 %{
8392   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8393 
8394   ins_cost(VOLATILE_REF_COST);
8395 
8396   format %{
8397     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8398     "cmpw rscratch1, zr\t# EQ on successful write"
8399   %}
8400 
8401   ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
8402 
8403   ins_pipe(pipe_slow);
8404 %}
8405 
8406 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8407 // can't match them
8408 
8409 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8410 
8411   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8412 
8413   effect(KILL cr);
8414 
8415  format %{
8416     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8417     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8418  %}
8419 
8420  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8421             aarch64_enc_cset_eq(res));
8422 
8423   ins_pipe(pipe_slow);
8424 %}
8425 
8426 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8427 
8428   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8429 
8430   effect(KILL cr);
8431 
8432  format %{
8433     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8434     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8435  %}
8436 
8437  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8438             aarch64_enc_cset_eq(res));
8439 
8440   ins_pipe(pipe_slow);
8441 %}
8442 
8443 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8444 
8445   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8446 
8447   effect(KILL cr);
8448 
8449  format %{
8450     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8451     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8452  %}
8453 
8454  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8455             aarch64_enc_cset_eq(res));
8456 
8457   ins_pipe(pipe_slow);
8458 %}
8459 
8460 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8461 
8462   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8463 
8464   effect(KILL cr);
8465 
8466  format %{
8467     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8468     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8469  %}
8470 
8471  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8472             aarch64_enc_cset_eq(res));
8473 
8474   ins_pipe(pipe_slow);
8475 %}
8476 
8477 
8478 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
8479   match(Set prev (GetAndSetI mem newv));
8480   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8481   ins_encode %{
8482     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8483   %}
8484   ins_pipe(pipe_serial);
8485 %}
8486 
8487 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
8488   match(Set prev (GetAndSetL mem newv));
8489   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8490   ins_encode %{
8491     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8492   %}
8493   ins_pipe(pipe_serial);
8494 %}
8495 
8496 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
8497   match(Set prev (GetAndSetN mem newv));
8498   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8499   ins_encode %{
8500     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8501   %}
8502   ins_pipe(pipe_serial);
8503 %}
8504 
8505 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
8506   match(Set prev (GetAndSetP mem newv));
8507   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8508   ins_encode %{
8509     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8510   %}
8511   ins_pipe(pipe_serial);
8512 %}
8513 
8514 
8515 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8516   match(Set newval (GetAndAddL mem incr));
8517   ins_cost(INSN_COST * 10);
8518   format %{ "get_and_addL $newval, [$mem], $incr" %}
8519   ins_encode %{
8520     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8521   %}
8522   ins_pipe(pipe_serial);
8523 %}
8524 
8525 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8526   predicate(n->as_LoadStore()->result_not_used());
8527   match(Set dummy (GetAndAddL mem incr));
8528   ins_cost(INSN_COST * 9);
8529   format %{ "get_and_addL [$mem], $incr" %}
8530   ins_encode %{
8531     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8532   %}
8533   ins_pipe(pipe_serial);
8534 %}
8535 
8536 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8537   match(Set newval (GetAndAddL mem incr));
8538   ins_cost(INSN_COST * 10);
8539   format %{ "get_and_addL $newval, [$mem], $incr" %}
8540   ins_encode %{
8541     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8542   %}
8543   ins_pipe(pipe_serial);
8544 %}
8545 
8546 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8547   predicate(n->as_LoadStore()->result_not_used());
8548   match(Set dummy (GetAndAddL mem incr));
8549   ins_cost(INSN_COST * 9);
8550   format %{ "get_and_addL [$mem], $incr" %}
8551   ins_encode %{
8552     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8553   %}
8554   ins_pipe(pipe_serial);
8555 %}
8556 
8557 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8558   match(Set newval (GetAndAddI mem incr));
8559   ins_cost(INSN_COST * 10);
8560   format %{ "get_and_addI $newval, [$mem], $incr" %}
8561   ins_encode %{
8562     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8563   %}
8564   ins_pipe(pipe_serial);
8565 %}
8566 
8567 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8568   predicate(n->as_LoadStore()->result_not_used());
8569   match(Set dummy (GetAndAddI mem incr));
8570   ins_cost(INSN_COST * 9);
8571   format %{ "get_and_addI [$mem], $incr" %}
8572   ins_encode %{
8573     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8574   %}
8575   ins_pipe(pipe_serial);
8576 %}
8577 
8578 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8579   match(Set newval (GetAndAddI mem incr));
8580   ins_cost(INSN_COST * 10);
8581   format %{ "get_and_addI $newval, [$mem], $incr" %}
8582   ins_encode %{
8583     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8584   %}
8585   ins_pipe(pipe_serial);
8586 %}
8587 
8588 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8589   predicate(n->as_LoadStore()->result_not_used());
8590   match(Set dummy (GetAndAddI mem incr));
8591   ins_cost(INSN_COST * 9);
8592   format %{ "get_and_addI [$mem], $incr" %}
8593   ins_encode %{
8594     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8595   %}
8596   ins_pipe(pipe_serial);
8597 %}
8598 
8599 // Manifest a CmpL result in an integer register.
8600 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8601 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8602 %{
8603   match(Set dst (CmpL3 src1 src2));
8604   effect(KILL flags);
8605 
8606   ins_cost(INSN_COST * 6);
8607   format %{
8608       "cmp $src1, $src2"
8609       "csetw $dst, ne"
8610       "cnegw $dst, lt"
8611   %}
8612   // format %{ "CmpL3 $dst, $src1, $src2" %}
8613   ins_encode %{
8614     __ cmp($src1$$Register, $src2$$Register);
8615     __ csetw($dst$$Register, Assembler::NE);
8616     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8617   %}
8618 
8619   ins_pipe(pipe_class_default);
8620 %}
8621 
8622 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8623 %{
8624   match(Set dst (CmpL3 src1 src2));
8625   effect(KILL flags);
8626 
8627   ins_cost(INSN_COST * 6);
8628   format %{
8629       "cmp $src1, $src2"
8630       "csetw $dst, ne"
8631       "cnegw $dst, lt"
8632   %}
8633   ins_encode %{
8634     int32_t con = (int32_t)$src2$$constant;
8635      if (con < 0) {
8636       __ adds(zr, $src1$$Register, -con);
8637     } else {
8638       __ subs(zr, $src1$$Register, con);
8639     }
8640     __ csetw($dst$$Register, Assembler::NE);
8641     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8642   %}
8643 
8644   ins_pipe(pipe_class_default);
8645 %}
8646 
8647 // ============================================================================
8648 // Conditional Move Instructions
8649 
8650 // n.b. we have identical rules for both a signed compare op (cmpOp)
8651 // and an unsigned compare op (cmpOpU). it would be nice if we could
8652 // define an op class which merged both inputs and use it to type the
8653 // argument to a single rule. unfortunatelyt his fails because the
8654 // opclass does not live up to the COND_INTER interface of its
8655 // component operands. When the generic code tries to negate the
8656 // operand it ends up running the generci Machoper::negate method
8657 // which throws a ShouldNotHappen. So, we have to provide two flavours
8658 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8659 
8660 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8661   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8662 
8663   ins_cost(INSN_COST * 2);
8664   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
8665 
8666   ins_encode %{
8667     __ cselw(as_Register($dst$$reg),
8668              as_Register($src2$$reg),
8669              as_Register($src1$$reg),
8670              (Assembler::Condition)$cmp$$cmpcode);
8671   %}
8672 
8673   ins_pipe(icond_reg_reg);
8674 %}
8675 
8676 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8677   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8678 
8679   ins_cost(INSN_COST * 2);
8680   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
8681 
8682   ins_encode %{
8683     __ cselw(as_Register($dst$$reg),
8684              as_Register($src2$$reg),
8685              as_Register($src1$$reg),
8686              (Assembler::Condition)$cmp$$cmpcode);
8687   %}
8688 
8689   ins_pipe(icond_reg_reg);
8690 %}
8691 
8692 // special cases where one arg is zero
8693 
8694 // n.b. this is selected in preference to the rule above because it
8695 // avoids loading constant 0 into a source register
8696 
8697 // TODO
8698 // we ought only to be able to cull one of these variants as the ideal
8699 // transforms ought always to order the zero consistently (to left/right?)
8700 
8701 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8702   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8703 
8704   ins_cost(INSN_COST * 2);
8705   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
8706 
8707   ins_encode %{
8708     __ cselw(as_Register($dst$$reg),
8709              as_Register($src$$reg),
8710              zr,
8711              (Assembler::Condition)$cmp$$cmpcode);
8712   %}
8713 
8714   ins_pipe(icond_reg);
8715 %}
8716 
8717 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
8718   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
8719 
8720   ins_cost(INSN_COST * 2);
8721   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
8722 
8723   ins_encode %{
8724     __ cselw(as_Register($dst$$reg),
8725              as_Register($src$$reg),
8726              zr,
8727              (Assembler::Condition)$cmp$$cmpcode);
8728   %}
8729 
8730   ins_pipe(icond_reg);
8731 %}
8732 
8733 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8734   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8735 
8736   ins_cost(INSN_COST * 2);
8737   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
8738 
8739   ins_encode %{
8740     __ cselw(as_Register($dst$$reg),
8741              zr,
8742              as_Register($src$$reg),
8743              (Assembler::Condition)$cmp$$cmpcode);
8744   %}
8745 
8746   ins_pipe(icond_reg);
8747 %}
8748 
8749 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
8750   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
8751 
8752   ins_cost(INSN_COST * 2);
8753   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
8754 
8755   ins_encode %{
8756     __ cselw(as_Register($dst$$reg),
8757              zr,
8758              as_Register($src$$reg),
8759              (Assembler::Condition)$cmp$$cmpcode);
8760   %}
8761 
8762   ins_pipe(icond_reg);
8763 %}
8764 
8765 // special case for creating a boolean 0 or 1
8766 
8767 // n.b. this is selected in preference to the rule above because it
8768 // avoids loading constants 0 and 1 into a source register
8769 
8770 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8771   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8772 
8773   ins_cost(INSN_COST * 2);
8774   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
8775 
8776   ins_encode %{
8777     // equivalently
8778     // cset(as_Register($dst$$reg),
8779     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8780     __ csincw(as_Register($dst$$reg),
8781              zr,
8782              zr,
8783              (Assembler::Condition)$cmp$$cmpcode);
8784   %}
8785 
8786   ins_pipe(icond_none);
8787 %}
8788 
8789 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
8790   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
8791 
8792   ins_cost(INSN_COST * 2);
8793   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
8794 
8795   ins_encode %{
8796     // equivalently
8797     // cset(as_Register($dst$$reg),
8798     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
8799     __ csincw(as_Register($dst$$reg),
8800              zr,
8801              zr,
8802              (Assembler::Condition)$cmp$$cmpcode);
8803   %}
8804 
8805   ins_pipe(icond_none);
8806 %}
8807 
8808 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8809   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8810 
8811   ins_cost(INSN_COST * 2);
8812   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
8813 
8814   ins_encode %{
8815     __ csel(as_Register($dst$$reg),
8816             as_Register($src2$$reg),
8817             as_Register($src1$$reg),
8818             (Assembler::Condition)$cmp$$cmpcode);
8819   %}
8820 
8821   ins_pipe(icond_reg_reg);
8822 %}
8823 
8824 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
8825   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
8826 
8827   ins_cost(INSN_COST * 2);
8828   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
8829 
8830   ins_encode %{
8831     __ csel(as_Register($dst$$reg),
8832             as_Register($src2$$reg),
8833             as_Register($src1$$reg),
8834             (Assembler::Condition)$cmp$$cmpcode);
8835   %}
8836 
8837   ins_pipe(icond_reg_reg);
8838 %}
8839 
8840 // special cases where one arg is zero
8841 
8842 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8843   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8844 
8845   ins_cost(INSN_COST * 2);
8846   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
8847 
8848   ins_encode %{
8849     __ csel(as_Register($dst$$reg),
8850             zr,
8851             as_Register($src$$reg),
8852             (Assembler::Condition)$cmp$$cmpcode);
8853   %}
8854 
8855   ins_pipe(icond_reg);
8856 %}
8857 
8858 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
8859   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
8860 
8861   ins_cost(INSN_COST * 2);
8862   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
8863 
8864   ins_encode %{
8865     __ csel(as_Register($dst$$reg),
8866             zr,
8867             as_Register($src$$reg),
8868             (Assembler::Condition)$cmp$$cmpcode);
8869   %}
8870 
8871   ins_pipe(icond_reg);
8872 %}
8873 
8874 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8875   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8876 
8877   ins_cost(INSN_COST * 2);
8878   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
8879 
8880   ins_encode %{
8881     __ csel(as_Register($dst$$reg),
8882             as_Register($src$$reg),
8883             zr,
8884             (Assembler::Condition)$cmp$$cmpcode);
8885   %}
8886 
8887   ins_pipe(icond_reg);
8888 %}
8889 
8890 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
8891   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
8892 
8893   ins_cost(INSN_COST * 2);
8894   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
8895 
8896   ins_encode %{
8897     __ csel(as_Register($dst$$reg),
8898             as_Register($src$$reg),
8899             zr,
8900             (Assembler::Condition)$cmp$$cmpcode);
8901   %}
8902 
8903   ins_pipe(icond_reg);
8904 %}
8905 
8906 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8907   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8908 
8909   ins_cost(INSN_COST * 2);
8910   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
8911 
8912   ins_encode %{
8913     __ csel(as_Register($dst$$reg),
8914             as_Register($src2$$reg),
8915             as_Register($src1$$reg),
8916             (Assembler::Condition)$cmp$$cmpcode);
8917   %}
8918 
8919   ins_pipe(icond_reg_reg);
8920 %}
8921 
8922 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
8923   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
8924 
8925   ins_cost(INSN_COST * 2);
8926   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
8927 
8928   ins_encode %{
8929     __ csel(as_Register($dst$$reg),
8930             as_Register($src2$$reg),
8931             as_Register($src1$$reg),
8932             (Assembler::Condition)$cmp$$cmpcode);
8933   %}
8934 
8935   ins_pipe(icond_reg_reg);
8936 %}
8937 
8938 // special cases where one arg is zero
8939 
8940 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8941   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8942 
8943   ins_cost(INSN_COST * 2);
8944   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
8945 
8946   ins_encode %{
8947     __ csel(as_Register($dst$$reg),
8948             zr,
8949             as_Register($src$$reg),
8950             (Assembler::Condition)$cmp$$cmpcode);
8951   %}
8952 
8953   ins_pipe(icond_reg);
8954 %}
8955 
8956 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
8957   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
8958 
8959   ins_cost(INSN_COST * 2);
8960   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
8961 
8962   ins_encode %{
8963     __ csel(as_Register($dst$$reg),
8964             zr,
8965             as_Register($src$$reg),
8966             (Assembler::Condition)$cmp$$cmpcode);
8967   %}
8968 
8969   ins_pipe(icond_reg);
8970 %}
8971 
8972 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8973   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8974 
8975   ins_cost(INSN_COST * 2);
8976   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
8977 
8978   ins_encode %{
8979     __ csel(as_Register($dst$$reg),
8980             as_Register($src$$reg),
8981             zr,
8982             (Assembler::Condition)$cmp$$cmpcode);
8983   %}
8984 
8985   ins_pipe(icond_reg);
8986 %}
8987 
8988 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
8989   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
8990 
8991   ins_cost(INSN_COST * 2);
8992   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
8993 
8994   ins_encode %{
8995     __ csel(as_Register($dst$$reg),
8996             as_Register($src$$reg),
8997             zr,
8998             (Assembler::Condition)$cmp$$cmpcode);
8999   %}
9000 
9001   ins_pipe(icond_reg);
9002 %}
9003 
9004 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9005   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9006 
9007   ins_cost(INSN_COST * 2);
9008   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9009 
9010   ins_encode %{
9011     __ cselw(as_Register($dst$$reg),
9012              as_Register($src2$$reg),
9013              as_Register($src1$$reg),
9014              (Assembler::Condition)$cmp$$cmpcode);
9015   %}
9016 
9017   ins_pipe(icond_reg_reg);
9018 %}
9019 
9020 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9021   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9022 
9023   ins_cost(INSN_COST * 2);
9024   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9025 
9026   ins_encode %{
9027     __ cselw(as_Register($dst$$reg),
9028              as_Register($src2$$reg),
9029              as_Register($src1$$reg),
9030              (Assembler::Condition)$cmp$$cmpcode);
9031   %}
9032 
9033   ins_pipe(icond_reg_reg);
9034 %}
9035 
9036 // special cases where one arg is zero
9037 
9038 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9039   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9040 
9041   ins_cost(INSN_COST * 2);
9042   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9043 
9044   ins_encode %{
9045     __ cselw(as_Register($dst$$reg),
9046              zr,
9047              as_Register($src$$reg),
9048              (Assembler::Condition)$cmp$$cmpcode);
9049   %}
9050 
9051   ins_pipe(icond_reg);
9052 %}
9053 
9054 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9055   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9056 
9057   ins_cost(INSN_COST * 2);
9058   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9059 
9060   ins_encode %{
9061     __ cselw(as_Register($dst$$reg),
9062              zr,
9063              as_Register($src$$reg),
9064              (Assembler::Condition)$cmp$$cmpcode);
9065   %}
9066 
9067   ins_pipe(icond_reg);
9068 %}
9069 
9070 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9071   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9072 
9073   ins_cost(INSN_COST * 2);
9074   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9075 
9076   ins_encode %{
9077     __ cselw(as_Register($dst$$reg),
9078              as_Register($src$$reg),
9079              zr,
9080              (Assembler::Condition)$cmp$$cmpcode);
9081   %}
9082 
9083   ins_pipe(icond_reg);
9084 %}
9085 
9086 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9087   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9088 
9089   ins_cost(INSN_COST * 2);
9090   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9091 
9092   ins_encode %{
9093     __ cselw(as_Register($dst$$reg),
9094              as_Register($src$$reg),
9095              zr,
9096              (Assembler::Condition)$cmp$$cmpcode);
9097   %}
9098 
9099   ins_pipe(icond_reg);
9100 %}
9101 
9102 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9103 %{
9104   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9105 
9106   ins_cost(INSN_COST * 3);
9107 
9108   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9109   ins_encode %{
9110     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9111     __ fcsels(as_FloatRegister($dst$$reg),
9112               as_FloatRegister($src2$$reg),
9113               as_FloatRegister($src1$$reg),
9114               cond);
9115   %}
9116 
9117   ins_pipe(pipe_class_default);
9118 %}
9119 
9120 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9121 %{
9122   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9123 
9124   ins_cost(INSN_COST * 3);
9125 
9126   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9127   ins_encode %{
9128     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9129     __ fcsels(as_FloatRegister($dst$$reg),
9130               as_FloatRegister($src2$$reg),
9131               as_FloatRegister($src1$$reg),
9132               cond);
9133   %}
9134 
9135   ins_pipe(pipe_class_default);
9136 %}
9137 
9138 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9139 %{
9140   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9141 
9142   ins_cost(INSN_COST * 3);
9143 
9144   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9145   ins_encode %{
9146     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9147     __ fcseld(as_FloatRegister($dst$$reg),
9148               as_FloatRegister($src2$$reg),
9149               as_FloatRegister($src1$$reg),
9150               cond);
9151   %}
9152 
9153   ins_pipe(pipe_class_default);
9154 %}
9155 
9156 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9157 %{
9158   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9159 
9160   ins_cost(INSN_COST * 3);
9161 
9162   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9163   ins_encode %{
9164     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9165     __ fcseld(as_FloatRegister($dst$$reg),
9166               as_FloatRegister($src2$$reg),
9167               as_FloatRegister($src1$$reg),
9168               cond);
9169   %}
9170 
9171   ins_pipe(pipe_class_default);
9172 %}
9173 
9174 // ============================================================================
9175 // Arithmetic Instructions
9176 //
9177 
9178 // Integer Addition
9179 
9180 // TODO
9181 // these currently employ operations which do not set CR and hence are
9182 // not flagged as killing CR but we would like to isolate the cases
9183 // where we want to set flags from those where we don't. need to work
9184 // out how to do that.
9185 
9186 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9187   match(Set dst (AddI src1 src2));
9188 
9189   ins_cost(INSN_COST);
9190   format %{ "addw  $dst, $src1, $src2" %}
9191 
9192   ins_encode %{
9193     __ addw(as_Register($dst$$reg),
9194             as_Register($src1$$reg),
9195             as_Register($src2$$reg));
9196   %}
9197 
9198   ins_pipe(ialu_reg_reg);
9199 %}
9200 
9201 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9202   match(Set dst (AddI src1 src2));
9203 
9204   ins_cost(INSN_COST);
9205   format %{ "addw $dst, $src1, $src2" %}
9206 
9207   // use opcode to indicate that this is an add not a sub
9208   opcode(0x0);
9209 
9210   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9211 
9212   ins_pipe(ialu_reg_imm);
9213 %}
9214 
9215 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9216   match(Set dst (AddI (ConvL2I src1) src2));
9217 
9218   ins_cost(INSN_COST);
9219   format %{ "addw $dst, $src1, $src2" %}
9220 
9221   // use opcode to indicate that this is an add not a sub
9222   opcode(0x0);
9223 
9224   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9225 
9226   ins_pipe(ialu_reg_imm);
9227 %}
9228 
9229 // Pointer Addition
9230 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9231   match(Set dst (AddP src1 src2));
9232 
9233   ins_cost(INSN_COST);
9234   format %{ "add $dst, $src1, $src2\t# ptr" %}
9235 
9236   ins_encode %{
9237     __ add(as_Register($dst$$reg),
9238            as_Register($src1$$reg),
9239            as_Register($src2$$reg));
9240   %}
9241 
9242   ins_pipe(ialu_reg_reg);
9243 %}
9244 
9245 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9246   match(Set dst (AddP src1 (ConvI2L src2)));
9247 
9248   ins_cost(1.9 * INSN_COST);
9249   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9250 
9251   ins_encode %{
9252     __ add(as_Register($dst$$reg),
9253            as_Register($src1$$reg),
9254            as_Register($src2$$reg), ext::sxtw);
9255   %}
9256 
9257   ins_pipe(ialu_reg_reg);
9258 %}
9259 
9260 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9261   match(Set dst (AddP src1 (LShiftL src2 scale)));
9262 
9263   ins_cost(1.9 * INSN_COST);
9264   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9265 
9266   ins_encode %{
9267     __ lea(as_Register($dst$$reg),
9268            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9269                    Address::lsl($scale$$constant)));
9270   %}
9271 
9272   ins_pipe(ialu_reg_reg_shift);
9273 %}
9274 
9275 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9276   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9277 
9278   ins_cost(1.9 * INSN_COST);
9279   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9280 
9281   ins_encode %{
9282     __ lea(as_Register($dst$$reg),
9283            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9284                    Address::sxtw($scale$$constant)));
9285   %}
9286 
9287   ins_pipe(ialu_reg_reg_shift);
9288 %}
9289 
9290 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9291   match(Set dst (LShiftL (ConvI2L src) scale));
9292 
9293   ins_cost(INSN_COST);
9294   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9295 
9296   ins_encode %{
9297     __ sbfiz(as_Register($dst$$reg),
9298           as_Register($src$$reg),
9299           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9300   %}
9301 
9302   ins_pipe(ialu_reg_shift);
9303 %}
9304 
9305 // Pointer Immediate Addition
9306 // n.b. this needs to be more expensive than using an indirect memory
9307 // operand
9308 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9309   match(Set dst (AddP src1 src2));
9310 
9311   ins_cost(INSN_COST);
9312   format %{ "add $dst, $src1, $src2\t# ptr" %}
9313 
9314   // use opcode to indicate that this is an add not a sub
9315   opcode(0x0);
9316 
9317   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9318 
9319   ins_pipe(ialu_reg_imm);
9320 %}
9321 
9322 // Long Addition
9323 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9324 
9325   match(Set dst (AddL src1 src2));
9326 
9327   ins_cost(INSN_COST);
9328   format %{ "add  $dst, $src1, $src2" %}
9329 
9330   ins_encode %{
9331     __ add(as_Register($dst$$reg),
9332            as_Register($src1$$reg),
9333            as_Register($src2$$reg));
9334   %}
9335 
9336   ins_pipe(ialu_reg_reg);
9337 %}
9338 
9339 // No constant pool entries requiredLong Immediate Addition.
9340 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9341   match(Set dst (AddL src1 src2));
9342 
9343   ins_cost(INSN_COST);
9344   format %{ "add $dst, $src1, $src2" %}
9345 
9346   // use opcode to indicate that this is an add not a sub
9347   opcode(0x0);
9348 
9349   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9350 
9351   ins_pipe(ialu_reg_imm);
9352 %}
9353 
9354 // Integer Subtraction
9355 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9356   match(Set dst (SubI src1 src2));
9357 
9358   ins_cost(INSN_COST);
9359   format %{ "subw  $dst, $src1, $src2" %}
9360 
9361   ins_encode %{
9362     __ subw(as_Register($dst$$reg),
9363             as_Register($src1$$reg),
9364             as_Register($src2$$reg));
9365   %}
9366 
9367   ins_pipe(ialu_reg_reg);
9368 %}
9369 
9370 // Immediate Subtraction
9371 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9372   match(Set dst (SubI src1 src2));
9373 
9374   ins_cost(INSN_COST);
9375   format %{ "subw $dst, $src1, $src2" %}
9376 
9377   // use opcode to indicate that this is a sub not an add
9378   opcode(0x1);
9379 
9380   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9381 
9382   ins_pipe(ialu_reg_imm);
9383 %}
9384 
9385 // Long Subtraction
9386 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9387 
9388   match(Set dst (SubL src1 src2));
9389 
9390   ins_cost(INSN_COST);
9391   format %{ "sub  $dst, $src1, $src2" %}
9392 
9393   ins_encode %{
9394     __ sub(as_Register($dst$$reg),
9395            as_Register($src1$$reg),
9396            as_Register($src2$$reg));
9397   %}
9398 
9399   ins_pipe(ialu_reg_reg);
9400 %}
9401 
9402 // No constant pool entries requiredLong Immediate Subtraction.
9403 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9404   match(Set dst (SubL src1 src2));
9405 
9406   ins_cost(INSN_COST);
9407   format %{ "sub$dst, $src1, $src2" %}
9408 
9409   // use opcode to indicate that this is a sub not an add
9410   opcode(0x1);
9411 
9412   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9413 
9414   ins_pipe(ialu_reg_imm);
9415 %}
9416 
9417 // Integer Negation (special case for sub)
9418 
9419 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9420   match(Set dst (SubI zero src));
9421 
9422   ins_cost(INSN_COST);
9423   format %{ "negw $dst, $src\t# int" %}
9424 
9425   ins_encode %{
9426     __ negw(as_Register($dst$$reg),
9427             as_Register($src$$reg));
9428   %}
9429 
9430   ins_pipe(ialu_reg);
9431 %}
9432 
9433 // Long Negation
9434 
9435 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
9436   match(Set dst (SubL zero src));
9437 
9438   ins_cost(INSN_COST);
9439   format %{ "neg $dst, $src\t# long" %}
9440 
9441   ins_encode %{
9442     __ neg(as_Register($dst$$reg),
9443            as_Register($src$$reg));
9444   %}
9445 
9446   ins_pipe(ialu_reg);
9447 %}
9448 
9449 // Integer Multiply
9450 
9451 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9452   match(Set dst (MulI src1 src2));
9453 
9454   ins_cost(INSN_COST * 3);
9455   format %{ "mulw  $dst, $src1, $src2" %}
9456 
9457   ins_encode %{
9458     __ mulw(as_Register($dst$$reg),
9459             as_Register($src1$$reg),
9460             as_Register($src2$$reg));
9461   %}
9462 
9463   ins_pipe(imul_reg_reg);
9464 %}
9465 
9466 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9467   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9468 
9469   ins_cost(INSN_COST * 3);
9470   format %{ "smull  $dst, $src1, $src2" %}
9471 
9472   ins_encode %{
9473     __ smull(as_Register($dst$$reg),
9474              as_Register($src1$$reg),
9475              as_Register($src2$$reg));
9476   %}
9477 
9478   ins_pipe(imul_reg_reg);
9479 %}
9480 
9481 // Long Multiply
9482 
9483 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9484   match(Set dst (MulL src1 src2));
9485 
9486   ins_cost(INSN_COST * 5);
9487   format %{ "mul  $dst, $src1, $src2" %}
9488 
9489   ins_encode %{
9490     __ mul(as_Register($dst$$reg),
9491            as_Register($src1$$reg),
9492            as_Register($src2$$reg));
9493   %}
9494 
9495   ins_pipe(lmul_reg_reg);
9496 %}
9497 
9498 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9499 %{
9500   match(Set dst (MulHiL src1 src2));
9501 
9502   ins_cost(INSN_COST * 7);
9503   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9504 
9505   ins_encode %{
9506     __ smulh(as_Register($dst$$reg),
9507              as_Register($src1$$reg),
9508              as_Register($src2$$reg));
9509   %}
9510 
9511   ins_pipe(lmul_reg_reg);
9512 %}
9513 
9514 // Combined Integer Multiply & Add/Sub
9515 
9516 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9517   match(Set dst (AddI src3 (MulI src1 src2)));
9518 
9519   ins_cost(INSN_COST * 3);
9520   format %{ "madd  $dst, $src1, $src2, $src3" %}
9521 
9522   ins_encode %{
9523     __ maddw(as_Register($dst$$reg),
9524              as_Register($src1$$reg),
9525              as_Register($src2$$reg),
9526              as_Register($src3$$reg));
9527   %}
9528 
9529   ins_pipe(imac_reg_reg);
9530 %}
9531 
9532 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9533   match(Set dst (SubI src3 (MulI src1 src2)));
9534 
9535   ins_cost(INSN_COST * 3);
9536   format %{ "msub  $dst, $src1, $src2, $src3" %}
9537 
9538   ins_encode %{
9539     __ msubw(as_Register($dst$$reg),
9540              as_Register($src1$$reg),
9541              as_Register($src2$$reg),
9542              as_Register($src3$$reg));
9543   %}
9544 
9545   ins_pipe(imac_reg_reg);
9546 %}
9547 
9548 // Combined Long Multiply & Add/Sub
9549 
9550 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9551   match(Set dst (AddL src3 (MulL src1 src2)));
9552 
9553   ins_cost(INSN_COST * 5);
9554   format %{ "madd  $dst, $src1, $src2, $src3" %}
9555 
9556   ins_encode %{
9557     __ madd(as_Register($dst$$reg),
9558             as_Register($src1$$reg),
9559             as_Register($src2$$reg),
9560             as_Register($src3$$reg));
9561   %}
9562 
9563   ins_pipe(lmac_reg_reg);
9564 %}
9565 
9566 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9567   match(Set dst (SubL src3 (MulL src1 src2)));
9568 
9569   ins_cost(INSN_COST * 5);
9570   format %{ "msub  $dst, $src1, $src2, $src3" %}
9571 
9572   ins_encode %{
9573     __ msub(as_Register($dst$$reg),
9574             as_Register($src1$$reg),
9575             as_Register($src2$$reg),
9576             as_Register($src3$$reg));
9577   %}
9578 
9579   ins_pipe(lmac_reg_reg);
9580 %}
9581 
9582 // Integer Divide
9583 
9584 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9585   match(Set dst (DivI src1 src2));
9586 
9587   ins_cost(INSN_COST * 19);
9588   format %{ "sdivw  $dst, $src1, $src2" %}
9589 
9590   ins_encode(aarch64_enc_divw(dst, src1, src2));
9591   ins_pipe(idiv_reg_reg);
9592 %}
9593 
9594 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9595   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9596   ins_cost(INSN_COST);
9597   format %{ "lsrw $dst, $src1, $div1" %}
9598   ins_encode %{
9599     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9600   %}
9601   ins_pipe(ialu_reg_shift);
9602 %}
9603 
9604 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9605   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9606   ins_cost(INSN_COST);
9607   format %{ "addw $dst, $src, LSR $div1" %}
9608 
9609   ins_encode %{
9610     __ addw(as_Register($dst$$reg),
9611               as_Register($src$$reg),
9612               as_Register($src$$reg),
9613               Assembler::LSR, 31);
9614   %}
9615   ins_pipe(ialu_reg);
9616 %}
9617 
9618 // Long Divide
9619 
9620 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9621   match(Set dst (DivL src1 src2));
9622 
9623   ins_cost(INSN_COST * 35);
9624   format %{ "sdiv   $dst, $src1, $src2" %}
9625 
9626   ins_encode(aarch64_enc_div(dst, src1, src2));
9627   ins_pipe(ldiv_reg_reg);
9628 %}
9629 
9630 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
9631   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9632   ins_cost(INSN_COST);
9633   format %{ "lsr $dst, $src1, $div1" %}
9634   ins_encode %{
9635     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9636   %}
9637   ins_pipe(ialu_reg_shift);
9638 %}
9639 
9640 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
9641   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9642   ins_cost(INSN_COST);
9643   format %{ "add $dst, $src, $div1" %}
9644 
9645   ins_encode %{
9646     __ add(as_Register($dst$$reg),
9647               as_Register($src$$reg),
9648               as_Register($src$$reg),
9649               Assembler::LSR, 63);
9650   %}
9651   ins_pipe(ialu_reg);
9652 %}
9653 
9654 // Integer Remainder
9655 
9656 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9657   match(Set dst (ModI src1 src2));
9658 
9659   ins_cost(INSN_COST * 22);
9660   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9661             "msubw($dst, rscratch1, $src2, $src1" %}
9662 
9663   ins_encode(aarch64_enc_modw(dst, src1, src2));
9664   ins_pipe(idiv_reg_reg);
9665 %}
9666 
9667 // Long Remainder
9668 
9669 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9670   match(Set dst (ModL src1 src2));
9671 
9672   ins_cost(INSN_COST * 38);
9673   format %{ "sdiv   rscratch1, $src1, $src2\n"
9674             "msub($dst, rscratch1, $src2, $src1" %}
9675 
9676   ins_encode(aarch64_enc_mod(dst, src1, src2));
9677   ins_pipe(ldiv_reg_reg);
9678 %}
9679 
9680 // Integer Shifts
9681 
9682 // Shift Left Register
9683 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9684   match(Set dst (LShiftI src1 src2));
9685 
9686   ins_cost(INSN_COST * 2);
9687   format %{ "lslvw  $dst, $src1, $src2" %}
9688 
9689   ins_encode %{
9690     __ lslvw(as_Register($dst$$reg),
9691              as_Register($src1$$reg),
9692              as_Register($src2$$reg));
9693   %}
9694 
9695   ins_pipe(ialu_reg_reg_vshift);
9696 %}
9697 
9698 // Shift Left Immediate
9699 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9700   match(Set dst (LShiftI src1 src2));
9701 
9702   ins_cost(INSN_COST);
9703   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
9704 
9705   ins_encode %{
9706     __ lslw(as_Register($dst$$reg),
9707             as_Register($src1$$reg),
9708             $src2$$constant & 0x1f);
9709   %}
9710 
9711   ins_pipe(ialu_reg_shift);
9712 %}
9713 
9714 // Shift Right Logical Register
9715 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9716   match(Set dst (URShiftI src1 src2));
9717 
9718   ins_cost(INSN_COST * 2);
9719   format %{ "lsrvw  $dst, $src1, $src2" %}
9720 
9721   ins_encode %{
9722     __ lsrvw(as_Register($dst$$reg),
9723              as_Register($src1$$reg),
9724              as_Register($src2$$reg));
9725   %}
9726 
9727   ins_pipe(ialu_reg_reg_vshift);
9728 %}
9729 
9730 // Shift Right Logical Immediate
9731 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9732   match(Set dst (URShiftI src1 src2));
9733 
9734   ins_cost(INSN_COST);
9735   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
9736 
9737   ins_encode %{
9738     __ lsrw(as_Register($dst$$reg),
9739             as_Register($src1$$reg),
9740             $src2$$constant & 0x1f);
9741   %}
9742 
9743   ins_pipe(ialu_reg_shift);
9744 %}
9745 
9746 // Shift Right Arithmetic Register
9747 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9748   match(Set dst (RShiftI src1 src2));
9749 
9750   ins_cost(INSN_COST * 2);
9751   format %{ "asrvw  $dst, $src1, $src2" %}
9752 
9753   ins_encode %{
9754     __ asrvw(as_Register($dst$$reg),
9755              as_Register($src1$$reg),
9756              as_Register($src2$$reg));
9757   %}
9758 
9759   ins_pipe(ialu_reg_reg_vshift);
9760 %}
9761 
9762 // Shift Right Arithmetic Immediate
9763 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
9764   match(Set dst (RShiftI src1 src2));
9765 
9766   ins_cost(INSN_COST);
9767   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
9768 
9769   ins_encode %{
9770     __ asrw(as_Register($dst$$reg),
9771             as_Register($src1$$reg),
9772             $src2$$constant & 0x1f);
9773   %}
9774 
9775   ins_pipe(ialu_reg_shift);
9776 %}
9777 
9778 // Combined Int Mask and Right Shift (using UBFM)
9779 // TODO
9780 
9781 // Long Shifts
9782 
9783 // Shift Left Register
9784 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9785   match(Set dst (LShiftL src1 src2));
9786 
9787   ins_cost(INSN_COST * 2);
9788   format %{ "lslv  $dst, $src1, $src2" %}
9789 
9790   ins_encode %{
9791     __ lslv(as_Register($dst$$reg),
9792             as_Register($src1$$reg),
9793             as_Register($src2$$reg));
9794   %}
9795 
9796   ins_pipe(ialu_reg_reg_vshift);
9797 %}
9798 
9799 // Shift Left Immediate
9800 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9801   match(Set dst (LShiftL src1 src2));
9802 
9803   ins_cost(INSN_COST);
9804   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
9805 
9806   ins_encode %{
9807     __ lsl(as_Register($dst$$reg),
9808             as_Register($src1$$reg),
9809             $src2$$constant & 0x3f);
9810   %}
9811 
9812   ins_pipe(ialu_reg_shift);
9813 %}
9814 
9815 // Shift Right Logical Register
9816 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9817   match(Set dst (URShiftL src1 src2));
9818 
9819   ins_cost(INSN_COST * 2);
9820   format %{ "lsrv  $dst, $src1, $src2" %}
9821 
9822   ins_encode %{
9823     __ lsrv(as_Register($dst$$reg),
9824             as_Register($src1$$reg),
9825             as_Register($src2$$reg));
9826   %}
9827 
9828   ins_pipe(ialu_reg_reg_vshift);
9829 %}
9830 
9831 // Shift Right Logical Immediate
9832 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9833   match(Set dst (URShiftL src1 src2));
9834 
9835   ins_cost(INSN_COST);
9836   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
9837 
9838   ins_encode %{
9839     __ lsr(as_Register($dst$$reg),
9840            as_Register($src1$$reg),
9841            $src2$$constant & 0x3f);
9842   %}
9843 
9844   ins_pipe(ialu_reg_shift);
9845 %}
9846 
9847 // A special-case pattern for card table stores.
9848 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
9849   match(Set dst (URShiftL (CastP2X src1) src2));
9850 
9851   ins_cost(INSN_COST);
9852   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
9853 
9854   ins_encode %{
9855     __ lsr(as_Register($dst$$reg),
9856            as_Register($src1$$reg),
9857            $src2$$constant & 0x3f);
9858   %}
9859 
9860   ins_pipe(ialu_reg_shift);
9861 %}
9862 
9863 // Shift Right Arithmetic Register
9864 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
9865   match(Set dst (RShiftL src1 src2));
9866 
9867   ins_cost(INSN_COST * 2);
9868   format %{ "asrv  $dst, $src1, $src2" %}
9869 
9870   ins_encode %{
9871     __ asrv(as_Register($dst$$reg),
9872             as_Register($src1$$reg),
9873             as_Register($src2$$reg));
9874   %}
9875 
9876   ins_pipe(ialu_reg_reg_vshift);
9877 %}
9878 
9879 // Shift Right Arithmetic Immediate
9880 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
9881   match(Set dst (RShiftL src1 src2));
9882 
9883   ins_cost(INSN_COST);
9884   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
9885 
9886   ins_encode %{
9887     __ asr(as_Register($dst$$reg),
9888            as_Register($src1$$reg),
9889            $src2$$constant & 0x3f);
9890   %}
9891 
9892   ins_pipe(ialu_reg_shift);
9893 %}
9894 
9895 // BEGIN This section of the file is automatically generated. Do not edit --------------
9896 
9897 instruct regL_not_reg(iRegLNoSp dst,
9898                          iRegL src1, immL_M1 m1,
9899                          rFlagsReg cr) %{
9900   match(Set dst (XorL src1 m1));
9901   ins_cost(INSN_COST);
9902   format %{ "eon  $dst, $src1, zr" %}
9903 
9904   ins_encode %{
9905     __ eon(as_Register($dst$$reg),
9906               as_Register($src1$$reg),
9907               zr,
9908               Assembler::LSL, 0);
9909   %}
9910 
9911   ins_pipe(ialu_reg);
9912 %}
9913 instruct regI_not_reg(iRegINoSp dst,
9914                          iRegIorL2I src1, immI_M1 m1,
9915                          rFlagsReg cr) %{
9916   match(Set dst (XorI src1 m1));
9917   ins_cost(INSN_COST);
9918   format %{ "eonw  $dst, $src1, zr" %}
9919 
9920   ins_encode %{
9921     __ eonw(as_Register($dst$$reg),
9922               as_Register($src1$$reg),
9923               zr,
9924               Assembler::LSL, 0);
9925   %}
9926 
9927   ins_pipe(ialu_reg);
9928 %}
9929 
9930 instruct AndI_reg_not_reg(iRegINoSp dst,
9931                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9932                          rFlagsReg cr) %{
9933   match(Set dst (AndI src1 (XorI src2 m1)));
9934   ins_cost(INSN_COST);
9935   format %{ "bicw  $dst, $src1, $src2" %}
9936 
9937   ins_encode %{
9938     __ bicw(as_Register($dst$$reg),
9939               as_Register($src1$$reg),
9940               as_Register($src2$$reg),
9941               Assembler::LSL, 0);
9942   %}
9943 
9944   ins_pipe(ialu_reg_reg);
9945 %}
9946 
9947 instruct AndL_reg_not_reg(iRegLNoSp dst,
9948                          iRegL src1, iRegL src2, immL_M1 m1,
9949                          rFlagsReg cr) %{
9950   match(Set dst (AndL src1 (XorL src2 m1)));
9951   ins_cost(INSN_COST);
9952   format %{ "bic  $dst, $src1, $src2" %}
9953 
9954   ins_encode %{
9955     __ bic(as_Register($dst$$reg),
9956               as_Register($src1$$reg),
9957               as_Register($src2$$reg),
9958               Assembler::LSL, 0);
9959   %}
9960 
9961   ins_pipe(ialu_reg_reg);
9962 %}
9963 
9964 instruct OrI_reg_not_reg(iRegINoSp dst,
9965                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
9966                          rFlagsReg cr) %{
9967   match(Set dst (OrI src1 (XorI src2 m1)));
9968   ins_cost(INSN_COST);
9969   format %{ "ornw  $dst, $src1, $src2" %}
9970 
9971   ins_encode %{
9972     __ ornw(as_Register($dst$$reg),
9973               as_Register($src1$$reg),
9974               as_Register($src2$$reg),
9975               Assembler::LSL, 0);
9976   %}
9977 
9978   ins_pipe(ialu_reg_reg);
9979 %}
9980 
9981 instruct OrL_reg_not_reg(iRegLNoSp dst,
9982                          iRegL src1, iRegL src2, immL_M1 m1,
9983                          rFlagsReg cr) %{
9984   match(Set dst (OrL src1 (XorL src2 m1)));
9985   ins_cost(INSN_COST);
9986   format %{ "orn  $dst, $src1, $src2" %}
9987 
9988   ins_encode %{
9989     __ orn(as_Register($dst$$reg),
9990               as_Register($src1$$reg),
9991               as_Register($src2$$reg),
9992               Assembler::LSL, 0);
9993   %}
9994 
9995   ins_pipe(ialu_reg_reg);
9996 %}
9997 
9998 instruct XorI_reg_not_reg(iRegINoSp dst,
9999                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10000                          rFlagsReg cr) %{
10001   match(Set dst (XorI m1 (XorI src2 src1)));
10002   ins_cost(INSN_COST);
10003   format %{ "eonw  $dst, $src1, $src2" %}
10004 
10005   ins_encode %{
10006     __ eonw(as_Register($dst$$reg),
10007               as_Register($src1$$reg),
10008               as_Register($src2$$reg),
10009               Assembler::LSL, 0);
10010   %}
10011 
10012   ins_pipe(ialu_reg_reg);
10013 %}
10014 
10015 instruct XorL_reg_not_reg(iRegLNoSp dst,
10016                          iRegL src1, iRegL src2, immL_M1 m1,
10017                          rFlagsReg cr) %{
10018   match(Set dst (XorL m1 (XorL src2 src1)));
10019   ins_cost(INSN_COST);
10020   format %{ "eon  $dst, $src1, $src2" %}
10021 
10022   ins_encode %{
10023     __ eon(as_Register($dst$$reg),
10024               as_Register($src1$$reg),
10025               as_Register($src2$$reg),
10026               Assembler::LSL, 0);
10027   %}
10028 
10029   ins_pipe(ialu_reg_reg);
10030 %}
10031 
10032 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10033                          iRegIorL2I src1, iRegIorL2I src2,
10034                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10035   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10036   ins_cost(1.9 * INSN_COST);
10037   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10038 
10039   ins_encode %{
10040     __ bicw(as_Register($dst$$reg),
10041               as_Register($src1$$reg),
10042               as_Register($src2$$reg),
10043               Assembler::LSR,
10044               $src3$$constant & 0x3f);
10045   %}
10046 
10047   ins_pipe(ialu_reg_reg_shift);
10048 %}
10049 
10050 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10051                          iRegL src1, iRegL src2,
10052                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10053   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10054   ins_cost(1.9 * INSN_COST);
10055   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10056 
10057   ins_encode %{
10058     __ bic(as_Register($dst$$reg),
10059               as_Register($src1$$reg),
10060               as_Register($src2$$reg),
10061               Assembler::LSR,
10062               $src3$$constant & 0x3f);
10063   %}
10064 
10065   ins_pipe(ialu_reg_reg_shift);
10066 %}
10067 
10068 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10069                          iRegIorL2I src1, iRegIorL2I src2,
10070                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10071   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10072   ins_cost(1.9 * INSN_COST);
10073   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10074 
10075   ins_encode %{
10076     __ bicw(as_Register($dst$$reg),
10077               as_Register($src1$$reg),
10078               as_Register($src2$$reg),
10079               Assembler::ASR,
10080               $src3$$constant & 0x3f);
10081   %}
10082 
10083   ins_pipe(ialu_reg_reg_shift);
10084 %}
10085 
10086 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10087                          iRegL src1, iRegL src2,
10088                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10089   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10090   ins_cost(1.9 * INSN_COST);
10091   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10092 
10093   ins_encode %{
10094     __ bic(as_Register($dst$$reg),
10095               as_Register($src1$$reg),
10096               as_Register($src2$$reg),
10097               Assembler::ASR,
10098               $src3$$constant & 0x3f);
10099   %}
10100 
10101   ins_pipe(ialu_reg_reg_shift);
10102 %}
10103 
10104 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10105                          iRegIorL2I src1, iRegIorL2I src2,
10106                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10107   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10108   ins_cost(1.9 * INSN_COST);
10109   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10110 
10111   ins_encode %{
10112     __ bicw(as_Register($dst$$reg),
10113               as_Register($src1$$reg),
10114               as_Register($src2$$reg),
10115               Assembler::LSL,
10116               $src3$$constant & 0x3f);
10117   %}
10118 
10119   ins_pipe(ialu_reg_reg_shift);
10120 %}
10121 
10122 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10123                          iRegL src1, iRegL src2,
10124                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10125   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10126   ins_cost(1.9 * INSN_COST);
10127   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10128 
10129   ins_encode %{
10130     __ bic(as_Register($dst$$reg),
10131               as_Register($src1$$reg),
10132               as_Register($src2$$reg),
10133               Assembler::LSL,
10134               $src3$$constant & 0x3f);
10135   %}
10136 
10137   ins_pipe(ialu_reg_reg_shift);
10138 %}
10139 
10140 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10141                          iRegIorL2I src1, iRegIorL2I src2,
10142                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10143   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10144   ins_cost(1.9 * INSN_COST);
10145   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10146 
10147   ins_encode %{
10148     __ eonw(as_Register($dst$$reg),
10149               as_Register($src1$$reg),
10150               as_Register($src2$$reg),
10151               Assembler::LSR,
10152               $src3$$constant & 0x3f);
10153   %}
10154 
10155   ins_pipe(ialu_reg_reg_shift);
10156 %}
10157 
10158 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10159                          iRegL src1, iRegL src2,
10160                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10161   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10162   ins_cost(1.9 * INSN_COST);
10163   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10164 
10165   ins_encode %{
10166     __ eon(as_Register($dst$$reg),
10167               as_Register($src1$$reg),
10168               as_Register($src2$$reg),
10169               Assembler::LSR,
10170               $src3$$constant & 0x3f);
10171   %}
10172 
10173   ins_pipe(ialu_reg_reg_shift);
10174 %}
10175 
10176 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10177                          iRegIorL2I src1, iRegIorL2I src2,
10178                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10179   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10180   ins_cost(1.9 * INSN_COST);
10181   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10182 
10183   ins_encode %{
10184     __ eonw(as_Register($dst$$reg),
10185               as_Register($src1$$reg),
10186               as_Register($src2$$reg),
10187               Assembler::ASR,
10188               $src3$$constant & 0x3f);
10189   %}
10190 
10191   ins_pipe(ialu_reg_reg_shift);
10192 %}
10193 
10194 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10195                          iRegL src1, iRegL src2,
10196                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10197   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10198   ins_cost(1.9 * INSN_COST);
10199   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10200 
10201   ins_encode %{
10202     __ eon(as_Register($dst$$reg),
10203               as_Register($src1$$reg),
10204               as_Register($src2$$reg),
10205               Assembler::ASR,
10206               $src3$$constant & 0x3f);
10207   %}
10208 
10209   ins_pipe(ialu_reg_reg_shift);
10210 %}
10211 
10212 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10213                          iRegIorL2I src1, iRegIorL2I src2,
10214                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10215   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10216   ins_cost(1.9 * INSN_COST);
10217   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10218 
10219   ins_encode %{
10220     __ eonw(as_Register($dst$$reg),
10221               as_Register($src1$$reg),
10222               as_Register($src2$$reg),
10223               Assembler::LSL,
10224               $src3$$constant & 0x3f);
10225   %}
10226 
10227   ins_pipe(ialu_reg_reg_shift);
10228 %}
10229 
10230 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10231                          iRegL src1, iRegL src2,
10232                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10233   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10234   ins_cost(1.9 * INSN_COST);
10235   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10236 
10237   ins_encode %{
10238     __ eon(as_Register($dst$$reg),
10239               as_Register($src1$$reg),
10240               as_Register($src2$$reg),
10241               Assembler::LSL,
10242               $src3$$constant & 0x3f);
10243   %}
10244 
10245   ins_pipe(ialu_reg_reg_shift);
10246 %}
10247 
10248 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10249                          iRegIorL2I src1, iRegIorL2I src2,
10250                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10251   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10252   ins_cost(1.9 * INSN_COST);
10253   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10254 
10255   ins_encode %{
10256     __ ornw(as_Register($dst$$reg),
10257               as_Register($src1$$reg),
10258               as_Register($src2$$reg),
10259               Assembler::LSR,
10260               $src3$$constant & 0x3f);
10261   %}
10262 
10263   ins_pipe(ialu_reg_reg_shift);
10264 %}
10265 
10266 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10267                          iRegL src1, iRegL src2,
10268                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10269   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10270   ins_cost(1.9 * INSN_COST);
10271   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10272 
10273   ins_encode %{
10274     __ orn(as_Register($dst$$reg),
10275               as_Register($src1$$reg),
10276               as_Register($src2$$reg),
10277               Assembler::LSR,
10278               $src3$$constant & 0x3f);
10279   %}
10280 
10281   ins_pipe(ialu_reg_reg_shift);
10282 %}
10283 
10284 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10285                          iRegIorL2I src1, iRegIorL2I src2,
10286                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10287   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10288   ins_cost(1.9 * INSN_COST);
10289   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10290 
10291   ins_encode %{
10292     __ ornw(as_Register($dst$$reg),
10293               as_Register($src1$$reg),
10294               as_Register($src2$$reg),
10295               Assembler::ASR,
10296               $src3$$constant & 0x3f);
10297   %}
10298 
10299   ins_pipe(ialu_reg_reg_shift);
10300 %}
10301 
10302 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10303                          iRegL src1, iRegL src2,
10304                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10305   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10306   ins_cost(1.9 * INSN_COST);
10307   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10308 
10309   ins_encode %{
10310     __ orn(as_Register($dst$$reg),
10311               as_Register($src1$$reg),
10312               as_Register($src2$$reg),
10313               Assembler::ASR,
10314               $src3$$constant & 0x3f);
10315   %}
10316 
10317   ins_pipe(ialu_reg_reg_shift);
10318 %}
10319 
10320 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10321                          iRegIorL2I src1, iRegIorL2I src2,
10322                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10323   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10324   ins_cost(1.9 * INSN_COST);
10325   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10326 
10327   ins_encode %{
10328     __ ornw(as_Register($dst$$reg),
10329               as_Register($src1$$reg),
10330               as_Register($src2$$reg),
10331               Assembler::LSL,
10332               $src3$$constant & 0x3f);
10333   %}
10334 
10335   ins_pipe(ialu_reg_reg_shift);
10336 %}
10337 
10338 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10339                          iRegL src1, iRegL src2,
10340                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10341   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10342   ins_cost(1.9 * INSN_COST);
10343   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10344 
10345   ins_encode %{
10346     __ orn(as_Register($dst$$reg),
10347               as_Register($src1$$reg),
10348               as_Register($src2$$reg),
10349               Assembler::LSL,
10350               $src3$$constant & 0x3f);
10351   %}
10352 
10353   ins_pipe(ialu_reg_reg_shift);
10354 %}
10355 
10356 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10357                          iRegIorL2I src1, iRegIorL2I src2,
10358                          immI src3, rFlagsReg cr) %{
10359   match(Set dst (AndI src1 (URShiftI src2 src3)));
10360 
10361   ins_cost(1.9 * INSN_COST);
10362   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10363 
10364   ins_encode %{
10365     __ andw(as_Register($dst$$reg),
10366               as_Register($src1$$reg),
10367               as_Register($src2$$reg),
10368               Assembler::LSR,
10369               $src3$$constant & 0x3f);
10370   %}
10371 
10372   ins_pipe(ialu_reg_reg_shift);
10373 %}
10374 
10375 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10376                          iRegL src1, iRegL src2,
10377                          immI src3, rFlagsReg cr) %{
10378   match(Set dst (AndL src1 (URShiftL src2 src3)));
10379 
10380   ins_cost(1.9 * INSN_COST);
10381   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10382 
10383   ins_encode %{
10384     __ andr(as_Register($dst$$reg),
10385               as_Register($src1$$reg),
10386               as_Register($src2$$reg),
10387               Assembler::LSR,
10388               $src3$$constant & 0x3f);
10389   %}
10390 
10391   ins_pipe(ialu_reg_reg_shift);
10392 %}
10393 
10394 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10395                          iRegIorL2I src1, iRegIorL2I src2,
10396                          immI src3, rFlagsReg cr) %{
10397   match(Set dst (AndI src1 (RShiftI src2 src3)));
10398 
10399   ins_cost(1.9 * INSN_COST);
10400   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10401 
10402   ins_encode %{
10403     __ andw(as_Register($dst$$reg),
10404               as_Register($src1$$reg),
10405               as_Register($src2$$reg),
10406               Assembler::ASR,
10407               $src3$$constant & 0x3f);
10408   %}
10409 
10410   ins_pipe(ialu_reg_reg_shift);
10411 %}
10412 
10413 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10414                          iRegL src1, iRegL src2,
10415                          immI src3, rFlagsReg cr) %{
10416   match(Set dst (AndL src1 (RShiftL src2 src3)));
10417 
10418   ins_cost(1.9 * INSN_COST);
10419   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10420 
10421   ins_encode %{
10422     __ andr(as_Register($dst$$reg),
10423               as_Register($src1$$reg),
10424               as_Register($src2$$reg),
10425               Assembler::ASR,
10426               $src3$$constant & 0x3f);
10427   %}
10428 
10429   ins_pipe(ialu_reg_reg_shift);
10430 %}
10431 
10432 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10433                          iRegIorL2I src1, iRegIorL2I src2,
10434                          immI src3, rFlagsReg cr) %{
10435   match(Set dst (AndI src1 (LShiftI src2 src3)));
10436 
10437   ins_cost(1.9 * INSN_COST);
10438   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10439 
10440   ins_encode %{
10441     __ andw(as_Register($dst$$reg),
10442               as_Register($src1$$reg),
10443               as_Register($src2$$reg),
10444               Assembler::LSL,
10445               $src3$$constant & 0x3f);
10446   %}
10447 
10448   ins_pipe(ialu_reg_reg_shift);
10449 %}
10450 
10451 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10452                          iRegL src1, iRegL src2,
10453                          immI src3, rFlagsReg cr) %{
10454   match(Set dst (AndL src1 (LShiftL src2 src3)));
10455 
10456   ins_cost(1.9 * INSN_COST);
10457   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10458 
10459   ins_encode %{
10460     __ andr(as_Register($dst$$reg),
10461               as_Register($src1$$reg),
10462               as_Register($src2$$reg),
10463               Assembler::LSL,
10464               $src3$$constant & 0x3f);
10465   %}
10466 
10467   ins_pipe(ialu_reg_reg_shift);
10468 %}
10469 
10470 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10471                          iRegIorL2I src1, iRegIorL2I src2,
10472                          immI src3, rFlagsReg cr) %{
10473   match(Set dst (XorI src1 (URShiftI src2 src3)));
10474 
10475   ins_cost(1.9 * INSN_COST);
10476   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10477 
10478   ins_encode %{
10479     __ eorw(as_Register($dst$$reg),
10480               as_Register($src1$$reg),
10481               as_Register($src2$$reg),
10482               Assembler::LSR,
10483               $src3$$constant & 0x3f);
10484   %}
10485 
10486   ins_pipe(ialu_reg_reg_shift);
10487 %}
10488 
10489 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10490                          iRegL src1, iRegL src2,
10491                          immI src3, rFlagsReg cr) %{
10492   match(Set dst (XorL src1 (URShiftL src2 src3)));
10493 
10494   ins_cost(1.9 * INSN_COST);
10495   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10496 
10497   ins_encode %{
10498     __ eor(as_Register($dst$$reg),
10499               as_Register($src1$$reg),
10500               as_Register($src2$$reg),
10501               Assembler::LSR,
10502               $src3$$constant & 0x3f);
10503   %}
10504 
10505   ins_pipe(ialu_reg_reg_shift);
10506 %}
10507 
10508 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10509                          iRegIorL2I src1, iRegIorL2I src2,
10510                          immI src3, rFlagsReg cr) %{
10511   match(Set dst (XorI src1 (RShiftI src2 src3)));
10512 
10513   ins_cost(1.9 * INSN_COST);
10514   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10515 
10516   ins_encode %{
10517     __ eorw(as_Register($dst$$reg),
10518               as_Register($src1$$reg),
10519               as_Register($src2$$reg),
10520               Assembler::ASR,
10521               $src3$$constant & 0x3f);
10522   %}
10523 
10524   ins_pipe(ialu_reg_reg_shift);
10525 %}
10526 
10527 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10528                          iRegL src1, iRegL src2,
10529                          immI src3, rFlagsReg cr) %{
10530   match(Set dst (XorL src1 (RShiftL src2 src3)));
10531 
10532   ins_cost(1.9 * INSN_COST);
10533   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10534 
10535   ins_encode %{
10536     __ eor(as_Register($dst$$reg),
10537               as_Register($src1$$reg),
10538               as_Register($src2$$reg),
10539               Assembler::ASR,
10540               $src3$$constant & 0x3f);
10541   %}
10542 
10543   ins_pipe(ialu_reg_reg_shift);
10544 %}
10545 
10546 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10547                          iRegIorL2I src1, iRegIorL2I src2,
10548                          immI src3, rFlagsReg cr) %{
10549   match(Set dst (XorI src1 (LShiftI src2 src3)));
10550 
10551   ins_cost(1.9 * INSN_COST);
10552   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10553 
10554   ins_encode %{
10555     __ eorw(as_Register($dst$$reg),
10556               as_Register($src1$$reg),
10557               as_Register($src2$$reg),
10558               Assembler::LSL,
10559               $src3$$constant & 0x3f);
10560   %}
10561 
10562   ins_pipe(ialu_reg_reg_shift);
10563 %}
10564 
10565 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10566                          iRegL src1, iRegL src2,
10567                          immI src3, rFlagsReg cr) %{
10568   match(Set dst (XorL src1 (LShiftL src2 src3)));
10569 
10570   ins_cost(1.9 * INSN_COST);
10571   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10572 
10573   ins_encode %{
10574     __ eor(as_Register($dst$$reg),
10575               as_Register($src1$$reg),
10576               as_Register($src2$$reg),
10577               Assembler::LSL,
10578               $src3$$constant & 0x3f);
10579   %}
10580 
10581   ins_pipe(ialu_reg_reg_shift);
10582 %}
10583 
10584 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10585                          iRegIorL2I src1, iRegIorL2I src2,
10586                          immI src3, rFlagsReg cr) %{
10587   match(Set dst (OrI src1 (URShiftI src2 src3)));
10588 
10589   ins_cost(1.9 * INSN_COST);
10590   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10591 
10592   ins_encode %{
10593     __ orrw(as_Register($dst$$reg),
10594               as_Register($src1$$reg),
10595               as_Register($src2$$reg),
10596               Assembler::LSR,
10597               $src3$$constant & 0x3f);
10598   %}
10599 
10600   ins_pipe(ialu_reg_reg_shift);
10601 %}
10602 
10603 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10604                          iRegL src1, iRegL src2,
10605                          immI src3, rFlagsReg cr) %{
10606   match(Set dst (OrL src1 (URShiftL src2 src3)));
10607 
10608   ins_cost(1.9 * INSN_COST);
10609   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10610 
10611   ins_encode %{
10612     __ orr(as_Register($dst$$reg),
10613               as_Register($src1$$reg),
10614               as_Register($src2$$reg),
10615               Assembler::LSR,
10616               $src3$$constant & 0x3f);
10617   %}
10618 
10619   ins_pipe(ialu_reg_reg_shift);
10620 %}
10621 
10622 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10623                          iRegIorL2I src1, iRegIorL2I src2,
10624                          immI src3, rFlagsReg cr) %{
10625   match(Set dst (OrI src1 (RShiftI src2 src3)));
10626 
10627   ins_cost(1.9 * INSN_COST);
10628   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10629 
10630   ins_encode %{
10631     __ orrw(as_Register($dst$$reg),
10632               as_Register($src1$$reg),
10633               as_Register($src2$$reg),
10634               Assembler::ASR,
10635               $src3$$constant & 0x3f);
10636   %}
10637 
10638   ins_pipe(ialu_reg_reg_shift);
10639 %}
10640 
10641 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10642                          iRegL src1, iRegL src2,
10643                          immI src3, rFlagsReg cr) %{
10644   match(Set dst (OrL src1 (RShiftL src2 src3)));
10645 
10646   ins_cost(1.9 * INSN_COST);
10647   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10648 
10649   ins_encode %{
10650     __ orr(as_Register($dst$$reg),
10651               as_Register($src1$$reg),
10652               as_Register($src2$$reg),
10653               Assembler::ASR,
10654               $src3$$constant & 0x3f);
10655   %}
10656 
10657   ins_pipe(ialu_reg_reg_shift);
10658 %}
10659 
10660 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10661                          iRegIorL2I src1, iRegIorL2I src2,
10662                          immI src3, rFlagsReg cr) %{
10663   match(Set dst (OrI src1 (LShiftI src2 src3)));
10664 
10665   ins_cost(1.9 * INSN_COST);
10666   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
10667 
10668   ins_encode %{
10669     __ orrw(as_Register($dst$$reg),
10670               as_Register($src1$$reg),
10671               as_Register($src2$$reg),
10672               Assembler::LSL,
10673               $src3$$constant & 0x3f);
10674   %}
10675 
10676   ins_pipe(ialu_reg_reg_shift);
10677 %}
10678 
10679 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
10680                          iRegL src1, iRegL src2,
10681                          immI src3, rFlagsReg cr) %{
10682   match(Set dst (OrL src1 (LShiftL src2 src3)));
10683 
10684   ins_cost(1.9 * INSN_COST);
10685   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
10686 
10687   ins_encode %{
10688     __ orr(as_Register($dst$$reg),
10689               as_Register($src1$$reg),
10690               as_Register($src2$$reg),
10691               Assembler::LSL,
10692               $src3$$constant & 0x3f);
10693   %}
10694 
10695   ins_pipe(ialu_reg_reg_shift);
10696 %}
10697 
10698 instruct AddI_reg_URShift_reg(iRegINoSp dst,
10699                          iRegIorL2I src1, iRegIorL2I src2,
10700                          immI src3, rFlagsReg cr) %{
10701   match(Set dst (AddI src1 (URShiftI src2 src3)));
10702 
10703   ins_cost(1.9 * INSN_COST);
10704   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
10705 
10706   ins_encode %{
10707     __ addw(as_Register($dst$$reg),
10708               as_Register($src1$$reg),
10709               as_Register($src2$$reg),
10710               Assembler::LSR,
10711               $src3$$constant & 0x3f);
10712   %}
10713 
10714   ins_pipe(ialu_reg_reg_shift);
10715 %}
10716 
10717 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
10718                          iRegL src1, iRegL src2,
10719                          immI src3, rFlagsReg cr) %{
10720   match(Set dst (AddL src1 (URShiftL src2 src3)));
10721 
10722   ins_cost(1.9 * INSN_COST);
10723   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
10724 
10725   ins_encode %{
10726     __ add(as_Register($dst$$reg),
10727               as_Register($src1$$reg),
10728               as_Register($src2$$reg),
10729               Assembler::LSR,
10730               $src3$$constant & 0x3f);
10731   %}
10732 
10733   ins_pipe(ialu_reg_reg_shift);
10734 %}
10735 
10736 instruct AddI_reg_RShift_reg(iRegINoSp dst,
10737                          iRegIorL2I src1, iRegIorL2I src2,
10738                          immI src3, rFlagsReg cr) %{
10739   match(Set dst (AddI src1 (RShiftI src2 src3)));
10740 
10741   ins_cost(1.9 * INSN_COST);
10742   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
10743 
10744   ins_encode %{
10745     __ addw(as_Register($dst$$reg),
10746               as_Register($src1$$reg),
10747               as_Register($src2$$reg),
10748               Assembler::ASR,
10749               $src3$$constant & 0x3f);
10750   %}
10751 
10752   ins_pipe(ialu_reg_reg_shift);
10753 %}
10754 
10755 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
10756                          iRegL src1, iRegL src2,
10757                          immI src3, rFlagsReg cr) %{
10758   match(Set dst (AddL src1 (RShiftL src2 src3)));
10759 
10760   ins_cost(1.9 * INSN_COST);
10761   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
10762 
10763   ins_encode %{
10764     __ add(as_Register($dst$$reg),
10765               as_Register($src1$$reg),
10766               as_Register($src2$$reg),
10767               Assembler::ASR,
10768               $src3$$constant & 0x3f);
10769   %}
10770 
10771   ins_pipe(ialu_reg_reg_shift);
10772 %}
10773 
10774 instruct AddI_reg_LShift_reg(iRegINoSp dst,
10775                          iRegIorL2I src1, iRegIorL2I src2,
10776                          immI src3, rFlagsReg cr) %{
10777   match(Set dst (AddI src1 (LShiftI src2 src3)));
10778 
10779   ins_cost(1.9 * INSN_COST);
10780   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
10781 
10782   ins_encode %{
10783     __ addw(as_Register($dst$$reg),
10784               as_Register($src1$$reg),
10785               as_Register($src2$$reg),
10786               Assembler::LSL,
10787               $src3$$constant & 0x3f);
10788   %}
10789 
10790   ins_pipe(ialu_reg_reg_shift);
10791 %}
10792 
10793 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
10794                          iRegL src1, iRegL src2,
10795                          immI src3, rFlagsReg cr) %{
10796   match(Set dst (AddL src1 (LShiftL src2 src3)));
10797 
10798   ins_cost(1.9 * INSN_COST);
10799   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
10800 
10801   ins_encode %{
10802     __ add(as_Register($dst$$reg),
10803               as_Register($src1$$reg),
10804               as_Register($src2$$reg),
10805               Assembler::LSL,
10806               $src3$$constant & 0x3f);
10807   %}
10808 
10809   ins_pipe(ialu_reg_reg_shift);
10810 %}
10811 
10812 instruct SubI_reg_URShift_reg(iRegINoSp dst,
10813                          iRegIorL2I src1, iRegIorL2I src2,
10814                          immI src3, rFlagsReg cr) %{
10815   match(Set dst (SubI src1 (URShiftI src2 src3)));
10816 
10817   ins_cost(1.9 * INSN_COST);
10818   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
10819 
10820   ins_encode %{
10821     __ subw(as_Register($dst$$reg),
10822               as_Register($src1$$reg),
10823               as_Register($src2$$reg),
10824               Assembler::LSR,
10825               $src3$$constant & 0x3f);
10826   %}
10827 
10828   ins_pipe(ialu_reg_reg_shift);
10829 %}
10830 
10831 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
10832                          iRegL src1, iRegL src2,
10833                          immI src3, rFlagsReg cr) %{
10834   match(Set dst (SubL src1 (URShiftL src2 src3)));
10835 
10836   ins_cost(1.9 * INSN_COST);
10837   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
10838 
10839   ins_encode %{
10840     __ sub(as_Register($dst$$reg),
10841               as_Register($src1$$reg),
10842               as_Register($src2$$reg),
10843               Assembler::LSR,
10844               $src3$$constant & 0x3f);
10845   %}
10846 
10847   ins_pipe(ialu_reg_reg_shift);
10848 %}
10849 
10850 instruct SubI_reg_RShift_reg(iRegINoSp dst,
10851                          iRegIorL2I src1, iRegIorL2I src2,
10852                          immI src3, rFlagsReg cr) %{
10853   match(Set dst (SubI src1 (RShiftI src2 src3)));
10854 
10855   ins_cost(1.9 * INSN_COST);
10856   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
10857 
10858   ins_encode %{
10859     __ subw(as_Register($dst$$reg),
10860               as_Register($src1$$reg),
10861               as_Register($src2$$reg),
10862               Assembler::ASR,
10863               $src3$$constant & 0x3f);
10864   %}
10865 
10866   ins_pipe(ialu_reg_reg_shift);
10867 %}
10868 
10869 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
10870                          iRegL src1, iRegL src2,
10871                          immI src3, rFlagsReg cr) %{
10872   match(Set dst (SubL src1 (RShiftL src2 src3)));
10873 
10874   ins_cost(1.9 * INSN_COST);
10875   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
10876 
10877   ins_encode %{
10878     __ sub(as_Register($dst$$reg),
10879               as_Register($src1$$reg),
10880               as_Register($src2$$reg),
10881               Assembler::ASR,
10882               $src3$$constant & 0x3f);
10883   %}
10884 
10885   ins_pipe(ialu_reg_reg_shift);
10886 %}
10887 
10888 instruct SubI_reg_LShift_reg(iRegINoSp dst,
10889                          iRegIorL2I src1, iRegIorL2I src2,
10890                          immI src3, rFlagsReg cr) %{
10891   match(Set dst (SubI src1 (LShiftI src2 src3)));
10892 
10893   ins_cost(1.9 * INSN_COST);
10894   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
10895 
10896   ins_encode %{
10897     __ subw(as_Register($dst$$reg),
10898               as_Register($src1$$reg),
10899               as_Register($src2$$reg),
10900               Assembler::LSL,
10901               $src3$$constant & 0x3f);
10902   %}
10903 
10904   ins_pipe(ialu_reg_reg_shift);
10905 %}
10906 
10907 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
10908                          iRegL src1, iRegL src2,
10909                          immI src3, rFlagsReg cr) %{
10910   match(Set dst (SubL src1 (LShiftL src2 src3)));
10911 
10912   ins_cost(1.9 * INSN_COST);
10913   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
10914 
10915   ins_encode %{
10916     __ sub(as_Register($dst$$reg),
10917               as_Register($src1$$reg),
10918               as_Register($src2$$reg),
10919               Assembler::LSL,
10920               $src3$$constant & 0x3f);
10921   %}
10922 
10923   ins_pipe(ialu_reg_reg_shift);
10924 %}
10925 
10926 
10927 
10928 // Shift Left followed by Shift Right.
10929 // This idiom is used by the compiler for the i2b bytecode etc.
10930 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10931 %{
10932   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
10933   // Make sure we are not going to exceed what sbfm can do.
10934   predicate((unsigned int)n->in(2)->get_int() <= 63
10935             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10936 
10937   ins_cost(INSN_COST * 2);
10938   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10939   ins_encode %{
10940     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10941     int s = 63 - lshift;
10942     int r = (rshift - lshift) & 63;
10943     __ sbfm(as_Register($dst$$reg),
10944             as_Register($src$$reg),
10945             r, s);
10946   %}
10947 
10948   ins_pipe(ialu_reg_shift);
10949 %}
10950 
10951 // Shift Left followed by Shift Right.
10952 // This idiom is used by the compiler for the i2b bytecode etc.
10953 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
10954 %{
10955   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
10956   // Make sure we are not going to exceed what sbfmw can do.
10957   predicate((unsigned int)n->in(2)->get_int() <= 31
10958             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
10959 
10960   ins_cost(INSN_COST * 2);
10961   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
10962   ins_encode %{
10963     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10964     int s = 31 - lshift;
10965     int r = (rshift - lshift) & 31;
10966     __ sbfmw(as_Register($dst$$reg),
10967             as_Register($src$$reg),
10968             r, s);
10969   %}
10970 
10971   ins_pipe(ialu_reg_shift);
10972 %}
10973 
10974 // Shift Left followed by Shift Right.
10975 // This idiom is used by the compiler for the i2b bytecode etc.
10976 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
10977 %{
10978   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
10979   // Make sure we are not going to exceed what ubfm can do.
10980   predicate((unsigned int)n->in(2)->get_int() <= 63
10981             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
10982 
10983   ins_cost(INSN_COST * 2);
10984   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
10985   ins_encode %{
10986     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
10987     int s = 63 - lshift;
10988     int r = (rshift - lshift) & 63;
10989     __ ubfm(as_Register($dst$$reg),
10990             as_Register($src$$reg),
10991             r, s);
10992   %}
10993 
10994   ins_pipe(ialu_reg_shift);
10995 %}
10996 
10997 // Shift Left followed by Shift Right.
10998 // This idiom is used by the compiler for the i2b bytecode etc.
10999 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11000 %{
11001   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11002   // Make sure we are not going to exceed what ubfmw can do.
11003   predicate((unsigned int)n->in(2)->get_int() <= 31
11004             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11005 
11006   ins_cost(INSN_COST * 2);
11007   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11008   ins_encode %{
11009     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11010     int s = 31 - lshift;
11011     int r = (rshift - lshift) & 31;
11012     __ ubfmw(as_Register($dst$$reg),
11013             as_Register($src$$reg),
11014             r, s);
11015   %}
11016 
11017   ins_pipe(ialu_reg_shift);
11018 %}
11019 // Bitfield extract with shift & mask
11020 
11021 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11022 %{
11023   match(Set dst (AndI (URShiftI src rshift) mask));
11024 
11025   ins_cost(INSN_COST);
11026   format %{ "ubfxw $dst, $src, $mask" %}
11027   ins_encode %{
11028     int rshift = $rshift$$constant;
11029     long mask = $mask$$constant;
11030     int width = exact_log2(mask+1);
11031     __ ubfxw(as_Register($dst$$reg),
11032             as_Register($src$$reg), rshift, width);
11033   %}
11034   ins_pipe(ialu_reg_shift);
11035 %}
11036 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11037 %{
11038   match(Set dst (AndL (URShiftL src rshift) mask));
11039 
11040   ins_cost(INSN_COST);
11041   format %{ "ubfx $dst, $src, $mask" %}
11042   ins_encode %{
11043     int rshift = $rshift$$constant;
11044     long mask = $mask$$constant;
11045     int width = exact_log2(mask+1);
11046     __ ubfx(as_Register($dst$$reg),
11047             as_Register($src$$reg), rshift, width);
11048   %}
11049   ins_pipe(ialu_reg_shift);
11050 %}
11051 
11052 // We can use ubfx when extending an And with a mask when we know mask
11053 // is positive.  We know that because immI_bitmask guarantees it.
11054 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11055 %{
11056   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11057 
11058   ins_cost(INSN_COST * 2);
11059   format %{ "ubfx $dst, $src, $mask" %}
11060   ins_encode %{
11061     int rshift = $rshift$$constant;
11062     long mask = $mask$$constant;
11063     int width = exact_log2(mask+1);
11064     __ ubfx(as_Register($dst$$reg),
11065             as_Register($src$$reg), rshift, width);
11066   %}
11067   ins_pipe(ialu_reg_shift);
11068 %}
11069 
11070 // Rotations
11071 
11072 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11073 %{
11074   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11075   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11076 
11077   ins_cost(INSN_COST);
11078   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11079 
11080   ins_encode %{
11081     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11082             $rshift$$constant & 63);
11083   %}
11084   ins_pipe(ialu_reg_reg_extr);
11085 %}
11086 
11087 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11088 %{
11089   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11090   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11091 
11092   ins_cost(INSN_COST);
11093   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11094 
11095   ins_encode %{
11096     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11097             $rshift$$constant & 31);
11098   %}
11099   ins_pipe(ialu_reg_reg_extr);
11100 %}
11101 
11102 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11103 %{
11104   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11105   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11106 
11107   ins_cost(INSN_COST);
11108   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11109 
11110   ins_encode %{
11111     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11112             $rshift$$constant & 63);
11113   %}
11114   ins_pipe(ialu_reg_reg_extr);
11115 %}
11116 
11117 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11118 %{
11119   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11120   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11121 
11122   ins_cost(INSN_COST);
11123   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11124 
11125   ins_encode %{
11126     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11127             $rshift$$constant & 31);
11128   %}
11129   ins_pipe(ialu_reg_reg_extr);
11130 %}
11131 
11132 
11133 // rol expander
11134 
11135 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11136 %{
11137   effect(DEF dst, USE src, USE shift);
11138 
11139   format %{ "rol    $dst, $src, $shift" %}
11140   ins_cost(INSN_COST * 3);
11141   ins_encode %{
11142     __ subw(rscratch1, zr, as_Register($shift$$reg));
11143     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11144             rscratch1);
11145     %}
11146   ins_pipe(ialu_reg_reg_vshift);
11147 %}
11148 
11149 // rol expander
11150 
11151 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11152 %{
11153   effect(DEF dst, USE src, USE shift);
11154 
11155   format %{ "rol    $dst, $src, $shift" %}
11156   ins_cost(INSN_COST * 3);
11157   ins_encode %{
11158     __ subw(rscratch1, zr, as_Register($shift$$reg));
11159     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11160             rscratch1);
11161     %}
11162   ins_pipe(ialu_reg_reg_vshift);
11163 %}
11164 
11165 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11166 %{
11167   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11168 
11169   expand %{
11170     rolL_rReg(dst, src, shift, cr);
11171   %}
11172 %}
11173 
11174 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11175 %{
11176   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11177 
11178   expand %{
11179     rolL_rReg(dst, src, shift, cr);
11180   %}
11181 %}
11182 
11183 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11184 %{
11185   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11186 
11187   expand %{
11188     rolL_rReg(dst, src, shift, cr);
11189   %}
11190 %}
11191 
11192 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11193 %{
11194   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11195 
11196   expand %{
11197     rolL_rReg(dst, src, shift, cr);
11198   %}
11199 %}
11200 
11201 // ror expander
11202 
11203 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11204 %{
11205   effect(DEF dst, USE src, USE shift);
11206 
11207   format %{ "ror    $dst, $src, $shift" %}
11208   ins_cost(INSN_COST);
11209   ins_encode %{
11210     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11211             as_Register($shift$$reg));
11212     %}
11213   ins_pipe(ialu_reg_reg_vshift);
11214 %}
11215 
11216 // ror expander
11217 
11218 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11219 %{
11220   effect(DEF dst, USE src, USE shift);
11221 
11222   format %{ "ror    $dst, $src, $shift" %}
11223   ins_cost(INSN_COST);
11224   ins_encode %{
11225     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11226             as_Register($shift$$reg));
11227     %}
11228   ins_pipe(ialu_reg_reg_vshift);
11229 %}
11230 
11231 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11232 %{
11233   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11234 
11235   expand %{
11236     rorL_rReg(dst, src, shift, cr);
11237   %}
11238 %}
11239 
11240 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11241 %{
11242   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11243 
11244   expand %{
11245     rorL_rReg(dst, src, shift, cr);
11246   %}
11247 %}
11248 
11249 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11250 %{
11251   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11252 
11253   expand %{
11254     rorL_rReg(dst, src, shift, cr);
11255   %}
11256 %}
11257 
11258 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11259 %{
11260   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11261 
11262   expand %{
11263     rorL_rReg(dst, src, shift, cr);
11264   %}
11265 %}
11266 
11267 // Add/subtract (extended)
11268 
11269 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11270 %{
11271   match(Set dst (AddL src1 (ConvI2L src2)));
11272   ins_cost(INSN_COST);
11273   format %{ "add  $dst, $src1, sxtw $src2" %}
11274 
11275    ins_encode %{
11276      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11277             as_Register($src2$$reg), ext::sxtw);
11278    %}
11279   ins_pipe(ialu_reg_reg);
11280 %};
11281 
11282 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11283 %{
11284   match(Set dst (SubL src1 (ConvI2L src2)));
11285   ins_cost(INSN_COST);
11286   format %{ "sub  $dst, $src1, sxtw $src2" %}
11287 
11288    ins_encode %{
11289      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11290             as_Register($src2$$reg), ext::sxtw);
11291    %}
11292   ins_pipe(ialu_reg_reg);
11293 %};
11294 
11295 
11296 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11297 %{
11298   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11299   ins_cost(INSN_COST);
11300   format %{ "add  $dst, $src1, sxth $src2" %}
11301 
11302    ins_encode %{
11303      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11304             as_Register($src2$$reg), ext::sxth);
11305    %}
11306   ins_pipe(ialu_reg_reg);
11307 %}
11308 
11309 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11310 %{
11311   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11312   ins_cost(INSN_COST);
11313   format %{ "add  $dst, $src1, sxtb $src2" %}
11314 
11315    ins_encode %{
11316      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11317             as_Register($src2$$reg), ext::sxtb);
11318    %}
11319   ins_pipe(ialu_reg_reg);
11320 %}
11321 
11322 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11323 %{
11324   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11325   ins_cost(INSN_COST);
11326   format %{ "add  $dst, $src1, uxtb $src2" %}
11327 
11328    ins_encode %{
11329      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11330             as_Register($src2$$reg), ext::uxtb);
11331    %}
11332   ins_pipe(ialu_reg_reg);
11333 %}
11334 
11335 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11336 %{
11337   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11338   ins_cost(INSN_COST);
11339   format %{ "add  $dst, $src1, sxth $src2" %}
11340 
11341    ins_encode %{
11342      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11343             as_Register($src2$$reg), ext::sxth);
11344    %}
11345   ins_pipe(ialu_reg_reg);
11346 %}
11347 
11348 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11349 %{
11350   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11351   ins_cost(INSN_COST);
11352   format %{ "add  $dst, $src1, sxtw $src2" %}
11353 
11354    ins_encode %{
11355      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11356             as_Register($src2$$reg), ext::sxtw);
11357    %}
11358   ins_pipe(ialu_reg_reg);
11359 %}
11360 
11361 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11362 %{
11363   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11364   ins_cost(INSN_COST);
11365   format %{ "add  $dst, $src1, sxtb $src2" %}
11366 
11367    ins_encode %{
11368      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11369             as_Register($src2$$reg), ext::sxtb);
11370    %}
11371   ins_pipe(ialu_reg_reg);
11372 %}
11373 
11374 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11375 %{
11376   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11377   ins_cost(INSN_COST);
11378   format %{ "add  $dst, $src1, uxtb $src2" %}
11379 
11380    ins_encode %{
11381      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11382             as_Register($src2$$reg), ext::uxtb);
11383    %}
11384   ins_pipe(ialu_reg_reg);
11385 %}
11386 
11387 
11388 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11389 %{
11390   match(Set dst (AddI src1 (AndI src2 mask)));
11391   ins_cost(INSN_COST);
11392   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11393 
11394    ins_encode %{
11395      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11396             as_Register($src2$$reg), ext::uxtb);
11397    %}
11398   ins_pipe(ialu_reg_reg);
11399 %}
11400 
11401 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11402 %{
11403   match(Set dst (AddI src1 (AndI src2 mask)));
11404   ins_cost(INSN_COST);
11405   format %{ "addw  $dst, $src1, $src2, uxth" %}
11406 
11407    ins_encode %{
11408      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11409             as_Register($src2$$reg), ext::uxth);
11410    %}
11411   ins_pipe(ialu_reg_reg);
11412 %}
11413 
11414 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11415 %{
11416   match(Set dst (AddL src1 (AndL src2 mask)));
11417   ins_cost(INSN_COST);
11418   format %{ "add  $dst, $src1, $src2, uxtb" %}
11419 
11420    ins_encode %{
11421      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11422             as_Register($src2$$reg), ext::uxtb);
11423    %}
11424   ins_pipe(ialu_reg_reg);
11425 %}
11426 
11427 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11428 %{
11429   match(Set dst (AddL src1 (AndL src2 mask)));
11430   ins_cost(INSN_COST);
11431   format %{ "add  $dst, $src1, $src2, uxth" %}
11432 
11433    ins_encode %{
11434      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11435             as_Register($src2$$reg), ext::uxth);
11436    %}
11437   ins_pipe(ialu_reg_reg);
11438 %}
11439 
11440 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11441 %{
11442   match(Set dst (AddL src1 (AndL src2 mask)));
11443   ins_cost(INSN_COST);
11444   format %{ "add  $dst, $src1, $src2, uxtw" %}
11445 
11446    ins_encode %{
11447      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11448             as_Register($src2$$reg), ext::uxtw);
11449    %}
11450   ins_pipe(ialu_reg_reg);
11451 %}
11452 
11453 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11454 %{
11455   match(Set dst (SubI src1 (AndI src2 mask)));
11456   ins_cost(INSN_COST);
11457   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11458 
11459    ins_encode %{
11460      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11461             as_Register($src2$$reg), ext::uxtb);
11462    %}
11463   ins_pipe(ialu_reg_reg);
11464 %}
11465 
11466 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11467 %{
11468   match(Set dst (SubI src1 (AndI src2 mask)));
11469   ins_cost(INSN_COST);
11470   format %{ "subw  $dst, $src1, $src2, uxth" %}
11471 
11472    ins_encode %{
11473      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11474             as_Register($src2$$reg), ext::uxth);
11475    %}
11476   ins_pipe(ialu_reg_reg);
11477 %}
11478 
11479 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11480 %{
11481   match(Set dst (SubL src1 (AndL src2 mask)));
11482   ins_cost(INSN_COST);
11483   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11484 
11485    ins_encode %{
11486      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11487             as_Register($src2$$reg), ext::uxtb);
11488    %}
11489   ins_pipe(ialu_reg_reg);
11490 %}
11491 
11492 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11493 %{
11494   match(Set dst (SubL src1 (AndL src2 mask)));
11495   ins_cost(INSN_COST);
11496   format %{ "sub  $dst, $src1, $src2, uxth" %}
11497 
11498    ins_encode %{
11499      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11500             as_Register($src2$$reg), ext::uxth);
11501    %}
11502   ins_pipe(ialu_reg_reg);
11503 %}
11504 
11505 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11506 %{
11507   match(Set dst (SubL src1 (AndL src2 mask)));
11508   ins_cost(INSN_COST);
11509   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11510 
11511    ins_encode %{
11512      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11513             as_Register($src2$$reg), ext::uxtw);
11514    %}
11515   ins_pipe(ialu_reg_reg);
11516 %}
11517 
11518 // END This section of the file is automatically generated. Do not edit --------------
11519 
11520 // ============================================================================
11521 // Floating Point Arithmetic Instructions
11522 
11523 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11524   match(Set dst (AddF src1 src2));
11525 
11526   ins_cost(INSN_COST * 5);
11527   format %{ "fadds   $dst, $src1, $src2" %}
11528 
11529   ins_encode %{
11530     __ fadds(as_FloatRegister($dst$$reg),
11531              as_FloatRegister($src1$$reg),
11532              as_FloatRegister($src2$$reg));
11533   %}
11534 
11535   ins_pipe(pipe_class_default);
11536 %}
11537 
11538 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11539   match(Set dst (AddD src1 src2));
11540 
11541   ins_cost(INSN_COST * 5);
11542   format %{ "faddd   $dst, $src1, $src2" %}
11543 
11544   ins_encode %{
11545     __ faddd(as_FloatRegister($dst$$reg),
11546              as_FloatRegister($src1$$reg),
11547              as_FloatRegister($src2$$reg));
11548   %}
11549 
11550   ins_pipe(pipe_class_default);
11551 %}
11552 
11553 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11554   match(Set dst (SubF src1 src2));
11555 
11556   ins_cost(INSN_COST * 5);
11557   format %{ "fsubs   $dst, $src1, $src2" %}
11558 
11559   ins_encode %{
11560     __ fsubs(as_FloatRegister($dst$$reg),
11561              as_FloatRegister($src1$$reg),
11562              as_FloatRegister($src2$$reg));
11563   %}
11564 
11565   ins_pipe(pipe_class_default);
11566 %}
11567 
11568 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11569   match(Set dst (SubD src1 src2));
11570 
11571   ins_cost(INSN_COST * 5);
11572   format %{ "fsubd   $dst, $src1, $src2" %}
11573 
11574   ins_encode %{
11575     __ fsubd(as_FloatRegister($dst$$reg),
11576              as_FloatRegister($src1$$reg),
11577              as_FloatRegister($src2$$reg));
11578   %}
11579 
11580   ins_pipe(pipe_class_default);
11581 %}
11582 
11583 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11584   match(Set dst (MulF src1 src2));
11585 
11586   ins_cost(INSN_COST * 6);
11587   format %{ "fmuls   $dst, $src1, $src2" %}
11588 
11589   ins_encode %{
11590     __ fmuls(as_FloatRegister($dst$$reg),
11591              as_FloatRegister($src1$$reg),
11592              as_FloatRegister($src2$$reg));
11593   %}
11594 
11595   ins_pipe(pipe_class_default);
11596 %}
11597 
11598 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11599   match(Set dst (MulD src1 src2));
11600 
11601   ins_cost(INSN_COST * 6);
11602   format %{ "fmuld   $dst, $src1, $src2" %}
11603 
11604   ins_encode %{
11605     __ fmuld(as_FloatRegister($dst$$reg),
11606              as_FloatRegister($src1$$reg),
11607              as_FloatRegister($src2$$reg));
11608   %}
11609 
11610   ins_pipe(pipe_class_default);
11611 %}
11612 
11613 // We cannot use these fused mul w add/sub ops because they don't
11614 // produce the same result as the equivalent separated ops
11615 // (essentially they don't round the intermediate result). that's a
11616 // shame. leaving them here in case we can idenitfy cases where it is
11617 // legitimate to use them
11618 
11619 
11620 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11621 //   match(Set dst (AddF (MulF src1 src2) src3));
11622 
11623 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
11624 
11625 //   ins_encode %{
11626 //     __ fmadds(as_FloatRegister($dst$$reg),
11627 //              as_FloatRegister($src1$$reg),
11628 //              as_FloatRegister($src2$$reg),
11629 //              as_FloatRegister($src3$$reg));
11630 //   %}
11631 
11632 //   ins_pipe(pipe_class_default);
11633 // %}
11634 
11635 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11636 //   match(Set dst (AddD (MulD src1 src2) src3));
11637 
11638 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
11639 
11640 //   ins_encode %{
11641 //     __ fmaddd(as_FloatRegister($dst$$reg),
11642 //              as_FloatRegister($src1$$reg),
11643 //              as_FloatRegister($src2$$reg),
11644 //              as_FloatRegister($src3$$reg));
11645 //   %}
11646 
11647 //   ins_pipe(pipe_class_default);
11648 // %}
11649 
11650 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11651 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
11652 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
11653 
11654 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
11655 
11656 //   ins_encode %{
11657 //     __ fmsubs(as_FloatRegister($dst$$reg),
11658 //               as_FloatRegister($src1$$reg),
11659 //               as_FloatRegister($src2$$reg),
11660 //              as_FloatRegister($src3$$reg));
11661 //   %}
11662 
11663 //   ins_pipe(pipe_class_default);
11664 // %}
11665 
11666 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11667 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
11668 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
11669 
11670 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
11671 
11672 //   ins_encode %{
11673 //     __ fmsubd(as_FloatRegister($dst$$reg),
11674 //               as_FloatRegister($src1$$reg),
11675 //               as_FloatRegister($src2$$reg),
11676 //               as_FloatRegister($src3$$reg));
11677 //   %}
11678 
11679 //   ins_pipe(pipe_class_default);
11680 // %}
11681 
11682 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
11683 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
11684 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
11685 
11686 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
11687 
11688 //   ins_encode %{
11689 //     __ fnmadds(as_FloatRegister($dst$$reg),
11690 //                as_FloatRegister($src1$$reg),
11691 //                as_FloatRegister($src2$$reg),
11692 //                as_FloatRegister($src3$$reg));
11693 //   %}
11694 
11695 //   ins_pipe(pipe_class_default);
11696 // %}
11697 
11698 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
11699 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
11700 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
11701 
11702 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
11703 
11704 //   ins_encode %{
11705 //     __ fnmaddd(as_FloatRegister($dst$$reg),
11706 //                as_FloatRegister($src1$$reg),
11707 //                as_FloatRegister($src2$$reg),
11708 //                as_FloatRegister($src3$$reg));
11709 //   %}
11710 
11711 //   ins_pipe(pipe_class_default);
11712 // %}
11713 
11714 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
11715 //   match(Set dst (SubF (MulF src1 src2) src3));
11716 
11717 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
11718 
11719 //   ins_encode %{
11720 //     __ fnmsubs(as_FloatRegister($dst$$reg),
11721 //                as_FloatRegister($src1$$reg),
11722 //                as_FloatRegister($src2$$reg),
11723 //                as_FloatRegister($src3$$reg));
11724 //   %}
11725 
11726 //   ins_pipe(pipe_class_default);
11727 // %}
11728 
11729 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
11730 //   match(Set dst (SubD (MulD src1 src2) src3));
11731 
11732 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
11733 
11734 //   ins_encode %{
11735 //   // n.b. insn name should be fnmsubd
11736 //     __ fnmsub(as_FloatRegister($dst$$reg),
11737 //                as_FloatRegister($src1$$reg),
11738 //                as_FloatRegister($src2$$reg),
11739 //                as_FloatRegister($src3$$reg));
11740 //   %}
11741 
11742 //   ins_pipe(pipe_class_default);
11743 // %}
11744 
11745 
11746 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
11747   match(Set dst (DivF src1  src2));
11748 
11749   ins_cost(INSN_COST * 18);
11750   format %{ "fdivs   $dst, $src1, $src2" %}
11751 
11752   ins_encode %{
11753     __ fdivs(as_FloatRegister($dst$$reg),
11754              as_FloatRegister($src1$$reg),
11755              as_FloatRegister($src2$$reg));
11756   %}
11757 
11758   ins_pipe(pipe_class_default);
11759 %}
11760 
11761 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
11762   match(Set dst (DivD src1  src2));
11763 
11764   ins_cost(INSN_COST * 32);
11765   format %{ "fdivd   $dst, $src1, $src2" %}
11766 
11767   ins_encode %{
11768     __ fdivd(as_FloatRegister($dst$$reg),
11769              as_FloatRegister($src1$$reg),
11770              as_FloatRegister($src2$$reg));
11771   %}
11772 
11773   ins_pipe(pipe_class_default);
11774 %}
11775 
11776 instruct negF_reg_reg(vRegF dst, vRegF src) %{
11777   match(Set dst (NegF src));
11778 
11779   ins_cost(INSN_COST * 3);
11780   format %{ "fneg   $dst, $src" %}
11781 
11782   ins_encode %{
11783     __ fnegs(as_FloatRegister($dst$$reg),
11784              as_FloatRegister($src$$reg));
11785   %}
11786 
11787   ins_pipe(pipe_class_default);
11788 %}
11789 
11790 instruct negD_reg_reg(vRegD dst, vRegD src) %{
11791   match(Set dst (NegD src));
11792 
11793   ins_cost(INSN_COST * 3);
11794   format %{ "fnegd   $dst, $src" %}
11795 
11796   ins_encode %{
11797     __ fnegd(as_FloatRegister($dst$$reg),
11798              as_FloatRegister($src$$reg));
11799   %}
11800 
11801   ins_pipe(pipe_class_default);
11802 %}
11803 
11804 instruct absF_reg(vRegF dst, vRegF src) %{
11805   match(Set dst (AbsF src));
11806 
11807   ins_cost(INSN_COST * 3);
11808   format %{ "fabss   $dst, $src" %}
11809   ins_encode %{
11810     __ fabss(as_FloatRegister($dst$$reg),
11811              as_FloatRegister($src$$reg));
11812   %}
11813 
11814   ins_pipe(pipe_class_default);
11815 %}
11816 
11817 instruct absD_reg(vRegD dst, vRegD src) %{
11818   match(Set dst (AbsD src));
11819 
11820   ins_cost(INSN_COST * 3);
11821   format %{ "fabsd   $dst, $src" %}
11822   ins_encode %{
11823     __ fabsd(as_FloatRegister($dst$$reg),
11824              as_FloatRegister($src$$reg));
11825   %}
11826 
11827   ins_pipe(pipe_class_default);
11828 %}
11829 
11830 instruct sqrtD_reg(vRegD dst, vRegD src) %{
11831   match(Set dst (SqrtD src));
11832 
11833   ins_cost(INSN_COST * 50);
11834   format %{ "fsqrtd  $dst, $src" %}
11835   ins_encode %{
11836     __ fsqrtd(as_FloatRegister($dst$$reg),
11837              as_FloatRegister($src$$reg));
11838   %}
11839 
11840   ins_pipe(pipe_class_default);
11841 %}
11842 
11843 instruct sqrtF_reg(vRegF dst, vRegF src) %{
11844   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11845 
11846   ins_cost(INSN_COST * 50);
11847   format %{ "fsqrts  $dst, $src" %}
11848   ins_encode %{
11849     __ fsqrts(as_FloatRegister($dst$$reg),
11850              as_FloatRegister($src$$reg));
11851   %}
11852 
11853   ins_pipe(pipe_class_default);
11854 %}
11855 
11856 // ============================================================================
11857 // Logical Instructions
11858 
11859 // Integer Logical Instructions
11860 
11861 // And Instructions
11862 
11863 
11864 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
11865   match(Set dst (AndI src1 src2));
11866 
11867   format %{ "andw  $dst, $src1, $src2\t# int" %}
11868 
11869   ins_cost(INSN_COST);
11870   ins_encode %{
11871     __ andw(as_Register($dst$$reg),
11872             as_Register($src1$$reg),
11873             as_Register($src2$$reg));
11874   %}
11875 
11876   ins_pipe(ialu_reg_reg);
11877 %}
11878 
11879 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
11880   match(Set dst (AndI src1 src2));
11881 
11882   format %{ "andsw  $dst, $src1, $src2\t# int" %}
11883 
11884   ins_cost(INSN_COST);
11885   ins_encode %{
11886     __ andw(as_Register($dst$$reg),
11887             as_Register($src1$$reg),
11888             (unsigned long)($src2$$constant));
11889   %}
11890 
11891   ins_pipe(ialu_reg_imm);
11892 %}
11893 
11894 // Or Instructions
11895 
11896 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11897   match(Set dst (OrI src1 src2));
11898 
11899   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11900 
11901   ins_cost(INSN_COST);
11902   ins_encode %{
11903     __ orrw(as_Register($dst$$reg),
11904             as_Register($src1$$reg),
11905             as_Register($src2$$reg));
11906   %}
11907 
11908   ins_pipe(ialu_reg_reg);
11909 %}
11910 
11911 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11912   match(Set dst (OrI src1 src2));
11913 
11914   format %{ "orrw  $dst, $src1, $src2\t# int" %}
11915 
11916   ins_cost(INSN_COST);
11917   ins_encode %{
11918     __ orrw(as_Register($dst$$reg),
11919             as_Register($src1$$reg),
11920             (unsigned long)($src2$$constant));
11921   %}
11922 
11923   ins_pipe(ialu_reg_imm);
11924 %}
11925 
11926 // Xor Instructions
11927 
11928 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11929   match(Set dst (XorI src1 src2));
11930 
11931   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11932 
11933   ins_cost(INSN_COST);
11934   ins_encode %{
11935     __ eorw(as_Register($dst$$reg),
11936             as_Register($src1$$reg),
11937             as_Register($src2$$reg));
11938   %}
11939 
11940   ins_pipe(ialu_reg_reg);
11941 %}
11942 
11943 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
11944   match(Set dst (XorI src1 src2));
11945 
11946   format %{ "eorw  $dst, $src1, $src2\t# int" %}
11947 
11948   ins_cost(INSN_COST);
11949   ins_encode %{
11950     __ eorw(as_Register($dst$$reg),
11951             as_Register($src1$$reg),
11952             (unsigned long)($src2$$constant));
11953   %}
11954 
11955   ins_pipe(ialu_reg_imm);
11956 %}
11957 
11958 // Long Logical Instructions
11959 // TODO
11960 
11961 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
11962   match(Set dst (AndL src1 src2));
11963 
11964   format %{ "and  $dst, $src1, $src2\t# int" %}
11965 
11966   ins_cost(INSN_COST);
11967   ins_encode %{
11968     __ andr(as_Register($dst$$reg),
11969             as_Register($src1$$reg),
11970             as_Register($src2$$reg));
11971   %}
11972 
11973   ins_pipe(ialu_reg_reg);
11974 %}
11975 
11976 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
11977   match(Set dst (AndL src1 src2));
11978 
11979   format %{ "and  $dst, $src1, $src2\t# int" %}
11980 
11981   ins_cost(INSN_COST);
11982   ins_encode %{
11983     __ andr(as_Register($dst$$reg),
11984             as_Register($src1$$reg),
11985             (unsigned long)($src2$$constant));
11986   %}
11987 
11988   ins_pipe(ialu_reg_imm);
11989 %}
11990 
11991 // Or Instructions
11992 
11993 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11994   match(Set dst (OrL src1 src2));
11995 
11996   format %{ "orr  $dst, $src1, $src2\t# int" %}
11997 
11998   ins_cost(INSN_COST);
11999   ins_encode %{
12000     __ orr(as_Register($dst$$reg),
12001            as_Register($src1$$reg),
12002            as_Register($src2$$reg));
12003   %}
12004 
12005   ins_pipe(ialu_reg_reg);
12006 %}
12007 
12008 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12009   match(Set dst (OrL src1 src2));
12010 
12011   format %{ "orr  $dst, $src1, $src2\t# int" %}
12012 
12013   ins_cost(INSN_COST);
12014   ins_encode %{
12015     __ orr(as_Register($dst$$reg),
12016            as_Register($src1$$reg),
12017            (unsigned long)($src2$$constant));
12018   %}
12019 
12020   ins_pipe(ialu_reg_imm);
12021 %}
12022 
12023 // Xor Instructions
12024 
12025 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12026   match(Set dst (XorL src1 src2));
12027 
12028   format %{ "eor  $dst, $src1, $src2\t# int" %}
12029 
12030   ins_cost(INSN_COST);
12031   ins_encode %{
12032     __ eor(as_Register($dst$$reg),
12033            as_Register($src1$$reg),
12034            as_Register($src2$$reg));
12035   %}
12036 
12037   ins_pipe(ialu_reg_reg);
12038 %}
12039 
12040 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12041   match(Set dst (XorL src1 src2));
12042 
12043   ins_cost(INSN_COST);
12044   format %{ "eor  $dst, $src1, $src2\t# int" %}
12045 
12046   ins_encode %{
12047     __ eor(as_Register($dst$$reg),
12048            as_Register($src1$$reg),
12049            (unsigned long)($src2$$constant));
12050   %}
12051 
12052   ins_pipe(ialu_reg_imm);
12053 %}
12054 
12055 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12056 %{
12057   match(Set dst (ConvI2L src));
12058 
12059   ins_cost(INSN_COST);
12060   format %{ "sxtw  $dst, $src\t# i2l" %}
12061   ins_encode %{
12062     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12063   %}
12064   ins_pipe(ialu_reg_shift);
12065 %}
12066 
12067 // this pattern occurs in bigmath arithmetic
12068 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12069 %{
12070   match(Set dst (AndL (ConvI2L src) mask));
12071 
12072   ins_cost(INSN_COST);
12073   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12074   ins_encode %{
12075     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12076   %}
12077 
12078   ins_pipe(ialu_reg_shift);
12079 %}
12080 
12081 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12082   match(Set dst (ConvL2I src));
12083 
12084   ins_cost(INSN_COST);
12085   format %{ "movw  $dst, $src \t// l2i" %}
12086 
12087   ins_encode %{
12088     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12089   %}
12090 
12091   ins_pipe(ialu_reg);
12092 %}
12093 
12094 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12095 %{
12096   match(Set dst (Conv2B src));
12097   effect(KILL cr);
12098 
12099   format %{
12100     "cmpw $src, zr\n\t"
12101     "cset $dst, ne"
12102   %}
12103 
12104   ins_encode %{
12105     __ cmpw(as_Register($src$$reg), zr);
12106     __ cset(as_Register($dst$$reg), Assembler::NE);
12107   %}
12108 
12109   ins_pipe(ialu_reg);
12110 %}
12111 
12112 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12113 %{
12114   match(Set dst (Conv2B src));
12115   effect(KILL cr);
12116 
12117   format %{
12118     "cmp  $src, zr\n\t"
12119     "cset $dst, ne"
12120   %}
12121 
12122   ins_encode %{
12123     __ cmp(as_Register($src$$reg), zr);
12124     __ cset(as_Register($dst$$reg), Assembler::NE);
12125   %}
12126 
12127   ins_pipe(ialu_reg);
12128 %}
12129 
12130 instruct convD2F_reg(vRegF dst, vRegD src) %{
12131   match(Set dst (ConvD2F src));
12132 
12133   ins_cost(INSN_COST * 5);
12134   format %{ "fcvtd  $dst, $src \t// d2f" %}
12135 
12136   ins_encode %{
12137     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12138   %}
12139 
12140   ins_pipe(pipe_class_default);
12141 %}
12142 
12143 instruct convF2D_reg(vRegD dst, vRegF src) %{
12144   match(Set dst (ConvF2D src));
12145 
12146   ins_cost(INSN_COST * 5);
12147   format %{ "fcvts  $dst, $src \t// f2d" %}
12148 
12149   ins_encode %{
12150     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12151   %}
12152 
12153   ins_pipe(pipe_class_default);
12154 %}
12155 
12156 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12157   match(Set dst (ConvF2I src));
12158 
12159   ins_cost(INSN_COST * 5);
12160   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12161 
12162   ins_encode %{
12163     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12164   %}
12165 
12166   ins_pipe(pipe_class_default);
12167 %}
12168 
12169 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12170   match(Set dst (ConvF2L src));
12171 
12172   ins_cost(INSN_COST * 5);
12173   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12174 
12175   ins_encode %{
12176     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12177   %}
12178 
12179   ins_pipe(pipe_class_default);
12180 %}
12181 
12182 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12183   match(Set dst (ConvI2F src));
12184 
12185   ins_cost(INSN_COST * 5);
12186   format %{ "scvtfws  $dst, $src \t// i2f" %}
12187 
12188   ins_encode %{
12189     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12190   %}
12191 
12192   ins_pipe(pipe_class_default);
12193 %}
12194 
12195 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12196   match(Set dst (ConvL2F src));
12197 
12198   ins_cost(INSN_COST * 5);
12199   format %{ "scvtfs  $dst, $src \t// l2f" %}
12200 
12201   ins_encode %{
12202     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12203   %}
12204 
12205   ins_pipe(pipe_class_default);
12206 %}
12207 
12208 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12209   match(Set dst (ConvD2I src));
12210 
12211   ins_cost(INSN_COST * 5);
12212   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12213 
12214   ins_encode %{
12215     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12216   %}
12217 
12218   ins_pipe(pipe_class_default);
12219 %}
12220 
12221 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12222   match(Set dst (ConvD2L src));
12223 
12224   ins_cost(INSN_COST * 5);
12225   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12226 
12227   ins_encode %{
12228     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12229   %}
12230 
12231   ins_pipe(pipe_class_default);
12232 %}
12233 
12234 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12235   match(Set dst (ConvI2D src));
12236 
12237   ins_cost(INSN_COST * 5);
12238   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12239 
12240   ins_encode %{
12241     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12242   %}
12243 
12244   ins_pipe(pipe_class_default);
12245 %}
12246 
12247 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12248   match(Set dst (ConvL2D src));
12249 
12250   ins_cost(INSN_COST * 5);
12251   format %{ "scvtfd  $dst, $src \t// l2d" %}
12252 
12253   ins_encode %{
12254     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12255   %}
12256 
12257   ins_pipe(pipe_class_default);
12258 %}
12259 
12260 // stack <-> reg and reg <-> reg shuffles with no conversion
12261 
12262 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12263 
12264   match(Set dst (MoveF2I src));
12265 
12266   effect(DEF dst, USE src);
12267 
12268   ins_cost(4 * INSN_COST);
12269 
12270   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12271 
12272   ins_encode %{
12273     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12274   %}
12275 
12276   ins_pipe(iload_reg_reg);
12277 
12278 %}
12279 
12280 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12281 
12282   match(Set dst (MoveI2F src));
12283 
12284   effect(DEF dst, USE src);
12285 
12286   ins_cost(4 * INSN_COST);
12287 
12288   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12289 
12290   ins_encode %{
12291     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12292   %}
12293 
12294   ins_pipe(pipe_class_memory);
12295 
12296 %}
12297 
12298 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12299 
12300   match(Set dst (MoveD2L src));
12301 
12302   effect(DEF dst, USE src);
12303 
12304   ins_cost(4 * INSN_COST);
12305 
12306   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12307 
12308   ins_encode %{
12309     __ ldr($dst$$Register, Address(sp, $src$$disp));
12310   %}
12311 
12312   ins_pipe(iload_reg_reg);
12313 
12314 %}
12315 
12316 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
12317 
12318   match(Set dst (MoveL2D src));
12319 
12320   effect(DEF dst, USE src);
12321 
12322   ins_cost(4 * INSN_COST);
12323 
12324   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
12325 
12326   ins_encode %{
12327     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12328   %}
12329 
12330   ins_pipe(pipe_class_memory);
12331 
12332 %}
12333 
12334 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
12335 
12336   match(Set dst (MoveF2I src));
12337 
12338   effect(DEF dst, USE src);
12339 
12340   ins_cost(INSN_COST);
12341 
12342   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
12343 
12344   ins_encode %{
12345     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12346   %}
12347 
12348   ins_pipe(pipe_class_memory);
12349 
12350 %}
12351 
12352 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
12353 
12354   match(Set dst (MoveI2F src));
12355 
12356   effect(DEF dst, USE src);
12357 
12358   ins_cost(INSN_COST);
12359 
12360   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
12361 
12362   ins_encode %{
12363     __ strw($src$$Register, Address(sp, $dst$$disp));
12364   %}
12365 
12366   ins_pipe(istore_reg_reg);
12367 
12368 %}
12369 
12370 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
12371 
12372   match(Set dst (MoveD2L src));
12373 
12374   effect(DEF dst, USE src);
12375 
12376   ins_cost(INSN_COST);
12377 
12378   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
12379 
12380   ins_encode %{
12381     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
12382   %}
12383 
12384   ins_pipe(pipe_class_memory);
12385 
12386 %}
12387 
12388 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
12389 
12390   match(Set dst (MoveL2D src));
12391 
12392   effect(DEF dst, USE src);
12393 
12394   ins_cost(INSN_COST);
12395 
12396   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
12397 
12398   ins_encode %{
12399     __ str($src$$Register, Address(sp, $dst$$disp));
12400   %}
12401 
12402   ins_pipe(istore_reg_reg);
12403 
12404 %}
12405 
12406 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12407 
12408   match(Set dst (MoveF2I src));
12409 
12410   effect(DEF dst, USE src);
12411 
12412   ins_cost(INSN_COST);
12413 
12414   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
12415 
12416   ins_encode %{
12417     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
12418   %}
12419 
12420   ins_pipe(pipe_class_memory);
12421 
12422 %}
12423 
12424 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
12425 
12426   match(Set dst (MoveI2F src));
12427 
12428   effect(DEF dst, USE src);
12429 
12430   ins_cost(INSN_COST);
12431 
12432   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
12433 
12434   ins_encode %{
12435     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
12436   %}
12437 
12438   ins_pipe(pipe_class_memory);
12439 
12440 %}
12441 
12442 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12443 
12444   match(Set dst (MoveD2L src));
12445 
12446   effect(DEF dst, USE src);
12447 
12448   ins_cost(INSN_COST);
12449 
12450   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
12451 
12452   ins_encode %{
12453     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
12454   %}
12455 
12456   ins_pipe(pipe_class_memory);
12457 
12458 %}
12459 
12460 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
12461 
12462   match(Set dst (MoveL2D src));
12463 
12464   effect(DEF dst, USE src);
12465 
12466   ins_cost(INSN_COST);
12467 
12468   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
12469 
12470   ins_encode %{
12471     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
12472   %}
12473 
12474   ins_pipe(pipe_class_memory);
12475 
12476 %}
12477 
12478 // ============================================================================
12479 // clearing of an array
12480 
12481 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
12482 %{
12483   match(Set dummy (ClearArray cnt base));
12484   effect(USE_KILL cnt, USE_KILL base);
12485 
12486   ins_cost(4 * INSN_COST);
12487   format %{ "ClearArray $cnt, $base" %}
12488 
12489   ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
12490 
12491   ins_pipe(pipe_class_memory);
12492 %}
12493 
12494 // ============================================================================
12495 // Overflow Math Instructions
12496 
12497 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12498 %{
12499   match(Set cr (OverflowAddI op1 op2));
12500 
12501   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12502   ins_cost(INSN_COST);
12503   ins_encode %{
12504     __ cmnw($op1$$Register, $op2$$Register);
12505   %}
12506 
12507   ins_pipe(icmp_reg_reg);
12508 %}
12509 
12510 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12511 %{
12512   match(Set cr (OverflowAddI op1 op2));
12513 
12514   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
12515   ins_cost(INSN_COST);
12516   ins_encode %{
12517     __ cmnw($op1$$Register, $op2$$constant);
12518   %}
12519 
12520   ins_pipe(icmp_reg_imm);
12521 %}
12522 
12523 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12524 %{
12525   match(Set cr (OverflowAddL op1 op2));
12526 
12527   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12528   ins_cost(INSN_COST);
12529   ins_encode %{
12530     __ cmn($op1$$Register, $op2$$Register);
12531   %}
12532 
12533   ins_pipe(icmp_reg_reg);
12534 %}
12535 
12536 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12537 %{
12538   match(Set cr (OverflowAddL op1 op2));
12539 
12540   format %{ "cmn   $op1, $op2\t# overflow check long" %}
12541   ins_cost(INSN_COST);
12542   ins_encode %{
12543     __ cmn($op1$$Register, $op2$$constant);
12544   %}
12545 
12546   ins_pipe(icmp_reg_imm);
12547 %}
12548 
12549 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12550 %{
12551   match(Set cr (OverflowSubI op1 op2));
12552 
12553   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12554   ins_cost(INSN_COST);
12555   ins_encode %{
12556     __ cmpw($op1$$Register, $op2$$Register);
12557   %}
12558 
12559   ins_pipe(icmp_reg_reg);
12560 %}
12561 
12562 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
12563 %{
12564   match(Set cr (OverflowSubI op1 op2));
12565 
12566   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
12567   ins_cost(INSN_COST);
12568   ins_encode %{
12569     __ cmpw($op1$$Register, $op2$$constant);
12570   %}
12571 
12572   ins_pipe(icmp_reg_imm);
12573 %}
12574 
12575 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12576 %{
12577   match(Set cr (OverflowSubL op1 op2));
12578 
12579   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12580   ins_cost(INSN_COST);
12581   ins_encode %{
12582     __ cmp($op1$$Register, $op2$$Register);
12583   %}
12584 
12585   ins_pipe(icmp_reg_reg);
12586 %}
12587 
12588 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
12589 %{
12590   match(Set cr (OverflowSubL op1 op2));
12591 
12592   format %{ "cmp   $op1, $op2\t# overflow check long" %}
12593   ins_cost(INSN_COST);
12594   ins_encode %{
12595     __ cmp($op1$$Register, $op2$$constant);
12596   %}
12597 
12598   ins_pipe(icmp_reg_imm);
12599 %}
12600 
12601 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
12602 %{
12603   match(Set cr (OverflowSubI zero op1));
12604 
12605   format %{ "cmpw  zr, $op1\t# overflow check int" %}
12606   ins_cost(INSN_COST);
12607   ins_encode %{
12608     __ cmpw(zr, $op1$$Register);
12609   %}
12610 
12611   ins_pipe(icmp_reg_imm);
12612 %}
12613 
12614 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
12615 %{
12616   match(Set cr (OverflowSubL zero op1));
12617 
12618   format %{ "cmp   zr, $op1\t# overflow check long" %}
12619   ins_cost(INSN_COST);
12620   ins_encode %{
12621     __ cmp(zr, $op1$$Register);
12622   %}
12623 
12624   ins_pipe(icmp_reg_imm);
12625 %}
12626 
12627 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
12628 %{
12629   match(Set cr (OverflowMulI op1 op2));
12630 
12631   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12632             "cmp   rscratch1, rscratch1, sxtw\n\t"
12633             "movw  rscratch1, #0x80000000\n\t"
12634             "cselw rscratch1, rscratch1, zr, NE\n\t"
12635             "cmpw  rscratch1, #1" %}
12636   ins_cost(5 * INSN_COST);
12637   ins_encode %{
12638     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12639     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12640     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12641     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12642     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12643   %}
12644 
12645   ins_pipe(pipe_slow);
12646 %}
12647 
12648 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
12649 %{
12650   match(If cmp (OverflowMulI op1 op2));
12651   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12652             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12653   effect(USE labl, KILL cr);
12654 
12655   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
12656             "cmp   rscratch1, rscratch1, sxtw\n\t"
12657             "b$cmp   $labl" %}
12658   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
12659   ins_encode %{
12660     Label* L = $labl$$label;
12661     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12662     __ smull(rscratch1, $op1$$Register, $op2$$Register);
12663     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
12664     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12665   %}
12666 
12667   ins_pipe(pipe_serial);
12668 %}
12669 
12670 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12671 %{
12672   match(Set cr (OverflowMulL op1 op2));
12673 
12674   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12675             "smulh rscratch2, $op1, $op2\n\t"
12676             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12677             "movw  rscratch1, #0x80000000\n\t"
12678             "cselw rscratch1, rscratch1, zr, NE\n\t"
12679             "cmpw  rscratch1, #1" %}
12680   ins_cost(6 * INSN_COST);
12681   ins_encode %{
12682     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12683     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12684     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12685     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
12686     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
12687     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
12688   %}
12689 
12690   ins_pipe(pipe_slow);
12691 %}
12692 
12693 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
12694 %{
12695   match(If cmp (OverflowMulL op1 op2));
12696   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
12697             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
12698   effect(USE labl, KILL cr);
12699 
12700   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
12701             "smulh rscratch2, $op1, $op2\n\t"
12702             "cmp   rscratch2, rscratch1, ASR #31\n\t"
12703             "b$cmp $labl" %}
12704   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
12705   ins_encode %{
12706     Label* L = $labl$$label;
12707     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
12708     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
12709     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
12710     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
12711     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
12712   %}
12713 
12714   ins_pipe(pipe_serial);
12715 %}
12716 
12717 // ============================================================================
12718 // Compare Instructions
12719 
12720 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
12721 %{
12722   match(Set cr (CmpI op1 op2));
12723 
12724   effect(DEF cr, USE op1, USE op2);
12725 
12726   ins_cost(INSN_COST);
12727   format %{ "cmpw  $op1, $op2" %}
12728 
12729   ins_encode(aarch64_enc_cmpw(op1, op2));
12730 
12731   ins_pipe(icmp_reg_reg);
12732 %}
12733 
12734 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
12735 %{
12736   match(Set cr (CmpI op1 zero));
12737 
12738   effect(DEF cr, USE op1);
12739 
12740   ins_cost(INSN_COST);
12741   format %{ "cmpw $op1, 0" %}
12742 
12743   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12744 
12745   ins_pipe(icmp_reg_imm);
12746 %}
12747 
12748 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
12749 %{
12750   match(Set cr (CmpI op1 op2));
12751 
12752   effect(DEF cr, USE op1);
12753 
12754   ins_cost(INSN_COST);
12755   format %{ "cmpw  $op1, $op2" %}
12756 
12757   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12758 
12759   ins_pipe(icmp_reg_imm);
12760 %}
12761 
12762 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
12763 %{
12764   match(Set cr (CmpI op1 op2));
12765 
12766   effect(DEF cr, USE op1);
12767 
12768   ins_cost(INSN_COST * 2);
12769   format %{ "cmpw  $op1, $op2" %}
12770 
12771   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12772 
12773   ins_pipe(icmp_reg_imm);
12774 %}
12775 
12776 // Unsigned compare Instructions; really, same as signed compare
12777 // except it should only be used to feed an If or a CMovI which takes a
12778 // cmpOpU.
12779 
12780 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
12781 %{
12782   match(Set cr (CmpU op1 op2));
12783 
12784   effect(DEF cr, USE op1, USE op2);
12785 
12786   ins_cost(INSN_COST);
12787   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12788 
12789   ins_encode(aarch64_enc_cmpw(op1, op2));
12790 
12791   ins_pipe(icmp_reg_reg);
12792 %}
12793 
12794 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
12795 %{
12796   match(Set cr (CmpU op1 zero));
12797 
12798   effect(DEF cr, USE op1);
12799 
12800   ins_cost(INSN_COST);
12801   format %{ "cmpw $op1, #0\t# unsigned" %}
12802 
12803   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
12804 
12805   ins_pipe(icmp_reg_imm);
12806 %}
12807 
12808 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
12809 %{
12810   match(Set cr (CmpU op1 op2));
12811 
12812   effect(DEF cr, USE op1);
12813 
12814   ins_cost(INSN_COST);
12815   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12816 
12817   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
12818 
12819   ins_pipe(icmp_reg_imm);
12820 %}
12821 
12822 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
12823 %{
12824   match(Set cr (CmpU op1 op2));
12825 
12826   effect(DEF cr, USE op1);
12827 
12828   ins_cost(INSN_COST * 2);
12829   format %{ "cmpw  $op1, $op2\t# unsigned" %}
12830 
12831   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
12832 
12833   ins_pipe(icmp_reg_imm);
12834 %}
12835 
12836 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
12837 %{
12838   match(Set cr (CmpL op1 op2));
12839 
12840   effect(DEF cr, USE op1, USE op2);
12841 
12842   ins_cost(INSN_COST);
12843   format %{ "cmp  $op1, $op2" %}
12844 
12845   ins_encode(aarch64_enc_cmp(op1, op2));
12846 
12847   ins_pipe(icmp_reg_reg);
12848 %}
12849 
12850 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
12851 %{
12852   match(Set cr (CmpL op1 zero));
12853 
12854   effect(DEF cr, USE op1);
12855 
12856   ins_cost(INSN_COST);
12857   format %{ "tst  $op1" %}
12858 
12859   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
12860 
12861   ins_pipe(icmp_reg_imm);
12862 %}
12863 
12864 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
12865 %{
12866   match(Set cr (CmpL op1 op2));
12867 
12868   effect(DEF cr, USE op1);
12869 
12870   ins_cost(INSN_COST);
12871   format %{ "cmp  $op1, $op2" %}
12872 
12873   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
12874 
12875   ins_pipe(icmp_reg_imm);
12876 %}
12877 
12878 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
12879 %{
12880   match(Set cr (CmpL op1 op2));
12881 
12882   effect(DEF cr, USE op1);
12883 
12884   ins_cost(INSN_COST * 2);
12885   format %{ "cmp  $op1, $op2" %}
12886 
12887   ins_encode(aarch64_enc_cmp_imm(op1, op2));
12888 
12889   ins_pipe(icmp_reg_imm);
12890 %}
12891 
12892 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
12893 %{
12894   match(Set cr (CmpP op1 op2));
12895 
12896   effect(DEF cr, USE op1, USE op2);
12897 
12898   ins_cost(INSN_COST);
12899   format %{ "cmp  $op1, $op2\t // ptr" %}
12900 
12901   ins_encode(aarch64_enc_cmpp(op1, op2));
12902 
12903   ins_pipe(icmp_reg_reg);
12904 %}
12905 
12906 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
12907 %{
12908   match(Set cr (CmpN op1 op2));
12909 
12910   effect(DEF cr, USE op1, USE op2);
12911 
12912   ins_cost(INSN_COST);
12913   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
12914 
12915   ins_encode(aarch64_enc_cmpn(op1, op2));
12916 
12917   ins_pipe(icmp_reg_reg);
12918 %}
12919 
12920 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
12921 %{
12922   match(Set cr (CmpP op1 zero));
12923 
12924   effect(DEF cr, USE op1, USE zero);
12925 
12926   ins_cost(INSN_COST);
12927   format %{ "cmp  $op1, 0\t // ptr" %}
12928 
12929   ins_encode(aarch64_enc_testp(op1));
12930 
12931   ins_pipe(icmp_reg_imm);
12932 %}
12933 
12934 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
12935 %{
12936   match(Set cr (CmpN op1 zero));
12937 
12938   effect(DEF cr, USE op1, USE zero);
12939 
12940   ins_cost(INSN_COST);
12941   format %{ "cmp  $op1, 0\t // compressed ptr" %}
12942 
12943   ins_encode(aarch64_enc_testn(op1));
12944 
12945   ins_pipe(icmp_reg_imm);
12946 %}
12947 
12948 // FP comparisons
12949 //
12950 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
12951 // using normal cmpOp. See declaration of rFlagsReg for details.
12952 
12953 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
12954 %{
12955   match(Set cr (CmpF src1 src2));
12956 
12957   ins_cost(3 * INSN_COST);
12958   format %{ "fcmps $src1, $src2" %}
12959 
12960   ins_encode %{
12961     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12962   %}
12963 
12964   ins_pipe(pipe_class_compare);
12965 %}
12966 
12967 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
12968 %{
12969   match(Set cr (CmpF src1 src2));
12970 
12971   ins_cost(3 * INSN_COST);
12972   format %{ "fcmps $src1, 0.0" %}
12973 
12974   ins_encode %{
12975     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
12976   %}
12977 
12978   ins_pipe(pipe_class_compare);
12979 %}
12980 // FROM HERE
12981 
12982 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
12983 %{
12984   match(Set cr (CmpD src1 src2));
12985 
12986   ins_cost(3 * INSN_COST);
12987   format %{ "fcmpd $src1, $src2" %}
12988 
12989   ins_encode %{
12990     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
12991   %}
12992 
12993   ins_pipe(pipe_class_compare);
12994 %}
12995 
12996 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
12997 %{
12998   match(Set cr (CmpD src1 src2));
12999 
13000   ins_cost(3 * INSN_COST);
13001   format %{ "fcmpd $src1, 0.0" %}
13002 
13003   ins_encode %{
13004     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
13005   %}
13006 
13007   ins_pipe(pipe_class_compare);
13008 %}
13009 
13010 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13011 %{
13012   match(Set dst (CmpF3 src1 src2));
13013   effect(KILL cr);
13014 
13015   ins_cost(5 * INSN_COST);
13016   format %{ "fcmps $src1, $src2\n\t"
13017             "csinvw($dst, zr, zr, eq\n\t"
13018             "csnegw($dst, $dst, $dst, lt)"
13019   %}
13020 
13021   ins_encode %{
13022     Label done;
13023     FloatRegister s1 = as_FloatRegister($src1$$reg);
13024     FloatRegister s2 = as_FloatRegister($src2$$reg);
13025     Register d = as_Register($dst$$reg);
13026     __ fcmps(s1, s2);
13027     // installs 0 if EQ else -1
13028     __ csinvw(d, zr, zr, Assembler::EQ);
13029     // keeps -1 if less or unordered else installs 1
13030     __ csnegw(d, d, d, Assembler::LT);
13031     __ bind(done);
13032   %}
13033 
13034   ins_pipe(pipe_class_default);
13035 
13036 %}
13037 
13038 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13039 %{
13040   match(Set dst (CmpD3 src1 src2));
13041   effect(KILL cr);
13042 
13043   ins_cost(5 * INSN_COST);
13044   format %{ "fcmpd $src1, $src2\n\t"
13045             "csinvw($dst, zr, zr, eq\n\t"
13046             "csnegw($dst, $dst, $dst, lt)"
13047   %}
13048 
13049   ins_encode %{
13050     Label done;
13051     FloatRegister s1 = as_FloatRegister($src1$$reg);
13052     FloatRegister s2 = as_FloatRegister($src2$$reg);
13053     Register d = as_Register($dst$$reg);
13054     __ fcmpd(s1, s2);
13055     // installs 0 if EQ else -1
13056     __ csinvw(d, zr, zr, Assembler::EQ);
13057     // keeps -1 if less or unordered else installs 1
13058     __ csnegw(d, d, d, Assembler::LT);
13059     __ bind(done);
13060   %}
13061   ins_pipe(pipe_class_default);
13062 
13063 %}
13064 
13065 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13066 %{
13067   match(Set dst (CmpF3 src1 zero));
13068   effect(KILL cr);
13069 
13070   ins_cost(5 * INSN_COST);
13071   format %{ "fcmps $src1, 0.0\n\t"
13072             "csinvw($dst, zr, zr, eq\n\t"
13073             "csnegw($dst, $dst, $dst, lt)"
13074   %}
13075 
13076   ins_encode %{
13077     Label done;
13078     FloatRegister s1 = as_FloatRegister($src1$$reg);
13079     Register d = as_Register($dst$$reg);
13080     __ fcmps(s1, 0.0D);
13081     // installs 0 if EQ else -1
13082     __ csinvw(d, zr, zr, Assembler::EQ);
13083     // keeps -1 if less or unordered else installs 1
13084     __ csnegw(d, d, d, Assembler::LT);
13085     __ bind(done);
13086   %}
13087 
13088   ins_pipe(pipe_class_default);
13089 
13090 %}
13091 
13092 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13093 %{
13094   match(Set dst (CmpD3 src1 zero));
13095   effect(KILL cr);
13096 
13097   ins_cost(5 * INSN_COST);
13098   format %{ "fcmpd $src1, 0.0\n\t"
13099             "csinvw($dst, zr, zr, eq\n\t"
13100             "csnegw($dst, $dst, $dst, lt)"
13101   %}
13102 
13103   ins_encode %{
13104     Label done;
13105     FloatRegister s1 = as_FloatRegister($src1$$reg);
13106     Register d = as_Register($dst$$reg);
13107     __ fcmpd(s1, 0.0D);
13108     // installs 0 if EQ else -1
13109     __ csinvw(d, zr, zr, Assembler::EQ);
13110     // keeps -1 if less or unordered else installs 1
13111     __ csnegw(d, d, d, Assembler::LT);
13112     __ bind(done);
13113   %}
13114   ins_pipe(pipe_class_default);
13115 
13116 %}
13117 
13118 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13119 %{
13120   match(Set dst (CmpLTMask p q));
13121   effect(KILL cr);
13122 
13123   ins_cost(3 * INSN_COST);
13124 
13125   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13126             "csetw $dst, lt\n\t"
13127             "subw $dst, zr, $dst"
13128   %}
13129 
13130   ins_encode %{
13131     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13132     __ csetw(as_Register($dst$$reg), Assembler::LT);
13133     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13134   %}
13135 
13136   ins_pipe(ialu_reg_reg);
13137 %}
13138 
13139 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13140 %{
13141   match(Set dst (CmpLTMask src zero));
13142   effect(KILL cr);
13143 
13144   ins_cost(INSN_COST);
13145 
13146   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13147 
13148   ins_encode %{
13149     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13150   %}
13151 
13152   ins_pipe(ialu_reg_shift);
13153 %}
13154 
13155 // ============================================================================
13156 // Max and Min
13157 
13158 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13159 %{
13160   match(Set dst (MinI src1 src2));
13161 
13162   effect(DEF dst, USE src1, USE src2, KILL cr);
13163   size(8);
13164 
13165   ins_cost(INSN_COST * 3);
13166   format %{
13167     "cmpw $src1 $src2\t signed int\n\t"
13168     "cselw $dst, $src1, $src2 lt\t"
13169   %}
13170 
13171   ins_encode %{
13172     __ cmpw(as_Register($src1$$reg),
13173             as_Register($src2$$reg));
13174     __ cselw(as_Register($dst$$reg),
13175              as_Register($src1$$reg),
13176              as_Register($src2$$reg),
13177              Assembler::LT);
13178   %}
13179 
13180   ins_pipe(ialu_reg_reg);
13181 %}
13182 // FROM HERE
13183 
13184 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13185 %{
13186   match(Set dst (MaxI src1 src2));
13187 
13188   effect(DEF dst, USE src1, USE src2, KILL cr);
13189   size(8);
13190 
13191   ins_cost(INSN_COST * 3);
13192   format %{
13193     "cmpw $src1 $src2\t signed int\n\t"
13194     "cselw $dst, $src1, $src2 gt\t"
13195   %}
13196 
13197   ins_encode %{
13198     __ cmpw(as_Register($src1$$reg),
13199             as_Register($src2$$reg));
13200     __ cselw(as_Register($dst$$reg),
13201              as_Register($src1$$reg),
13202              as_Register($src2$$reg),
13203              Assembler::GT);
13204   %}
13205 
13206   ins_pipe(ialu_reg_reg);
13207 %}
13208 
13209 // ============================================================================
13210 // Branch Instructions
13211 
13212 // Direct Branch.
13213 instruct branch(label lbl)
13214 %{
13215   match(Goto);
13216 
13217   effect(USE lbl);
13218 
13219   ins_cost(BRANCH_COST);
13220   format %{ "b  $lbl" %}
13221 
13222   ins_encode(aarch64_enc_b(lbl));
13223 
13224   ins_pipe(pipe_branch);
13225 %}
13226 
13227 // Conditional Near Branch
13228 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13229 %{
13230   // Same match rule as `branchConFar'.
13231   match(If cmp cr);
13232 
13233   effect(USE lbl);
13234 
13235   ins_cost(BRANCH_COST);
13236   // If set to 1 this indicates that the current instruction is a
13237   // short variant of a long branch. This avoids using this
13238   // instruction in first-pass matching. It will then only be used in
13239   // the `Shorten_branches' pass.
13240   // ins_short_branch(1);
13241   format %{ "b$cmp  $lbl" %}
13242 
13243   ins_encode(aarch64_enc_br_con(cmp, lbl));
13244 
13245   ins_pipe(pipe_branch_cond);
13246 %}
13247 
13248 // Conditional Near Branch Unsigned
13249 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13250 %{
13251   // Same match rule as `branchConFar'.
13252   match(If cmp cr);
13253 
13254   effect(USE lbl);
13255 
13256   ins_cost(BRANCH_COST);
13257   // If set to 1 this indicates that the current instruction is a
13258   // short variant of a long branch. This avoids using this
13259   // instruction in first-pass matching. It will then only be used in
13260   // the `Shorten_branches' pass.
13261   // ins_short_branch(1);
13262   format %{ "b$cmp  $lbl\t# unsigned" %}
13263 
13264   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13265 
13266   ins_pipe(pipe_branch_cond);
13267 %}
13268 
13269 // Make use of CBZ and CBNZ.  These instructions, as well as being
13270 // shorter than (cmp; branch), have the additional benefit of not
13271 // killing the flags.
13272 
13273 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
13274   match(If cmp (CmpI op1 op2));
13275   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13276             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13277   effect(USE labl);
13278 
13279   ins_cost(BRANCH_COST);
13280   format %{ "cbw$cmp   $op1, $labl" %}
13281   ins_encode %{
13282     Label* L = $labl$$label;
13283     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13284     if (cond == Assembler::EQ)
13285       __ cbzw($op1$$Register, *L);
13286     else
13287       __ cbnzw($op1$$Register, *L);
13288   %}
13289   ins_pipe(pipe_cmp_branch);
13290 %}
13291 
13292 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
13293   match(If cmp (CmpL op1 op2));
13294   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13295             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13296   effect(USE labl);
13297 
13298   ins_cost(BRANCH_COST);
13299   format %{ "cb$cmp   $op1, $labl" %}
13300   ins_encode %{
13301     Label* L = $labl$$label;
13302     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13303     if (cond == Assembler::EQ)
13304       __ cbz($op1$$Register, *L);
13305     else
13306       __ cbnz($op1$$Register, *L);
13307   %}
13308   ins_pipe(pipe_cmp_branch);
13309 %}
13310 
13311 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
13312   match(If cmp (CmpP op1 op2));
13313   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
13314             || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
13315   effect(USE labl);
13316 
13317   ins_cost(BRANCH_COST);
13318   format %{ "cb$cmp   $op1, $labl" %}
13319   ins_encode %{
13320     Label* L = $labl$$label;
13321     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13322     if (cond == Assembler::EQ)
13323       __ cbz($op1$$Register, *L);
13324     else
13325       __ cbnz($op1$$Register, *L);
13326   %}
13327   ins_pipe(pipe_cmp_branch);
13328 %}
13329 
13330 // Conditional Far Branch
13331 // Conditional Far Branch Unsigned
13332 // TODO: fixme
13333 
13334 // counted loop end branch near
13335 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
13336 %{
13337   match(CountedLoopEnd cmp cr);
13338 
13339   effect(USE lbl);
13340 
13341   ins_cost(BRANCH_COST);
13342   // short variant.
13343   // ins_short_branch(1);
13344   format %{ "b$cmp $lbl \t// counted loop end" %}
13345 
13346   ins_encode(aarch64_enc_br_con(cmp, lbl));
13347 
13348   ins_pipe(pipe_branch);
13349 %}
13350 
13351 // counted loop end branch near Unsigned
13352 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
13353 %{
13354   match(CountedLoopEnd cmp cr);
13355 
13356   effect(USE lbl);
13357 
13358   ins_cost(BRANCH_COST);
13359   // short variant.
13360   // ins_short_branch(1);
13361   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
13362 
13363   ins_encode(aarch64_enc_br_conU(cmp, lbl));
13364 
13365   ins_pipe(pipe_branch);
13366 %}
13367 
13368 // counted loop end branch far
13369 // counted loop end branch far unsigned
13370 // TODO: fixme
13371 
13372 // ============================================================================
13373 // inlined locking and unlocking
13374 
13375 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13376 %{
13377   match(Set cr (FastLock object box));
13378   effect(TEMP tmp, TEMP tmp2);
13379 
13380   // TODO
13381   // identify correct cost
13382   ins_cost(5 * INSN_COST);
13383   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
13384 
13385   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
13386 
13387   ins_pipe(pipe_serial);
13388 %}
13389 
13390 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
13391 %{
13392   match(Set cr (FastUnlock object box));
13393   effect(TEMP tmp, TEMP tmp2);
13394 
13395   ins_cost(5 * INSN_COST);
13396   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
13397 
13398   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
13399 
13400   ins_pipe(pipe_serial);
13401 %}
13402 
13403 
13404 // ============================================================================
13405 // Safepoint Instructions
13406 
13407 // TODO
13408 // provide a near and far version of this code
13409 
13410 instruct safePoint(iRegP poll)
13411 %{
13412   match(SafePoint poll);
13413 
13414   format %{
13415     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
13416   %}
13417   ins_encode %{
13418     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
13419   %}
13420   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
13421 %}
13422 
13423 
13424 // ============================================================================
13425 // Procedure Call/Return Instructions
13426 
13427 // Call Java Static Instruction
13428 
13429 instruct CallStaticJavaDirect(method meth)
13430 %{
13431   match(CallStaticJava);
13432 
13433   effect(USE meth);
13434 
13435   ins_cost(CALL_COST);
13436 
13437   format %{ "call,static $meth \t// ==> " %}
13438 
13439   ins_encode( aarch64_enc_java_static_call(meth),
13440               aarch64_enc_call_epilog );
13441 
13442   ins_pipe(pipe_class_call);
13443 %}
13444 
13445 // TO HERE
13446 
13447 // Call Java Dynamic Instruction
13448 instruct CallDynamicJavaDirect(method meth)
13449 %{
13450   match(CallDynamicJava);
13451 
13452   effect(USE meth);
13453 
13454   ins_cost(CALL_COST);
13455 
13456   format %{ "CALL,dynamic $meth \t// ==> " %}
13457 
13458   ins_encode( aarch64_enc_java_dynamic_call(meth),
13459                aarch64_enc_call_epilog );
13460 
13461   ins_pipe(pipe_class_call);
13462 %}
13463 
13464 // Call Runtime Instruction
13465 
13466 instruct CallRuntimeDirect(method meth)
13467 %{
13468   match(CallRuntime);
13469 
13470   effect(USE meth);
13471 
13472   ins_cost(CALL_COST);
13473 
13474   format %{ "CALL, runtime $meth" %}
13475 
13476   ins_encode( aarch64_enc_java_to_runtime(meth) );
13477 
13478   ins_pipe(pipe_class_call);
13479 %}
13480 
13481 // Call Runtime Instruction
13482 
13483 instruct CallLeafDirect(method meth)
13484 %{
13485   match(CallLeaf);
13486 
13487   effect(USE meth);
13488 
13489   ins_cost(CALL_COST);
13490 
13491   format %{ "CALL, runtime leaf $meth" %}
13492 
13493   ins_encode( aarch64_enc_java_to_runtime(meth) );
13494 
13495   ins_pipe(pipe_class_call);
13496 %}
13497 
13498 // Call Runtime Instruction
13499 
13500 instruct CallLeafNoFPDirect(method meth)
13501 %{
13502   match(CallLeafNoFP);
13503 
13504   effect(USE meth);
13505 
13506   ins_cost(CALL_COST);
13507 
13508   format %{ "CALL, runtime leaf nofp $meth" %}
13509 
13510   ins_encode( aarch64_enc_java_to_runtime(meth) );
13511 
13512   ins_pipe(pipe_class_call);
13513 %}
13514 
13515 // Tail Call; Jump from runtime stub to Java code.
13516 // Also known as an 'interprocedural jump'.
13517 // Target of jump will eventually return to caller.
13518 // TailJump below removes the return address.
13519 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
13520 %{
13521   match(TailCall jump_target method_oop);
13522 
13523   ins_cost(CALL_COST);
13524 
13525   format %{ "br $jump_target\t# $method_oop holds method oop" %}
13526 
13527   ins_encode(aarch64_enc_tail_call(jump_target));
13528 
13529   ins_pipe(pipe_class_call);
13530 %}
13531 
13532 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
13533 %{
13534   match(TailJump jump_target ex_oop);
13535 
13536   ins_cost(CALL_COST);
13537 
13538   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
13539 
13540   ins_encode(aarch64_enc_tail_jmp(jump_target));
13541 
13542   ins_pipe(pipe_class_call);
13543 %}
13544 
13545 // Create exception oop: created by stack-crawling runtime code.
13546 // Created exception is now available to this handler, and is setup
13547 // just prior to jumping to this handler. No code emitted.
13548 // TODO check
13549 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
13550 instruct CreateException(iRegP_R0 ex_oop)
13551 %{
13552   match(Set ex_oop (CreateEx));
13553 
13554   format %{ " -- \t// exception oop; no code emitted" %}
13555 
13556   size(0);
13557 
13558   ins_encode( /*empty*/ );
13559 
13560   ins_pipe(pipe_class_empty);
13561 %}
13562 
13563 // Rethrow exception: The exception oop will come in the first
13564 // argument position. Then JUMP (not call) to the rethrow stub code.
13565 instruct RethrowException() %{
13566   match(Rethrow);
13567   ins_cost(CALL_COST);
13568 
13569   format %{ "b rethrow_stub" %}
13570 
13571   ins_encode( aarch64_enc_rethrow() );
13572 
13573   ins_pipe(pipe_class_call);
13574 %}
13575 
13576 
13577 // Return Instruction
13578 // epilog node loads ret address into lr as part of frame pop
13579 instruct Ret()
13580 %{
13581   match(Return);
13582 
13583   format %{ "ret\t// return register" %}
13584 
13585   ins_encode( aarch64_enc_ret() );
13586 
13587   ins_pipe(pipe_branch);
13588 %}
13589 
13590 // Die now.
13591 instruct ShouldNotReachHere() %{
13592   match(Halt);
13593 
13594   ins_cost(CALL_COST);
13595   format %{ "ShouldNotReachHere" %}
13596 
13597   ins_encode %{
13598     // TODO
13599     // implement proper trap call here
13600     __ brk(999);
13601   %}
13602 
13603   ins_pipe(pipe_class_default);
13604 %}
13605 
13606 // ============================================================================
13607 // Partial Subtype Check
13608 //
13609 // superklass array for an instance of the superklass.  Set a hidden
13610 // internal cache on a hit (cache is checked with exposed code in
13611 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13612 // encoding ALSO sets flags.
13613 
13614 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
13615 %{
13616   match(Set result (PartialSubtypeCheck sub super));
13617   effect(KILL cr, KILL temp);
13618 
13619   ins_cost(1100);  // slightly larger than the next version
13620   format %{ "partialSubtypeCheck $result, $sub, $super" %}
13621 
13622   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13623 
13624   opcode(0x1); // Force zero of result reg on hit
13625 
13626   ins_pipe(pipe_class_memory);
13627 %}
13628 
13629 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
13630 %{
13631   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13632   effect(KILL temp, KILL result);
13633 
13634   ins_cost(1100);  // slightly larger than the next version
13635   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
13636 
13637   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
13638 
13639   opcode(0x0); // Don't zero result reg on hit
13640 
13641   ins_pipe(pipe_class_memory);
13642 %}
13643 
13644 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
13645                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
13646 %{
13647   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
13648   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
13649 
13650   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
13651   ins_encode %{
13652     __ string_compare($str1$$Register, $str2$$Register,
13653                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
13654                       $tmp1$$Register);
13655   %}
13656   ins_pipe(pipe_class_memory);
13657 %}
13658 
13659 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
13660        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13661 %{
13662   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
13663   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
13664          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13665   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
13666 
13667   ins_encode %{
13668     __ string_indexof($str1$$Register, $str2$$Register,
13669                       $cnt1$$Register, $cnt2$$Register,
13670                       $tmp1$$Register, $tmp2$$Register,
13671                       $tmp3$$Register, $tmp4$$Register,
13672                       -1, $result$$Register);
13673   %}
13674   ins_pipe(pipe_class_memory);
13675 %}
13676 
13677 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
13678                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
13679                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
13680 %{
13681   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
13682   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
13683          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
13684   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
13685 
13686   ins_encode %{
13687     int icnt2 = (int)$int_cnt2$$constant;
13688     __ string_indexof($str1$$Register, $str2$$Register,
13689                       $cnt1$$Register, zr,
13690                       $tmp1$$Register, $tmp2$$Register,
13691                       $tmp3$$Register, $tmp4$$Register,
13692                       icnt2, $result$$Register);
13693   %}
13694   ins_pipe(pipe_class_memory);
13695 %}
13696 
13697 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
13698                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
13699 %{
13700   match(Set result (StrEquals (Binary str1 str2) cnt));
13701   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
13702 
13703   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
13704   ins_encode %{
13705     __ string_equals($str1$$Register, $str2$$Register,
13706                       $cnt$$Register, $result$$Register,
13707                       $tmp$$Register);
13708   %}
13709   ins_pipe(pipe_class_memory);
13710 %}
13711 
13712 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
13713                       iRegP_R10 tmp, rFlagsReg cr)
13714 %{
13715   match(Set result (AryEq ary1 ary2));
13716   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
13717 
13718   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
13719   ins_encode %{
13720     __ char_arrays_equals($ary1$$Register, $ary2$$Register,
13721                           $result$$Register, $tmp$$Register);
13722   %}
13723   ins_pipe(pipe_class_memory);
13724 %}
13725 
13726 // encode char[] to byte[] in ISO_8859_1
13727 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
13728                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
13729                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
13730                           iRegI_R0 result, rFlagsReg cr)
13731 %{
13732   match(Set result (EncodeISOArray src (Binary dst len)));
13733   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
13734          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
13735 
13736   format %{ "Encode array $src,$dst,$len -> $result" %}
13737   ins_encode %{
13738     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
13739          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
13740          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
13741   %}
13742   ins_pipe( pipe_class_memory );
13743 %}
13744 
13745 // ============================================================================
13746 // This name is KNOWN by the ADLC and cannot be changed.
13747 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13748 // for this guy.
13749 instruct tlsLoadP(thread_RegP dst)
13750 %{
13751   match(Set dst (ThreadLocal));
13752 
13753   ins_cost(0);
13754 
13755   format %{ " -- \t// $dst=Thread::current(), empty" %}
13756 
13757   size(0);
13758 
13759   ins_encode( /*empty*/ );
13760 
13761   ins_pipe(pipe_class_empty);
13762 %}
13763 
13764 // ====================VECTOR INSTRUCTIONS=====================================
13765 
13766 // Load vector (32 bits)
13767 instruct loadV4(vecD dst, vmem mem)
13768 %{
13769   predicate(n->as_LoadVector()->memory_size() == 4);
13770   match(Set dst (LoadVector mem));
13771   ins_cost(4 * INSN_COST);
13772   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
13773   ins_encode( aarch64_enc_ldrvS(dst, mem) );
13774   ins_pipe(pipe_class_memory);
13775 %}
13776 
13777 // Load vector (64 bits)
13778 instruct loadV8(vecD dst, vmem mem)
13779 %{
13780   predicate(n->as_LoadVector()->memory_size() == 8);
13781   match(Set dst (LoadVector mem));
13782   ins_cost(4 * INSN_COST);
13783   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
13784   ins_encode( aarch64_enc_ldrvD(dst, mem) );
13785   ins_pipe(pipe_class_memory);
13786 %}
13787 
13788 // Load Vector (128 bits)
13789 instruct loadV16(vecX dst, vmem mem)
13790 %{
13791   predicate(n->as_LoadVector()->memory_size() == 16);
13792   match(Set dst (LoadVector mem));
13793   ins_cost(4 * INSN_COST);
13794   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
13795   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
13796   ins_pipe(pipe_class_memory);
13797 %}
13798 
13799 // Store Vector (32 bits)
13800 instruct storeV4(vecD src, vmem mem)
13801 %{
13802   predicate(n->as_StoreVector()->memory_size() == 4);
13803   match(Set mem (StoreVector mem src));
13804   ins_cost(4 * INSN_COST);
13805   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
13806   ins_encode( aarch64_enc_strvS(src, mem) );
13807   ins_pipe(pipe_class_memory);
13808 %}
13809 
13810 // Store Vector (64 bits)
13811 instruct storeV8(vecD src, vmem mem)
13812 %{
13813   predicate(n->as_StoreVector()->memory_size() == 8);
13814   match(Set mem (StoreVector mem src));
13815   ins_cost(4 * INSN_COST);
13816   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
13817   ins_encode( aarch64_enc_strvD(src, mem) );
13818   ins_pipe(pipe_class_memory);
13819 %}
13820 
13821 // Store Vector (128 bits)
13822 instruct storeV16(vecX src, vmem mem)
13823 %{
13824   predicate(n->as_StoreVector()->memory_size() == 16);
13825   match(Set mem (StoreVector mem src));
13826   ins_cost(4 * INSN_COST);
13827   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
13828   ins_encode( aarch64_enc_strvQ(src, mem) );
13829   ins_pipe(pipe_class_memory);
13830 %}
13831 
13832 instruct replicate8B(vecD dst, iRegIorL2I src)
13833 %{
13834   predicate(n->as_Vector()->length() == 4 ||
13835             n->as_Vector()->length() == 8);
13836   match(Set dst (ReplicateB src));
13837   ins_cost(INSN_COST);
13838   format %{ "dup  $dst, $src\t# vector (8B)" %}
13839   ins_encode %{
13840     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
13841   %}
13842   ins_pipe(pipe_class_default);
13843 %}
13844 
13845 instruct replicate16B(vecX dst, iRegIorL2I src)
13846 %{
13847   predicate(n->as_Vector()->length() == 16);
13848   match(Set dst (ReplicateB src));
13849   ins_cost(INSN_COST);
13850   format %{ "dup  $dst, $src\t# vector (16B)" %}
13851   ins_encode %{
13852     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
13853   %}
13854   ins_pipe(pipe_class_default);
13855 %}
13856 
13857 instruct replicate8B_imm(vecD dst, immI con)
13858 %{
13859   predicate(n->as_Vector()->length() == 4 ||
13860             n->as_Vector()->length() == 8);
13861   match(Set dst (ReplicateB con));
13862   ins_cost(INSN_COST);
13863   format %{ "movi  $dst, $con\t# vector(8B)" %}
13864   ins_encode %{
13865     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
13866   %}
13867   ins_pipe(pipe_class_default);
13868 %}
13869 
13870 instruct replicate16B_imm(vecX dst, immI con)
13871 %{
13872   predicate(n->as_Vector()->length() == 16);
13873   match(Set dst (ReplicateB con));
13874   ins_cost(INSN_COST);
13875   format %{ "movi  $dst, $con\t# vector(16B)" %}
13876   ins_encode %{
13877     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
13878   %}
13879   ins_pipe(pipe_class_default);
13880 %}
13881 
13882 instruct replicate4S(vecD dst, iRegIorL2I src)
13883 %{
13884   predicate(n->as_Vector()->length() == 2 ||
13885             n->as_Vector()->length() == 4);
13886   match(Set dst (ReplicateS src));
13887   ins_cost(INSN_COST);
13888   format %{ "dup  $dst, $src\t# vector (4S)" %}
13889   ins_encode %{
13890     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
13891   %}
13892   ins_pipe(pipe_class_default);
13893 %}
13894 
13895 instruct replicate8S(vecX dst, iRegIorL2I src)
13896 %{
13897   predicate(n->as_Vector()->length() == 8);
13898   match(Set dst (ReplicateS src));
13899   ins_cost(INSN_COST);
13900   format %{ "dup  $dst, $src\t# vector (8S)" %}
13901   ins_encode %{
13902     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
13903   %}
13904   ins_pipe(pipe_class_default);
13905 %}
13906 
13907 instruct replicate4S_imm(vecD dst, immI con)
13908 %{
13909   predicate(n->as_Vector()->length() == 2 ||
13910             n->as_Vector()->length() == 4);
13911   match(Set dst (ReplicateS con));
13912   ins_cost(INSN_COST);
13913   format %{ "movi  $dst, $con\t# vector(4H)" %}
13914   ins_encode %{
13915     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
13916   %}
13917   ins_pipe(pipe_class_default);
13918 %}
13919 
13920 instruct replicate8S_imm(vecX dst, immI con)
13921 %{
13922   predicate(n->as_Vector()->length() == 8);
13923   match(Set dst (ReplicateS con));
13924   ins_cost(INSN_COST);
13925   format %{ "movi  $dst, $con\t# vector(8H)" %}
13926   ins_encode %{
13927     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
13928   %}
13929   ins_pipe(pipe_class_default);
13930 %}
13931 
13932 instruct replicate2I(vecD dst, iRegIorL2I src)
13933 %{
13934   predicate(n->as_Vector()->length() == 2);
13935   match(Set dst (ReplicateI src));
13936   ins_cost(INSN_COST);
13937   format %{ "dup  $dst, $src\t# vector (2I)" %}
13938   ins_encode %{
13939     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
13940   %}
13941   ins_pipe(pipe_class_default);
13942 %}
13943 
13944 instruct replicate4I(vecX dst, iRegIorL2I src)
13945 %{
13946   predicate(n->as_Vector()->length() == 4);
13947   match(Set dst (ReplicateI src));
13948   ins_cost(INSN_COST);
13949   format %{ "dup  $dst, $src\t# vector (4I)" %}
13950   ins_encode %{
13951     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
13952   %}
13953   ins_pipe(pipe_class_default);
13954 %}
13955 
13956 instruct replicate2I_imm(vecD dst, immI con)
13957 %{
13958   predicate(n->as_Vector()->length() == 2);
13959   match(Set dst (ReplicateI con));
13960   ins_cost(INSN_COST);
13961   format %{ "movi  $dst, $con\t# vector(2I)" %}
13962   ins_encode %{
13963     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
13964   %}
13965   ins_pipe(pipe_class_default);
13966 %}
13967 
13968 instruct replicate4I_imm(vecX dst, immI con)
13969 %{
13970   predicate(n->as_Vector()->length() == 4);
13971   match(Set dst (ReplicateI con));
13972   ins_cost(INSN_COST);
13973   format %{ "movi  $dst, $con\t# vector(4I)" %}
13974   ins_encode %{
13975     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
13976   %}
13977   ins_pipe(pipe_class_default);
13978 %}
13979 
13980 instruct replicate2L(vecX dst, iRegL src)
13981 %{
13982   predicate(n->as_Vector()->length() == 2);
13983   match(Set dst (ReplicateL src));
13984   ins_cost(INSN_COST);
13985   format %{ "dup  $dst, $src\t# vector (2L)" %}
13986   ins_encode %{
13987     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
13988   %}
13989   ins_pipe(pipe_class_default);
13990 %}
13991 
13992 instruct replicate2L_zero(vecX dst, immI0 zero)
13993 %{
13994   predicate(n->as_Vector()->length() == 2);
13995   match(Set dst (ReplicateI zero));
13996   ins_cost(INSN_COST);
13997   format %{ "movi  $dst, $zero\t# vector(4I)" %}
13998   ins_encode %{
13999     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14000            as_FloatRegister($dst$$reg),
14001            as_FloatRegister($dst$$reg));
14002   %}
14003   ins_pipe(pipe_class_default);
14004 %}
14005 
14006 instruct replicate2F(vecD dst, vRegF src)
14007 %{
14008   predicate(n->as_Vector()->length() == 2);
14009   match(Set dst (ReplicateF src));
14010   ins_cost(INSN_COST);
14011   format %{ "dup  $dst, $src\t# vector (2F)" %}
14012   ins_encode %{
14013     __ dup(as_FloatRegister($dst$$reg), __ T2S,
14014            as_FloatRegister($src$$reg));
14015   %}
14016   ins_pipe(pipe_class_default);
14017 %}
14018 
14019 instruct replicate4F(vecX dst, vRegF src)
14020 %{
14021   predicate(n->as_Vector()->length() == 4);
14022   match(Set dst (ReplicateF src));
14023   ins_cost(INSN_COST);
14024   format %{ "dup  $dst, $src\t# vector (4F)" %}
14025   ins_encode %{
14026     __ dup(as_FloatRegister($dst$$reg), __ T4S,
14027            as_FloatRegister($src$$reg));
14028   %}
14029   ins_pipe(pipe_class_default);
14030 %}
14031 
14032 instruct replicate2D(vecX dst, vRegD src)
14033 %{
14034   predicate(n->as_Vector()->length() == 2);
14035   match(Set dst (ReplicateD src));
14036   ins_cost(INSN_COST);
14037   format %{ "dup  $dst, $src\t# vector (2D)" %}
14038   ins_encode %{
14039     __ dup(as_FloatRegister($dst$$reg), __ T2D,
14040            as_FloatRegister($src$$reg));
14041   %}
14042   ins_pipe(pipe_class_default);
14043 %}
14044 
14045 // ====================REDUCTION ARITHMETIC====================================
14046 
14047 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
14048 %{
14049   match(Set dst (AddReductionVI src1 src2));
14050   ins_cost(INSN_COST);
14051   effect(TEMP tmp, TEMP tmp2);
14052   format %{ "umov  $tmp, $src2, S, 0\n\t"
14053             "umov  $tmp2, $src2, S, 1\n\t"
14054             "addw  $dst, $src1, $tmp\n\t"
14055             "addw  $dst, $dst, $tmp2\t add reduction2i"
14056   %}
14057   ins_encode %{
14058     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14059     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14060     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
14061     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
14062   %}
14063   ins_pipe(pipe_class_default);
14064 %}
14065 
14066 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14067 %{
14068   match(Set dst (AddReductionVI src1 src2));
14069   ins_cost(INSN_COST);
14070   effect(TEMP tmp, TEMP tmp2);
14071   format %{ "addv  $tmp, T4S, $src2\n\t"
14072             "umov  $tmp2, $tmp, S, 0\n\t"
14073             "addw  $dst, $tmp2, $src1\t add reduction4i"
14074   %}
14075   ins_encode %{
14076     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
14077             as_FloatRegister($src2$$reg));
14078     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14079     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
14080   %}
14081   ins_pipe(pipe_class_default);
14082 %}
14083 
14084 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
14085 %{
14086   match(Set dst (MulReductionVI src1 src2));
14087   ins_cost(INSN_COST);
14088   effect(TEMP tmp, TEMP dst);
14089   format %{ "umov  $tmp, $src2, S, 0\n\t"
14090             "mul   $dst, $tmp, $src1\n\t"
14091             "umov  $tmp, $src2, S, 1\n\t"
14092             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
14093   %}
14094   ins_encode %{
14095     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
14096     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
14097     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
14098     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
14099   %}
14100   ins_pipe(pipe_class_default);
14101 %}
14102 
14103 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
14104 %{
14105   match(Set dst (MulReductionVI src1 src2));
14106   ins_cost(INSN_COST);
14107   effect(TEMP tmp, TEMP tmp2, TEMP dst);
14108   format %{ "ins   $tmp, $src2, 0, 1\n\t"
14109             "mul   $tmp, $tmp, $src2\n\t"
14110             "umov  $tmp2, $tmp, S, 0\n\t"
14111             "mul   $dst, $tmp2, $src1\n\t"
14112             "umov  $tmp2, $tmp, S, 1\n\t"
14113             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
14114   %}
14115   ins_encode %{
14116     __ ins(as_FloatRegister($tmp$$reg), __ D,
14117            as_FloatRegister($src2$$reg), 0, 1);
14118     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
14119            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
14120     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
14121     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
14122     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
14123     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
14124   %}
14125   ins_pipe(pipe_class_default);
14126 %}
14127 
14128 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14129 %{
14130   match(Set dst (AddReductionVF src1 src2));
14131   ins_cost(INSN_COST);
14132   effect(TEMP tmp, TEMP dst);
14133   format %{ "fadds $dst, $src1, $src2\n\t"
14134             "ins   $tmp, S, $src2, 0, 1\n\t"
14135             "fadds $dst, $dst, $tmp\t add reduction2f"
14136   %}
14137   ins_encode %{
14138     __ fadds(as_FloatRegister($dst$$reg),
14139              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14140     __ ins(as_FloatRegister($tmp$$reg), __ S,
14141            as_FloatRegister($src2$$reg), 0, 1);
14142     __ fadds(as_FloatRegister($dst$$reg),
14143              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14144   %}
14145   ins_pipe(pipe_class_default);
14146 %}
14147 
14148 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14149 %{
14150   match(Set dst (AddReductionVF src1 src2));
14151   ins_cost(INSN_COST);
14152   effect(TEMP tmp, TEMP dst);
14153   format %{ "fadds $dst, $src1, $src2\n\t"
14154             "ins   $tmp, S, $src2, 0, 1\n\t"
14155             "fadds $dst, $dst, $tmp\n\t"
14156             "ins   $tmp, S, $src2, 0, 2\n\t"
14157             "fadds $dst, $dst, $tmp\n\t"
14158             "ins   $tmp, S, $src2, 0, 3\n\t"
14159             "fadds $dst, $dst, $tmp\t add reduction4f"
14160   %}
14161   ins_encode %{
14162     __ fadds(as_FloatRegister($dst$$reg),
14163              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14164     __ ins(as_FloatRegister($tmp$$reg), __ S,
14165            as_FloatRegister($src2$$reg), 0, 1);
14166     __ fadds(as_FloatRegister($dst$$reg),
14167              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14168     __ ins(as_FloatRegister($tmp$$reg), __ S,
14169            as_FloatRegister($src2$$reg), 0, 2);
14170     __ fadds(as_FloatRegister($dst$$reg),
14171              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14172     __ ins(as_FloatRegister($tmp$$reg), __ S,
14173            as_FloatRegister($src2$$reg), 0, 3);
14174     __ fadds(as_FloatRegister($dst$$reg),
14175              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14176   %}
14177   ins_pipe(pipe_class_default);
14178 %}
14179 
14180 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
14181 %{
14182   match(Set dst (MulReductionVF src1 src2));
14183   ins_cost(INSN_COST);
14184   effect(TEMP tmp, TEMP dst);
14185   format %{ "fmuls $dst, $src1, $src2\n\t"
14186             "ins   $tmp, S, $src2, 0, 1\n\t"
14187             "fmuls $dst, $dst, $tmp\t add reduction4f"
14188   %}
14189   ins_encode %{
14190     __ fmuls(as_FloatRegister($dst$$reg),
14191              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14192     __ ins(as_FloatRegister($tmp$$reg), __ S,
14193            as_FloatRegister($src2$$reg), 0, 1);
14194     __ fmuls(as_FloatRegister($dst$$reg),
14195              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14196   %}
14197   ins_pipe(pipe_class_default);
14198 %}
14199 
14200 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
14201 %{
14202   match(Set dst (MulReductionVF src1 src2));
14203   ins_cost(INSN_COST);
14204   effect(TEMP tmp, TEMP dst);
14205   format %{ "fmuls $dst, $src1, $src2\n\t"
14206             "ins   $tmp, S, $src2, 0, 1\n\t"
14207             "fmuls $dst, $dst, $tmp\n\t"
14208             "ins   $tmp, S, $src2, 0, 2\n\t"
14209             "fmuls $dst, $dst, $tmp\n\t"
14210             "ins   $tmp, S, $src2, 0, 3\n\t"
14211             "fmuls $dst, $dst, $tmp\t add reduction4f"
14212   %}
14213   ins_encode %{
14214     __ fmuls(as_FloatRegister($dst$$reg),
14215              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14216     __ ins(as_FloatRegister($tmp$$reg), __ S,
14217            as_FloatRegister($src2$$reg), 0, 1);
14218     __ fmuls(as_FloatRegister($dst$$reg),
14219              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14220     __ ins(as_FloatRegister($tmp$$reg), __ S,
14221            as_FloatRegister($src2$$reg), 0, 2);
14222     __ fmuls(as_FloatRegister($dst$$reg),
14223              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14224     __ ins(as_FloatRegister($tmp$$reg), __ S,
14225            as_FloatRegister($src2$$reg), 0, 3);
14226     __ fmuls(as_FloatRegister($dst$$reg),
14227              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14228   %}
14229   ins_pipe(pipe_class_default);
14230 %}
14231 
14232 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14233 %{
14234   match(Set dst (AddReductionVD src1 src2));
14235   ins_cost(INSN_COST);
14236   effect(TEMP tmp, TEMP dst);
14237   format %{ "faddd $dst, $src1, $src2\n\t"
14238             "ins   $tmp, D, $src2, 0, 1\n\t"
14239             "faddd $dst, $dst, $tmp\t add reduction2d"
14240   %}
14241   ins_encode %{
14242     __ faddd(as_FloatRegister($dst$$reg),
14243              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14244     __ ins(as_FloatRegister($tmp$$reg), __ D,
14245            as_FloatRegister($src2$$reg), 0, 1);
14246     __ faddd(as_FloatRegister($dst$$reg),
14247              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14248   %}
14249   ins_pipe(pipe_class_default);
14250 %}
14251 
14252 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
14253 %{
14254   match(Set dst (MulReductionVD src1 src2));
14255   ins_cost(INSN_COST);
14256   effect(TEMP tmp, TEMP dst);
14257   format %{ "fmuld $dst, $src1, $src2\n\t"
14258             "ins   $tmp, D, $src2, 0, 1\n\t"
14259             "fmuld $dst, $dst, $tmp\t add reduction2d"
14260   %}
14261   ins_encode %{
14262     __ fmuld(as_FloatRegister($dst$$reg),
14263              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14264     __ ins(as_FloatRegister($tmp$$reg), __ D,
14265            as_FloatRegister($src2$$reg), 0, 1);
14266     __ fmuld(as_FloatRegister($dst$$reg),
14267              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
14268   %}
14269   ins_pipe(pipe_class_default);
14270 %}
14271 
14272 // ====================VECTOR ARITHMETIC=======================================
14273 
14274 // --------------------------------- ADD --------------------------------------
14275 
14276 instruct vadd8B(vecD dst, vecD src1, vecD src2)
14277 %{
14278   predicate(n->as_Vector()->length() == 4 ||
14279             n->as_Vector()->length() == 8);
14280   match(Set dst (AddVB src1 src2));
14281   ins_cost(INSN_COST);
14282   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
14283   ins_encode %{
14284     __ addv(as_FloatRegister($dst$$reg), __ T8B,
14285             as_FloatRegister($src1$$reg),
14286             as_FloatRegister($src2$$reg));
14287   %}
14288   ins_pipe(pipe_class_default);
14289 %}
14290 
14291 instruct vadd16B(vecX dst, vecX src1, vecX src2)
14292 %{
14293   predicate(n->as_Vector()->length() == 16);
14294   match(Set dst (AddVB src1 src2));
14295   ins_cost(INSN_COST);
14296   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
14297   ins_encode %{
14298     __ addv(as_FloatRegister($dst$$reg), __ T16B,
14299             as_FloatRegister($src1$$reg),
14300             as_FloatRegister($src2$$reg));
14301   %}
14302   ins_pipe(pipe_class_default);
14303 %}
14304 
14305 instruct vadd4S(vecD dst, vecD src1, vecD src2)
14306 %{
14307   predicate(n->as_Vector()->length() == 2 ||
14308             n->as_Vector()->length() == 4);
14309   match(Set dst (AddVS src1 src2));
14310   ins_cost(INSN_COST);
14311   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
14312   ins_encode %{
14313     __ addv(as_FloatRegister($dst$$reg), __ T4H,
14314             as_FloatRegister($src1$$reg),
14315             as_FloatRegister($src2$$reg));
14316   %}
14317   ins_pipe(pipe_class_default);
14318 %}
14319 
14320 instruct vadd8S(vecX dst, vecX src1, vecX src2)
14321 %{
14322   predicate(n->as_Vector()->length() == 8);
14323   match(Set dst (AddVS src1 src2));
14324   ins_cost(INSN_COST);
14325   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
14326   ins_encode %{
14327     __ addv(as_FloatRegister($dst$$reg), __ T8H,
14328             as_FloatRegister($src1$$reg),
14329             as_FloatRegister($src2$$reg));
14330   %}
14331   ins_pipe(pipe_class_default);
14332 %}
14333 
14334 instruct vadd2I(vecD dst, vecD src1, vecD src2)
14335 %{
14336   predicate(n->as_Vector()->length() == 2);
14337   match(Set dst (AddVI src1 src2));
14338   ins_cost(INSN_COST);
14339   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
14340   ins_encode %{
14341     __ addv(as_FloatRegister($dst$$reg), __ T2S,
14342             as_FloatRegister($src1$$reg),
14343             as_FloatRegister($src2$$reg));
14344   %}
14345   ins_pipe(pipe_class_default);
14346 %}
14347 
14348 instruct vadd4I(vecX dst, vecX src1, vecX src2)
14349 %{
14350   predicate(n->as_Vector()->length() == 4);
14351   match(Set dst (AddVI src1 src2));
14352   ins_cost(INSN_COST);
14353   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
14354   ins_encode %{
14355     __ addv(as_FloatRegister($dst$$reg), __ T4S,
14356             as_FloatRegister($src1$$reg),
14357             as_FloatRegister($src2$$reg));
14358   %}
14359   ins_pipe(pipe_class_default);
14360 %}
14361 
14362 instruct vadd2L(vecX dst, vecX src1, vecX src2)
14363 %{
14364   predicate(n->as_Vector()->length() == 2);
14365   match(Set dst (AddVL src1 src2));
14366   ins_cost(INSN_COST);
14367   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
14368   ins_encode %{
14369     __ addv(as_FloatRegister($dst$$reg), __ T2D,
14370             as_FloatRegister($src1$$reg),
14371             as_FloatRegister($src2$$reg));
14372   %}
14373   ins_pipe(pipe_class_default);
14374 %}
14375 
14376 instruct vadd2F(vecD dst, vecD src1, vecD src2)
14377 %{
14378   predicate(n->as_Vector()->length() == 2);
14379   match(Set dst (AddVF src1 src2));
14380   ins_cost(INSN_COST);
14381   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
14382   ins_encode %{
14383     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
14384             as_FloatRegister($src1$$reg),
14385             as_FloatRegister($src2$$reg));
14386   %}
14387   ins_pipe(pipe_class_default);
14388 %}
14389 
14390 instruct vadd4F(vecX dst, vecX src1, vecX src2)
14391 %{
14392   predicate(n->as_Vector()->length() == 4);
14393   match(Set dst (AddVF src1 src2));
14394   ins_cost(INSN_COST);
14395   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
14396   ins_encode %{
14397     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
14398             as_FloatRegister($src1$$reg),
14399             as_FloatRegister($src2$$reg));
14400   %}
14401   ins_pipe(pipe_class_default);
14402 %}
14403 
14404 instruct vadd2D(vecX dst, vecX src1, vecX src2)
14405 %{
14406   match(Set dst (AddVD src1 src2));
14407   ins_cost(INSN_COST);
14408   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
14409   ins_encode %{
14410     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
14411             as_FloatRegister($src1$$reg),
14412             as_FloatRegister($src2$$reg));
14413   %}
14414   ins_pipe(pipe_class_default);
14415 %}
14416 
14417 // --------------------------------- SUB --------------------------------------
14418 
14419 instruct vsub8B(vecD dst, vecD src1, vecD src2)
14420 %{
14421   predicate(n->as_Vector()->length() == 4 ||
14422             n->as_Vector()->length() == 8);
14423   match(Set dst (SubVB src1 src2));
14424   ins_cost(INSN_COST);
14425   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
14426   ins_encode %{
14427     __ subv(as_FloatRegister($dst$$reg), __ T8B,
14428             as_FloatRegister($src1$$reg),
14429             as_FloatRegister($src2$$reg));
14430   %}
14431   ins_pipe(pipe_class_default);
14432 %}
14433 
14434 instruct vsub16B(vecX dst, vecX src1, vecX src2)
14435 %{
14436   predicate(n->as_Vector()->length() == 16);
14437   match(Set dst (SubVB src1 src2));
14438   ins_cost(INSN_COST);
14439   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
14440   ins_encode %{
14441     __ subv(as_FloatRegister($dst$$reg), __ T16B,
14442             as_FloatRegister($src1$$reg),
14443             as_FloatRegister($src2$$reg));
14444   %}
14445   ins_pipe(pipe_class_default);
14446 %}
14447 
14448 instruct vsub4S(vecD dst, vecD src1, vecD src2)
14449 %{
14450   predicate(n->as_Vector()->length() == 2 ||
14451             n->as_Vector()->length() == 4);
14452   match(Set dst (SubVS src1 src2));
14453   ins_cost(INSN_COST);
14454   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
14455   ins_encode %{
14456     __ subv(as_FloatRegister($dst$$reg), __ T4H,
14457             as_FloatRegister($src1$$reg),
14458             as_FloatRegister($src2$$reg));
14459   %}
14460   ins_pipe(pipe_class_default);
14461 %}
14462 
14463 instruct vsub8S(vecX dst, vecX src1, vecX src2)
14464 %{
14465   predicate(n->as_Vector()->length() == 8);
14466   match(Set dst (SubVS src1 src2));
14467   ins_cost(INSN_COST);
14468   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
14469   ins_encode %{
14470     __ subv(as_FloatRegister($dst$$reg), __ T8H,
14471             as_FloatRegister($src1$$reg),
14472             as_FloatRegister($src2$$reg));
14473   %}
14474   ins_pipe(pipe_class_default);
14475 %}
14476 
14477 instruct vsub2I(vecD dst, vecD src1, vecD src2)
14478 %{
14479   predicate(n->as_Vector()->length() == 2);
14480   match(Set dst (SubVI src1 src2));
14481   ins_cost(INSN_COST);
14482   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
14483   ins_encode %{
14484     __ subv(as_FloatRegister($dst$$reg), __ T2S,
14485             as_FloatRegister($src1$$reg),
14486             as_FloatRegister($src2$$reg));
14487   %}
14488   ins_pipe(pipe_class_default);
14489 %}
14490 
14491 instruct vsub4I(vecX dst, vecX src1, vecX src2)
14492 %{
14493   predicate(n->as_Vector()->length() == 4);
14494   match(Set dst (SubVI src1 src2));
14495   ins_cost(INSN_COST);
14496   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
14497   ins_encode %{
14498     __ subv(as_FloatRegister($dst$$reg), __ T4S,
14499             as_FloatRegister($src1$$reg),
14500             as_FloatRegister($src2$$reg));
14501   %}
14502   ins_pipe(pipe_class_default);
14503 %}
14504 
14505 instruct vsub2L(vecX dst, vecX src1, vecX src2)
14506 %{
14507   predicate(n->as_Vector()->length() == 2);
14508   match(Set dst (SubVL src1 src2));
14509   ins_cost(INSN_COST);
14510   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
14511   ins_encode %{
14512     __ subv(as_FloatRegister($dst$$reg), __ T2D,
14513             as_FloatRegister($src1$$reg),
14514             as_FloatRegister($src2$$reg));
14515   %}
14516   ins_pipe(pipe_class_default);
14517 %}
14518 
14519 instruct vsub2F(vecD dst, vecD src1, vecD src2)
14520 %{
14521   predicate(n->as_Vector()->length() == 2);
14522   match(Set dst (SubVF src1 src2));
14523   ins_cost(INSN_COST);
14524   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
14525   ins_encode %{
14526     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
14527             as_FloatRegister($src1$$reg),
14528             as_FloatRegister($src2$$reg));
14529   %}
14530   ins_pipe(pipe_class_default);
14531 %}
14532 
14533 instruct vsub4F(vecX dst, vecX src1, vecX src2)
14534 %{
14535   predicate(n->as_Vector()->length() == 4);
14536   match(Set dst (SubVF src1 src2));
14537   ins_cost(INSN_COST);
14538   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
14539   ins_encode %{
14540     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
14541             as_FloatRegister($src1$$reg),
14542             as_FloatRegister($src2$$reg));
14543   %}
14544   ins_pipe(pipe_class_default);
14545 %}
14546 
14547 instruct vsub2D(vecX dst, vecX src1, vecX src2)
14548 %{
14549   predicate(n->as_Vector()->length() == 2);
14550   match(Set dst (SubVD src1 src2));
14551   ins_cost(INSN_COST);
14552   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
14553   ins_encode %{
14554     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
14555             as_FloatRegister($src1$$reg),
14556             as_FloatRegister($src2$$reg));
14557   %}
14558   ins_pipe(pipe_class_default);
14559 %}
14560 
14561 // --------------------------------- MUL --------------------------------------
14562 
14563 instruct vmul4S(vecD dst, vecD src1, vecD src2)
14564 %{
14565   predicate(n->as_Vector()->length() == 2 ||
14566             n->as_Vector()->length() == 4);
14567   match(Set dst (MulVS src1 src2));
14568   ins_cost(INSN_COST);
14569   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
14570   ins_encode %{
14571     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
14572             as_FloatRegister($src1$$reg),
14573             as_FloatRegister($src2$$reg));
14574   %}
14575   ins_pipe(pipe_class_default);
14576 %}
14577 
14578 instruct vmul8S(vecX dst, vecX src1, vecX src2)
14579 %{
14580   predicate(n->as_Vector()->length() == 8);
14581   match(Set dst (MulVS src1 src2));
14582   ins_cost(INSN_COST);
14583   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
14584   ins_encode %{
14585     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
14586             as_FloatRegister($src1$$reg),
14587             as_FloatRegister($src2$$reg));
14588   %}
14589   ins_pipe(pipe_class_default);
14590 %}
14591 
14592 instruct vmul2I(vecD dst, vecD src1, vecD src2)
14593 %{
14594   predicate(n->as_Vector()->length() == 2);
14595   match(Set dst (MulVI src1 src2));
14596   ins_cost(INSN_COST);
14597   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
14598   ins_encode %{
14599     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
14600             as_FloatRegister($src1$$reg),
14601             as_FloatRegister($src2$$reg));
14602   %}
14603   ins_pipe(pipe_class_default);
14604 %}
14605 
14606 instruct vmul4I(vecX dst, vecX src1, vecX src2)
14607 %{
14608   predicate(n->as_Vector()->length() == 4);
14609   match(Set dst (MulVI src1 src2));
14610   ins_cost(INSN_COST);
14611   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
14612   ins_encode %{
14613     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
14614             as_FloatRegister($src1$$reg),
14615             as_FloatRegister($src2$$reg));
14616   %}
14617   ins_pipe(pipe_class_default);
14618 %}
14619 
14620 instruct vmul2F(vecD dst, vecD src1, vecD src2)
14621 %{
14622   predicate(n->as_Vector()->length() == 2);
14623   match(Set dst (MulVF src1 src2));
14624   ins_cost(INSN_COST);
14625   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
14626   ins_encode %{
14627     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
14628             as_FloatRegister($src1$$reg),
14629             as_FloatRegister($src2$$reg));
14630   %}
14631   ins_pipe(pipe_class_default);
14632 %}
14633 
14634 instruct vmul4F(vecX dst, vecX src1, vecX src2)
14635 %{
14636   predicate(n->as_Vector()->length() == 4);
14637   match(Set dst (MulVF src1 src2));
14638   ins_cost(INSN_COST);
14639   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
14640   ins_encode %{
14641     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
14642             as_FloatRegister($src1$$reg),
14643             as_FloatRegister($src2$$reg));
14644   %}
14645   ins_pipe(pipe_class_default);
14646 %}
14647 
14648 instruct vmul2D(vecX dst, vecX src1, vecX src2)
14649 %{
14650   predicate(n->as_Vector()->length() == 2);
14651   match(Set dst (MulVD src1 src2));
14652   ins_cost(INSN_COST);
14653   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
14654   ins_encode %{
14655     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
14656             as_FloatRegister($src1$$reg),
14657             as_FloatRegister($src2$$reg));
14658   %}
14659   ins_pipe(pipe_class_default);
14660 %}
14661 
14662 // --------------------------------- DIV --------------------------------------
14663 
14664 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
14665 %{
14666   predicate(n->as_Vector()->length() == 2);
14667   match(Set dst (DivVF src1 src2));
14668   ins_cost(INSN_COST);
14669   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
14670   ins_encode %{
14671     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
14672             as_FloatRegister($src1$$reg),
14673             as_FloatRegister($src2$$reg));
14674   %}
14675   ins_pipe(pipe_class_default);
14676 %}
14677 
14678 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
14679 %{
14680   predicate(n->as_Vector()->length() == 4);
14681   match(Set dst (DivVF src1 src2));
14682   ins_cost(INSN_COST);
14683   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
14684   ins_encode %{
14685     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
14686             as_FloatRegister($src1$$reg),
14687             as_FloatRegister($src2$$reg));
14688   %}
14689   ins_pipe(pipe_class_default);
14690 %}
14691 
14692 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
14693 %{
14694   predicate(n->as_Vector()->length() == 2);
14695   match(Set dst (DivVD src1 src2));
14696   ins_cost(INSN_COST);
14697   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
14698   ins_encode %{
14699     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
14700             as_FloatRegister($src1$$reg),
14701             as_FloatRegister($src2$$reg));
14702   %}
14703   ins_pipe(pipe_class_default);
14704 %}
14705 
14706 // --------------------------------- AND --------------------------------------
14707 
14708 instruct vand8B(vecD dst, vecD src1, vecD src2)
14709 %{
14710   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14711             n->as_Vector()->length_in_bytes() == 8);
14712   match(Set dst (AndV src1 src2));
14713   ins_cost(INSN_COST);
14714   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14715   ins_encode %{
14716     __ andr(as_FloatRegister($dst$$reg), __ T8B,
14717             as_FloatRegister($src1$$reg),
14718             as_FloatRegister($src2$$reg));
14719   %}
14720   ins_pipe(pipe_class_default);
14721 %}
14722 
14723 instruct vand16B(vecX dst, vecX src1, vecX src2)
14724 %{
14725   predicate(n->as_Vector()->length_in_bytes() == 16);
14726   match(Set dst (AndV src1 src2));
14727   ins_cost(INSN_COST);
14728   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
14729   ins_encode %{
14730     __ andr(as_FloatRegister($dst$$reg), __ T16B,
14731             as_FloatRegister($src1$$reg),
14732             as_FloatRegister($src2$$reg));
14733   %}
14734   ins_pipe(pipe_class_default);
14735 %}
14736 
14737 // --------------------------------- OR ---------------------------------------
14738 
14739 instruct vor8B(vecD dst, vecD src1, vecD src2)
14740 %{
14741   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14742             n->as_Vector()->length_in_bytes() == 8);
14743   match(Set dst (OrV src1 src2));
14744   ins_cost(INSN_COST);
14745   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
14746   ins_encode %{
14747     __ orr(as_FloatRegister($dst$$reg), __ T8B,
14748             as_FloatRegister($src1$$reg),
14749             as_FloatRegister($src2$$reg));
14750   %}
14751   ins_pipe(pipe_class_default);
14752 %}
14753 
14754 instruct vor16B(vecX dst, vecX src1, vecX src2)
14755 %{
14756   predicate(n->as_Vector()->length_in_bytes() == 16);
14757   match(Set dst (OrV src1 src2));
14758   ins_cost(INSN_COST);
14759   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
14760   ins_encode %{
14761     __ orr(as_FloatRegister($dst$$reg), __ T16B,
14762             as_FloatRegister($src1$$reg),
14763             as_FloatRegister($src2$$reg));
14764   %}
14765   ins_pipe(pipe_class_default);
14766 %}
14767 
14768 // --------------------------------- XOR --------------------------------------
14769 
14770 instruct vxor8B(vecD dst, vecD src1, vecD src2)
14771 %{
14772   predicate(n->as_Vector()->length_in_bytes() == 4 ||
14773             n->as_Vector()->length_in_bytes() == 8);
14774   match(Set dst (XorV src1 src2));
14775   ins_cost(INSN_COST);
14776   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
14777   ins_encode %{
14778     __ eor(as_FloatRegister($dst$$reg), __ T8B,
14779             as_FloatRegister($src1$$reg),
14780             as_FloatRegister($src2$$reg));
14781   %}
14782   ins_pipe(pipe_class_default);
14783 %}
14784 
14785 instruct vxor16B(vecX dst, vecX src1, vecX src2)
14786 %{
14787   predicate(n->as_Vector()->length_in_bytes() == 16);
14788   match(Set dst (XorV src1 src2));
14789   ins_cost(INSN_COST);
14790   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
14791   ins_encode %{
14792     __ eor(as_FloatRegister($dst$$reg), __ T16B,
14793             as_FloatRegister($src1$$reg),
14794             as_FloatRegister($src2$$reg));
14795   %}
14796   ins_pipe(pipe_class_default);
14797 %}
14798 
14799 // ------------------------------ Shift ---------------------------------------
14800 
14801 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
14802   match(Set dst (LShiftCntV cnt));
14803   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
14804   ins_encode %{
14805     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14806   %}
14807   ins_pipe(pipe_class_default);
14808 %}
14809 
14810 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
14811 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
14812   match(Set dst (RShiftCntV cnt));
14813   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
14814   ins_encode %{
14815     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
14816     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
14817   %}
14818   ins_pipe(pipe_class_default);
14819 %}
14820 
14821 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
14822   predicate(n->as_Vector()->length() == 4 ||
14823             n->as_Vector()->length() == 8);
14824   match(Set dst (LShiftVB src shift));
14825   match(Set dst (RShiftVB src shift));
14826   ins_cost(INSN_COST);
14827   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
14828   ins_encode %{
14829     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
14830             as_FloatRegister($src$$reg),
14831             as_FloatRegister($shift$$reg));
14832   %}
14833   ins_pipe(pipe_class_default);
14834 %}
14835 
14836 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
14837   predicate(n->as_Vector()->length() == 16);
14838   match(Set dst (LShiftVB src shift));
14839   match(Set dst (RShiftVB src shift));
14840   ins_cost(INSN_COST);
14841   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
14842   ins_encode %{
14843     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
14844             as_FloatRegister($src$$reg),
14845             as_FloatRegister($shift$$reg));
14846   %}
14847   ins_pipe(pipe_class_default);
14848 %}
14849 
14850 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
14851   predicate(n->as_Vector()->length() == 4 ||
14852             n->as_Vector()->length() == 8);
14853   match(Set dst (URShiftVB src shift));
14854   ins_cost(INSN_COST);
14855   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
14856   ins_encode %{
14857     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
14858             as_FloatRegister($src$$reg),
14859             as_FloatRegister($shift$$reg));
14860   %}
14861   ins_pipe(pipe_class_default);
14862 %}
14863 
14864 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
14865   predicate(n->as_Vector()->length() == 16);
14866   match(Set dst (URShiftVB src shift));
14867   ins_cost(INSN_COST);
14868   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
14869   ins_encode %{
14870     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
14871             as_FloatRegister($src$$reg),
14872             as_FloatRegister($shift$$reg));
14873   %}
14874   ins_pipe(pipe_class_default);
14875 %}
14876 
14877 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
14878   predicate(n->as_Vector()->length() == 4 ||
14879             n->as_Vector()->length() == 8);
14880   match(Set dst (LShiftVB src shift));
14881   ins_cost(INSN_COST);
14882   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
14883   ins_encode %{
14884     int sh = (int)$shift$$constant & 31;
14885     if (sh >= 8) {
14886       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14887              as_FloatRegister($src$$reg),
14888              as_FloatRegister($src$$reg));
14889     } else {
14890       __ shl(as_FloatRegister($dst$$reg), __ T8B,
14891              as_FloatRegister($src$$reg), sh);
14892     }
14893   %}
14894   ins_pipe(pipe_class_default);
14895 %}
14896 
14897 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
14898   predicate(n->as_Vector()->length() == 16);
14899   match(Set dst (LShiftVB src shift));
14900   ins_cost(INSN_COST);
14901   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
14902   ins_encode %{
14903     int sh = (int)$shift$$constant & 31;
14904     if (sh >= 8) {
14905       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14906              as_FloatRegister($src$$reg),
14907              as_FloatRegister($src$$reg));
14908     } else {
14909       __ shl(as_FloatRegister($dst$$reg), __ T16B,
14910              as_FloatRegister($src$$reg), sh);
14911     }
14912   %}
14913   ins_pipe(pipe_class_default);
14914 %}
14915 
14916 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
14917   predicate(n->as_Vector()->length() == 4 ||
14918             n->as_Vector()->length() == 8);
14919   match(Set dst (RShiftVB src shift));
14920   ins_cost(INSN_COST);
14921   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
14922   ins_encode %{
14923     int sh = (int)$shift$$constant & 31;
14924     if (sh >= 8) sh = 7;
14925     sh = -sh & 7;
14926     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
14927            as_FloatRegister($src$$reg), sh);
14928   %}
14929   ins_pipe(pipe_class_default);
14930 %}
14931 
14932 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
14933   predicate(n->as_Vector()->length() == 16);
14934   match(Set dst (RShiftVB src shift));
14935   ins_cost(INSN_COST);
14936   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
14937   ins_encode %{
14938     int sh = (int)$shift$$constant & 31;
14939     if (sh >= 8) sh = 7;
14940     sh = -sh & 7;
14941     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
14942            as_FloatRegister($src$$reg), sh);
14943   %}
14944   ins_pipe(pipe_class_default);
14945 %}
14946 
14947 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
14948   predicate(n->as_Vector()->length() == 4 ||
14949             n->as_Vector()->length() == 8);
14950   match(Set dst (URShiftVB src shift));
14951   ins_cost(INSN_COST);
14952   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
14953   ins_encode %{
14954     int sh = (int)$shift$$constant & 31;
14955     if (sh >= 8) {
14956       __ eor(as_FloatRegister($dst$$reg), __ T8B,
14957              as_FloatRegister($src$$reg),
14958              as_FloatRegister($src$$reg));
14959     } else {
14960       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
14961              as_FloatRegister($src$$reg), -sh & 7);
14962     }
14963   %}
14964   ins_pipe(pipe_class_default);
14965 %}
14966 
14967 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
14968   predicate(n->as_Vector()->length() == 16);
14969   match(Set dst (URShiftVB src shift));
14970   ins_cost(INSN_COST);
14971   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
14972   ins_encode %{
14973     int sh = (int)$shift$$constant & 31;
14974     if (sh >= 8) {
14975       __ eor(as_FloatRegister($dst$$reg), __ T16B,
14976              as_FloatRegister($src$$reg),
14977              as_FloatRegister($src$$reg));
14978     } else {
14979       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
14980              as_FloatRegister($src$$reg), -sh & 7);
14981     }
14982   %}
14983   ins_pipe(pipe_class_default);
14984 %}
14985 
14986 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
14987   predicate(n->as_Vector()->length() == 2 ||
14988             n->as_Vector()->length() == 4);
14989   match(Set dst (LShiftVS src shift));
14990   match(Set dst (RShiftVS src shift));
14991   ins_cost(INSN_COST);
14992   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
14993   ins_encode %{
14994     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
14995             as_FloatRegister($src$$reg),
14996             as_FloatRegister($shift$$reg));
14997   %}
14998   ins_pipe(pipe_class_default);
14999 %}
15000 
15001 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
15002   predicate(n->as_Vector()->length() == 8);
15003   match(Set dst (LShiftVS src shift));
15004   match(Set dst (RShiftVS src shift));
15005   ins_cost(INSN_COST);
15006   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
15007   ins_encode %{
15008     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
15009             as_FloatRegister($src$$reg),
15010             as_FloatRegister($shift$$reg));
15011   %}
15012   ins_pipe(pipe_class_default);
15013 %}
15014 
15015 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
15016   predicate(n->as_Vector()->length() == 2 ||
15017             n->as_Vector()->length() == 4);
15018   match(Set dst (URShiftVS src shift));
15019   ins_cost(INSN_COST);
15020   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
15021   ins_encode %{
15022     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
15023             as_FloatRegister($src$$reg),
15024             as_FloatRegister($shift$$reg));
15025   %}
15026   ins_pipe(pipe_class_default);
15027 %}
15028 
15029 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
15030   predicate(n->as_Vector()->length() == 8);
15031   match(Set dst (URShiftVS src shift));
15032   ins_cost(INSN_COST);
15033   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
15034   ins_encode %{
15035     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
15036             as_FloatRegister($src$$reg),
15037             as_FloatRegister($shift$$reg));
15038   %}
15039   ins_pipe(pipe_class_default);
15040 %}
15041 
15042 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
15043   predicate(n->as_Vector()->length() == 2 ||
15044             n->as_Vector()->length() == 4);
15045   match(Set dst (LShiftVS src shift));
15046   ins_cost(INSN_COST);
15047   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
15048   ins_encode %{
15049     int sh = (int)$shift$$constant & 31;
15050     if (sh >= 16) {
15051       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15052              as_FloatRegister($src$$reg),
15053              as_FloatRegister($src$$reg));
15054     } else {
15055       __ shl(as_FloatRegister($dst$$reg), __ T4H,
15056              as_FloatRegister($src$$reg), sh);
15057     }
15058   %}
15059   ins_pipe(pipe_class_default);
15060 %}
15061 
15062 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
15063   predicate(n->as_Vector()->length() == 8);
15064   match(Set dst (LShiftVS src shift));
15065   ins_cost(INSN_COST);
15066   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
15067   ins_encode %{
15068     int sh = (int)$shift$$constant & 31;
15069     if (sh >= 16) {
15070       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15071              as_FloatRegister($src$$reg),
15072              as_FloatRegister($src$$reg));
15073     } else {
15074       __ shl(as_FloatRegister($dst$$reg), __ T8H,
15075              as_FloatRegister($src$$reg), sh);
15076     }
15077   %}
15078   ins_pipe(pipe_class_default);
15079 %}
15080 
15081 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
15082   predicate(n->as_Vector()->length() == 2 ||
15083             n->as_Vector()->length() == 4);
15084   match(Set dst (RShiftVS src shift));
15085   ins_cost(INSN_COST);
15086   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
15087   ins_encode %{
15088     int sh = (int)$shift$$constant & 31;
15089     if (sh >= 16) sh = 15;
15090     sh = -sh & 15;
15091     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
15092            as_FloatRegister($src$$reg), sh);
15093   %}
15094   ins_pipe(pipe_class_default);
15095 %}
15096 
15097 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
15098   predicate(n->as_Vector()->length() == 8);
15099   match(Set dst (RShiftVS src shift));
15100   ins_cost(INSN_COST);
15101   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
15102   ins_encode %{
15103     int sh = (int)$shift$$constant & 31;
15104     if (sh >= 16) sh = 15;
15105     sh = -sh & 15;
15106     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
15107            as_FloatRegister($src$$reg), sh);
15108   %}
15109   ins_pipe(pipe_class_default);
15110 %}
15111 
15112 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
15113   predicate(n->as_Vector()->length() == 2 ||
15114             n->as_Vector()->length() == 4);
15115   match(Set dst (URShiftVS src shift));
15116   ins_cost(INSN_COST);
15117   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
15118   ins_encode %{
15119     int sh = (int)$shift$$constant & 31;
15120     if (sh >= 16) {
15121       __ eor(as_FloatRegister($dst$$reg), __ T8B,
15122              as_FloatRegister($src$$reg),
15123              as_FloatRegister($src$$reg));
15124     } else {
15125       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
15126              as_FloatRegister($src$$reg), -sh & 15);
15127     }
15128   %}
15129   ins_pipe(pipe_class_default);
15130 %}
15131 
15132 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
15133   predicate(n->as_Vector()->length() == 8);
15134   match(Set dst (URShiftVS src shift));
15135   ins_cost(INSN_COST);
15136   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
15137   ins_encode %{
15138     int sh = (int)$shift$$constant & 31;
15139     if (sh >= 16) {
15140       __ eor(as_FloatRegister($dst$$reg), __ T16B,
15141              as_FloatRegister($src$$reg),
15142              as_FloatRegister($src$$reg));
15143     } else {
15144       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
15145              as_FloatRegister($src$$reg), -sh & 15);
15146     }
15147   %}
15148   ins_pipe(pipe_class_default);
15149 %}
15150 
15151 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
15152   predicate(n->as_Vector()->length() == 2);
15153   match(Set dst (LShiftVI src shift));
15154   match(Set dst (RShiftVI src shift));
15155   ins_cost(INSN_COST);
15156   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
15157   ins_encode %{
15158     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
15159             as_FloatRegister($src$$reg),
15160             as_FloatRegister($shift$$reg));
15161   %}
15162   ins_pipe(pipe_class_default);
15163 %}
15164 
15165 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
15166   predicate(n->as_Vector()->length() == 4);
15167   match(Set dst (LShiftVI src shift));
15168   match(Set dst (RShiftVI src shift));
15169   ins_cost(INSN_COST);
15170   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
15171   ins_encode %{
15172     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
15173             as_FloatRegister($src$$reg),
15174             as_FloatRegister($shift$$reg));
15175   %}
15176   ins_pipe(pipe_class_default);
15177 %}
15178 
15179 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
15180   predicate(n->as_Vector()->length() == 2);
15181   match(Set dst (URShiftVI src shift));
15182   ins_cost(INSN_COST);
15183   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
15184   ins_encode %{
15185     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
15186             as_FloatRegister($src$$reg),
15187             as_FloatRegister($shift$$reg));
15188   %}
15189   ins_pipe(pipe_class_default);
15190 %}
15191 
15192 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
15193   predicate(n->as_Vector()->length() == 4);
15194   match(Set dst (URShiftVI src shift));
15195   ins_cost(INSN_COST);
15196   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
15197   ins_encode %{
15198     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
15199             as_FloatRegister($src$$reg),
15200             as_FloatRegister($shift$$reg));
15201   %}
15202   ins_pipe(pipe_class_default);
15203 %}
15204 
15205 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
15206   predicate(n->as_Vector()->length() == 2);
15207   match(Set dst (LShiftVI src shift));
15208   ins_cost(INSN_COST);
15209   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
15210   ins_encode %{
15211     __ shl(as_FloatRegister($dst$$reg), __ T2S,
15212            as_FloatRegister($src$$reg),
15213            (int)$shift$$constant & 31);
15214   %}
15215   ins_pipe(pipe_class_default);
15216 %}
15217 
15218 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
15219   predicate(n->as_Vector()->length() == 4);
15220   match(Set dst (LShiftVI src shift));
15221   ins_cost(INSN_COST);
15222   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
15223   ins_encode %{
15224     __ shl(as_FloatRegister($dst$$reg), __ T4S,
15225            as_FloatRegister($src$$reg),
15226            (int)$shift$$constant & 31);
15227   %}
15228   ins_pipe(pipe_class_default);
15229 %}
15230 
15231 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
15232   predicate(n->as_Vector()->length() == 2);
15233   match(Set dst (RShiftVI src shift));
15234   ins_cost(INSN_COST);
15235   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
15236   ins_encode %{
15237     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
15238             as_FloatRegister($src$$reg),
15239             -(int)$shift$$constant & 31);
15240   %}
15241   ins_pipe(pipe_class_default);
15242 %}
15243 
15244 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
15245   predicate(n->as_Vector()->length() == 4);
15246   match(Set dst (RShiftVI src shift));
15247   ins_cost(INSN_COST);
15248   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
15249   ins_encode %{
15250     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
15251             as_FloatRegister($src$$reg),
15252             -(int)$shift$$constant & 31);
15253   %}
15254   ins_pipe(pipe_class_default);
15255 %}
15256 
15257 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
15258   predicate(n->as_Vector()->length() == 2);
15259   match(Set dst (URShiftVI src shift));
15260   ins_cost(INSN_COST);
15261   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
15262   ins_encode %{
15263     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
15264             as_FloatRegister($src$$reg),
15265             -(int)$shift$$constant & 31);
15266   %}
15267   ins_pipe(pipe_class_default);
15268 %}
15269 
15270 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
15271   predicate(n->as_Vector()->length() == 4);
15272   match(Set dst (URShiftVI src shift));
15273   ins_cost(INSN_COST);
15274   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
15275   ins_encode %{
15276     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
15277             as_FloatRegister($src$$reg),
15278             -(int)$shift$$constant & 31);
15279   %}
15280   ins_pipe(pipe_class_default);
15281 %}
15282 
15283 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
15284   predicate(n->as_Vector()->length() == 2);
15285   match(Set dst (LShiftVL src shift));
15286   match(Set dst (RShiftVL src shift));
15287   ins_cost(INSN_COST);
15288   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
15289   ins_encode %{
15290     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
15291             as_FloatRegister($src$$reg),
15292             as_FloatRegister($shift$$reg));
15293   %}
15294   ins_pipe(pipe_class_default);
15295 %}
15296 
15297 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
15298   predicate(n->as_Vector()->length() == 2);
15299   match(Set dst (URShiftVL src shift));
15300   ins_cost(INSN_COST);
15301   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
15302   ins_encode %{
15303     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
15304             as_FloatRegister($src$$reg),
15305             as_FloatRegister($shift$$reg));
15306   %}
15307   ins_pipe(pipe_class_default);
15308 %}
15309 
15310 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
15311   predicate(n->as_Vector()->length() == 2);
15312   match(Set dst (LShiftVL src shift));
15313   ins_cost(INSN_COST);
15314   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
15315   ins_encode %{
15316     __ shl(as_FloatRegister($dst$$reg), __ T2D,
15317            as_FloatRegister($src$$reg),
15318            (int)$shift$$constant & 63);
15319   %}
15320   ins_pipe(pipe_class_default);
15321 %}
15322 
15323 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
15324   predicate(n->as_Vector()->length() == 2);
15325   match(Set dst (RShiftVL src shift));
15326   ins_cost(INSN_COST);
15327   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
15328   ins_encode %{
15329     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
15330             as_FloatRegister($src$$reg),
15331             -(int)$shift$$constant & 63);
15332   %}
15333   ins_pipe(pipe_class_default);
15334 %}
15335 
15336 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
15337   predicate(n->as_Vector()->length() == 2);
15338   match(Set dst (URShiftVL src shift));
15339   ins_cost(INSN_COST);
15340   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
15341   ins_encode %{
15342     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
15343             as_FloatRegister($src$$reg),
15344             -(int)$shift$$constant & 63);
15345   %}
15346   ins_pipe(pipe_class_default);
15347 %}
15348 
15349 //----------PEEPHOLE RULES-----------------------------------------------------
15350 // These must follow all instruction definitions as they use the names
15351 // defined in the instructions definitions.
15352 //
15353 // peepmatch ( root_instr_name [preceding_instruction]* );
15354 //
15355 // peepconstraint %{
15356 // (instruction_number.operand_name relational_op instruction_number.operand_name
15357 //  [, ...] );
15358 // // instruction numbers are zero-based using left to right order in peepmatch
15359 //
15360 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
15361 // // provide an instruction_number.operand_name for each operand that appears
15362 // // in the replacement instruction's match rule
15363 //
15364 // ---------VM FLAGS---------------------------------------------------------
15365 //
15366 // All peephole optimizations can be turned off using -XX:-OptoPeephole
15367 //
15368 // Each peephole rule is given an identifying number starting with zero and
15369 // increasing by one in the order seen by the parser.  An individual peephole
15370 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
15371 // on the command-line.
15372 //
15373 // ---------CURRENT LIMITATIONS----------------------------------------------
15374 //
15375 // Only match adjacent instructions in same basic block
15376 // Only equality constraints
15377 // Only constraints between operands, not (0.dest_reg == RAX_enc)
15378 // Only one replacement instruction
15379 //
15380 // ---------EXAMPLE----------------------------------------------------------
15381 //
15382 // // pertinent parts of existing instructions in architecture description
15383 // instruct movI(iRegINoSp dst, iRegI src)
15384 // %{
15385 //   match(Set dst (CopyI src));
15386 // %}
15387 //
15388 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
15389 // %{
15390 //   match(Set dst (AddI dst src));
15391 //   effect(KILL cr);
15392 // %}
15393 //
15394 // // Change (inc mov) to lea
15395 // peephole %{
15396 //   // increment preceeded by register-register move
15397 //   peepmatch ( incI_iReg movI );
15398 //   // require that the destination register of the increment
15399 //   // match the destination register of the move
15400 //   peepconstraint ( 0.dst == 1.dst );
15401 //   // construct a replacement instruction that sets
15402 //   // the destination to ( move's source register + one )
15403 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
15404 // %}
15405 //
15406 
15407 // Implementation no longer uses movX instructions since
15408 // machine-independent system no longer uses CopyX nodes.
15409 //
15410 // peephole
15411 // %{
15412 //   peepmatch (incI_iReg movI);
15413 //   peepconstraint (0.dst == 1.dst);
15414 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15415 // %}
15416 
15417 // peephole
15418 // %{
15419 //   peepmatch (decI_iReg movI);
15420 //   peepconstraint (0.dst == 1.dst);
15421 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15422 // %}
15423 
15424 // peephole
15425 // %{
15426 //   peepmatch (addI_iReg_imm movI);
15427 //   peepconstraint (0.dst == 1.dst);
15428 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
15429 // %}
15430 
15431 // peephole
15432 // %{
15433 //   peepmatch (incL_iReg movL);
15434 //   peepconstraint (0.dst == 1.dst);
15435 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15436 // %}
15437 
15438 // peephole
15439 // %{
15440 //   peepmatch (decL_iReg movL);
15441 //   peepconstraint (0.dst == 1.dst);
15442 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15443 // %}
15444 
15445 // peephole
15446 // %{
15447 //   peepmatch (addL_iReg_imm movL);
15448 //   peepconstraint (0.dst == 1.dst);
15449 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
15450 // %}
15451 
15452 // peephole
15453 // %{
15454 //   peepmatch (addP_iReg_imm movP);
15455 //   peepconstraint (0.dst == 1.dst);
15456 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
15457 // %}
15458 
15459 // // Change load of spilled value to only a spill
15460 // instruct storeI(memory mem, iRegI src)
15461 // %{
15462 //   match(Set mem (StoreI mem src));
15463 // %}
15464 //
15465 // instruct loadI(iRegINoSp dst, memory mem)
15466 // %{
15467 //   match(Set dst (LoadI mem));
15468 // %}
15469 //
15470 
15471 //----------SMARTSPILL RULES---------------------------------------------------
15472 // These must follow all instruction definitions as they use the names
15473 // defined in the instructions definitions.
15474 
15475 // Local Variables:
15476 // mode: c++
15477 // End: