New src/hotspot/cpu/aarch32/aarch32.ad

   1 //
   2 // Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 
  25 // AARCH32 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 register %{
  32 //----------Architecture Description Register Definitions----------------------
  33 // General Registers
  34 // "reg_def"  name ( register save type, C convention save type,
  35 //                   ideal register type, encoding, vm name );
  36 // Register Save Types:
  37 //
  38 // NS  = No-Save:       The register allocator assumes that these registers
  39 //                      can be used without saving upon entry to the method, &
  40 //                      that they do not need to be saved at call sites.
  41 //
  42 // SOC = Save-On-Call:  The register allocator assumes that these registers
  43 //                      can be used without saving upon entry to the method,
  44 //                      but that they must be saved at call sites.
  45 //
  46 // SOE = Save-On-Entry: The register allocator assumes that these registers
  47 //                      must be saved before using them upon entry to the
  48 //                      method, but they do not need to be saved at call
  49 //                      sites.
  50 //
  51 // AS  = Always-Save:   The register allocator assumes that these registers
  52 //                      must be saved before using them upon entry to the
  53 //                      method, & that they must be saved at call sites.
  54 //
  55 // Ideal Register Type is used to determine how to save & restore a
  56 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  57 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  58 //
  59 // The encoding number is the actual bit-pattern placed into the opcodes.
  60 
  61 
  62 // ----------------------------
  63 // Integer/Long Registers
  64 // ----------------------------
  65 
  66 reg_def R_R0 (SOC, SOC, Op_RegI,  0,  R(0)->as_VMReg());
  67 reg_def R_R1 (SOC, SOC, Op_RegI,  1,  R(1)->as_VMReg());
  68 reg_def R_R2 (SOC, SOC, Op_RegI,  2,  R(2)->as_VMReg());
  69 reg_def R_R3 (SOC, SOC, Op_RegI,  3,  R(3)->as_VMReg());
  70 reg_def R_R4 (SOC, SOE, Op_RegI,  4,  R(4)->as_VMReg());
  71 reg_def R_R5 (SOC, SOE, Op_RegI,  5,  R(5)->as_VMReg());
  72 reg_def R_R6 (SOC, SOE, Op_RegI,  6,  R(6)->as_VMReg());
  73 reg_def R_R7 (SOC, SOE, Op_RegI,  7,  R(7)->as_VMReg());
  74 reg_def R_R8 (SOC, SOE, Op_RegI,  8,  R(8)->as_VMReg());
  75 reg_def R_R9 (SOC, SOE, Op_RegI,  9,  R(9)->as_VMReg());
  76 reg_def R_R10(NS,  SOE, Op_RegI, 10, R(10)->as_VMReg());
  77 reg_def R_R11(NS,  SOE, Op_RegI, 11, R(11)->as_VMReg());
  78 reg_def R_R12(SOC, SOC, Op_RegI, 12, R(12)->as_VMReg());
  79 reg_def R_R13(NS,  NS,  Op_RegI, 13, R(13)->as_VMReg());
  80 reg_def R_R14(SOC, SOC, Op_RegI, 14, R(14)->as_VMReg());
  81 reg_def R_R15(NS,  NS,  Op_RegI, 15, R(15)->as_VMReg());
  82 
  83 // ----------------------------
  84 // Float/Double Registers
  85 // ----------------------------
  86 
  87 // Float Registers
  88 
  89 reg_def R_S0 ( SOC, SOC, Op_RegF,  0, f0->as_VMReg());
  90 reg_def R_S1 ( SOC, SOC, Op_RegF,  1, f1->as_VMReg());
  91 reg_def R_S2 ( SOC, SOC, Op_RegF,  2, f2->as_VMReg());
  92 reg_def R_S3 ( SOC, SOC, Op_RegF,  3, f3->as_VMReg());
  93 reg_def R_S4 ( SOC, SOC, Op_RegF,  4, f4->as_VMReg());
  94 reg_def R_S5 ( SOC, SOC, Op_RegF,  5, f5->as_VMReg());
  95 reg_def R_S6 ( SOC, SOC, Op_RegF,  6, f6->as_VMReg());
  96 reg_def R_S7 ( SOC, SOC, Op_RegF,  7, f7->as_VMReg());
  97 reg_def R_S8 ( SOC, SOC, Op_RegF,  8, f8->as_VMReg());
  98 reg_def R_S9 ( SOC, SOC, Op_RegF,  9, f9->as_VMReg());
  99 reg_def R_S10( SOC, SOC, Op_RegF, 10,f10->as_VMReg());
 100 reg_def R_S11( SOC, SOC, Op_RegF, 11,f11->as_VMReg());
 101 reg_def R_S12( SOC, SOC, Op_RegF, 12,f12->as_VMReg());
 102 reg_def R_S13( SOC, SOC, Op_RegF, 13,f13->as_VMReg());
 103 reg_def R_S14( SOC, SOC, Op_RegF, 14,f14->as_VMReg());
 104 reg_def R_S15( SOC, SOC, Op_RegF, 15,f15->as_VMReg());
 105 reg_def R_S16( SOC, SOE, Op_RegF, 16,f16->as_VMReg());
 106 reg_def R_S17( SOC, SOE, Op_RegF, 17,f17->as_VMReg());
 107 reg_def R_S18( SOC, SOE, Op_RegF, 18,f18->as_VMReg());
 108 reg_def R_S19( SOC, SOE, Op_RegF, 19,f19->as_VMReg());
 109 reg_def R_S20( SOC, SOE, Op_RegF, 20,f20->as_VMReg());
 110 reg_def R_S21( SOC, SOE, Op_RegF, 21,f21->as_VMReg());
 111 reg_def R_S22( SOC, SOE, Op_RegF, 22,f22->as_VMReg());
 112 reg_def R_S23( SOC, SOE, Op_RegF, 23,f23->as_VMReg());
 113 reg_def R_S24( SOC, SOE, Op_RegF, 24,f24->as_VMReg());
 114 reg_def R_S25( SOC, SOE, Op_RegF, 25,f25->as_VMReg());
 115 reg_def R_S26( SOC, SOE, Op_RegF, 26,f26->as_VMReg());
 116 reg_def R_S27( SOC, SOE, Op_RegF, 27,f27->as_VMReg());
 117 reg_def R_S28( SOC, SOE, Op_RegF, 28,f28->as_VMReg());
 118 reg_def R_S29( SOC, SOE, Op_RegF, 29,f29->as_VMReg());
 119 reg_def R_S30( SOC, SOE, Op_RegF, 30,f30->as_VMReg());
 120 reg_def R_S31( SOC, SOE, Op_RegF, 31,f31->as_VMReg());
 121 
 122 // Double Registers
 123 // The rules of ADL require that double registers be defined in pairs.
 124 // Each pair must be two 32-bit values, but not necessarily a pair of
 125 // single float registers.  In each pair, ADLC-assigned register numbers
 126 // must be adjacent, with the lower number even.  Finally, when the
 127 // CPU stores such a register pair to memory, the word associated with
 128 // the lower ADLC-assigned number must be stored to the lower address.
 129 
 130 // TODO, the problem is that AArch32 port has same same numeric value for
 131 // d16->as_VMReg and f1->as_VMReg which breaks reverse mapping from
 132 // VMReg to OptoReg
 133 // reg_def R_D16 (SOC, SOC, Op_RegD, 32, d16->as_VMReg());
 134 // reg_def R_D16x(SOC, SOC, Op_RegD,255, d16->as_VMReg()->next());
 135 // reg_def R_D17 (SOC, SOC, Op_RegD, 34, d17->as_VMReg());
 136 // reg_def R_D17x(SOC, SOC, Op_RegD,255, d17->as_VMReg()->next());
 137 // reg_def R_D18 (SOC, SOC, Op_RegD, 36, d18->as_VMReg());
 138 // reg_def R_D18x(SOC, SOC, Op_RegD,255, d18->as_VMReg()->next());
 139 // reg_def R_D19 (SOC, SOC, Op_RegD, 38, d19->as_VMReg());
 140 // reg_def R_D19x(SOC, SOC, Op_RegD,255, d19->as_VMReg()->next());
 141 // reg_def R_D20 (SOC, SOC, Op_RegD, 40, d20->as_VMReg());
 142 // reg_def R_D20x(SOC, SOC, Op_RegD,255, d20->as_VMReg()->next());
 143 // reg_def R_D21 (SOC, SOC, Op_RegD, 42, d21->as_VMReg());
 144 // reg_def R_D21x(SOC, SOC, Op_RegD,255, d21->as_VMReg()->next());
 145 // reg_def R_D22 (SOC, SOC, Op_RegD, 44, d22->as_VMReg());
 146 // reg_def R_D22x(SOC, SOC, Op_RegD,255, d22->as_VMReg()->next());
 147 // reg_def R_D23 (SOC, SOC, Op_RegD, 46, d23->as_VMReg());
 148 // reg_def R_D23x(SOC, SOC, Op_RegD,255, d23->as_VMReg()->next());
 149 // reg_def R_D24 (SOC, SOC, Op_RegD, 48, d24->as_VMReg());
 150 // reg_def R_D24x(SOC, SOC, Op_RegD,255, d24->as_VMReg()->next());
 151 // reg_def R_D25 (SOC, SOC, Op_RegD, 50, d25->as_VMReg());
 152 // reg_def R_D25x(SOC, SOC, Op_RegD,255, d25->as_VMReg()->next());
 153 // reg_def R_D26 (SOC, SOC, Op_RegD, 52, d26->as_VMReg());
 154 // reg_def R_D26x(SOC, SOC, Op_RegD,255, d26->as_VMReg()->next());
 155 // reg_def R_D27 (SOC, SOC, Op_RegD, 54, d27->as_VMReg());
 156 // reg_def R_D27x(SOC, SOC, Op_RegD,255, d27->as_VMReg()->next());
 157 // reg_def R_D28 (SOC, SOC, Op_RegD, 56, d28->as_VMReg());
 158 // reg_def R_D28x(SOC, SOC, Op_RegD,255, d28->as_VMReg()->next());
 159 // reg_def R_D29 (SOC, SOC, Op_RegD, 58, d29->as_VMReg());
 160 // reg_def R_D29x(SOC, SOC, Op_RegD,255, d29->as_VMReg()->next());
 161 // reg_def R_D30 (SOC, SOC, Op_RegD, 60, d30->as_VMReg());
 162 // reg_def R_D30x(SOC, SOC, Op_RegD,255, d30->as_VMReg()->next());
 163 // reg_def R_D31 (SOC, SOC, Op_RegD, 62, d31->as_VMReg());
 164 // reg_def R_D31x(SOC, SOC, Op_RegD,255, d31->as_VMReg()->next());
 165 
 166 // ----------------------------
 167 // Special Registers
 168 // Condition Codes Flag Registers
 169 reg_def APSR (SOC, SOC,  Op_RegFlags, 0, VMRegImpl::Bad());
 170 reg_def FPSCR(SOC, SOC,  Op_RegFlags, 0, VMRegImpl::Bad());
 171 
 172 // ----------------------------
 173 // Specify the enum values for the registers.  These enums are only used by the
 174 // OptoReg "class". We can convert these enum values at will to VMReg when needed
 175 // for visibility to the rest of the vm. The order of this enum influences the
 176 // register allocator so having the freedom to set this order and not be stuck
 177 // with the order that is natural for the rest of the vm is worth it.
 178 
 179 // registers in that order so that R11/R12 is an aligned pair that can be used for longs
 180 alloc_class chunk0(
 181                    R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R10, R_R13, R_R14, R_R15, R_R0, R_R1, R_R2, R_R3);
 182 
 183 // Note that a register is not allocatable unless it is also mentioned
 184 // in a widely-used reg_class below.
 185 
 186 alloc_class chunk1(
 187                    R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23,
 188                    R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31,
 189                    R_S0,  R_S1,  R_S2,  R_S3,  R_S4,  R_S5,  R_S6,  R_S7,
 190                    R_S8,  R_S9,  R_S10, R_S11, R_S12, R_S13, R_S14, R_S15
 191                    // ,
 192                    // R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
 193                    // R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
 194                    // R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
 195                    // R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x
 196 );
 197 
 198 alloc_class chunk2(APSR, FPSCR);
 199 
 200 //----------Architecture Description Register Classes--------------------------
 201 // Several register classes are automatically defined based upon information in
 202 // this architecture description.
 203 // 1) reg_class inline_cache_reg           ( as defined in frame section )
 204 // 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
 205 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 206 //
 207 
 208 // ----------------------------
 209 // Integer Register Classes
 210 // ----------------------------
 211 // Exclusions from i_reg:
 212 // sp (R13), PC (R15)
 213 // R10: reserved by HotSpot to the TLS register (invariant within Java)
 214 reg_class int_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14);
 215 
 216 reg_class R0_regI(R_R0);
 217 reg_class R1_regI(R_R1);
 218 reg_class R2_regI(R_R2);
 219 reg_class R3_regI(R_R3);
 220 reg_class R9_regI(R_R9);
 221 reg_class R12_regI(R_R12);
 222 
 223 // ----------------------------
 224 // Pointer Register Classes
 225 // ----------------------------
 226 reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14);
 227 // Special class for storeP instructions, which can store SP or RPC to TLS.
 228 // It is also used for memory addressing, allowing direct TLS addressing.
 229 reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R9, R_R11, R_R12, R_R14, R_R8, R_R10 /* TLS*/, R_R13 /* SP*/);
 230 
 231 #define R_Ricklass R_R12
 232 #define R_Rmethod  R_R8
 233 #define R_Rthread  R_R10
 234 #define R_Rexception_obj R_R0
 235 
 236 // Other special pointer regs
 237 reg_class R0_regP(R_R0);
 238 reg_class R1_regP(R_R1);
 239 reg_class R2_regP(R_R2);
 240 reg_class R4_regP(R_R4);
 241 reg_class Rexception_regP(R_Rexception_obj);
 242 reg_class Ricklass_regP(R_Ricklass);
 243 reg_class Rmethod_regP(R_Rmethod);
 244 reg_class Rthread_regP(R_Rthread);
 245 reg_class IP_regP(R_R12);
 246 reg_class LR_regP(R_R14);
 247 
 248 reg_class FP_regP(R_R11);
 249 
 250 // ----------------------------
 251 // Long Register Classes
 252 // ----------------------------
 253 reg_class long_reg (             R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9, R_R11,R_R12);
 254 // for ldrexd, strexd: first reg of pair must be even
 255 reg_class long_reg_align (       R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9);
 256 
 257 reg_class R0R1_regL(R_R0,R_R1);
 258 reg_class R2R3_regL(R_R2,R_R3);
 259 
 260 // ----------------------------
 261 // Special Class for Condition Code Flags Register
 262 reg_class int_flags(APSR);
 263 reg_class float_flags(FPSCR);
 264 
 265 
 266 // ----------------------------
 267 // Float Point Register Classes
 268 // ----------------------------
 269 // Skip f14/f15, they are reserved for mem-mem copies
 270 reg_class sflt_reg(R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11, R_S12, R_S13,
 271                    R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31);
 272 
 273 // Paired floating point registers--they show up in the same order as the floats,
 274 // but they are used with the "Op_RegD" type, and always occur in even/odd pairs.
 275 reg_class dflt_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13,
 276                    R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31
 277                    // ,
 278                    // R_D16,R_D16x, R_D17,R_D17x, R_D18,R_D18x, R_D19,R_D19x, R_D20,R_D20x, R_D21,R_D21x, R_D22,R_D22x,
 279                    // R_D23,R_D23x, R_D24,R_D24x, R_D25,R_D25x, R_D26,R_D26x, R_D27,R_D27x, R_D28,R_D28x, R_D29,R_D29x,
 280                    // R_D30,R_D30x, R_D31,R_D31x
 281   );
 282 
 283 reg_class dflt_low_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13,
 284                        R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31);
 285 
 286 
 287 reg_class actual_dflt_reg %{
 288   if (/*VM_Version::features() & FT_VFPV3D32*/0) { // TODO verify and enable
 289     return DFLT_REG_mask();
 290   } else {
 291     return DFLT_LOW_REG_mask();
 292   }
 293 %}
 294 
 295 reg_class f0_regF(R_S0);
 296 reg_class D0_regD(R_S0,R_S1);
 297 reg_class D1_regD(R_S2,R_S3);
 298 reg_class D2_regD(R_S4,R_S5);
 299 reg_class D3_regD(R_S6,R_S7);
 300 reg_class D4_regD(R_S8,R_S9);
 301 reg_class D5_regD(R_S10,R_S11);
 302 reg_class D6_regD(R_S12,R_S13);
 303 reg_class D7_regD(R_S14,R_S15);
 304 reg_class D0D1_regD(R_S0,R_S1,R_S2,R_S3);
 305 reg_class D2D3_regD(R_S4,R_S5,R_S6,R_S7);
 306 
 307 // reg_class D16_regD(R_D16,R_D16x);
 308 // reg_class D17_regD(R_D17,R_D17x);
 309 // reg_class D18_regD(R_D18,R_D18x);
 310 // reg_class D19_regD(R_D19,R_D19x);
 311 // reg_class D20_regD(R_D20,R_D20x);
 312 // reg_class D21_regD(R_D21,R_D21x);
 313 // reg_class D22_regD(R_D22,R_D22x);
 314 // reg_class D23_regD(R_D23,R_D23x);
 315 // reg_class D24_regD(R_D24,R_D24x);
 316 // reg_class D25_regD(R_D25,R_D25x);
 317 // reg_class D26_regD(R_D26,R_D26x);
 318 // reg_class D27_regD(R_D27,R_D27x);
 319 // reg_class D28_regD(R_D28,R_D28x);
 320 // reg_class D29_regD(R_D29,R_D29x);
 321 // reg_class D30_regD(R_D30,R_D30x);
 322 // reg_class D31_regD(R_D31,R_D31x);
 323 
 324 reg_class vectorx_reg(R_S0,R_S1,R_S2,R_S3, R_S4,R_S5,R_S6,R_S7,
 325                       R_S8,R_S9,R_S10,R_S11, /* skip f14/f15 */
 326                       R_S16,R_S17,R_S18,R_S19, R_S20,R_S21,R_S22,R_S23,
 327                       R_S24,R_S25,R_S26,R_S27, R_S28,R_S29,R_S30,R_S31
 328                       // ,
 329                       // R_D16,R_D16x,R_D17,R_D17x, R_D18,R_D18x,R_D19,R_D19x,
 330                       // R_D20,R_D20x,R_D21,R_D21x, R_D22,R_D22x,R_D23,R_D23x,
 331                       // R_D24,R_D24x,R_D25,R_D25x, R_D26,R_D26x,R_D27,R_D27x,
 332                       // R_D28,R_D28x,R_D29,R_D29x, R_D30,R_D30x,R_D31,R_D31x
 333   );
 334 
 335 %}
 336 
 337 source_hpp %{
 338 // FIXME
 339 const MachRegisterNumbers R_mem_copy_lo_num = R_S14_num;
 340 const MachRegisterNumbers R_mem_copy_hi_num = R_S15_num;
 341 const FloatRegister Rmemcopy = f14;
 342 const MachRegisterNumbers R_hf_ret_lo_num = R_S0_num;
 343 const MachRegisterNumbers R_hf_ret_hi_num = R_S1_num;
 344 
 345 const MachRegisterNumbers R_Ricklass_num = R_R12_num;
 346 const MachRegisterNumbers R_Rmethod_num  = R_R8_num;
 347 
 348 #define LDR_DOUBLE "FLDD"
 349 #define LDR_FLOAT  "FLDS"
 350 #define STR_DOUBLE "FSTD"
 351 #define STR_FLOAT  "FSTS"
 352 #define LDR_64     "LDRD"
 353 #define STR_64     "STRD"
 354 #define LDR_32     "LDR"
 355 #define STR_32     "STR"
 356 #define MOV_DOUBLE "FCPYD"
 357 #define MOV_FLOAT  "FCPYS"
 358 #define FMSR       "FMSR"
 359 #define FMRS       "FMRS"
 360 #define LDREX      "ldrex "
 361 #define STREX      "strex "
 362 
 363 static inline bool is_memoryD(int offset) {
 364   return offset < 1024 && offset > -1024;
 365 }
 366 
 367 static inline bool is_memoryfp(int offset) {
 368   return offset < 1024 && offset > -1024;
 369 }
 370 
 371 static inline bool is_memoryI(int offset) {
 372   return offset < 4096 && offset > -4096;
 373 }
 374 
 375 static inline bool is_memoryP(int offset) {
 376   return offset < 4096 && offset > -4096;
 377 }
 378 
 379 static inline bool is_memoryHD(int offset) {
 380   return offset < 256 && offset > -256;
 381 }
 382 
 383 static inline bool is_aimm(int imm) {
 384   return Assembler::is_valid_for_imm12(imm);
 385 }
 386 
 387 static inline bool is_limmI(jint imm) {
 388   return Assembler::is_valid_for_imm12(imm);
 389 }
 390 
 391 static inline bool is_limmI_low(jint imm, int n) {
 392   int imml = imm & right_n_bits(n);
 393   return is_limmI(imml) || is_limmI(imm);
 394 }
 395 
 396 static inline int limmI_low(jint imm, int n) {
 397   int imml = imm & right_n_bits(n);
 398   return is_limmI(imml) ? imml : imm;
 399 }
 400 
 401 %}
 402 
 403 source %{
 404 
 405 // Given a register encoding, produce a Integer Register object
 406 static Register reg_to_register_object(int register_encoding) {
 407   assert(r0->encoding() == R_R0_enc && r15->encoding() == R_R15_enc, "right coding");
 408   return as_Register(register_encoding);
 409 }
 410 
 411 // Given a register encoding, produce a Float Register object
 412 static FloatRegister reg_to_FloatRegister_object(int register_encoding) {
 413   assert(f0->encoding() == R_S0_enc && f31->encoding() == R_S31_enc, "right coding");
 414   // [d16,d31] share FloatRegister encoding with [f1,f31] since it numericall equals to ARM insn parameter encoding
 415   // in contrary OptoReg encoding for d16+ is different
 416   return as_FloatRegister((register_encoding&0x1f)|(register_encoding>>5));
 417 }
 418 
 419 void Compile::pd_compiler2_init() {
 420   // Umimplemented
 421 }
 422 
 423 OptoRegPair c2::return_value(int ideal_reg) {
 424   assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 425   static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num,     R_R0_num,     R_hf_ret_lo_num,  R_hf_ret_lo_num, R_R0_num };
 426   static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad,     R_hf_ret_hi_num, R_R1_num };
 427 #ifndef HARD_FLOAT_CC
 428   assert(hasFPU(), "non-VFP java ABI is not supported");
 429 #endif
 430   return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
 431 }
 432 
 433 #ifndef HARD_FLOAT_CC
 434 OptoRegPair c2::c_return_value(int ideal_reg) {
 435   assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 436   static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num,     R_R0_num,     R_R0_num,     R_R0_num, R_R0_num };
 437   static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_R1_num, R_R1_num };
 438   return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
 439 }
 440 #endif
 441 
 442 // !!!!! Special hack to get all type of calls to specify the byte offset
 443 //       from the start of the call to the point where the return address
 444 //       will point.
 445 
 446 static uint call_static_enc_size(const MachCallNode *n, ciMethod *_method, bool _method_handle_invoke) {
 447   int call_sz = (_method == NULL) ?
 448     (maybe_far_call(n) ? 3 : 1) :
 449     (far_branches() ? NativeCall::instruction_size / NativeInstruction::arm_insn_sz : 1);
 450   return (call_sz + (_method_handle_invoke ? 2 : 0)) *
 451     NativeInstruction::arm_insn_sz;
 452 }
 453 
 454 static uint call_dynamic_enc_size() {
 455   return 2 * NativeInstruction::arm_insn_sz +
 456     (far_branches() ? NativeCall::instruction_size : NativeInstruction::arm_insn_sz);
 457 }
 458 
 459 static uint call_runtime_enc_size(const MachCallNode *n) {
 460   // bl or movw; movt; blx
 461   bool far = maybe_far_call(n);
 462   return (far ? 3 : 1) * NativeInstruction::arm_insn_sz;
 463 }
 464 
 465 int MachCallStaticJavaNode::ret_addr_offset() {
 466   return call_static_enc_size(this, _method, _method_handle_invoke) -
 467     (_method_handle_invoke ? 1 : 0) * NativeInstruction::arm_insn_sz;
 468 }
 469 
 470 int MachCallDynamicJavaNode::ret_addr_offset() {
 471   return call_dynamic_enc_size();
 472 }
 473 
 474 int MachCallRuntimeNode::ret_addr_offset() {
 475   return call_runtime_enc_size(this);
 476 }
 477 %}
 478 
 479 // The intptr_t operand types, defined by textual substitution.
 480 // (Cf. opto/type.hpp.  This lets us avoid many, many other ifdefs.)
 481 #define immX      immI
 482 #define immXRot   immIRot
 483 #define iRegX     iRegI
 484 #define aimmX     aimmI
 485 #define limmX     limmI
 486 #define immX10x2  immI10x2
 487 #define LShiftX   LShiftI
 488 #define shimmX    immU5
 489 
 490 // Compatibility interface
 491 #define aimmP     immPRot
 492 #define immIMov   immIRot
 493 
 494 #define store_RegL     iRegL
 495 #define store_RegLd    iRegLd
 496 #define store_RegI     iRegI
 497 #define store_ptr_RegP iRegP
 498 
 499 //----------ATTRIBUTES---------------------------------------------------------
 500 //----------Operand Attributes-------------------------------------------------
 501 op_attrib op_cost(1);          // Required cost attribute
 502 
 503 //----------OPERANDS-----------------------------------------------------------
 504 // Operand definitions must precede instruction definitions for correct parsing
 505 // in the ADLC because operands constitute user defined types which are used in
 506 // instruction definitions.
 507 
 508 //----------Simple Operands----------------------------------------------------
 509 // Immediate Operands
 510 
 511 operand immIRot() %{
 512   predicate(Assembler::is_valid_for_imm12(n->get_int()));
 513   match(ConI);
 514 
 515   op_cost(0);
 516   // formats are generated automatically for constants and base registers
 517   format %{ %}
 518   interface(CONST_INTER);
 519 %}
 520 
 521 operand immIRotn() %{
 522   predicate(n->get_int() != 0 && Assembler::is_valid_for_imm12(~n->get_int()));
 523   match(ConI);
 524 
 525   op_cost(0);
 526   // formats are generated automatically for constants and base registers
 527   format %{ %}
 528   interface(CONST_INTER);
 529 %}
 530 
 531 operand immIRotneg() %{
 532   // if Assembler::is_valid_for_imm12() is true for this constant, it is
 533   // a immIRot and an optimal instruction combination exists to handle the
 534   // constant as an immIRot
 535   predicate(!Assembler::is_valid_for_imm12(n->get_int()) && Assembler::is_valid_for_imm12(-n->get_int()));
 536   match(ConI);
 537 
 538   op_cost(0);
 539   // formats are generated automatically for constants and base registers
 540   format %{ %}
 541   interface(CONST_INTER);
 542 %}
 543 
 544 // Non-negative integer immediate that is encodable using the rotation scheme,
 545 // and that when expanded fits in 31 bits.
 546 operand immU31Rot() %{
 547   predicate((0 <= n->get_int()) && Assembler::is_valid_for_imm12(n->get_int()));
 548   match(ConI);
 549 
 550   op_cost(0);
 551   // formats are generated automatically for constants and base registers
 552   format %{ %}
 553   interface(CONST_INTER);
 554 %}
 555 
 556 operand immPRot() %{
 557   predicate(n->get_ptr() == 0 || (Assembler::is_valid_for_imm12(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none));
 558 
 559   match(ConP);
 560 
 561   op_cost(0);
 562   // formats are generated automatically for constants and base registers
 563   format %{ %}
 564   interface(CONST_INTER);
 565 %}
 566 
 567 operand immLlowRot() %{
 568   predicate(n->get_long() >> 32 == 0 && Assembler::is_valid_for_imm12((int)n->get_long()));
 569   match(ConL);
 570   op_cost(0);
 571 
 572   format %{ %}
 573   interface(CONST_INTER);
 574 %}
 575 
 576 operand immLRot2() %{
 577   predicate(Assembler::is_valid_for_imm12((int)(n->get_long() >> 32)) &&
 578             Assembler::is_valid_for_imm12((int)(n->get_long())));
 579   match(ConL);
 580   op_cost(0);
 581 
 582   format %{ %}
 583   interface(CONST_INTER);
 584 %}
 585 
 586 // Integer Immediate: 12-bit - for addressing mode
 587 operand immI12() %{
 588   predicate((-4096 < n->get_int()) && (n->get_int() < 4096));
 589   match(ConI);
 590   op_cost(0);
 591 
 592   format %{ %}
 593   interface(CONST_INTER);
 594 %}
 595 
 596 // Integer Immediate: 10-bit disp and disp+4 - for addressing float pair
 597 operand immI10x2() %{
 598   predicate((-1024 < n->get_int()) && (n->get_int() < 1024 - 4));
 599   match(ConI);
 600   op_cost(0);
 601 
 602   format %{ %}
 603   interface(CONST_INTER);
 604 %}
 605 
 606 // Integer Immediate: 12-bit disp and disp+4 - for addressing word pair
 607 operand immI12x2() %{
 608   predicate((-4096 < n->get_int()) && (n->get_int() < 4096 - 4));
 609   match(ConI);
 610   op_cost(0);
 611 
 612   format %{ %}
 613   interface(CONST_INTER);
 614 %}
 615 
 616 //----------DEFINITION BLOCK---------------------------------------------------
 617 // Define name --> value mappings to inform the ADLC of an integer valued name
 618 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 619 // Format:
 620 //        int_def  <name>         ( <int_value>, <expression>);
 621 // Generated Code in ad_<arch>.hpp
 622 //        #define  <name>   (<expression>)
 623 //        // value == <int_value>
 624 // Generated code in ad_<arch>.cpp adlc_verification()
 625 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 626 //
 627 definitions %{
 628 // The default cost (of an ALU instruction).
 629   int_def DEFAULT_COST      (    100,     100);
 630   int_def HUGE_COST         (1000000, 1000000);
 631 
 632 // Memory refs are twice as expensive as run-of-the-mill.
 633   int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
 634 
 635 // Branches are even more expensive.
 636   int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
 637   int_def CALL_COST         (    300, DEFAULT_COST * 3);
 638 %}
 639 
 640 
 641 //----------SOURCE BLOCK-------------------------------------------------------
 642 // This is a block of C++ code which provides values, functions, and
 643 // definitions necessary in the rest of the architecture description
 644 source_hpp %{
 645 // Header information of the source block.
 646 // Method declarations/definitions which are used outside
 647 // the ad-scope can conveniently be defined here.
 648 //
 649 // To keep related declarations/definitions/uses close together,
 650 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 651 
 652 #ifdef PRODUCT
 653 #define BLOCK_COMMENT(str) /* nothing */
 654 #define STOP(error) __ stop(error)
 655 #else
 656 #define BLOCK_COMMENT(str) __ block_comment(str)
 657 #define STOP(error) __ block_comment(error); stop(error)
 658 #endif
 659 
 660 #define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":")
 661 
 662 // Does destination need to be loaded in a register then passed to a
 663 // branch instruction?
 664 extern bool maybe_far_call(const CallNode *n);
 665 extern bool maybe_far_call(const MachCallNode *n);
 666 static inline bool cache_reachable() {
 667   return MacroAssembler::_cache_fully_reachable();
 668 }
 669 static inline bool far_branches() {
 670   return MacroAssembler::far_branches();
 671 }
 672 
 673 extern bool PrintOptoAssembly;
 674 
 675 class c2 {
 676 public:
 677   static OptoRegPair return_value(int ideal_reg);
 678 #ifndef HARD_FLOAT_CC
 679   static OptoRegPair c_return_value(int ideal_reg);
 680 #endif
 681 };
 682 
 683 class CallStubImpl {
 684 
 685   //--------------------------------------------------------------
 686   //---<  Used for optimization in Compile::Shorten_branches  >---
 687   //--------------------------------------------------------------
 688 
 689  public:
 690   // Size of call trampoline stub.
 691   static uint size_call_trampoline() {
 692     return 0; // no call trampolines on this platform
 693   }
 694 
 695   // number of relocations needed by a call trampoline stub
 696   static uint reloc_call_trampoline() {
 697     return 0; // no call trampolines on this platform
 698   }
 699 };
 700 
 701 class HandlerImpl {
 702 
 703  public:
 704 
 705   static int emit_exception_handler(CodeBuffer &cbuf);
 706   static int emit_deopt_handler(CodeBuffer& cbuf);
 707 
 708   static uint size_exception_handler() {
 709     return ( 3 * 4 );
 710   }
 711 
 712 
 713   static uint size_deopt_handler() {
 714     return ( 9 * 4 );
 715   }
 716 
 717 };
 718 
 719 %}
 720 
 721 source %{
 722 #define __ _masm.
 723 
 724 static FloatRegister reg_to_FloatRegister_object(int register_encoding);
 725 static Register reg_to_register_object(int register_encoding);
 726 
 727 
 728 // ****************************************************************************
 729 
 730 // REQUIRED FUNCTIONALITY
 731 
 732 // Indicate if the safepoint node needs the polling page as an input.
 733 // Since ARM does not have absolute addressing, it does.
 734 bool SafePointNode::needs_polling_address_input() {
 735   return true;
 736 }
 737 
 738 // emit an interrupt that is caught by the debugger (for debugging compiler)
 739 void emit_break(CodeBuffer &cbuf) {
 740   MacroAssembler _masm(&cbuf);
 741   __ bkpt(0);
 742 }
 743 
 744 #ifndef PRODUCT
 745 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const {
 746   st->print("TA");
 747 }
 748 #endif
 749 
 750 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 751   emit_break(cbuf);
 752 }
 753 
 754 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 755   return MachNode::size(ra_);
 756 }
 757 
 758 
 759 void emit_nop(CodeBuffer &cbuf) {
 760   MacroAssembler _masm(&cbuf);
 761   __ nop();
 762 }
 763 
 764 
 765 void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
 766   int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
 767   int call_site_offset = cbuf.insts()->mark_off();
 768   MacroAssembler _masm(&cbuf);
 769   __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
 770   address target = (address)m->method();
 771   assert(n->as_MachCall()->entry_point() == target, "sanity");
 772   assert(maybe_far_call(n) == !__ reachable_from_cache(target), "sanity");
 773   assert(cache_reachable() == __ cache_fully_reachable(), "sanity");
 774 
 775   assert(target != NULL, "need real address");
 776 
 777   if (rspec.type() == relocInfo::runtime_call_type ||
 778     rspec.type() == relocInfo::none) {
 779     __ call(target, rspec);
 780   } else {
 781     __ trampoline_call(Address(target, rspec), NULL);
 782   }
 783   int ret_addr_offset = __ offset();
 784   assert(ret_addr_offset - call_site_offset == ret_addr_offset0, "fix ret_addr_offset()");
 785 }
 786 
 787 //=============================================================================
 788 // REQUIRED FUNCTIONALITY for encoding
 789 void emit_lo(CodeBuffer &cbuf, int val) {  }
 790 void emit_hi(CodeBuffer &cbuf, int val) {  }
 791 
 792 
 793 //=============================================================================
 794 const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();
 795 
 796 int Compile::ConstantTable::calculate_table_base_offset() const {
 797   int offset = -(size() / 2);
 798   // vldr_f32, vldr_f64: 8-bit  offset multiplied by 4: +/- 1024
 799   // ldr, ldrb : 12-bit offset:                 +/- 4096
 800   if (!Assembler::is_simm10(offset)) {
 801     offset = Assembler::min_simm10();
 802   }
 803   return offset;
 804 }
 805 
 806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 808   ShouldNotReachHere();
 809 }
 810 
 811 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 812   Compile* C = ra_->C;
 813   Compile::ConstantTable& constant_table = C->constant_table();
 814   MacroAssembler _masm(&cbuf);
 815 
 816   Register r = as_Register(ra_->get_encode(this));
 817   CodeSection* consts_section = __ code()->consts();
 818   int consts_size = consts_section->align_at_start(consts_section->size());
 819   assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size);
 820 
 821   // Materialize the constant table base.
 822   address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
 823   RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
 824   __ mov_address(r, baseaddr, rspec);
 825 }
 826 
 827 uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
 828   return 8;
 829 }
 830 
 831 #ifndef PRODUCT
 832 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 833   char reg[128];
 834   ra_->dump_register(this, reg);
 835   st->print("MOV_SLOW    &constanttable,%s\t! constant table base", reg);
 836 }
 837 #endif
 838 
 839 #ifndef PRODUCT
 840 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 841   Compile* C = ra_->C;
 842 
 843   for (int i = 0; i < OptoPrologueNops; i++) {
 844     st->print_cr("NOP"); st->print("\t");
 845   }
 846 
 847   size_t framesize = C->frame_size_in_bytes();
 848   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 849   int bangsize = C->bang_size_in_bytes();
 850   // Remove two words for return addr and rbp,
 851   framesize -= 2*wordSize;
 852   bangsize -= 2*wordSize;
 853 
 854   // Calls to C2R adapters often do not accept exceptional returns.
 855   // We require that their callers must bang for them.  But be careful, because
 856   // some VM calls (such as call site linkage) can use several kilobytes of
 857   // stack.  But the stack safety zone should account for that.
 858   // See bugs 4446381, 4468289, 4497237.
 859   if (C->need_stack_bang(bangsize)) {
 860     st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t");
 861   }
 862   st->print_cr("PUSH   R_FP|R_LR_LR"); st->print("\t");
 863   if (framesize != 0) {
 864     st->print   ("SUB    R_SP, R_SP, " SIZE_FORMAT,framesize);
 865   }
 866 }
 867 #endif
 868 
 869 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 870   Compile* C = ra_->C;
 871   MacroAssembler _masm(&cbuf);
 872 
 873   // insert a nop at the start of the prolog so we can patch in a
 874   // branch if we need to invalidate the method later
 875   __ nop();
 876 
 877   size_t framesize = C->frame_size_in_bytes();
 878   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 879   int bangsize = C->bang_size_in_bytes();
 880   // Remove two words for return addr and fp,
 881   framesize -= 2*wordSize;
 882   bangsize -= 2*wordSize;
 883 
 884   // Calls to C2R adapters often do not accept exceptional returns.
 885   // We require that their callers must bang for them.  But be careful, because
 886   // some VM calls (such as call site linkage) can use several kilobytes of
 887   // stack.  But the stack safety zone should account for that.
 888   // See bugs 4446381, 4468289, 4497237.
 889   if (C->need_stack_bang(bangsize)) {
 890     __ arm_stack_overflow_check(bangsize, r12);
 891   }
 892 
 893   __ push(RegSet::of(rfp, lr), sp);
 894   if (framesize != 0) {
 895     __ sub(sp, sp, framesize);
 896   }
 897 
 898   // offset from scratch buffer is not valid
 899   if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
 900     C->set_frame_complete( __ offset() );
 901   }
 902 
 903   if (C->has_mach_constant_base_node()) {
 904     // NOTE: We set the table base offset here because users might be
 905     // emitted before MachConstantBaseNode.
 906     Compile::ConstantTable& constant_table = C->constant_table();
 907     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 908   }
 909 }
 910 
 911 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 912   return MachNode::size(ra_);
 913 }
 914 
 915 int MachPrologNode::reloc() const {
 916   return 10; // a large enough number
 917 }
 918 
 919 //=============================================================================
 920 #ifndef PRODUCT
 921 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 922   Compile* C = ra_->C;
 923 
 924   size_t framesize = C->frame_size_in_bytes();
 925   framesize -= 2*wordSize;
 926 
 927   if (framesize != 0) {
 928     st->print("ADD    R_SP, R_SP, " SIZE_FORMAT "\n\t",framesize);
 929   }
 930   st->print("POP    R_FP|R_LR_LR");
 931 
 932   if (do_polling() && ra_->C->is_method_compilation()) {
 933     st->print("\n\t");
 934     st->print("MOV    r12, #PollAddr\t! Load Polling address\n\t");
 935     st->print("LDR    r12,[r12]\t!Poll for Safepointing");
 936   }
 937 }
 938 #endif
 939 
 940 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 941   MacroAssembler _masm(&cbuf);
 942   Compile* C = ra_->C;
 943 
 944   size_t framesize = C->frame_size_in_bytes();
 945   framesize -= 2*wordSize;
 946   if (framesize != 0) {
 947     __ add(sp, sp, framesize);
 948   }
 949   __ pop(RegSet::of(rfp, lr), sp);
 950 
 951   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 952     __ reserved_stack_check();
 953   }
 954 
 955   // If this does safepoint polling, then do it here
 956   if (do_polling() && ra_->C->is_method_compilation()) {
 957     // mov here is usually one or two instruction
 958     __ mov_address(r12, (address)os::get_polling_page(), RelocationHolder::none);
 959     __ relocate(relocInfo::poll_return_type);
 960     __ ldr(r12, Address(r12));
 961   }
 962 }
 963 
 964 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 965   return MachNode::size(ra_);
 966 }
 967 
 968 int MachEpilogNode::reloc() const {
 969   return 16; // a large enough number
 970 }
 971 
 972 const Pipeline * MachEpilogNode::pipeline() const {
 973   return MachNode::pipeline_class();
 974 }
 975 
 976 int MachEpilogNode::safepoint_offset() const {
 977   assert( do_polling(), "no return for this epilog node");
 978   //  return MacroAssembler::size_of_sethi(os::get_polling_page());
 979   Unimplemented();
 980   return 0;
 981 }
 982 
 983 //=============================================================================
 984 
 985 // Figure out which register class each belongs in: rc_int, rc_float, rc_stack
 986 enum RC { rc_bad, rc_int, rc_float, rc_stack };
 987 static enum RC rc_class( OptoReg::Name reg ) {
 988   if (!OptoReg::is_valid(reg)) return rc_bad;
 989   if (OptoReg::is_stack(reg)) return rc_stack;
 990   VMReg r = OptoReg::as_VMReg(reg);
 991   if (r->is_Register()) return rc_int;
 992   assert(r->is_FloatRegister(), "must be");
 993   return rc_float;
 994 }
 995 
 996 static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) {
 997   int rlo = Matcher::_regEncode[src_first];
 998   int rhi = Matcher::_regEncode[src_second];
 999   // if (!((rlo&1)==0 && (rlo+1 == rhi))) {
1000   //   tty->print_cr("CAUGHT BAD LDRD/STRD");
1001   // }
1002   return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset);
1003 }
1004 
1005 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
1006                                         PhaseRegAlloc *ra_,
1007                                         bool do_size,
1008                                         outputStream* st ) const {
1009   // Get registers to move
1010   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1011   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1012   OptoReg::Name dst_second = ra_->get_reg_second(this );
1013   OptoReg::Name dst_first = ra_->get_reg_first(this );
1014 
1015   enum RC src_second_rc = rc_class(src_second);
1016   enum RC src_first_rc = rc_class(src_first);
1017   enum RC dst_second_rc = rc_class(dst_second);
1018   enum RC dst_first_rc = rc_class(dst_first);
1019 
1020   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1021 
1022   // Generate spill code!
1023   int size = 0;
1024 
1025   if (src_first == dst_first && src_second == dst_second)
1026     return size;            // Self copy, no move
1027 
1028 #ifdef TODO
1029   if (bottom_type()->isa_vect() != NULL) {
1030   }
1031 #endif
1032 
1033   // Shared code does not expect instruction set capability based bailouts here.
1034   // Handle offset unreachable bailout with minimal change in shared code.
1035   // Bailout only for real instruction emit.
1036   // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )
1037 
1038   MacroAssembler _masm(cbuf);
1039 
1040   // --------------------------------------
1041   // Check for mem-mem move.  Load into unused float registers and fall into
1042   // the float-store case.
1043   if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
1044     int offset = ra_->reg2offset(src_first);
1045     if (cbuf && !is_memoryfp(offset)) {
1046       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1047       return 0;
1048     } else {
1049       if (src_second_rc != rc_bad) {
1050         assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
1051         src_first     = OptoReg::Name(R_mem_copy_lo_num);
1052         src_second    = OptoReg::Name(R_mem_copy_hi_num);
1053         src_first_rc  = rc_float;
1054         src_second_rc = rc_float;
1055         if (cbuf) {
1056           __ vldr_f64(Rmemcopy, Address(sp, offset));
1057         } else if (!do_size) {
1058           st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
1059         }
1060       } else {
1061         src_first     = OptoReg::Name(R_mem_copy_lo_num);
1062         src_first_rc  = rc_float;
1063         if (cbuf) {
1064           __ vldr_f32(Rmemcopy, Address(sp, offset));
1065         } else if (!do_size) {
1066           st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
1067         }
1068       }
1069       size += 4;
1070     }
1071   }
1072 
1073   if (src_second_rc == rc_stack && dst_second_rc == rc_stack) {
1074     Unimplemented();
1075   }
1076 
1077   // --------------------------------------
1078   // Check for integer reg-reg copy
1079   if (src_first_rc == rc_int && dst_first_rc == rc_int) {
1080     // Else normal reg-reg copy
1081     assert( src_second != dst_first, "smashed second before evacuating it" );
1082     if (cbuf) {
1083       __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
1084 #ifndef PRODUCT
1085     } else if (!do_size) {
1086       st->print("MOV    R_%s, R_%s\t# spill",
1087                 Matcher::regName[dst_first],
1088                 Matcher::regName[src_first]);
1089 #endif
1090     }
1091     size += 4;
1092   }
1093 
1094   // Check for integer store
1095   if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
1096     int offset = ra_->reg2offset(dst_first);
1097     if (cbuf && !is_memoryI(offset)) {
1098       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1099       return 0;
1100     } else {
1101       if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) {
1102         assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
1103         if (cbuf) {
1104           __ strd(reg_to_register_object(Matcher::_regEncode[src_first]), Address(sp, offset));
1105 #ifndef PRODUCT
1106         } else if (!do_size) {
1107           if (size != 0) st->print("\n\t");
1108           st->print(STR_64 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
1109 #endif
1110         }
1111         return size + 4;
1112       } else {
1113         if (cbuf) {
1114           __ str(reg_to_register_object(Matcher::_regEncode[src_first]), Address(sp, offset));
1115 #ifndef PRODUCT
1116         } else if (!do_size) {
1117           if (size != 0) st->print("\n\t");
1118           st->print(STR_32 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
1119 #endif
1120         }
1121       }
1122     }
1123     size += 4;
1124   }
1125 
1126   // Check for integer load
1127   if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
1128     int offset = ra_->reg2offset(src_first);
1129     if (cbuf && !is_memoryI(offset)) {
1130       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1131       return 0;
1132     } else {
1133       if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) {
1134         assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
1135         if (cbuf) {
1136           __ ldrd(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
1137 #ifndef PRODUCT
1138         } else if (!do_size) {
1139           if (size != 0) st->print("\n\t");
1140           st->print(LDR_64 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
1141 #endif
1142         }
1143         return size + 4;
1144       } else {
1145         if (cbuf) {
1146           __ ldr(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
1147 #ifndef PRODUCT
1148         } else if (!do_size) {
1149           if (size != 0) st->print("\n\t");
1150           st->print(LDR_32 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
1151 #endif
1152         }
1153       }
1154     }
1155     size += 4;
1156   }
1157 
1158   // Check for float reg-reg copy
1159   if (src_first_rc == rc_float && dst_first_rc == rc_float) {
1160     if (src_second_rc != rc_bad) {
1161       assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
1162       if (cbuf) {
1163       __ vmov_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
1164 #ifndef PRODUCT
1165       } else if (!do_size) {
1166         st->print(MOV_DOUBLE "    R_%s, R_%s\t# spill",
1167                   Matcher::regName[dst_first],
1168                   Matcher::regName[src_first]);
1169 #endif
1170       }
1171       return 4;
1172     }
1173     if (cbuf) {
1174       __ vmov_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
1175 #ifndef PRODUCT
1176     } else if (!do_size) {
1177       st->print(MOV_FLOAT "    R_%s, R_%s\t# spill",
1178                 Matcher::regName[dst_first],
1179                 Matcher::regName[src_first]);
1180 #endif
1181     }
1182     size = 4;
1183   }
1184 
1185   // Check for float store
1186   if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
1187     int offset = ra_->reg2offset(dst_first);
1188     if (cbuf && !is_memoryfp(offset)) {
1189       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1190       return 0;
1191     } else {
1192       // Further check for aligned-adjacent pair, so we can use a double store
1193       if (src_second_rc != rc_bad) {
1194         assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
1195         if (cbuf) {
1196           __ vstr_f64(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(sp, offset));
1197 #ifndef PRODUCT
1198         } else if (!do_size) {
1199           if (size != 0) st->print("\n\t");
1200           st->print(STR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
1201 #endif
1202         }
1203         return size + 4;
1204       } else {
1205         if (cbuf) {
1206           __ vstr_f32(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(sp, offset));
1207 #ifndef PRODUCT
1208         } else if (!do_size) {
1209           if (size != 0) st->print("\n\t");
1210           st->print(STR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
1211 #endif
1212         }
1213       }
1214     }
1215     size += 4;
1216   }
1217 
1218   // Check for float load
1219   if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
1220     int offset = ra_->reg2offset(src_first);
1221     if (cbuf && !is_memoryfp(offset)) {
1222       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1223       return 0;
1224     } else {
1225       // Further check for aligned-adjacent pair, so we can use a double store
1226       if (src_second_rc != rc_bad) {
1227         assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
1228         if (cbuf) {
1229           __ vldr_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
1230 #ifndef PRODUCT
1231         } else if (!do_size) {
1232           if (size != 0) st->print("\n\t");
1233           st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
1234 #endif
1235         }
1236         return size + 4;
1237       } else {
1238         if (cbuf) {
1239           __ vldr_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
1240 #ifndef PRODUCT
1241         } else if (!do_size) {
1242           if (size != 0) st->print("\n\t");
1243           st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
1244 #endif
1245         }
1246       }
1247     }
1248     size += 4;
1249   }
1250 
1251   // check for int reg -> float reg move
1252   if (src_first_rc == rc_int && dst_first_rc == rc_float) {
1253     // Further check for aligned-adjacent pair, so we can use a single instruction
1254     if (src_second_rc != rc_bad) {
1255       assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
1256       assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
1257       assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported");
1258       if (cbuf) {
1259         __ vmov_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second]));
1260 #ifndef PRODUCT
1261       } else if (!do_size) {
1262         if (size != 0) st->print("\n\t");
1263         st->print("FMDRR   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second));
1264 #endif
1265       }
1266       return size + 4;
1267     } else {
1268       if (cbuf) {
1269         __ vmov_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
1270 #ifndef PRODUCT
1271       } else if (!do_size) {
1272         if (size != 0) st->print("\n\t");
1273         st->print(FMSR "   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
1274 #endif
1275       }
1276       size += 4;
1277     }
1278   }
1279 
1280   // check for float reg -> int reg move
1281   if (src_first_rc == rc_float && dst_first_rc == rc_int) {
1282     // Further check for aligned-adjacent pair, so we can use a single instruction
1283     if (src_second_rc != rc_bad) {
1284       assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
1285       assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
1286       assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported");
1287       if (cbuf) {
1288         __ vmov_f64(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
1289 #ifndef PRODUCT
1290       } else if (!do_size) {
1291         if (size != 0) st->print("\n\t");
1292         st->print("FMRRD   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first));
1293 #endif
1294       }
1295       return size + 4;
1296     } else {
1297       if (cbuf) {
1298         __ vmov_f32(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
1299 #ifndef PRODUCT
1300       } else if (!do_size) {
1301         if (size != 0) st->print("\n\t");
1302         st->print(FMRS "   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
1303 #endif
1304       }
1305       size += 4;
1306     }
1307   }
1308 
1309   // --------------------------------------------------------------------
1310   // Check for hi bits still needing moving.  Only happens for misaligned
1311   // arguments to native calls.
1312   if (src_second == dst_second)
1313     return size;               // Self copy; no move
1314   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1315 
1316   // Check for integer reg-reg copy.  Hi bits are stuck up in the top
1317   // 32-bits of a 64-bit register, but are needed in low bits of another
1318   // register (else it's a hi-bits-to-hi-bits copy which should have
1319   // happened already as part of a 64-bit move)
1320   if (src_second_rc == rc_int && dst_second_rc == rc_int) {
1321     if (cbuf) {
1322       __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second]));
1323 #ifndef PRODUCT
1324     } else if (!do_size) {
1325       if (size != 0) st->print("\n\t");
1326       st->print("MOV    R_%s, R_%s\t# spill high",
1327                 Matcher::regName[dst_second],
1328                 Matcher::regName[src_second]);
1329 #endif
1330     }
1331     return size+4;
1332   }
1333 
1334   // Check for high word integer store
1335   if (src_second_rc == rc_int && dst_second_rc == rc_stack) {
1336     int offset = ra_->reg2offset(dst_second);
1337 
1338     if (cbuf && !is_memoryP(offset)) {
1339       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1340       return 0;
1341     } else {
1342       if (cbuf) {
1343         __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(sp, offset));
1344 #ifndef PRODUCT
1345       } else if (!do_size) {
1346         if (size != 0) st->print("\n\t");
1347         st->print("STR   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_second), offset);
1348 #endif
1349       }
1350     }
1351     return size + 4;
1352   }
1353 
1354   // Check for high word integer load
1355   if (dst_second_rc == rc_int && src_second_rc == rc_stack) {
1356     int offset = ra_->reg2offset(src_second);
1357     if (cbuf && !is_memoryP(offset)) {
1358       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
1359       return 0;
1360     } else {
1361       if (cbuf) {
1362         __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(sp, offset));
1363 #ifndef PRODUCT
1364       } else if (!do_size) {
1365         if (size != 0) st->print("\n\t");
1366         st->print("LDR   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_second), offset);
1367 #endif
1368       }
1369     }
1370     return size + 4;
1371   }
1372 
1373   Unimplemented();
1374   return 0; // Mute compiler
1375 }
1376 
1377 #ifndef PRODUCT
1378 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
1379   implementation( NULL, ra_, false, st );
1380 }
1381 #endif
1382 
1383 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1384   implementation( &cbuf, ra_, false, NULL );
1385 }
1386 
1387 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1388   return implementation( NULL, ra_, true, NULL );
1389 }
1390 
1391 //=============================================================================
1392 #ifndef PRODUCT
1393 void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
1394   st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
1395 }
1396 #endif
1397 
1398 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1399   MacroAssembler _masm(&cbuf);
1400   for(int i = 0; i < _count; i += 1) {
1401     __ nop();
1402   }
1403 }
1404 
1405 uint MachNopNode::size(PhaseRegAlloc *ra_) const {
1406   return 4 * _count;
1407 }
1408 
1409 
1410 //=============================================================================
1411 #ifndef PRODUCT
1412 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
1413   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1414   int reg = ra_->get_reg_first(this);
1415   st->print("ADD    %s,R_SP+#%d",Matcher::regName[reg], offset);
1416 }
1417 #endif
1418 
1419 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1420   MacroAssembler _masm(&cbuf);
1421   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1422   int reg = ra_->get_encode(this);
1423   Register dst = reg_to_register_object(reg);
1424 
1425   if (is_aimm(offset)) {
1426     __ add(dst, sp, offset);
1427   } else {
1428     __ mov(dst, offset);
1429     __ add(dst, sp, dst);
1430   }
1431 }
1432 
1433 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1434   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
1435   assert(ra_ == ra_->C->regalloc(), "sanity");
1436   return ra_->C->scratch_emit_size(this);
1437 }
1438 
1439 //=============================================================================
1440 #ifndef PRODUCT
1441 #define R_RTEMP "R_R12"
1442 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
1443   st->print_cr("\nUEP:");
1444   if (UseCompressedClassPointers) {
1445     st->print_cr("\tLDR_w " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
1446     st->print_cr("\tdecode_klass " R_RTEMP);
1447   } else {
1448     st->print_cr("\tLDR   " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
1449   }
1450   st->print_cr("\tCMP   " R_RTEMP ",R_R12" );
1451   st->print   ("\tB.NE  SharedRuntime::handle_ic_miss_stub");
1452 }
1453 #endif
1454 
1455 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1456   MacroAssembler _masm(&cbuf);
1457   Register iCache  = reg_to_register_object(Matcher::inline_cache_reg_encode());
1458   assert(iCache == rscratch2/*Ricklass*/, "should be");
1459   Register receiver = r0;
1460 
1461   __ load_klass(r9, receiver);
1462   __ cmp(r9, iCache);
1463   // r9 seems temporary here
1464   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, r9, Assembler::NE);
1465 }
1466 
1467 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1468   return MachNode::size(ra_);
1469 }
1470 
1471 
1472 // REQUIRED EMIT CODE
1473 
1474 //=============================================================================
1475 
1476 // Emit exception handler code.
1477 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1478   MacroAssembler _masm(&cbuf);
1479 
1480   address base = __ start_a_stub(size_exception_handler());
1481   if (base == NULL) {
1482     ciEnv::current()->record_failure("CodeCache is full");
1483     return 0;  // CodeBuffer::expand failed
1484   }
1485 
1486   int offset = __ offset();
1487 
1488   // OK to trash LR, because exception blob will kill it
1489   __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, lr);
1490 
1491   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1492 
1493   __ end_a_stub();
1494 
1495   return offset;
1496 }
1497 
1498 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1499   // Can't use any of the current frame's registers as we may have deopted
1500   // at a poll and everything can be live.
1501   MacroAssembler _masm(&cbuf);
1502 
1503   address base = __ start_a_stub(size_deopt_handler());
1504   if (base == NULL) {
1505     ciEnv::current()->record_failure("CodeCache is full");
1506     return 0;  // CodeBuffer::expand failed
1507   }
1508 
1509   int offset = __ offset();
1510   address deopt_pc = __ pc();
1511 
1512   __ sub(sp, sp, wordSize); // make room for saved PC
1513   __ push(lr); // save LR that may be live when we get here
1514   __ mov_relative_address(lr, deopt_pc);
1515   __ str(lr, Address(sp, wordSize)); // save deopt PC
1516   __ pop(lr); // restore LR
1517   // rscratch1 seems killed  at deopt_blob
1518   __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, rscratch1);
1519 
1520   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1521 
1522   __ end_a_stub();
1523   return offset;
1524 }
1525 
1526 // REQUIRED MATCHER CODE
1527 
1528 //=============================================================================
1529 
1530 const bool Matcher::match_rule_supported(int opcode) {
1531   if (!has_match_rule(opcode))
1532     return false;
1533 
1534   switch (opcode) {
1535   case Op_PopCountI:
1536   case Op_PopCountL:
1537     if (!UsePopCountInstruction)
1538       return false;
1539     break;
1540   case Op_LShiftCntV:
1541   case Op_RShiftCntV:
1542   case Op_AddVB:
1543   case Op_AddVS:
1544   case Op_AddVI:
1545   case Op_AddVL:
1546   case Op_SubVB:
1547   case Op_SubVS:
1548   case Op_SubVI:
1549   case Op_SubVL:
1550   case Op_MulVS:
1551   case Op_MulVI:
1552   case Op_LShiftVB:
1553   case Op_LShiftVS:
1554   case Op_LShiftVI:
1555   case Op_LShiftVL:
1556   case Op_RShiftVB:
1557   case Op_RShiftVS:
1558   case Op_RShiftVI:
1559   case Op_RShiftVL:
1560   case Op_URShiftVB:
1561   case Op_URShiftVS:
1562   case Op_URShiftVI:
1563   case Op_URShiftVL:
1564   case Op_AndV:
1565   case Op_OrV:
1566   case Op_XorV:
1567     return VM_Version::features() & FT_AdvSIMD;
1568   case Op_LoadVector:
1569   case Op_StoreVector:
1570   case Op_AddVF:
1571   case Op_SubVF:
1572   case Op_MulVF:
1573     return VM_Version::features() & (FT_VFPV2 | FT_AdvSIMD);
1574   case Op_AddVD:
1575   case Op_SubVD:
1576   case Op_MulVD:
1577   case Op_DivVF:
1578   case Op_DivVD:
1579     return VM_Version::features() & FT_VFPV2;
1580   }
1581 
1582   return true;  // Per default match rules are supported.
1583 }
1584 
1585 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
1586 
1587   // TODO
1588   // identify extra cases that we might want to provide match rules for
1589   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
1590   bool ret_value = match_rule_supported(opcode);
1591   // Add rules here.
1592 
1593   return ret_value;  // Per default match rules are supported.
1594 }
1595 
1596 const bool Matcher::has_predicated_vectors(void) {
1597   return false;
1598 }
1599 
1600 const int Matcher::float_pressure(int default_pressure_threshold) {
1601   return default_pressure_threshold;
1602 }
1603 
1604 int Matcher::regnum_to_fpu_offset(int regnum) {
1605   return regnum - 32; // The FP registers are in the second chunk
1606 }
1607 
1608 // Vector width in bytes
1609 const int Matcher::vector_width_in_bytes(BasicType bt) {
1610   return MaxVectorSize;
1611 }
1612 
1613 // Vector ideal reg corresponding to specified size in bytes
1614 const uint Matcher::vector_ideal_reg(int size) {
1615   assert(MaxVectorSize >= size, "");
1616   switch(size) {
1617     case  8: return Op_VecD;
1618     case 16: return Op_VecX;
1619   }
1620   ShouldNotReachHere();
1621   return 0;
1622 }
1623 
1624 const uint Matcher::vector_shift_count_ideal_reg(int size) {
1625   return vector_ideal_reg(size);
1626 }
1627 
1628 // Limits on vector size (number of elements) loaded into vector.
1629 const int Matcher::max_vector_size(const BasicType bt) {
1630   assert(is_java_primitive(bt), "only primitive type vectors");
1631   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1632 }
1633 
1634 const int Matcher::min_vector_size(const BasicType bt) {
1635   assert(is_java_primitive(bt), "only primitive type vectors");
1636   return 8/type2aelembytes(bt);
1637 }
1638 
1639 // ARM doesn't support misaligned vectors store/load.
1640 const bool Matcher::misaligned_vectors_ok() {
1641   return false;
1642 }
1643 
1644 // ARM doesn't support AES intrinsics
1645 const bool Matcher::pass_original_key_for_aes() {
1646   return false;
1647 }
1648 
1649 const bool Matcher::convL2FSupported(void) {
1650   return false; // TODO why not?
1651 }
1652 
1653 // Is this branch offset short enough that a short branch can be used?
1654 //
1655 // NOTE: If the platform does not provide any short branch variants, then
1656 //       this method should return false for offset 0.
1657 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1658   // The passed offset is relative to address of the branch.
1659   // On ARM a branch displacement is calculated relative to address
1660   // of the branch + 8.
1661   //
1662   // offset -= 8;
1663   // return (Assembler::is_simm24(offset));
1664   return false;
1665 }
1666 
1667 const bool Matcher::isSimpleConstant64(jlong value) {
1668   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1669   return false;
1670 }
1671 
1672 // No scaling for the parameter the ClearArray node.
1673 const bool Matcher::init_array_count_is_in_bytes = true;
1674 
1675 // Needs 2 CMOV's for longs.
1676 const int Matcher::long_cmove_cost() { return 2; }
1677 
1678 // CMOVF/CMOVD are expensive on ARM.
1679 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1680 
1681 // Does the CPU require late expand (see block.cpp for description of late expand)?
1682 const bool Matcher::require_postalloc_expand = false;
1683 
1684 // Do we need to mask the count passed to shift instructions or does
1685 // the cpu only look at the lower 5/6 bits anyway?
1686 // FIXME: does this handle vector shifts as well?
1687 const bool Matcher::need_masked_shift_count = true;
1688 
1689 const bool Matcher::convi2l_type_required = true;
1690 
1691 // Should the Matcher clone shifts on addressing modes, expecting them
1692 // to be subsumed into complex addressing expressions or compute them
1693 // into registers?
1694 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
1695   return clone_base_plus_offset_address(m, mstack, address_visited);
1696 }
1697 
1698 void Compile::reshape_address(AddPNode* addp) {
1699 }
1700 
1701 bool Matcher::narrow_oop_use_complex_address() {
1702   ShouldNotCallThis();
1703   return false;
1704 }
1705 
1706 bool Matcher::narrow_klass_use_complex_address() {
1707   ShouldNotCallThis();
1708   return false;
1709 }
1710 
1711 bool Matcher::const_oop_prefer_decode() {
1712   ShouldNotCallThis();
1713   return true;
1714 }
1715 
1716 bool Matcher::const_klass_prefer_decode() {
1717   ShouldNotCallThis();
1718   return true;
1719 }
1720 
1721 // Is it better to copy float constants, or load them directly from memory?
1722 // Intel can load a float constant from a direct address, requiring no
1723 // extra registers.  Most RISCs will have to materialize an address into a
1724 // register first, so they would do better to copy the constant from stack.
1725 const bool Matcher::rematerialize_float_constants = false;
1726 
1727 // If CPU can load and store mis-aligned doubles directly then no fixup is
1728 // needed.  Else we split the double into 2 integer pieces and move it
1729 // piece-by-piece.  Only happens when passing doubles into C code as the
1730 // Java calling convention forces doubles to be aligned.
1731 const bool Matcher::misaligned_doubles_ok = false;
1732 
1733 // No-op on ARM.
1734 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1735 }
1736 
1737 // Advertise here if the CPU requires explicit rounding operations
1738 // to implement the UseStrictFP mode.
1739 const bool Matcher::strict_fp_requires_explicit_rounding = false;
1740 
1741 // Are floats converted to double when stored to stack during deoptimization?
1742 // ARM does not handle callee-save floats.
1743 bool Matcher::float_in_double() {
1744   return false;
1745 }
1746 
1747 // Do ints take an entire long register or just half?
1748 // Note that we if-def off of _LP64.
1749 // The relevant question is how the int is callee-saved.  In _LP64
1750 // the whole long is written but de-opt'ing will have to extract
1751 // the relevant 32 bits, in not-_LP64 only the low 32 bits is written.
1752 const bool Matcher::int_in_long = false;
1753 
1754 // Return whether or not this register is ever used as an argument.  This
1755 // function is used on startup to build the trampoline stubs in generateOptoStub.
1756 // Registers not mentioned will be killed by the VM call in the trampoline, and
1757 // arguments in those registers not be available to the callee.
1758 bool Matcher::can_be_java_arg( int reg ) {
1759   if (reg == R_R0_num ||
1760       reg == R_R1_num ||
1761       reg == R_R2_num ||
1762       reg == R_R3_num) return true;
1763 
1764   if (reg >= R_S0_num &&
1765       reg <= R_S15_num) return true;
1766   return false;
1767 }
1768 
1769 bool Matcher::is_spillable_arg( int reg ) {
1770   return can_be_java_arg(reg);
1771 }
1772 
1773 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1774   return false;
1775 }
1776 
1777 // Register for DIVI projection of divmodI
1778 RegMask Matcher::divI_proj_mask() {
1779   ShouldNotReachHere();
1780   return RegMask();
1781 }
1782 
1783 // Register for MODI projection of divmodI
1784 RegMask Matcher::modI_proj_mask() {
1785   ShouldNotReachHere();
1786   return RegMask();
1787 }
1788 
1789 // Register for DIVL projection of divmodL
1790 RegMask Matcher::divL_proj_mask() {
1791   ShouldNotReachHere();
1792   return RegMask();
1793 }
1794 
1795 // Register for MODL projection of divmodL
1796 RegMask Matcher::modL_proj_mask() {
1797   ShouldNotReachHere();
1798   return RegMask();
1799 }
1800 
1801 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1802   return FP_REGP_mask();
1803 }
1804 
1805 bool maybe_far_call(const CallNode *n) {
1806   return !MacroAssembler::_reachable_from_cache(n->as_Call()->entry_point());
1807 }
1808 
1809 bool maybe_far_call(const MachCallNode *n) {
1810   return !MacroAssembler::_reachable_from_cache(n->as_MachCall()->entry_point());
1811 }
1812 
1813 %}
1814 
1815 //----------ENCODING BLOCK-----------------------------------------------------
1816 // This block specifies the encoding classes used by the compiler to output
1817 // byte streams.  Encoding classes are parameterized macros used by
1818 // Machine Instruction Nodes in order to generate the bit encoding of the
1819 // instruction.  Operands specify their base encoding interface with the
1820 // interface keyword.  There are currently supported four interfaces,
1821 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1822 // operand to generate a function which returns its register number when
1823 // queried.   CONST_INTER causes an operand to generate a function which
1824 // returns the value of the constant when queried.  MEMORY_INTER causes an
1825 // operand to generate four functions which return the Base Register, the
1826 // Index Register, the Scale Value, and the Offset Value of the operand when
1827 // queried.  COND_INTER causes an operand to generate six functions which
1828 // return the encoding code (ie - encoding bits for the instruction)
1829 // associated with each basic boolean condition for a conditional instruction.
1830 //
1831 // Instructions specify two basic values for encoding.  Again, a function
1832 // is available to check if the constant displacement is an oop. They use the
1833 // ins_encode keyword to specify their encoding classes (which must be
1834 // a sequence of enc_class names, and their parameters, specified in
1835 // the encoding block), and they use the
1836 // opcode keyword to specify, in order, their primary, secondary, and
1837 // tertiary opcode.  Only the opcode sections which a particular instruction
1838 // needs for encoding need to be specified.
1839 encode %{
1840   enc_class call_epilog %{
1841     // nothing
1842   %}
1843 
1844   enc_class Java_To_Runtime (method meth) %{
1845     // CALL directly to the runtime
1846     emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
1847   %}
1848 
1849   enc_class Java_Static_Call (method meth) %{
1850     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1851     // who we intended to call.
1852 
1853     if ( !_method) {
1854       emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
1855     } else {
1856       int method_index = resolved_method_index(cbuf);
1857       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1858                                                   : static_call_Relocation::spec(method_index);
1859       emit_call_reloc(cbuf, as_MachCall(), $meth, rspec);
1860 
1861       // Emit stubs for static call.
1862       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1863       if (stub == NULL) {
1864         ciEnv::current()->record_failure("CodeCache is full");
1865         return;
1866       }
1867     }
1868   %}
1869 
1870   enc_class save_last_PC %{
1871     // preserve mark
1872     address mark = cbuf.insts()->mark();
1873     debug_only(int off0 = cbuf.insts_size());
1874     MacroAssembler _masm(&cbuf);
1875     int ret_addr_offset = as_MachCall()->ret_addr_offset();
1876     __ adr(lr, mark + ret_addr_offset);
1877     __ str(lr, Address(Rthread, JavaThread::last_Java_pc_offset()));
1878     debug_only(int off1 = cbuf.insts_size());
1879     assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction");
1880     // restore mark
1881     cbuf.insts()->set_mark(mark);
1882   %}
1883 
1884   enc_class preserve_SP %{
1885     // preserve mark
1886     address mark = cbuf.insts()->mark();
1887     debug_only(int off0 = cbuf.insts_size());
1888     MacroAssembler _masm(&cbuf);
1889     // FP is preserved across all calls, even compiled calls.
1890     // Use it to preserve SP in places where the callee might change the SP.
1891     __ mov(Rmh_SP_save, sp);
1892     debug_only(int off1 = cbuf.insts_size());
1893     assert(off1 - off0 == 4, "correct size prediction");
1894     // restore mark
1895     cbuf.insts()->set_mark(mark);
1896   %}
1897 
1898   enc_class restore_SP %{
1899     MacroAssembler _masm(&cbuf);
1900     __ mov(sp, Rmh_SP_save);
1901   %}
1902 
1903   enc_class Java_Dynamic_Call (method meth) %{
1904     MacroAssembler _masm(&cbuf);
1905     Register R12_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
1906     assert(R12_ic_reg == rscratch2/*Ricklass*/, "should be");
1907     __ set_inst_mark();
1908     __ movw_i(R12_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff);
1909     __ movt_i(R12_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16);
1910     address  virtual_call_oop_addr = __ inst_mark();
1911     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1912     // who we intended to call.
1913     int method_index = resolved_method_index(cbuf);
1914     emit_call_reloc(cbuf, as_MachCall(), $meth, virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
1915   %}
1916 
1917   enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{
1918     // FIXME: load from constant table?
1919     // Load a constant replicated "count" times with width "width"
1920     int count = $cnt$$constant;
1921     int width = $wth$$constant;
1922     assert(count*width == 4, "sanity");
1923     int val = $src$$constant;
1924     if (width < 4) {
1925       int bit_width = width * 8;
1926       val &= (((int)1) << bit_width) - 1; // mask off sign bits
1927       for (int i = 0; i < count - 1; i++) {
1928         val |= (val << bit_width);
1929       }
1930     }
1931     MacroAssembler _masm(&cbuf);
1932 
1933     if (val == -1) {
1934       __ mvn_i($tmp$$Register, 0);
1935     } else if (val == 0) {
1936       __ mov_i($tmp$$Register, 0);
1937     } else {
1938       __ movw_i($tmp$$Register, val & 0xffff);
1939       __ movt_i($tmp$$Register, (unsigned int)val >> 16);
1940     }
1941     __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
1942   %}
1943 
1944   enc_class LdReplImmF(immF src, regD dst, iRegI tmp) %{
1945     // Replicate float con 2 times and pack into vector (8 bytes) in regD.
1946     float fval = $src$$constant;
1947     int val = *((int*)&fval);
1948     MacroAssembler _masm(&cbuf);
1949 
1950     if (val == -1) {
1951       __ mvn_i($tmp$$Register, 0);
1952     } else if (val == 0) {
1953       __ mov_i($tmp$$Register, 0);
1954     } else {
1955       __ movw_i($tmp$$Register, val & 0xffff);
1956       __ movt_i($tmp$$Register, (unsigned int)val >> 16);
1957     }
1958     __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
1959   %}
1960 
1961   enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
1962                                iRegI tmp1, iRegI tmp2, Q0_regD ftmp1, Q1_regD ftmp2,
1963                                int bytes_per_char1, int bytes_per_char2) %{
1964     MacroAssembler _masm(&cbuf);
1965 
1966     Register       str1 = $str1$$Register;
1967     Register       str2 = $str2$$Register;
1968     Register       cnt1 = $cnt1$$Register;
1969     Register       cnt2 = $cnt2$$Register;
1970     Register       tmp1 = $tmp1$$Register;
1971     Register       tmp2 = $tmp2$$Register;
1972     FloatRegister ftmp1 = $ftmp1$$FloatRegister;
1973     FloatRegister ftmp2 = $ftmp2$$FloatRegister;
1974     Register     result = $result$$Register;
1975     int bytes_per_char1 = $bytes_per_char1;
1976     int bytes_per_char2 = $bytes_per_char2;
1977 
1978     typedef void (Assembler::*ldfp)(Register, const Address &, Assembler::Condition);
1979     typedef void (Assembler::*usubp)(Register, Register, Register, Assembler::Condition);
1980     ldfp ldf_16 = &Assembler::ldrh;
1981     ldfp ldf_8  = &Assembler::ldrb;
1982 
1983     // slow path: single char load
1984     int cnt_per_char = bytes_per_char1==2 && bytes_per_char2==2 ? 2 : 1;
1985     ldfp lds1 = bytes_per_char1 == 2 ? ldf_16 : ldf_8;
1986     ldfp lds2 = bytes_per_char2 == 2 ? ldf_16 : ldf_8;
1987     usubp usub = bytes_per_char1 == 1 ? (usubp)&Assembler::usub8 : (usubp)&Assembler::usub16;
1988 
1989     assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
1990 
1991     Label Llength_diff, Ldone, Lshort_loop;
1992 
1993     BLOCK_COMMENT("string_compare {");
1994 
1995     // for UU we count bytes (saves 1 insn) for others count in chars
1996     if (cnt_per_char == 1 && bytes_per_char1 == 2)
1997       __ lsr(cnt1, cnt1, 1);
1998     if (cnt_per_char == 1 && bytes_per_char2 == 2)
1999       __ lsr(cnt2, cnt2, 1);
2000 
2001     // Compute the minimum of the string lengths and save the difference.
2002     __ subs(tmp1, cnt1, cnt2);
2003     __ mov(cnt2, cnt1, Assembler::LE); // min
2004 
2005     // Check if the strings start at the same location.
2006     __ cmp(str1, str2);
2007     __ b(Llength_diff, Assembler::EQ);
2008 
2009     // without NEON only for UU and LL fast path is available
2010     if ((VM_Version::features() & FT_AdvSIMD) || bytes_per_char1 == bytes_per_char2) {
2011       Label Lshort_string, Lnext_word, Ldifference;
2012 
2013       // A very short string
2014       __ cmp(cnt2, 8+4);
2015       __ b(Lshort_string, Assembler::LT);
2016 
2017       // Compare words
2018       {
2019         const int bits_per_char = bytes_per_char1==1 && bytes_per_char2==1 ? 8 : 16;
2020         // Check first few chars to avoid excessive processing
2021         if (bytes_per_char1 == 1 && bytes_per_char2 == 1) {
2022           Label Lfull_speed;
2023           __ ldr(tmp2, __ post(str1, wordSize));
2024           __ ldr(result, __ post(str2, wordSize));
2025           (_masm.*usub)(result, tmp2, result, Assembler::AL);
2026           __ tst(result, result);
2027           __ b(Lfull_speed, Assembler::EQ);
2028 
2029           __ rbit(cnt1, result);
2030           __ clz(cnt1, cnt1);
2031           __ bic(cnt1, cnt1, bits_per_char-1);
2032           __ lsr(result, result, cnt1);
2033           __ lsr(tmp2, tmp2, cnt1);
2034           __ ubfx(result, result, 0, bits_per_char);
2035           __ ubfx(tmp2, tmp2, 0, bits_per_char);
2036           __ cmp(result, tmp2);
2037           __ sub(result, result, 1<<bits_per_char, Assembler::HI);
2038           __ b(Ldone);
2039 
2040           __ bind(Lfull_speed);
2041         } else {
2042           (_masm.*lds1)(result, __ post(str1, bytes_per_char1), Assembler::AL);
2043           (_masm.*lds2)(cnt1, __ post(str2, bytes_per_char2), Assembler::AL);
2044           __ subs(result, result, cnt1);
2045           __ b(Ldone, Assembler::NE);
2046           (_masm.*lds1)(result, __ post(str1, bytes_per_char1), Assembler::AL);
2047           (_masm.*lds2)(cnt1, __ post(str2, bytes_per_char2), Assembler::AL);
2048           __ subs(result, result, cnt1);
2049           __ b(Ldone, Assembler::NE);
2050         }
2051 
2052         if (VM_Version::features() & FT_AdvSIMD) {
2053 #define LD(expand_needed,reg,str)        \
2054           if (!(expand_needed))                                  \
2055             __ vld1_64((reg), __ post(str, 8), Assembler::ALIGN_STD);     \
2056           else { \
2057             __ vld1_32((reg), 0, __ post(str, 4), false);         \
2058             __ vmovl_8u((reg), (reg)); /* kills reg+1 */        \
2059           }
2060           const int cnt_per_LD  = bytes_per_char1==bytes_per_char2 ? 8 : 4;
2061           const bool expand_needed1 = bytes_per_char1==1 && bytes_per_char2==2;
2062           const bool expand_needed2 = bytes_per_char1==2 && bytes_per_char2==1;
2063 
2064           __ sub(cnt2, cnt2, 2*cnt_per_char*(16/bits_per_char) + cnt_per_LD); // 4 chars processed above for LL, 2 for the rest of encodings
2065           __ bind(Lnext_word);
2066           LD(expand_needed1,ftmp1,str1);
2067           LD(expand_needed2,ftmp2,str2);
2068           if (bits_per_char == 8)
2069             __ vsub_64_8(ftmp2, ftmp1, ftmp2);
2070           else
2071             __ vsub_64_16(ftmp2, ftmp1, ftmp2);
2072           __ vmov_f64(result, cnt1, ftmp2);
2073           __ orrs(tmp2, result, cnt1);
2074           __ b(Ldifference, Assembler::NE);
2075           __ subs(cnt2, cnt2, cnt_per_LD);
2076           __ b(Lnext_word, Assembler::HS);
2077 
2078           // check the tail
2079           __ adds(cnt2, cnt2, cnt_per_LD);
2080           __ b(Llength_diff, Assembler::EQ);
2081           __ b(Lshort_loop);
2082 
2083           __ bind(Ldifference);
2084           __ vmov_f64(tmp2, cnt2, ftmp1);
2085           __ tst(result, result);
2086           __ mov(result, cnt1, Assembler::EQ);
2087           __ mov(tmp2, cnt2, Assembler::EQ);
2088         } else {
2089           __ sub(cnt2, cnt2, 2*cnt_per_char*(16/bits_per_char)+4); // Skip 4 or 2 chars processed above. The last word is a special case
2090 
2091           // Move both string pointers to the last word of their
2092           // strings, negate the remaining count.
2093           __ lea(str1, Address(str1, cnt2));
2094           __ lea(str2, Address(str2, cnt2));
2095           __ neg(cnt2, cnt2);
2096 
2097           // Loop, loading words and comparing them into tmp2.
2098           __ bind(Lnext_word);
2099           __ ldr(tmp2, Address(str1, cnt2));
2100           __ ldr(result, Address(str2, cnt2));
2101           __ teq(result, tmp2);
2102           __ b(Ldifference, Assembler::NE);
2103           __ adds(cnt2, cnt2, wordSize); // cnt is per-byte for both UU and LL
2104           __ b(Lnext_word, Assembler::LT);
2105 
2106           // Last word.  In the case where length == 2 we compare the
2107           // same word twice, but that's still faster than another
2108           // conditional branch.
2109 
2110           __ ldr(tmp2, Address(str1));
2111           __ ldr(result, Address(str2));
2112           __ teq(result, tmp2);
2113           __ b(Llength_diff, Assembler::EQ);
2114 
2115           // Find the first different characters in the words and
2116           // compute their difference.
2117           __ bind(Ldifference);
2118           (_masm.*usub)(result, tmp2, result, Assembler::AL);
2119         }
2120 
2121         // now result is a-b and tmp2 is a
2122         if (bits_per_char == 8) {
2123           __ rbit(cnt1, result);
2124           __ clz(cnt1, cnt1);
2125           __ bic(cnt1, cnt1, bits_per_char-1);
2126           __ lsr(result, result, cnt1);
2127           __ lsr(tmp2, tmp2, cnt1);
2128           __ ubfx(result, result, 0, bits_per_char);
2129           __ ubfx(tmp2, tmp2, 0, bits_per_char);
2130         } else {
2131           __ lsls(cnt1, result, 16);
2132           __ uxth(result, result, ror(16), Assembler::EQ);
2133           __ uxth(result, result, ror(), Assembler::NE);
2134           __ uxth(tmp2, tmp2, ror(16), Assembler::EQ);
2135           __ uxth(tmp2, tmp2, ror(), Assembler::NE);
2136         }
2137         __ cmp(result, tmp2);
2138         __ sub(result, result, 1<<bits_per_char, Assembler::HI);
2139 
2140         __ b(Ldone);
2141       }
2142 
2143       __ bind(Lshort_string);
2144     }
2145 
2146     // Is the minimum length zero?
2147     __ cbz(cnt2, Llength_diff);
2148 
2149     __ bind(Lshort_loop);
2150     (_masm.*lds1)(result, __ post(str1, bytes_per_char1), Assembler::AL);
2151     (_masm.*lds2)(cnt1, __ post(str2, bytes_per_char2), Assembler::AL);
2152     __ subs(result, result, cnt1);
2153     __ b(Ldone, Assembler::NE);
2154     __ subs(cnt2, cnt2, cnt_per_char);
2155     __ b(Lshort_loop, Assembler::NE);
2156 
2157     // Strings are equal up to min length.  Return the length difference.
2158     __ bind(Llength_diff);
2159     __ asr(result, tmp1, cnt_per_char-1); // input in bytes, result in chars, nice convention
2160 
2161     // That's it
2162     __ bind(Ldone);
2163 
2164     BLOCK_COMMENT("} string_compare");
2165   %}
2166 
2167   enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI cnt, iRegI tmp2, iRegI result, int elemSize, bool isArray) %{
2168     Label Ldone, Lloop, Lset_result, Lshort_array, Lnext_word, Lshort_array_cont, Lone_byte;
2169     MacroAssembler _masm(&cbuf);
2170 
2171     Register   ary1 = $ary1$$Register;
2172     Register   ary2 = $ary2$$Register;
2173     Register   cnt =  $cnt$$Register;
2174     Register   tmp2 = $tmp2$$Register;
2175     Register result = $result$$Register;
2176     int elemSize    = $elemSize$$constant;
2177     bool isArray    = $isArray$$constant;
2178 
2179     assert_different_registers(ary1, ary2, cnt, tmp2, result);
2180 
2181     if (isArray) {
2182       int length_offset  = arrayOopDesc::length_offset_in_bytes();
2183       int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
2184 
2185       BLOCK_COMMENT(elemSize == 2 ? "char_array_equalsUU {" : "char_array_equalsLL {");
2186 
2187       // return true if the same array
2188       __ cmpoop(ary1, ary2);
2189       __ b(Lset_result, Assembler::EQ); // equal
2190 
2191       __ ands(result, ary1, ary1);
2192       __ b(Ldone, Assembler::EQ);    // not equal
2193 
2194       __ ands(result, ary2, ary2);
2195       __ b(Ldone, Assembler::EQ);    // not equal
2196 
2197       //load the lengths of arrays
2198       __ ldr(cnt, Address(ary1, length_offset));
2199       __ ldr(tmp2, Address(ary2, length_offset));
2200 
2201       // return false if the two arrays are not equal length
2202       __ teq(cnt, tmp2);
2203       __ b(Lset_result, Assembler::NE);    // not equal
2204 
2205       __ tst(cnt, cnt);
2206       __ b(Lset_result, Assembler::EQ);    // zero-length arrays are equal
2207 
2208       // load array addresses
2209       __ add(ary1, ary1, base_offset);
2210       __ add(ary2, ary2, base_offset);
2211     } else {
2212     // Check if the strings start at the same location.
2213       BLOCK_COMMENT(elemSize == 2 ? "string_equalsUU {" : "string_equalsLL {");
2214 
2215       __ cmp(ary1, ary2);
2216       __ b(Lset_result, Assembler::EQ);
2217     }
2218 
2219     __ cmp(cnt, 4*(2/elemSize));
2220     __ b(Lshort_array, Assembler::LT);
2221 
2222     {
2223       // Move both string pointers to the last word of their
2224       // strings, negate the remaining count, and convert it to bytes if needed.
2225       if (isArray && elemSize == 2)
2226           __ lsl(cnt, cnt, 1);
2227       __ sub(cnt, cnt, wordSize); // The last word is a special case
2228 
2229       __ lea(ary1, Address(ary1, cnt));
2230       __ lea(ary2, Address(ary2, cnt));
2231       __ neg(cnt, cnt);
2232 
2233       // Loop, loading words and comparing them.
2234       __ bind(Lnext_word);
2235       __ ldr(result, Address(ary1, cnt));
2236       __ ldr(tmp2, Address(ary2, cnt));
2237       __ cmp(result, tmp2);
2238       __ b(Lset_result, Assembler::NE);
2239       __ adds(cnt, cnt, wordSize);
2240       __ b(Lnext_word, Assembler::LT);
2241 
2242       // Last word.  In the case where length < 4 we compare the
2243       // same bytes twice, but that's still faster than another
2244       // conditional branch.
2245       __ ldr(result, Address(ary1));
2246       __ ldr(tmp2, Address(ary2));
2247       __ cmp(result, tmp2);
2248       __ b(Lset_result);
2249     }
2250 
2251     __ bind(Lshort_array); {
2252       if (!isArray) {
2253         __ tst(cnt, cnt);
2254         __ b(Lset_result, Assembler::EQ);
2255       }
2256 
2257       if (elemSize == 1) {
2258         __ subs(cnt, cnt, 1);
2259         __ b(Lone_byte, Assembler::EQ);
2260       }
2261       __ bind(Lshort_array_cont);
2262 
2263       __ ldrh(result, __ post(ary1, 2));
2264       __ ldrh(tmp2, __ post(ary2, 2));
2265       __ cmp(result, tmp2);
2266       __ b(Lset_result, Assembler::NE);
2267       __ subs(cnt, cnt, isArray ? 2/elemSize : 2);
2268       __ b(Lshort_array_cont, Assembler::GT);
2269     }
2270 
2271     if (elemSize == 1) {
2272       __ cmn(cnt, 1);
2273       __ b(Lset_result, Assembler::EQ);
2274 
2275       __ bind(Lone_byte); {
2276         __ ldrb(result, Address(ary1));
2277         __ ldrb(tmp2, Address(ary2));
2278         __ cmp(result, tmp2);
2279       }
2280     }
2281 
2282     __ bind(Lset_result);
2283     __ mov(result, 1, Assembler::EQ);
2284     __ mov(result, 0, Assembler::NE);
2285 
2286     __ bind(Ldone);
2287 
2288     if (isArray)
2289       BLOCK_COMMENT(elemSize == 2 ? "} char_array_equalsUU" : "} char_array_equalsLL");
2290     else
2291       BLOCK_COMMENT(elemSize == 2? "} string_equalsUU" : "} string_equalsLL");
2292 
2293     %}
2294 
2295   enc_class enc_Char_Array_Compress(R2RegP src, R1RegP dst, R3RegI len, R9RegI tmp1,
2296                                     Q0_regD tmp2, Q1_regD tmp3, R12RegI tmp4,
2297                                     R0RegI result, flagsReg ccr) %{
2298     Label Ldone, Lloop1, Lset_result;
2299     MacroAssembler _masm(&cbuf);
2300 
2301     Register      src    = $src$$Register;
2302     Register      dst    = $dst$$Register;
2303     Register      len    = $len$$Register;
2304     Register      tmp1   = $tmp1$$Register;
2305     Register      result = $result$$Register;
2306     // tmp2, tmp3 and tmp4 are consumed by NEON stub
2307 
2308     BLOCK_COMMENT("char_array_compress {");
2309 
2310     __ movs(result, len);
2311     __ b(Ldone, Assembler::EQ);
2312 
2313     if (VM_Version::features() & FT_AdvSIMD) {
2314       Label Lloop2;
2315       __ cmp(len, 2+8+16); // neon stub consumes minimum 24 chars
2316       __ b(Lloop1, Assembler::LO);
2317 
2318       // check first 2 chars in hope they quickly give information about encoding
2319       __ ldrh(tmp1, __ post(src, 2));
2320       __ strb(tmp1, __ post(dst, 1));
2321       __ lsrs(tmp1, tmp1, 8);
2322       __ ldrh(tmp1, __ post(src, 2), Assembler::EQ);
2323       __ strb(tmp1, __ post(dst, 1), Assembler::EQ);
2324       __ lsrs(tmp1, tmp1, 8, Assembler::EQ);
2325       __ b(Lset_result, Assembler::NE);
2326       __ sub(len, len, 2);
2327 
2328       __ call(StubRoutines::aarch32::string_compress_neon());
2329       __ b(Ldone, Assembler::EQ);
2330     }
2331 
2332     // nothing better we could do with Aarch32 basic instruction set
2333     __ bind(Lloop1); {
2334       __ ldrh(tmp1, __ post(src, 2));
2335       __ strb(tmp1, __ post(dst, 1));
2336       __ rsbs(tmp1, tmp1, 0x100); // GT good, LE bad
2337       __ subs(len, len, 1, Assembler::GT);
2338       __ b(Lloop1, Assembler::GT);
2339     }
2340 
2341     __ cmp(len, 0);
2342     __ bind(Lset_result);
2343     __ mov(result, 0, Assembler::NE);
2344 
2345     __ bind(Ldone);
2346     BLOCK_COMMENT("} char_array_compress");
2347     %}
2348 
2349   enc_class enc_Byte_Array_Inflate(R0RegP src, R1RegP dst, R2RegI len,
2350                                    iRegI tmp1, Q0_regD tmp2, flagsReg ccr) %{
2351     Label Ldone, Lloop1, Lone_char;
2352     MacroAssembler _masm(&cbuf);
2353 
2354     Register      src = $src$$Register;
2355     Register      dst = $dst$$Register;
2356     Register      len = $len$$Register;
2357     Register     tmp1 = $tmp1$$Register;
2358     // tmp2 is consumed by NEON stub
2359 
2360     BLOCK_COMMENT("byte_array_inflate {");
2361 
2362     __ cbz(len, Ldone);
2363     if (VM_Version::features() & FT_AdvSIMD) {
2364       Label Lskip_simd;
2365 
2366       __ cmp(len, 16);
2367       __ b(Lskip_simd, Assembler::LO);
2368       __ call(StubRoutines::aarch32::string_inflate_neon());
2369       __ b(Ldone, Assembler::EQ);
2370       __ bind(Lskip_simd);
2371     }
2372 
2373     // nothing better we could do with Aarch32 basic instruction set
2374     __ subs(len, len, 1);
2375     __ b(Lone_char, Assembler::EQ);
2376     __ bind(Lloop1); {
2377       __ ldrb(tmp1, __ post(src, 1));
2378       __ strh(tmp1, __ post(dst, 2));
2379       __ ldrb(tmp1, __ post(src, 1));
2380       __ strh(tmp1, __ post(dst, 2));
2381       __ subs(len, len, 2);
2382       __ b(Lloop1, Assembler::HI);
2383     }
2384     __ b(Ldone, Assembler::LO);
2385 
2386     __ bind(Lone_char);
2387     __ ldrb(tmp1, __ post(src, 1));
2388     __ strh(tmp1, __ post(dst, 2));
2389 
2390     __ bind(Ldone);
2391     BLOCK_COMMENT("} byte_array_inflate");
2392   %}
2393 
2394 %}
2395 
2396 //----------FRAME--------------------------------------------------------------
2397 // Definition of frame structure and management information.
2398 //
2399 //  S T A C K   L A Y O U T    Allocators stack-slot number
2400 //                             |   (to get allocators register number
2401 //  G  Owned by    |        |  v    add VMRegImpl::stack0)
2402 //  r   CALLER     |        |
2403 //  o     |        +--------+      pad to even-align allocators stack-slot
2404 //  w     V        |  pad0  |        numbers; owned by CALLER
2405 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2406 //  h     ^        |   in   |  5
2407 //        |        |  args  |  4   Holes in incoming args owned by SELF
2408 //  |     |        |        |  3
2409 //  |     |        +--------+
2410 //  V     |        | old out|      Empty on Intel, window on Sparc
2411 //        |    old |preserve|      Must be even aligned.
2412 //        |     SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned
2413 //        |        |   in   |  3   area for Intel ret address
2414 //     Owned by    |preserve|      Empty on Sparc.
2415 //       SELF      +--------+
2416 //        |        |  pad2  |  2   pad to align old SP
2417 //        |        +--------+  1
2418 //        |        | locks  |  0
2419 //        |        +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned
2420 //        |        |  pad1  | 11   pad to align new SP
2421 //        |        +--------+
2422 //        |        |        | 10
2423 //        |        | spills |  9   spills
2424 //        V        |        |  8   (pad0 slot for callee)
2425 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2426 //        ^        |  out   |  7
2427 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2428 //     Owned by    +--------+
2429 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2430 //        |    new |preserve|      Must be even-aligned.
2431 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2432 //        |        |        |
2433 //
2434 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2435 //         known from SELF's arguments and the Java calling convention.
2436 //         Region 6-7 is determined per call site.
2437 // Note 2: If the calling convention leaves holes in the incoming argument
2438 //         area, those holes are owned by SELF.  Holes in the outgoing area
2439 //         are owned by the CALLEE.  Holes should not be nessecary in the
2440 //         incoming area, as the Java calling convention is completely under
2441 //         the control of the AD file.  Doubles can be sorted and packed to
2442 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2443 //         varargs C calling conventions.
2444 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2445 //         even aligned with pad0 as needed.
2446 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2447 //         region 6-11 is even aligned; it may be padded out more so that
2448 //         the region from SP to FP meets the minimum stack alignment.
2449 
2450 frame %{
2451   // What direction does stack grow in (assumed to be same for native & Java)
2452   stack_direction(TOWARDS_LOW);
2453 
2454   // These two registers define part of the calling convention
2455   // between compiled code and the interpreter.
2456   inline_cache_reg(R_Ricklass);          // Inline Cache Register or Method* for I2C
2457   interpreter_method_oop_reg(R_Rmethod); // Method Oop Register when calling interpreter
2458 
2459   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
2460   cisc_spilling_operand_name(indOffset);
2461 
2462   // Number of stack slots consumed by a Monitor enter
2463   sync_stack_slots(1 * VMRegImpl::slots_per_word);
2464 
2465   // Compiled code's Frame Pointer
2466   frame_pointer(R_R13);
2467 
2468   // Stack alignment requirement
2469   stack_alignment(StackAlignmentInBytes);
2470   //  LP64: Alignment size in bytes (128-bit -> 16 bytes)
2471   // !LP64: Alignment size in bytes (64-bit  ->  8 bytes)
2472 
2473   // Number of stack slots between incoming argument block and the start of
2474   // a new frame.  The PROLOG must add this many slots to the stack.  The
2475   // EPILOG must remove this many slots.
2476   // FP + LR
2477   in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
2478 
2479   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2480   // for calls to C.  Supports the var-args backing area for register parms.
2481   // ADLC doesn't support parsing expressions, so I folded the math by hand.
2482   varargs_C_out_slots_killed( 0);
2483 
2484   // The after-PROLOG location of the return address.  Location of
2485   // return address specifies a type (REG or STACK) and a number
2486   // representing the register number (i.e. - use a register name) or
2487   // stack slot.
2488   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2489   // Otherwise, it is above the locks and verification slot and alignment word
2490   return_addr(STACK - 1*VMRegImpl::slots_per_word +
2491               align_up((Compile::current()->in_preserve_stack_slots() +
2492                         Compile::current()->fixed_slots()),
2493                        stack_alignment_in_slots()));
2494 
2495   // Body of function which returns an OptoRegs array locating
2496   // arguments either in registers or in stack slots for calling
2497   // java
2498   calling_convention %{
2499     (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing);
2500 
2501   %}
2502 
2503   // Body of function which returns an OptoRegs array locating
2504   // arguments either in registers or in stack slots for callin
2505   // C.
2506   c_calling_convention %{
2507     // This is obviously always outgoing
2508     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2509   %}
2510 
2511   // Location of compiled Java return values.
2512   return_value %{
2513     return c2::return_value(ideal_reg);
2514   %}
2515 
2516   // Location of C return values.
2517   c_return_value %{
2518 #ifndef HARD_FLOAT_CC
2519     return c2::c_return_value(ideal_reg);
2520 #else
2521     return c2::return_value(ideal_reg);
2522 #endif
2523   %}
2524 
2525 %}
2526 
2527 //----------ATTRIBUTES---------------------------------------------------------
2528 //----------Instruction Attributes---------------------------------------------
2529 ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
2530 ins_attrib ins_size(32);           // Required size attribute (in bits)
2531 ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
2532                                    // non-matching short branch variant of some
2533                                                             // long branch?
2534 
2535 //----------OPERANDS-----------------------------------------------------------
2536 // Operand definitions must precede instruction definitions for correct parsing
2537 // in the ADLC because operands constitute user defined types which are used in
2538 // instruction definitions.
2539 
2540 //----------Simple Operands----------------------------------------------------
2541 // Immediate Operands
2542 // Integer Immediate: 32-bit
2543 operand immI() %{
2544   match(ConI);
2545 
2546   op_cost(0);
2547   // formats are generated automatically for constants and base registers
2548   format %{ %}
2549   interface(CONST_INTER);
2550 %}
2551 
2552 // Integer Immediate: 8-bit unsigned - for VMOV
2553 operand immU8() %{
2554   predicate(0 <= n->get_int() && (n->get_int() <= 255));
2555   match(ConI);
2556   op_cost(0);
2557 
2558   format %{ %}
2559   interface(CONST_INTER);
2560 %}
2561 
2562 // Integer Immediate: 16-bit
2563 operand immI16() %{
2564   predicate((n->get_int() >> 16) == 0 && (VM_Version::features() & FT_ARMV6T2));
2565   match(ConI);
2566   op_cost(0);
2567 
2568   format %{ %}
2569   interface(CONST_INTER);
2570 %}
2571 
2572 // Integer Immediate: offset for half and double word loads and stores
2573 operand immIHD() %{
2574   predicate(is_memoryHD(n->get_int()));
2575   match(ConI);
2576   op_cost(0);
2577   format %{ %}
2578   interface(CONST_INTER);
2579 %}
2580 
2581 // Integer Immediate: offset for fp loads and stores
2582 operand immIFP() %{
2583   predicate(is_memoryfp(n->get_int()) && ((n->get_int() & 3) == 0));
2584   match(ConI);
2585   op_cost(0);
2586 
2587   format %{ %}
2588   interface(CONST_INTER);
2589 %}
2590 
2591 // Valid scale values for addressing modes and shifts
2592 operand immU5() %{
2593   predicate(0 <= n->get_int() && (n->get_int() <= 31));
2594   match(ConI);
2595   op_cost(0);
2596 
2597   format %{ %}
2598   interface(CONST_INTER);
2599 %}
2600 
2601 // Integer Immediate: 6-bit
2602 operand immU6Big() %{
2603   predicate(n->get_int() >= 32 && n->get_int() <= 63);
2604   match(ConI);
2605   op_cost(0);
2606   format %{ %}
2607   interface(CONST_INTER);
2608 %}
2609 
2610 // Integer Immediate: 0-bit
2611 operand immI0() %{
2612   predicate(n->get_int() == 0);
2613   match(ConI);
2614   op_cost(0);
2615 
2616   format %{ %}
2617   interface(CONST_INTER);
2618 %}
2619 
2620 // Integer Immediate: the value 1
2621 operand immI_1() %{
2622   predicate(n->get_int() == 1);
2623   match(ConI);
2624   op_cost(0);
2625 
2626   format %{ %}
2627   interface(CONST_INTER);
2628 %}
2629 
2630 // Integer Immediate: the value 2
2631 operand immI_2() %{
2632   predicate(n->get_int() == 2);
2633   match(ConI);
2634   op_cost(0);
2635 
2636   format %{ %}
2637   interface(CONST_INTER);
2638 %}
2639 
2640 // Integer Immediate: the value 3
2641 operand immI_3() %{
2642   predicate(n->get_int() == 3);
2643   match(ConI);
2644   op_cost(0);
2645 
2646   format %{ %}
2647   interface(CONST_INTER);
2648 %}
2649 
2650 // Integer Immediate: the value 4
2651 operand immI_4() %{
2652   predicate(n->get_int() == 4);
2653   match(ConI);
2654   op_cost(0);
2655 
2656   format %{ %}
2657   interface(CONST_INTER);
2658 %}
2659 
2660 // Integer Immediate: the value 8
2661 operand immI_8() %{
2662   predicate(n->get_int() == 8);
2663   match(ConI);
2664   op_cost(0);
2665 
2666   format %{ %}
2667   interface(CONST_INTER);
2668 %}
2669 
2670 // Int Immediate non-negative
2671 operand immU31()
2672 %{
2673   predicate(n->get_int() >= 0);
2674   match(ConI);
2675 
2676   op_cost(0);
2677   format %{ %}
2678   interface(CONST_INTER);
2679 %}
2680 
2681 // Integer Immediate: the values 32-63
2682 operand immI_32_63() %{
2683   predicate(n->get_int() >= 32 && n->get_int() <= 63);
2684   match(ConI);
2685   op_cost(0);
2686 
2687   format %{ %}
2688   interface(CONST_INTER);
2689 %}
2690 
2691 // Immediates for special shifts (sign extend)
2692 
2693 // Integer Immediate: the value 16
2694 operand immI_16() %{
2695   predicate(n->get_int() == 16);
2696   match(ConI);
2697   op_cost(0);
2698 
2699   format %{ %}
2700   interface(CONST_INTER);
2701 %}
2702 
2703 // Integer Immediate: the value 24
2704 operand immI_24() %{
2705   predicate(n->get_int() == 24);
2706   match(ConI);
2707   op_cost(0);
2708 
2709   format %{ %}
2710   interface(CONST_INTER);
2711 %}
2712 
2713 // Integer Immediate: the value 255
2714 operand immI_255() %{
2715   predicate( n->get_int() == 255 );
2716   match(ConI);
2717   op_cost(0);
2718 
2719   format %{ %}
2720   interface(CONST_INTER);
2721 %}
2722 
2723 // Integer Immediate: the value 65535
2724 operand immI_65535() %{
2725   predicate(n->get_int() == 65535);
2726   match(ConI);
2727   op_cost(0);
2728 
2729   format %{ %}
2730   interface(CONST_INTER);
2731 %}
2732 
2733 // Integer Immediates for arithmetic instructions
2734 
2735 operand aimmI() %{
2736   predicate(is_aimm(n->get_int()));
2737   match(ConI);
2738   op_cost(0);
2739 
2740   format %{ %}
2741   interface(CONST_INTER);
2742 %}
2743 
2744 operand aimmIneg() %{
2745   predicate(is_aimm(-n->get_int()));
2746   match(ConI);
2747   op_cost(0);
2748 
2749   format %{ %}
2750   interface(CONST_INTER);
2751 %}
2752 
2753 operand aimmU31() %{
2754   predicate((0 <= n->get_int()) && is_aimm(n->get_int()));
2755   match(ConI);
2756   op_cost(0);
2757 
2758   format %{ %}
2759   interface(CONST_INTER);
2760 %}
2761 
2762 // Integer Immediates for logical instructions
2763 
2764 operand limmI() %{
2765   predicate(is_limmI(n->get_int()));
2766   match(ConI);
2767   op_cost(0);
2768 
2769   format %{ %}
2770   interface(CONST_INTER);
2771 %}
2772 
2773 operand limmIlow8() %{
2774   predicate(is_limmI_low(n->get_int(), 8));
2775   match(ConI);
2776   op_cost(0);
2777 
2778   format %{ %}
2779   interface(CONST_INTER);
2780 %}
2781 
2782 operand limmU31() %{
2783   predicate(0 <= n->get_int() && is_limmI(n->get_int()));
2784   match(ConI);
2785   op_cost(0);
2786 
2787   format %{ %}
2788   interface(CONST_INTER);
2789 %}
2790 
2791 operand limmIn() %{
2792   predicate(is_limmI(~n->get_int()));
2793   match(ConI);
2794   op_cost(0);
2795 
2796   format %{ %}
2797   interface(CONST_INTER);
2798 %}
2799 
2800 // Long Immediate: the value FF
2801 operand immL_FF() %{
2802   predicate( n->get_long() == 0xFFL );
2803   match(ConL);
2804   op_cost(0);
2805 
2806   format %{ %}
2807   interface(CONST_INTER);
2808 %}
2809 
2810 // Long Immediate: the value FFFF
2811 operand immL_FFFF() %{
2812   predicate( n->get_long() == 0xFFFFL );
2813   match(ConL);
2814   op_cost(0);
2815 
2816   format %{ %}
2817   interface(CONST_INTER);
2818 %}
2819 
2820 // Pointer Immediate: 32 or 64-bit
2821 operand immP() %{
2822   match(ConP);
2823 
2824   op_cost(5);
2825   // formats are generated automatically for constants and base registers
2826   format %{ %}
2827   interface(CONST_INTER);
2828 %}
2829 
2830 operand immP0() %{
2831   predicate(n->get_ptr() == 0);
2832   match(ConP);
2833   op_cost(0);
2834 
2835   format %{ %}
2836   interface(CONST_INTER);
2837 %}
2838 
2839 operand immP_poll() %{
2840   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
2841   match(ConP);
2842 
2843   // formats are generated automatically for constants and base registers
2844   format %{ %}
2845   interface(CONST_INTER);
2846 %}
2847 
2848 // Pointer Immediate
2849 operand immN()
2850 %{
2851   match(ConN);
2852 
2853   op_cost(10);
2854   format %{ %}
2855   interface(CONST_INTER);
2856 %}
2857 
2858 operand immNKlass()
2859 %{
2860   match(ConNKlass);
2861 
2862   op_cost(10);
2863   format %{ %}
2864   interface(CONST_INTER);
2865 %}
2866 
2867 // NULL Pointer Immediate
2868 operand immN0()
2869 %{
2870   predicate(n->get_narrowcon() == 0);
2871   match(ConN);
2872 
2873   op_cost(0);
2874   format %{ %}
2875   interface(CONST_INTER);
2876 %}
2877 
2878 operand immL() %{
2879   match(ConL);
2880   op_cost(40);
2881   // formats are generated automatically for constants and base registers
2882   format %{ %}
2883   interface(CONST_INTER);
2884 %}
2885 
2886 operand immL0() %{
2887   predicate(n->get_long() == 0L);
2888   match(ConL);
2889   op_cost(0);
2890   // formats are generated automatically for constants and base registers
2891   format %{ %}
2892   interface(CONST_INTER);
2893 %}
2894 
2895 // Long Immediate: 16-bit
2896 operand immL16() %{
2897   predicate(n->get_long() >= 0 && n->get_long() < (1<<16)  && (VM_Version::features() & FT_ARMV6T2));
2898   match(ConL);
2899   op_cost(0);
2900 
2901   format %{ %}
2902   interface(CONST_INTER);
2903 %}
2904 
2905 // Long Immediate: low 32-bit mask
2906 operand immL_32bits() %{
2907   predicate(n->get_long() == 0xFFFFFFFFL);
2908   match(ConL);
2909   op_cost(0);
2910 
2911   format %{ %}
2912   interface(CONST_INTER);
2913 %}
2914 
2915 // Double Immediate
2916 operand immD() %{
2917   match(ConD);
2918 
2919   op_cost(40);
2920   format %{ %}
2921   interface(CONST_INTER);
2922 %}
2923 
2924 // Double Immediate: +0.0d.
2925 operand immD0() %{
2926   predicate(jlong_cast(n->getd()) == 0);
2927 
2928   match(ConD);
2929   op_cost(0);
2930   format %{ %}
2931   interface(CONST_INTER);
2932 %}
2933 
2934 operand imm8D() %{
2935   predicate(Assembler::operand_valid_for_double_immediate(n->getd()));
2936   match(ConD);
2937 
2938   op_cost(0);
2939   format %{ %}
2940   interface(CONST_INTER);
2941 %}
2942 
2943 // Float Immediate
2944 operand immF() %{
2945   match(ConF);
2946 
2947   op_cost(20);
2948   format %{ %}
2949   interface(CONST_INTER);
2950 %}
2951 
2952 // Float Immediate: +0.0f
2953 operand immF0() %{
2954   predicate(jint_cast(n->getf()) == 0);
2955   match(ConF);
2956 
2957   op_cost(0);
2958   format %{ %}
2959   interface(CONST_INTER);
2960 %}
2961 
2962 // Float Immediate: encoded as 8 bits
2963 operand imm8F() %{
2964   predicate(Assembler::operand_valid_for_float_immediate(n->getf()));
2965   match(ConF);
2966 
2967   op_cost(0);
2968   format %{ %}
2969   interface(CONST_INTER);
2970 %}
2971 
2972 // Integer Register Operands
2973 // Integer Register
2974 operand iRegI() %{
2975   constraint(ALLOC_IN_RC(int_reg));
2976   match(RegI);
2977   match(R0RegI);
2978   match(R1RegI);
2979   match(R2RegI);
2980   match(R3RegI);
2981   match(R12RegI);
2982 
2983   format %{ %}
2984   interface(REG_INTER);
2985 %}
2986 
2987 // Pointer Register
2988 operand iRegP() %{
2989   constraint(ALLOC_IN_RC(ptr_reg));
2990   match(RegP);
2991   match(R0RegP);
2992   match(R1RegP);
2993   match(R2RegP);
2994   match(RExceptionRegP);
2995   match(RmethodRegP); // R8
2996   match(R9RegP);
2997   match(RthreadRegP); // R10, TODO Oracle FIXME: move to sp_ptr_RegP?
2998   match(R12RegP);
2999   match(LRRegP);
3000 
3001   match(sp_ptr_RegP);
3002   match(store_ptr_RegP);
3003 
3004   format %{ %}
3005   interface(REG_INTER);
3006 %}
3007 
3008 // GPRs + Rmethod + Rthread + SP
3009 operand sp_ptr_RegP() %{
3010   constraint(ALLOC_IN_RC(sp_ptr_reg));
3011   match(RegP);
3012   match(iRegP);
3013   match(SPRegP); // FIXME: check cost
3014 
3015   format %{ %}
3016   interface(REG_INTER);
3017 %}
3018 
3019 operand R0RegP() %{
3020   constraint(ALLOC_IN_RC(R0_regP));
3021   match(iRegP);
3022 
3023   format %{ %}
3024   interface(REG_INTER);
3025 %}
3026 
3027 operand R1RegP() %{
3028   constraint(ALLOC_IN_RC(R1_regP));
3029   match(iRegP);
3030 
3031   format %{ %}
3032   interface(REG_INTER);
3033 %}
3034 
3035 operand R2RegP() %{
3036   constraint(ALLOC_IN_RC(R2_regP));
3037   match(iRegP);
3038 
3039   format %{ %}
3040   interface(REG_INTER);
3041 %}
3042 
3043 operand RExceptionRegP() %{
3044   constraint(ALLOC_IN_RC(Rexception_regP));
3045   match(iRegP);
3046 
3047   format %{ %}
3048   interface(REG_INTER);
3049 %}
3050 
3051 operand RthreadRegP() %{
3052   constraint(ALLOC_IN_RC(Rthread_regP));
3053   match(iRegP);
3054 
3055   format %{ %}
3056   interface(REG_INTER);
3057 %}
3058 
3059 operand RmethodRegP() %{
3060   constraint(ALLOC_IN_RC(Rmethod_regP));
3061   match(iRegP);
3062 
3063   format %{ %}
3064   interface(REG_INTER);
3065 %}
3066 
3067 operand IPRegP() %{
3068   constraint(ALLOC_IN_RC(IP_regP));
3069   match(iRegP);
3070 
3071   format %{ %}
3072   interface(REG_INTER);
3073 %}
3074 
3075 operand LRRegP() %{
3076   constraint(ALLOC_IN_RC(LR_regP));
3077   match(iRegP);
3078 
3079   format %{ %}
3080   interface(REG_INTER);
3081 %}
3082 
3083 operand R0RegI() %{
3084   constraint(ALLOC_IN_RC(R0_regI));
3085   match(iRegI);
3086 
3087   format %{ %}
3088   interface(REG_INTER);
3089 %}
3090 
3091 operand R1RegI() %{
3092   constraint(ALLOC_IN_RC(R1_regI));
3093   match(iRegI);
3094 
3095   format %{ %}
3096   interface(REG_INTER);
3097 %}
3098 
3099 operand R2RegI() %{
3100   constraint(ALLOC_IN_RC(R2_regI));
3101   match(iRegI);
3102 
3103   format %{ %}
3104   interface(REG_INTER);
3105 %}
3106 
3107 operand R3RegI() %{
3108   constraint(ALLOC_IN_RC(R3_regI));
3109   match(iRegI);
3110 
3111   format %{ %}
3112   interface(REG_INTER);
3113 %}
3114 
3115 operand R9RegI() %{
3116   constraint(ALLOC_IN_RC(R9_regI));
3117   match(iRegI);
3118 
3119   format %{ %}
3120   interface(REG_INTER);
3121 %}
3122 
3123 operand R12RegI() %{
3124   constraint(ALLOC_IN_RC(R12_regI));
3125   match(iRegI);
3126 
3127   format %{ %}
3128   interface(REG_INTER);
3129 %}
3130 
3131 // Long Register
3132 operand iRegL() %{
3133   constraint(ALLOC_IN_RC(long_reg));
3134   match(RegL);
3135   match(R0R1RegL);
3136   match(R2R3RegL);
3137 
3138   format %{ %}
3139   interface(REG_INTER);
3140 %}
3141 
3142 operand iRegLd() %{
3143   constraint(ALLOC_IN_RC(long_reg_align));
3144   match(iRegL); // FIXME: allows unaligned R11/R12?
3145 
3146   format %{ %}
3147   interface(REG_INTER);
3148 %}
3149 
3150 // first long arg, or return value
3151 operand R0R1RegL() %{
3152   constraint(ALLOC_IN_RC(R0R1_regL));
3153   match(iRegL);
3154 
3155   format %{ %}
3156   interface(REG_INTER);
3157 %}
3158 
3159 operand R2R3RegL() %{
3160   constraint(ALLOC_IN_RC(R2R3_regL));
3161   match(iRegL);
3162 
3163   format %{ %}
3164   interface(REG_INTER);
3165 %}
3166 
3167 // Condition Code Flag Register
3168 operand flagsReg() %{
3169   constraint(ALLOC_IN_RC(int_flags));
3170   match(RegFlags);
3171 
3172   format %{ "apsr" %}
3173   interface(REG_INTER);
3174 %}
3175 
3176 // Result of compare to 0 (TST)
3177 operand flagsReg_EQNELTGE() %{
3178   constraint(ALLOC_IN_RC(int_flags));
3179   match(RegFlags);
3180 
3181   format %{ "apsr_EQNELTGE" %}
3182   interface(REG_INTER);
3183 %}
3184 
3185 // Condition Code Register, unsigned comparisons.
3186 operand flagsRegU() %{
3187   constraint(ALLOC_IN_RC(int_flags));
3188   match(RegFlags);
3189 #ifdef TODO
3190   match(RegFlagsP);
3191 #endif
3192 
3193   format %{ "apsr_U" %}
3194   interface(REG_INTER);
3195 %}
3196 
3197 // Condition Code Register, pointer comparisons.
3198 operand flagsRegP() %{
3199   constraint(ALLOC_IN_RC(int_flags));
3200   match(RegFlags);
3201 
3202   format %{ "apsr_P" %}
3203   interface(REG_INTER);
3204 %}
3205 
3206 // Condition Code Register, long comparisons.
3207 operand flagsRegL_LTGE() %{
3208   constraint(ALLOC_IN_RC(int_flags));
3209   match(RegFlags);
3210 
3211   format %{ "apsr_L_LTGE" %}
3212   interface(REG_INTER);
3213 %}
3214 
3215 operand flagsRegUL() %{
3216   constraint(ALLOC_IN_RC(int_flags));
3217   match(RegFlags);
3218 
3219   format %{ "apsr_UL" %}
3220   interface(REG_INTER);
3221 %}
3222 
3223 operand flagsRegL_EQNE() %{
3224   constraint(ALLOC_IN_RC(int_flags));
3225   match(RegFlags);
3226 
3227   format %{ "apsr_L_EQNE" %}
3228   interface(REG_INTER);
3229 %}
3230 
3231 operand flagsRegL_LEGT() %{
3232   constraint(ALLOC_IN_RC(int_flags));
3233   match(RegFlags);
3234 
3235   format %{ "apsr_L_LEGT" %}
3236   interface(REG_INTER);
3237 %}
3238 
3239 // Condition Code Register, floating comparisons, unordered same as "less".
3240 operand flagsRegF() %{
3241   constraint(ALLOC_IN_RC(float_flags));
3242   match(RegFlags);
3243 
3244   format %{ "fpscr_F" %}
3245   interface(REG_INTER);
3246 %}
3247 
3248 // Vectors
3249 operand vecD() %{
3250   constraint(ALLOC_IN_RC(actual_dflt_reg));
3251   match(VecD);
3252 
3253   format %{ %}
3254   interface(REG_INTER);
3255 %}
3256 
3257 operand vecX() %{
3258   constraint(ALLOC_IN_RC(vectorx_reg));
3259   match(VecX);
3260 
3261   format %{ %}
3262   interface(REG_INTER);
3263 %}
3264 
3265 operand regD() %{
3266   constraint(ALLOC_IN_RC(actual_dflt_reg));
3267   match(RegD);
3268   match(regD_low);
3269 
3270   format %{ %}
3271   interface(REG_INTER);
3272 %}
3273 
3274 operand Q0_regD() %{
3275   constraint(ALLOC_IN_RC(D0D1_regD));
3276   match(RegD);
3277   match(regD_low);
3278 
3279   format %{ %}
3280   interface(REG_INTER);
3281 %}
3282 
3283 operand Q1_regD() %{
3284   constraint(ALLOC_IN_RC(D2D3_regD));
3285   match(RegD);
3286   match(regD_low);
3287 
3288   format %{ %}
3289   interface(REG_INTER);
3290 %}
3291 
3292 operand regF() %{
3293   constraint(ALLOC_IN_RC(sflt_reg));
3294   match(RegF);
3295 
3296   format %{ %}
3297   interface(REG_INTER);
3298 %}
3299 
3300 operand regD_low() %{
3301   constraint(ALLOC_IN_RC(dflt_low_reg));
3302   match(RegD);
3303 
3304   format %{ %}
3305   interface(REG_INTER);
3306 %}
3307 
3308 // Special Registers
3309 
3310 // Method Register
3311 operand inline_cache_regP(iRegP reg) %{
3312   constraint(ALLOC_IN_RC(Ricklass_regP));
3313   match(reg);
3314   format %{ %}
3315   interface(REG_INTER);
3316 %}
3317 
3318 operand interpreter_method_oop_regP(iRegP reg) %{
3319   constraint(ALLOC_IN_RC(Rmethod_regP));
3320   match(reg);
3321   format %{ %}
3322   interface(REG_INTER);
3323 %}
3324 
3325 
3326 //----------Complex Operands---------------------------------------------------
3327 // Indirect Memory Reference
3328 operand indirect(sp_ptr_RegP reg) %{
3329   constraint(ALLOC_IN_RC(sp_ptr_reg));
3330   match(reg);
3331 
3332   op_cost(100);
3333   format %{ "[$reg]" %}
3334   interface(MEMORY_INTER) %{
3335     base($reg);
3336     index(0xf); // PC => no index
3337     scale(0x0);
3338     disp(0x0);
3339   %}
3340 %}
3341 
3342 // Indirect with Offset in ]-4096, 4096[
3343 operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{
3344   constraint(ALLOC_IN_RC(sp_ptr_reg));
3345   match(AddP reg offset);
3346 
3347   op_cost(100);
3348   format %{ "[$reg + $offset]" %}
3349   interface(MEMORY_INTER) %{
3350     base($reg);
3351     index(0xf); // PC => no index
3352     scale(0x0);
3353     disp($offset);
3354   %}
3355 %}
3356 
3357 // Indirect with offset for float load/store
3358 operand indOffsetFP(sp_ptr_RegP reg, immIFP offset) %{
3359   constraint(ALLOC_IN_RC(sp_ptr_reg));
3360   match(AddP reg offset);
3361 
3362   op_cost(100);
3363   format %{ "[$reg + $offset]" %}
3364   interface(MEMORY_INTER) %{
3365     base($reg);
3366     index(0xf); // PC => no index
3367     scale(0x0);
3368     disp($offset);
3369   %}
3370 %}
3371 
3372 // Indirect with Offset for half and double words
3373 operand indOffsetHD(sp_ptr_RegP reg, immIHD offset) %{
3374   constraint(ALLOC_IN_RC(sp_ptr_reg));
3375   match(AddP reg offset);
3376 
3377   op_cost(100);
3378   format %{ "[$reg + $offset]" %}
3379   interface(MEMORY_INTER) %{
3380     base($reg);
3381     index(0xf); // PC => no index
3382     scale(0x0);
3383     disp($offset);
3384   %}
3385 %}
3386 
3387 // Indirect with Offset and Offset+4 in ]-1024, 1024[
3388 operand indOffsetFPx2(sp_ptr_RegP reg, immX10x2 offset) %{
3389   constraint(ALLOC_IN_RC(sp_ptr_reg));
3390   match(AddP reg offset);
3391 
3392   op_cost(100);
3393   format %{ "[$reg + $offset]" %}
3394   interface(MEMORY_INTER) %{
3395     base($reg);
3396     index(0xf); // PC => no index
3397     scale(0x0);
3398     disp($offset);
3399   %}
3400 %}
3401 
3402 // Indirect with Offset and Offset+4 in ]-4096, 4096[
3403 operand indOffset12x2(sp_ptr_RegP reg, immI12x2 offset) %{
3404   constraint(ALLOC_IN_RC(sp_ptr_reg));
3405   match(AddP reg offset);
3406 
3407   op_cost(100);
3408   format %{ "[$reg + $offset]" %}
3409   interface(MEMORY_INTER) %{
3410     base($reg);
3411     index(0xf); // PC => no index
3412     scale(0x0);
3413     disp($offset);
3414   %}
3415 %}
3416 
3417 // Indirect with Register Index
3418 operand indIndex(iRegP addr, iRegX index) %{
3419   constraint(ALLOC_IN_RC(ptr_reg));
3420   match(AddP addr index);
3421 
3422   op_cost(100);
3423   format %{ "[$addr + $index]" %}
3424   interface(MEMORY_INTER) %{
3425     base($addr);
3426     index($index);
3427     scale(0x0);
3428     disp(0x0);
3429   %}
3430 %}
3431 
3432 // Indirect Memory Times Scale Plus Index Register
3433 operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{
3434   constraint(ALLOC_IN_RC(ptr_reg));
3435   match(AddP addr (LShiftX index scale));
3436 
3437   op_cost(100);
3438   format %{"[$addr + $index << $scale]" %}
3439   interface(MEMORY_INTER) %{
3440     base($addr);
3441     index($index);
3442     scale($scale);
3443     disp(0x0);
3444   %}
3445 %}
3446 
3447 // Operands for expressing Control Flow
3448 // NOTE:  Label is a predefined operand which should not be redefined in
3449 //        the AD file.  It is generically handled within the ADLC.
3450 
3451 //----------Conditional Branch Operands----------------------------------------
3452 // Comparison Op  - This is the operation of the comparison, and is limited to
3453 //                  the following set of codes:
3454 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
3455 //
3456 // Other attributes of the comparison, such as unsignedness, are specified
3457 // by the comparison instruction that sets a condition code flags register.
3458 // That result is represented by a flags operand whose subtype is appropriate
3459 // to the unsignedness (etc.) of the comparison.
3460 //
3461 // Later, the instruction which matches both the Comparison Op (a Bool) and
3462 // the flags (produced by the Cmp) specifies the coding of the comparison op
3463 // by matching a specific subtype of Bool operand below, such as cmpOpU.
3464 
3465 operand cmpOp() %{
3466   match(Bool);
3467 
3468   format %{ "" %}
3469   interface(COND_INTER) %{
3470     equal(0x0);
3471     not_equal(0x1);
3472     less(0xb);
3473     greater_equal(0xa);
3474     less_equal(0xd);
3475     greater(0xc);
3476     overflow(0x0); // unsupported/unimplemented
3477     no_overflow(0x0); // unsupported/unimplemented
3478   %}
3479 %}
3480 
3481 // integer comparison with 0, signed
3482 operand cmpOp0() %{
3483   match(Bool);
3484 
3485   format %{ "" %}
3486   interface(COND_INTER) %{
3487     equal(0x0);
3488     not_equal(0x1);
3489     less(0x4);
3490     greater_equal(0x5);
3491     less_equal(0xd); // unsupported
3492     greater(0xc); // unsupported
3493     overflow(0x0); // unsupported/unimplemented
3494     no_overflow(0x0); // unsupported/unimplemented
3495   %}
3496 %}
3497 
3498 // Comparison Op, unsigned
3499 operand cmpOpU() %{
3500   match(Bool);
3501 
3502   format %{ "u" %}
3503   interface(COND_INTER) %{
3504     equal(0x0);
3505     not_equal(0x1);
3506     less(0x3);
3507     greater_equal(0x2);
3508     less_equal(0x9);
3509     greater(0x8);
3510     overflow(0x0); // unsupported/unimplemented
3511     no_overflow(0x0); // unsupported/unimplemented
3512   %}
3513 %}
3514 
3515 // Comparison Op, pointer (same as unsigned)
3516 operand cmpOpP() %{
3517   match(Bool);
3518 
3519   format %{ "p" %}
3520   interface(COND_INTER) %{
3521     equal(0x0);
3522     not_equal(0x1);
3523     less(0x3);
3524     greater_equal(0x2);
3525     less_equal(0x9);
3526     greater(0x8);
3527     overflow(0x0); // unsupported/unimplemented
3528     no_overflow(0x0); // unsupported/unimplemented
3529   %}
3530 %}
3531 
3532 operand cmpOpL() %{
3533   match(Bool);
3534 
3535   format %{ "L" %}
3536   interface(COND_INTER) %{
3537     equal(0x0);
3538     not_equal(0x1);
3539     less(0xb);
3540     greater_equal(0xa);
3541     less_equal(0xd);
3542     greater(0xc);
3543     overflow(0x0); // unsupported/unimplemented
3544     no_overflow(0x0); // unsupported/unimplemented
3545   %}
3546 %}
3547 
3548 operand cmpOpL_commute() %{
3549   match(Bool);
3550 
3551   format %{ "L" %}
3552   interface(COND_INTER) %{
3553     equal(0x0);
3554     not_equal(0x1);
3555     less(0xc);
3556     greater_equal(0xd);
3557     less_equal(0xa);
3558     greater(0xb);
3559     overflow(0x0); // unsupported/unimplemented
3560     no_overflow(0x0); // unsupported/unimplemented
3561   %}
3562 %}
3563 
3564 //----------OPERAND CLASSES----------------------------------------------------
3565 // Operand Classes are groups of operands that are used to simplify
3566 // instruction definitions by not requiring the AD writer to specify separate
3567 // instructions for every form of operand when the instruction accepts
3568 // multiple operand types with the same basic encoding and format.  The classic
3569 // case of this is memory operands.
3570 opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale );
3571 opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale );
3572 opclass memoryF ( indirect, indOffsetFP );
3573 opclass memoryF2 ( indirect, indOffsetFPx2 );
3574 opclass memoryD ( indirect, indOffsetFP );
3575 opclass memoryfp( indirect, indOffsetFP );
3576 opclass memoryB ( indirect, indIndex, indOffsetHD );
3577 opclass memoryS ( indirect, indIndex, indOffsetHD );
3578 opclass memoryL ( indirect, indIndex, indOffsetHD );
3579 
3580 opclass memoryScaledI(indIndexScale);
3581 opclass memoryScaledP(indIndexScale);
3582 
3583 // when ldrex/strex is used:
3584 opclass memoryex ( indirect );
3585 opclass indIndexMemory( indIndex );
3586 opclass memorylong ( indirect, indOffset12x2 );
3587 opclass memoryvld ( indirect /* , write back mode not implemented */ );
3588 
3589 //----------PIPELINE-----------------------------------------------------------
3590 pipeline %{
3591 
3592 //----------ATTRIBUTES---------------------------------------------------------
3593 attributes %{
3594   fixed_size_instructions;           // Fixed size instructions
3595   max_instructions_per_bundle = 4;   // Up to 4 instructions per bundle
3596   instruction_unit_size = 4;         // An instruction is 4 bytes long
3597   instruction_fetch_unit_size = 16;  // The processor fetches one line
3598   instruction_fetch_units = 1;       // of 16 bytes
3599 
3600   // List of nop instructions
3601   nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR );
3602 %}
3603 
3604 //----------RESOURCES----------------------------------------------------------
3605 // Resources are the functional units available to the machine
3606 resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1);
3607 
3608 //----------PIPELINE DESCRIPTION-----------------------------------------------
3609 // Pipeline Description specifies the stages in the machine's pipeline
3610 
3611 pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D);
3612 
3613 //----------PIPELINE CLASSES---------------------------------------------------
3614 // Pipeline Classes describe the stages in which input and output are
3615 // referenced by the hardware pipeline.
3616 
3617 // Integer ALU reg-reg operation
3618 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
3619     single_instruction;
3620     dst   : E(write);
3621     src1  : R(read);
3622     src2  : R(read);
3623     IALU  : R;
3624 %}
3625 
3626 // Integer ALU reg-reg long operation
3627 pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{
3628     instruction_count(2);
3629     dst   : E(write);
3630     src1  : R(read);
3631     src2  : R(read);
3632     IALU  : R;
3633     IALU  : R;
3634 %}
3635 
3636 // Integer ALU reg-reg long dependent operation
3637 pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
3638     instruction_count(1); multiple_bundles;
3639     dst   : E(write);
3640     src1  : R(read);
3641     src2  : R(read);
3642     cr    : E(write);
3643     IALU  : R(2);
3644 %}
3645 
3646 // Integer ALU reg-imm operaion
3647 pipe_class ialu_reg_imm(iRegI dst, iRegI src1) %{
3648     single_instruction;
3649     dst   : E(write);
3650     src1  : R(read);
3651     IALU  : R;
3652 %}
3653 
3654 // Integer ALU reg-reg operation with condition code
3655 pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
3656     single_instruction;
3657     dst   : E(write);
3658     cr    : E(write);
3659     src1  : R(read);
3660     src2  : R(read);
3661     IALU  : R;
3662 %}
3663 
3664 // Integer ALU zero-reg operation
3665 pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{
3666     single_instruction;
3667     dst   : E(write);
3668     src2  : R(read);
3669     IALU  : R;
3670 %}
3671 
3672 // Integer ALU zero-reg operation with condition code only
3673 pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{
3674     single_instruction;
3675     cr    : E(write);
3676     src   : R(read);
3677     IALU  : R;
3678 %}
3679 
3680 // Integer ALU reg-reg operation with condition code only
3681 pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
3682     single_instruction;
3683     cr    : E(write);
3684     src1  : R(read);
3685     src2  : R(read);
3686     IALU  : R;
3687 %}
3688 
3689 // Integer ALU reg-imm operation with condition code only
3690 pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1) %{
3691     single_instruction;
3692     cr    : E(write);
3693     src1  : R(read);
3694     IALU  : R;
3695 %}
3696 
3697 // Integer ALU reg-reg-zero operation with condition code only
3698 pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{
3699     single_instruction;
3700     cr    : E(write);
3701     src1  : R(read);
3702     src2  : R(read);
3703     IALU  : R;
3704 %}
3705 
3706 // Integer ALU reg-imm-zero operation with condition code only
3707 pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI0 zero) %{
3708     single_instruction;
3709     cr    : E(write);
3710     src1  : R(read);
3711     IALU  : R;
3712 %}
3713 
3714 // Integer ALU reg-reg operation with condition code, src1 modified
3715 pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
3716     single_instruction;
3717     cr    : E(write);
3718     src1  : E(write);
3719     src1  : R(read);
3720     src2  : R(read);
3721     IALU  : R;
3722 %}
3723 
3724 pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{
3725     multiple_bundles;
3726     dst   : E(write)+4;
3727     cr    : E(write);
3728     src1  : R(read);
3729     src2  : R(read);
3730     IALU  : R(3);
3731     BR    : R(2);
3732 %}
3733 
3734 // Integer ALU operation
3735 pipe_class ialu_none(iRegI dst) %{
3736     single_instruction;
3737     dst   : E(write);
3738     IALU  : R;
3739 %}
3740 
3741 // Integer ALU reg operation
3742 pipe_class ialu_reg(iRegI dst, iRegI src) %{
3743     single_instruction; may_have_no_code;
3744     dst   : E(write);
3745     src   : R(read);
3746     IALU  : R;
3747 %}
3748 
3749 // Integer ALU reg conditional operation
3750 // This instruction has a 1 cycle stall, and cannot execute
3751 // in the same cycle as the instruction setting the condition
3752 // code. We kludge this by pretending to read the condition code
3753 // 1 cycle earlier, and by marking the functional units as busy
3754 // for 2 cycles with the result available 1 cycle later than
3755 // is really the case.
3756 pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{
3757     single_instruction;
3758     op2_out : C(write);
3759     op1     : R(read);
3760     cr      : R(read);       // This is really E, with a 1 cycle stall
3761     BR      : R(2);
3762     MS      : R(2);
3763 %}
3764 
3765 // Integer ALU reg operation
3766 pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{
3767     single_instruction; may_have_no_code;
3768     dst   : E(write);
3769     src   : R(read);
3770     IALU  : R;
3771 %}
3772 pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{
3773     single_instruction; may_have_no_code;
3774     dst   : E(write);
3775     src   : R(read);
3776     IALU  : R;
3777 %}
3778 
3779 // Two integer ALU reg operations
3780 pipe_class ialu_reg_2(iRegL dst, iRegL src) %{
3781     instruction_count(2);
3782     dst   : E(write);
3783     src   : R(read);
3784     A0    : R;
3785     A1    : R;
3786 %}
3787 
3788 // Two integer ALU reg operations
3789 pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{
3790     instruction_count(2); may_have_no_code;
3791     dst   : E(write);
3792     src   : R(read);
3793     A0    : R;
3794     A1    : R;
3795 %}
3796 
3797 // Integer ALU imm operation
3798 pipe_class ialu_imm(iRegI dst) %{
3799     single_instruction;
3800     dst   : E(write);
3801     IALU  : R;
3802 %}
3803 
3804 pipe_class ialu_imm_n(iRegI dst) %{
3805     single_instruction;
3806     dst   : E(write);
3807     IALU  : R;
3808 %}
3809 
3810 // Integer ALU reg-reg with carry operation
3811 pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{
3812     single_instruction;
3813     dst   : E(write);
3814     src1  : R(read);
3815     src2  : R(read);
3816     IALU  : R;
3817 %}
3818 
3819 // Integer ALU cc operation
3820 pipe_class ialu_cc(iRegI dst, flagsReg cc) %{
3821     single_instruction;
3822     dst   : E(write);
3823     cc    : R(read);
3824     IALU  : R;
3825 %}
3826 
3827 // Integer ALU cc / second IALU operation
3828 pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{
3829     instruction_count(1); multiple_bundles;
3830     dst   : E(write)+1;
3831     src   : R(read);
3832     IALU  : R;
3833 %}
3834 
3835 // Integer ALU cc / second IALU operation
3836 pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{
3837     instruction_count(1); multiple_bundles;
3838     dst   : E(write)+1;
3839     p     : R(read);
3840     q     : R(read);
3841     IALU  : R;
3842 %}
3843 
3844 // Integer ALU hi-lo-reg operation
3845 pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{
3846     instruction_count(1); multiple_bundles;
3847     dst   : E(write)+1;
3848     IALU  : R(2);
3849 %}
3850 
3851 // Long Constant
3852 pipe_class loadConL( iRegL dst, immL src ) %{
3853     instruction_count(2); multiple_bundles;
3854     dst   : E(write)+1;
3855     IALU  : R(2);
3856     IALU  : R(2);
3857 %}
3858 
3859 // Pointer Constant
3860 pipe_class loadConP( iRegP dst, immP src ) %{
3861     instruction_count(0); multiple_bundles;
3862     fixed_latency(6);
3863 %}
3864 
3865 // Polling Address
3866 pipe_class loadConP_poll( iRegP dst, immP_poll src ) %{
3867     dst   : E(write);
3868     IALU  : R;
3869 %}
3870 
3871 // Long Constant small
3872 pipe_class loadConLlo( iRegL dst, immL src ) %{
3873     instruction_count(2);
3874     dst   : E(write);
3875     IALU  : R;
3876     IALU  : R;
3877 %}
3878 
3879 // [PHH] This is wrong for 64-bit.  See LdImmF/D.
3880 pipe_class loadConFD(regF dst, immF src, iRegP tmp) %{
3881     instruction_count(1); multiple_bundles;
3882     src   : R(read);
3883     dst   : M(write)+1;
3884     IALU  : R;
3885     MS    : E;
3886 %}
3887 
3888 // Integer ALU nop operation
3889 pipe_class ialu_nop() %{
3890     single_instruction;
3891     IALU  : R;
3892 %}
3893 
3894 // Integer ALU nop operation
3895 pipe_class ialu_nop_A0() %{
3896     single_instruction;
3897     A0    : R;
3898 %}
3899 
3900 // Integer ALU nop operation
3901 pipe_class ialu_nop_A1() %{
3902     single_instruction;
3903     A1    : R;
3904 %}
3905 
3906 // Integer Multiply reg-reg operation
3907 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
3908     single_instruction;
3909     dst   : E(write);
3910     src1  : R(read);
3911     src2  : R(read);
3912     MS    : R(5);
3913 %}
3914 
3915 pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
3916     single_instruction;
3917     dst   : E(write)+4;
3918     src1  : R(read);
3919     src2  : R(read);
3920     MS    : R(6);
3921 %}
3922 
3923 // Integer Divide reg-reg
3924 pipe_class sdiv_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{
3925     single_instruction;
3926     dst   : E(write);
3927     temp  : E(write);
3928     src1  : R(read);
3929     src2  : R(read);
3930     temp  : R(read);
3931     MS    : R(10);
3932 %}
3933 
3934 pipe_class sdiv_reg_reg_SW(iRegI dst, iRegI src1, iRegI src2, iRegI temp1, iRegI temp2, flagsReg cr) %{
3935     instruction_count(1); multiple_bundles;
3936     dst   : E(write);
3937     temp1 : E(write);
3938     temp2 : E(write);
3939     src1  : R(read);
3940     src2  : R(read);
3941     temp1 : R(read);
3942     temp2 : R(read);
3943     MS    : R(38);
3944 %}
3945 
3946 // Long Divide
3947 pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
3948     dst  : E(write)+71;
3949     src1 : R(read);
3950     src2 : R(read)+1;
3951     MS   : R(70);
3952 %}
3953 
3954 // Floating Point Add Float
3955 pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{
3956     single_instruction;
3957     dst   : X(write);
3958     src1  : E(read);
3959     src2  : E(read);
3960     FA    : R;
3961 %}
3962 
3963 // Floating Point Add Double
3964 pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{
3965     single_instruction;
3966     dst   : X(write);
3967     src1  : E(read);
3968     src2  : E(read);
3969     FA    : R;
3970 %}
3971 
3972 // Floating Point Conditional Move based on integer flags
3973 pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{
3974     single_instruction;
3975     dst   : X(write);
3976     src   : E(read);
3977     cr    : R(read);
3978     FA    : R(2);
3979     BR    : R(2);
3980 %}
3981 
3982 // Floating Point Conditional Move based on integer flags
3983 pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{
3984     single_instruction;
3985     dst   : X(write);
3986     src   : E(read);
3987     cr    : R(read);
3988     FA    : R(2);
3989     BR    : R(2);
3990 %}
3991 
3992 // Floating Point Multiply Float
3993 pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{
3994     single_instruction;
3995     dst   : X(write);
3996     src1  : E(read);
3997     src2  : E(read);
3998     FM    : R;
3999 %}
4000 
4001 // Floating Point Multiply Double
4002 pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{
4003     single_instruction;
4004     dst   : X(write);
4005     src1  : E(read);
4006     src2  : E(read);
4007     FM    : R;
4008 %}
4009 
4010 // Floating Point Divide Float
4011 pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{
4012     single_instruction;
4013     dst   : X(write);
4014     src1  : E(read);
4015     src2  : E(read);
4016     FM    : R;
4017     FDIV  : C(14);
4018 %}
4019 
4020 // Floating Point Divide Double
4021 pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{
4022     single_instruction;
4023     dst   : X(write);
4024     src1  : E(read);
4025     src2  : E(read);
4026     FM    : R;
4027     FDIV  : C(17);
4028 %}
4029 
4030 // Floating Point Move/Negate/Abs Float
4031 pipe_class faddF_reg(regF dst, regF src) %{
4032     single_instruction;
4033     dst   : W(write);
4034     src   : E(read);
4035     FA    : R(1);
4036 %}
4037 
4038 // Floating Point Move/Negate/Abs Double
4039 pipe_class faddD_reg(regD dst, regD src) %{
4040     single_instruction;
4041     dst   : W(write);
4042     src   : E(read);
4043     FA    : R;
4044 %}
4045 
4046 // Floating Point Convert F->D
4047 pipe_class fcvtF2D(regD dst, regF src) %{
4048     single_instruction;
4049     dst   : X(write);
4050     src   : E(read);
4051     FA    : R;
4052 %}
4053 
4054 // Floating Point Convert I->D
4055 pipe_class fcvtI2D(regD dst, regF src) %{
4056     single_instruction;
4057     dst   : X(write);
4058     src   : E(read);
4059     FA    : R;
4060 %}
4061 
4062 // Floating Point Convert LHi->D
4063 pipe_class fcvtLHi2D(regD dst, regD src) %{
4064     single_instruction;
4065     dst   : X(write);
4066     src   : E(read);
4067     FA    : R;
4068 %}
4069 
4070 // Floating Point Convert L->D
4071 pipe_class fcvtL2D(regD dst, iRegL src) %{
4072     single_instruction;
4073     dst   : X(write);
4074     src   : E(read);
4075     FA    : R;
4076 %}
4077 
4078 // Floating Point Convert L->F
4079 pipe_class fcvtL2F(regF dst, iRegL src) %{
4080     single_instruction;
4081     dst   : X(write);
4082     src   : E(read);
4083     FA    : R;
4084 %}
4085 
4086 // Floating Point Convert D->F
4087 pipe_class fcvtD2F(regD dst, regF src) %{
4088     single_instruction;
4089     dst   : X(write);
4090     src   : E(read);
4091     FA    : R;
4092 %}
4093 
4094 // Floating Point Convert I->L
4095 pipe_class fcvtI2L(regD dst, regF src) %{
4096     single_instruction;
4097     dst   : X(write);
4098     src   : E(read);
4099     FA    : R;
4100 %}
4101 
4102 // Floating Point Convert D->F
4103 pipe_class fcvtD2I(iRegI dst, regD src, flagsReg cr) %{
4104     instruction_count(1); multiple_bundles;
4105     dst   : X(write)+6;
4106     src   : E(read);
4107     FA    : R;
4108 %}
4109 
4110 // Floating Point Convert D->L
4111 pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{
4112     instruction_count(1); multiple_bundles;
4113     dst   : X(write)+6;
4114     src   : E(read);
4115     FA    : R;
4116 %}
4117 
4118 // Floating Point Convert F->I
4119 pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{
4120     instruction_count(1); multiple_bundles;
4121     dst   : X(write)+6;
4122     src   : E(read);
4123     FA    : R;
4124 %}
4125 
4126 // Floating Point Convert F->L
4127 pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{
4128     instruction_count(1); multiple_bundles;
4129     dst   : X(write)+6;
4130     src   : E(read);
4131     FA    : R;
4132 %}
4133 
4134 // Floating Point Convert I->F
4135 pipe_class fcvtI2F(regF dst, regF src) %{
4136     single_instruction;
4137     dst   : X(write);
4138     src   : E(read);
4139     FA    : R;
4140 %}
4141 
4142 // Floating Point Compare
4143 pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{
4144     single_instruction;
4145     cr    : X(write);
4146     src1  : E(read);
4147     src2  : E(read);
4148     FA    : R;
4149 %}
4150 
4151 // Floating Point Compare
4152 pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{
4153     single_instruction;
4154     cr    : X(write);
4155     src1  : E(read);
4156     src2  : E(read);
4157     FA    : R;
4158 %}
4159 
4160 // Floating Add Nop
4161 pipe_class fadd_nop() %{
4162     single_instruction;
4163     FA  : R;
4164 %}
4165 
4166 // Integer Store to Memory
4167 pipe_class istore_mem_reg(memoryI mem, iRegI src) %{
4168     single_instruction;
4169     mem   : R(read);
4170     src   : C(read);
4171     MS    : R;
4172 %}
4173 
4174 // Integer Store to Memory
4175 pipe_class istore_mem_spORreg(memoryI mem, sp_ptr_RegP src) %{
4176     single_instruction;
4177     mem   : R(read);
4178     src   : C(read);
4179     MS    : R;
4180 %}
4181 
4182 // Float Store
4183 pipe_class fstoreF_mem_reg(memoryF mem, RegF src) %{
4184     single_instruction;
4185     mem : R(read);
4186     src : C(read);
4187     MS  : R;
4188 %}
4189 
4190 // Float Store
4191 pipe_class fstoreF_mem_zero(memoryF mem, immF0 src) %{
4192     single_instruction;
4193     mem : R(read);
4194     MS  : R;
4195 %}
4196 
4197 // Double Store
4198 pipe_class fstoreD_mem_reg(memoryD mem, RegD src) %{
4199     instruction_count(1);
4200     mem : R(read);
4201     src : C(read);
4202     MS  : R;
4203 %}
4204 
4205 // Double Store
4206 pipe_class fstoreD_mem_zero(memoryD mem, immD0 src) %{
4207     single_instruction;
4208     mem : R(read);
4209     MS  : R;
4210 %}
4211 
4212 // Integer Load (when sign bit propagation not needed)
4213 pipe_class iload_mem(iRegI dst, memoryI mem) %{
4214     single_instruction;
4215     mem : R(read);
4216     dst : C(write);
4217     MS  : R;
4218 %}
4219 
4220 // Integer Load (when sign bit propagation or masking is needed)
4221 pipe_class iload_mask_mem(iRegI dst, memoryI mem) %{
4222     single_instruction;
4223     mem : R(read);
4224     dst : M(write);
4225     MS  : R;
4226 %}
4227 
4228 // Float Load
4229 pipe_class floadF_mem(regF dst, memoryF mem) %{
4230     single_instruction;
4231     mem : R(read);
4232     dst : M(write);
4233     MS  : R;
4234 %}
4235 
4236 // Float Load
4237 pipe_class floadD_mem(regD dst, memoryD mem) %{
4238     instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case
4239     mem : R(read);
4240     dst : M(write);
4241     MS  : R;
4242 %}
4243 
4244 // Memory Nop
4245 pipe_class mem_nop() %{
4246     single_instruction;
4247     MS  : R;
4248 %}
4249 
4250 pipe_class sethi(iRegP dst, immI src) %{
4251     single_instruction;
4252     dst  : E(write);
4253     IALU : R;
4254 %}
4255 
4256 pipe_class loadPollP(iRegP poll) %{
4257     single_instruction;
4258     poll : R(read);
4259     MS   : R;
4260 %}
4261 
4262 pipe_class br(Universe br, label labl) %{
4263     single_instruction_with_delay_slot;
4264     BR  : R;
4265 %}
4266 
4267 pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{
4268     single_instruction_with_delay_slot;
4269     cr    : E(read);
4270     BR    : R;
4271 %}
4272 
4273 pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{
4274     single_instruction_with_delay_slot;
4275     op1 : E(read);
4276     BR  : R;
4277     MS  : R;
4278 %}
4279 
4280 pipe_class br_nop() %{
4281     single_instruction;
4282     BR  : R;
4283 %}
4284 
4285 pipe_class simple_call(method meth) %{
4286     instruction_count(2); multiple_bundles; force_serialization;
4287     fixed_latency(100);
4288     BR  : R(1);
4289     MS  : R(1);
4290     A0  : R(1);
4291 %}
4292 
4293 pipe_class compiled_call(method meth) %{
4294     instruction_count(1); multiple_bundles; force_serialization;
4295     fixed_latency(100);
4296     MS  : R(1);
4297 %}
4298 
4299 pipe_class call(method meth) %{
4300     instruction_count(0); multiple_bundles; force_serialization;
4301     fixed_latency(100);
4302 %}
4303 
4304 pipe_class tail_call(Universe ignore, label labl) %{
4305     single_instruction; has_delay_slot;
4306     fixed_latency(100);
4307     BR  : R(1);
4308     MS  : R(1);
4309 %}
4310 
4311 pipe_class ret(Universe ignore) %{
4312     single_instruction; has_delay_slot;
4313     BR  : R(1);
4314     MS  : R(1);
4315 %}
4316 
4317 // The real do-nothing guy
4318 pipe_class empty( ) %{
4319     instruction_count(0);
4320 %}
4321 
4322 pipe_class long_memory_op() %{
4323     instruction_count(0); multiple_bundles; force_serialization;
4324     fixed_latency(25);
4325     MS  : R(1);
4326 %}
4327 
4328 // Check-cast
4329 pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{
4330     array : R(read);
4331     match  : R(read);
4332     IALU   : R(2);
4333     BR     : R(2);
4334     MS     : R;
4335 %}
4336 
4337 // Convert FPU flags into +1,0,-1
4338 pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{
4339     src1  : E(read);
4340     src2  : E(read);
4341     dst   : E(write);
4342     FA    : R;
4343     MS    : R(2);
4344     BR    : R(2);
4345 %}
4346 
4347 // Compare for p < q, and conditionally add y
4348 pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{
4349     p     : E(read);
4350     q     : E(read);
4351     y     : E(read);
4352     IALU  : R(3)
4353 %}
4354 
4355 // Perform a compare, then move conditionally in a branch delay slot.
4356 pipe_class min_max( iRegI src2, iRegI srcdst ) %{
4357     src2   : E(read);
4358     srcdst : E(read);
4359     IALU   : R;
4360     BR     : R;
4361 %}
4362 
4363 // Define the class for the Nop node
4364 define %{
4365    MachNop = ialu_nop;
4366 %}
4367 
4368 %}
4369 
4370 //----------INSTRUCTIONS-------------------------------------------------------
4371 
4372 //------------Special Nop instructions for bundling - no match rules-----------
4373 // Nop using the A0 functional unit
4374 instruct Nop_A0() %{
4375   ins_pipe(ialu_nop_A0);
4376 %}
4377 
4378 // Nop using the A1 functional unit
4379 instruct Nop_A1( ) %{
4380   ins_pipe(ialu_nop_A1);
4381 %}
4382 
4383 // Nop using the memory functional unit
4384 instruct Nop_MS( ) %{
4385   ins_pipe(mem_nop);
4386 %}
4387 
4388 // Nop using the floating add functional unit
4389 instruct Nop_FA( ) %{
4390   ins_pipe(fadd_nop);
4391 %}
4392 
4393 // Nop using the branch functional unit
4394 instruct Nop_BR( ) %{
4395   ins_pipe(br_nop);
4396 %}
4397 
4398 //----------Load/Store/Move Instructions---------------------------------------
4399 //----------Load Instructions--------------------------------------------------
4400 // Load Byte (8bit signed)
4401 instruct loadB(iRegI dst, memoryB mem) %{
4402   match(Set dst (LoadB mem));
4403   ins_cost(MEMORY_REF_COST);
4404 
4405   size(4);
4406   format %{ "LDRSB   $dst,$mem\t! byte -> int" %}
4407   ins_encode %{
4408     __ ldrsb($dst$$Register, $mem$$Address);
4409   %}
4410   ins_pipe(iload_mask_mem);
4411 %}
4412 
4413 // Load Byte (8bit signed) into a Long Register
4414 instruct loadB2L(iRegL dst, memoryB mem) %{
4415   match(Set dst (ConvI2L (LoadB mem)));
4416   ins_cost(MEMORY_REF_COST);
4417 
4418   size(8);
4419   format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t"
4420             "ASR   $dst.hi,$dst.lo,31" %}
4421   ins_encode %{
4422     __ ldrsb($dst$$Register, $mem$$Address);
4423     __ mov($dst$$Register->successor(), $dst$$Register, asr(31));
4424   %}
4425   ins_pipe(iload_mask_mem);
4426 %}
4427 
4428 // Load Unsigned Byte (8bit UNsigned) into an int reg
4429 instruct loadUB(iRegI dst, memoryB mem) %{
4430   match(Set dst (LoadUB mem));
4431   ins_cost(MEMORY_REF_COST);
4432 
4433   size(4);
4434   format %{ "LDRB   $dst,$mem\t! ubyte -> int" %}
4435   ins_encode %{
4436     __ ldrb($dst$$Register, $mem$$Address);
4437   %}
4438   ins_pipe(iload_mem);
4439 %}
4440 
4441 // Load Unsigned Byte (8bit UNsigned) into a Long Register
4442 instruct loadUB2L(iRegL dst, memoryB mem) %{
4443   match(Set dst (ConvI2L (LoadUB mem)));
4444   ins_cost(MEMORY_REF_COST);
4445 
4446   size(8);
4447   format %{ "LDRB  $dst.lo,$mem\t! ubyte -> long\n\t"
4448             "MOV   $dst.hi,0" %}
4449   ins_encode %{
4450     __ ldrb($dst$$Register, $mem$$Address);
4451     __ mov($dst$$Register->successor(), 0);
4452   %}
4453   ins_pipe(iload_mem);
4454 %}
4455 
4456 // Load Unsigned Byte (8 bit UNsigned) with immediate mask into Long Register
4457 instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{
4458   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4459 
4460   ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
4461   size(12);
4462   format %{ "LDRB  $dst.lo,$mem\t! ubyte -> long\n\t"
4463             "MOV   $dst.hi,0\n\t"
4464             "AND  $dst.lo,$dst.lo,$mask" %}
4465   ins_encode %{
4466     __ ldrb($dst$$Register, $mem$$Address);
4467     __ mov($dst$$Register->successor(), 0);
4468     __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8));
4469   %}
4470   ins_pipe(iload_mem);
4471 %}
4472 
4473 // Load Short (16bit signed)
4474 
4475 instruct loadS(iRegI dst, memoryS mem) %{
4476   match(Set dst (LoadS mem));
4477   ins_cost(MEMORY_REF_COST);
4478 
4479   size(4);
4480   format %{ "LDRSH   $dst,$mem\t! short" %}
4481   ins_encode %{
4482     __ ldrsh($dst$$Register, $mem$$Address);
4483   %}
4484   ins_pipe(iload_mask_mem);
4485 %}
4486 
4487 // Load Short (16 bit signed) to Byte (8 bit signed)
4488 instruct loadS2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
4489   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4490   ins_cost(MEMORY_REF_COST);
4491 
4492   size(4);
4493 
4494   format %{ "LDRSB   $dst,$mem\t! short -> byte" %}
4495   ins_encode %{
4496     // High 32 bits are harmlessly set on Aarch64
4497     __ ldrsb($dst$$Register, $mem$$Address);
4498   %}
4499   ins_pipe(iload_mask_mem);
4500 %}
4501 
4502 // Load Short (16bit signed) into a Long Register
4503 instruct loadS2L(iRegL dst, memoryS mem) %{
4504   match(Set dst (ConvI2L (LoadS mem)));
4505   ins_cost(MEMORY_REF_COST);
4506 
4507   size(8);
4508   format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t"
4509             "ASR   $dst.hi,$dst.lo,31" %}
4510   ins_encode %{
4511     __ ldrsh($dst$$Register, $mem$$Address);
4512     __ mov($dst$$Register->successor(), $dst$$Register, asr(31));
4513   %}
4514   ins_pipe(iload_mask_mem);
4515 %}
4516 
4517 // Load Unsigned Short/Char (16bit UNsigned)
4518 
4519 
4520 instruct loadUS(iRegI dst, memoryS mem) %{
4521   match(Set dst (LoadUS mem));
4522   ins_cost(MEMORY_REF_COST);
4523 
4524   size(4);
4525   format %{ "LDRH   $dst,$mem\t! ushort/char" %}
4526   ins_encode %{
4527     __ ldrh($dst$$Register, $mem$$Address);
4528   %}
4529   ins_pipe(iload_mem);
4530 %}
4531 
4532 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
4533 instruct loadUS2B(iRegI dst, memoryB mem, immI_24 twentyfour) %{
4534   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
4535   ins_cost(MEMORY_REF_COST);
4536 
4537   size(4);
4538   format %{ "LDRSB   $dst,$mem\t! ushort -> byte" %}
4539   ins_encode %{
4540     __ ldrsb($dst$$Register, $mem$$Address);
4541   %}
4542   ins_pipe(iload_mask_mem);
4543 %}
4544 
4545 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register
4546 instruct loadUS2L(iRegL dst, memoryS mem) %{
4547   match(Set dst (ConvI2L (LoadUS mem)));
4548   ins_cost(MEMORY_REF_COST);
4549 
4550   size(8);
4551   format %{ "LDRH  $dst.lo,$mem\t! short -> long\n\t"
4552             "MOV   $dst.hi, 0" %}
4553   ins_encode %{
4554     __ ldrh($dst$$Register, $mem$$Address);
4555     __ mov($dst$$Register->successor(), 0);
4556   %}
4557   ins_pipe(iload_mem);
4558 %}
4559 
4560 // Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register
4561 instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
4562   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4563   ins_cost(MEMORY_REF_COST);
4564 
4565   size(8);
4566   format %{ "LDRB  $dst.lo,$mem\t! \n\t"
4567             "MOV   $dst.hi, 0" %}
4568   ins_encode %{
4569     __ ldrb($dst$$Register, $mem$$Address);
4570     __ mov($dst$$Register->successor(), 0);
4571   %}
4572   ins_pipe(iload_mem);
4573 %}
4574 
4575 // Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register
4576 instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{
4577   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
4578   ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
4579 
4580   size(12);
4581   format %{ "LDRH   $dst,$mem\t! ushort/char & mask -> long\n\t"
4582             "MOV    $dst.hi, 0\n\t"
4583             "AND    $dst,$dst,$mask" %}
4584   ins_encode %{
4585     __ ldrh($dst$$Register, $mem$$Address);
4586     __ mov($dst$$Register->successor(), 0);
4587     __ andr($dst$$Register, $dst$$Register, $mask$$constant);
4588   %}
4589   ins_pipe(iload_mem);
4590 %}
4591 
4592 // Load Integer
4593 
4594 instruct loadI(iRegI dst, memoryI mem) %{
4595   match(Set dst (LoadI mem));
4596   ins_cost(MEMORY_REF_COST);
4597 
4598   size(4);
4599   format %{ "ldr $dst,$mem\t! int" %}
4600   ins_encode %{
4601     __ ldr($dst$$Register, $mem$$Address);
4602   %}
4603   ins_pipe(iload_mem);
4604 %}
4605 
4606 // Load Integer to Byte (8 bit signed)
4607 instruct loadI2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
4608   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
4609   ins_cost(MEMORY_REF_COST);
4610 
4611   size(4);
4612 
4613   format %{ "LDRSB   $dst,$mem\t! int -> byte" %}
4614   ins_encode %{
4615     __ ldrsb($dst$$Register, $mem$$Address);
4616   %}
4617   ins_pipe(iload_mask_mem);
4618 %}
4619 
4620 // Load Integer to Unsigned Byte (8 bit UNsigned)
4621 instruct loadI2UB(iRegI dst, memoryB mem, immI_255 mask) %{
4622   match(Set dst (AndI (LoadI mem) mask));
4623   ins_cost(MEMORY_REF_COST);
4624 
4625   size(4);
4626 
4627   format %{ "LDRB   $dst,$mem\t! int -> ubyte" %}
4628   ins_encode %{
4629     __ ldrb($dst$$Register, $mem$$Address);
4630   %}
4631   ins_pipe(iload_mask_mem);
4632 %}
4633 
4634 // Load Integer to Short (16 bit signed)
4635 instruct loadI2S(iRegI dst, memoryS mem, immI_16 sixteen) %{
4636   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
4637   ins_cost(MEMORY_REF_COST);
4638 
4639   size(4);
4640   format %{ "LDRSH   $dst,$mem\t! int -> short" %}
4641   ins_encode %{
4642     __ ldrsh($dst$$Register, $mem$$Address);
4643   %}
4644   ins_pipe(iload_mask_mem);
4645 %}
4646 
4647 // Load Integer to Unsigned Short (16 bit UNsigned)
4648 instruct loadI2US(iRegI dst, memoryS mem, immI_65535 mask) %{
4649   match(Set dst (AndI (LoadI mem) mask));
4650   ins_cost(MEMORY_REF_COST);
4651 
4652   size(4);
4653   format %{ "LDRH   $dst,$mem\t! int -> ushort/char" %}
4654   ins_encode %{
4655     __ ldrh($dst$$Register, $mem$$Address);
4656   %}
4657   ins_pipe(iload_mask_mem);
4658 %}
4659 
4660 // Load Integer into a Long Register
4661 instruct loadI2L(iRegL dst, memoryI mem) %{
4662   match(Set dst (ConvI2L (LoadI mem)));
4663   ins_cost(MEMORY_REF_COST);
4664 
4665   size(8);
4666   format %{ "LDR   $dst.lo,$mem\t! int -> long\n\t"
4667             "ASR   $dst.hi,$dst.lo,31\t! int->long" %}
4668   ins_encode %{
4669     __ ldr($dst$$Register, $mem$$Address);
4670     __ mov($dst$$Register->successor(), $dst$$Register, asr(31));
4671   %}
4672   ins_pipe(iload_mask_mem);
4673 %}
4674 
4675 // Load Integer with mask 0xFF into a Long Register
4676 instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
4677   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
4678   ins_cost(MEMORY_REF_COST);
4679 
4680   size(8);
4681   format %{ "LDRB   $dst.lo,$mem\t! int & 0xFF -> long\n\t"
4682             "MOV    $dst.hi, 0" %}
4683   ins_encode %{
4684     __ ldrb($dst$$Register, $mem$$Address);
4685     __ mov($dst$$Register->successor(), 0);
4686   %}
4687   ins_pipe(iload_mem);
4688 %}
4689 
4690 // Load Integer with mask 0xFFFF into a Long Register
4691 instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{
4692   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
4693   ins_cost(MEMORY_REF_COST);
4694 
4695   size(8);
4696   format %{ "LDRH   $dst,$mem\t! int & 0xFFFF -> long\n\t"
4697             "MOV    $dst.hi, 0" %}
4698   ins_encode %{
4699     __ ldrh($dst$$Register, $mem$$Address);
4700     __ mov($dst$$Register->successor(), 0);
4701   %}
4702   ins_pipe(iload_mask_mem);
4703 %}
4704 
4705 // Load Integer with a 31-bit immediate mask into a Long Register
4706 instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{
4707   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
4708   ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
4709 
4710   size(12);
4711   format %{ "LDR   $dst.lo,$mem\t! int -> long\n\t"
4712             "MOV    $dst.hi, 0\n\t"
4713             "AND   $dst,$dst,$mask" %}
4714 
4715   ins_encode %{
4716     __ ldr($dst$$Register, $mem$$Address);
4717     __ mov($dst$$Register->successor(), 0);
4718     __ andr($dst$$Register, $dst$$Register, $mask$$constant);
4719   %}
4720   ins_pipe(iload_mem);
4721 %}
4722 
4723 // Load Integer with a 31-bit mask into a Long Register
4724 // FIXME: use iRegI mask, remove tmp?
4725 instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{
4726   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
4727   effect(TEMP dst, TEMP tmp);
4728 
4729   ins_cost(MEMORY_REF_COST + 4*DEFAULT_COST);
4730   size(20);
4731   format %{ "LDR      $mem,$dst\t! int & 31-bit mask -> long\n\t"
4732             "MOV      $dst.hi, 0\n\t"
4733             "MOV_SLOW $tmp,$mask\n\t"
4734             "AND      $dst,$tmp,$dst" %}
4735   ins_encode %{
4736     __ ldr($dst$$Register, $mem$$Address);
4737     __ mov($dst$$Register->successor(), 0);
4738     __ mov($tmp$$Register, $mask$$constant);
4739     __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
4740   %}
4741   ins_pipe(iload_mem);
4742 %}
4743 
4744 // Load Unsigned Integer into a Long Register
4745 instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{
4746   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
4747   ins_cost(MEMORY_REF_COST);
4748 
4749   size(8);
4750   format %{ "LDR   $dst.lo,$mem\t! uint -> long\n\t"
4751             "MOV   $dst.hi,0" %}
4752   ins_encode %{
4753     __ ldr($dst$$Register, $mem$$Address);
4754     __ mov($dst$$Register->successor(), 0);
4755   %}
4756   ins_pipe(iload_mem);
4757 %}
4758 
4759 // Load Long
4760 
4761 instruct loadL(iRegLd dst, memoryL mem ) %{
4762   predicate(!((LoadLNode*)n)->require_atomic_access());
4763   match(Set dst (LoadL mem));
4764   effect(TEMP dst);
4765   ins_cost(MEMORY_REF_COST);
4766 
4767   size(4);
4768   format %{ "ldrd  $dst,$mem\t! long" %}
4769   ins_encode %{
4770     __ ldrd($dst$$Register, $mem$$Address);
4771   %}
4772   ins_pipe(iload_mem);
4773 %}
4774 
4775 instruct loadL_2instr(iRegL dst, memorylong mem ) %{
4776   predicate(!((LoadLNode*)n)->require_atomic_access());
4777   match(Set dst (LoadL mem));
4778   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
4779 
4780   size(8);
4781   format %{ "LDR    $dst.lo,$mem \t! long order of instrs reversed if $dst.lo == base($mem)\n\t"
4782             "LDR    $dst.hi,$mem+4 or $mem" %}
4783   ins_encode %{
4784     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
4785     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
4786 
4787     if ($dst$$Register == reg_to_register_object($mem$$base)) {
4788       __ ldr($dst$$Register->successor(), Amemhi);
4789       __ ldr($dst$$Register, Amemlo);
4790     } else {
4791       __ ldr($dst$$Register, Amemlo);
4792       __ ldr($dst$$Register->successor(), Amemhi);
4793     }
4794   %}
4795   ins_pipe(iload_mem);
4796 %}
4797 
4798 instruct loadL_volatile(iRegL dst, indirect mem ) %{
4799   predicate(((LoadLNode*)n)->require_atomic_access());
4800   match(Set dst (LoadL mem));
4801   ins_cost(MEMORY_REF_COST);
4802 
4803   size(4);
4804   format %{ "LDREXD    $dst,$mem\t! long" %}
4805   ins_encode %{
4806     __ atomic_ldrd($dst$$Register, reg_to_register_object($dst$$reg + 1), reg_to_register_object($mem$$base));
4807   %}
4808   ins_pipe(iload_mem);
4809 %}
4810 
4811 instruct loadL_volatile_fp(iRegL dst, memoryD mem ) %{
4812   predicate(((LoadLNode*)n)->require_atomic_access());
4813   match(Set dst (LoadL mem));
4814   ins_cost(MEMORY_REF_COST);
4815 
4816   size(8);
4817   format %{ "FLDD      S14, $mem"
4818             "FMRRD    $dst, S14\t! long \n't" %}
4819   ins_encode %{
4820     __ vldr_f64(f14, $mem$$Address);
4821     __ vmov_f64($dst$$Register, $dst$$Register->successor(), f14);
4822   %}
4823   ins_pipe(iload_mem);
4824 %}
4825 
4826 instruct loadL_unaligned(iRegL dst, memorylong mem ) %{
4827   match(Set dst (LoadL_unaligned mem));
4828   ins_cost(MEMORY_REF_COST);
4829 
4830   size(8);
4831   format %{ "LDR    $dst.lo,$mem\t! long order of instrs reversed if $dst.lo == base($mem)\n\t"
4832             "LDR    $dst.hi,$mem+4" %}
4833   ins_encode %{
4834     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
4835     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
4836 
4837     if ($dst$$Register == reg_to_register_object($mem$$base)) {
4838       __ ldr($dst$$Register->successor(), Amemhi);
4839       __ ldr($dst$$Register, Amemlo);
4840     } else {
4841       __ ldr($dst$$Register, Amemlo);
4842       __ ldr($dst$$Register->successor(), Amemhi);
4843     }
4844   %}
4845   ins_pipe(iload_mem);
4846 %}
4847 
4848 // Load Range
4849 instruct loadRange(iRegI dst, memoryI mem) %{
4850   match(Set dst (LoadRange mem));
4851   ins_cost(MEMORY_REF_COST);
4852 
4853   size(4);
4854   format %{ "LDR_u32 $dst,$mem\t! range" %}
4855   ins_encode %{
4856     __ ldr($dst$$Register, $mem$$Address);
4857   %}
4858   ins_pipe(iload_mem);
4859 %}
4860 
4861 // Load Pointer
4862 
4863 instruct loadP(iRegP dst, memoryP mem) %{
4864   match(Set dst (LoadP mem));
4865   ins_cost(MEMORY_REF_COST);
4866   size(4);
4867 
4868   format %{ "LDR   $dst,$mem\t! ptr" %}
4869   ins_encode %{
4870     __ ldr($dst$$Register, $mem$$Address);
4871   %}
4872   ins_pipe(iload_mem);
4873 %}
4874 
4875 // Load Klass Pointer
4876 instruct loadKlass(iRegP dst, memoryI mem) %{
4877   match(Set dst (LoadKlass mem));
4878   ins_cost(MEMORY_REF_COST);
4879   size(4);
4880 
4881   format %{ "LDR   $dst,$mem\t! klass ptr" %}
4882   ins_encode %{
4883     __ ldr($dst$$Register, $mem$$Address);
4884   %}
4885   ins_pipe(iload_mem);
4886 %}
4887 
4888 instruct loadD(regD dst, memoryD mem) %{
4889   match(Set dst (LoadD mem));
4890   ins_cost(MEMORY_REF_COST);
4891 
4892   size(4);
4893   // FIXME: needs to be atomic, but  ARMv7 A.R.M. guarantees
4894   // only LDREXD and STREXD are 64-bit single-copy atomic
4895   format %{ "FLDD   $dst,$mem" %}
4896   ins_encode %{
4897     __ vldr_f64($dst$$FloatRegister, $mem$$Address);
4898   %}
4899   ins_pipe(floadD_mem);
4900 %}
4901 
4902 // Load Double - UNaligned
4903 instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{
4904   match(Set dst (LoadD_unaligned mem));
4905   ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
4906   size(8);
4907   format %{ "FLDS    $dst.lo,$mem\t! misaligned double\n"
4908           "\tFLDS    $dst.hi,$mem+4\t!" %}
4909   ins_encode %{
4910     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
4911     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
4912       __ vldr_f32($dst$$FloatRegister, Amemlo);
4913       __ vldr_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), Amemhi);
4914   %}
4915   ins_pipe(iload_mem);
4916 %}
4917 
4918 instruct loadF(regF dst, memoryF mem) %{
4919   match(Set dst (LoadF mem));
4920 
4921   ins_cost(MEMORY_REF_COST);
4922   size(4);
4923   format %{ "FLDS    $dst,$mem" %}
4924   ins_encode %{
4925     __ vldr_f32($dst$$FloatRegister, $mem$$Address);
4926   %}
4927   ins_pipe(floadF_mem);
4928 %}
4929 
4930 // // Load Constant
4931 instruct loadConI( iRegI dst, immI src ) %{
4932   match(Set dst src);
4933   ins_cost(DEFAULT_COST * 3/2);
4934   format %{ "MOV_SLOW    $dst, $src" %}
4935   ins_encode %{
4936     __ mov($dst$$Register, $src$$constant);
4937   %}
4938   ins_pipe(ialu_hi_lo_reg);
4939 %}
4940 
4941 instruct loadConIMov( iRegI dst, immIMov src ) %{
4942   match(Set dst src);
4943   size(4);
4944   format %{ "MOV    $dst, $src" %}
4945   ins_encode %{
4946     __ mov($dst$$Register, $src$$constant);
4947   %}
4948   ins_pipe(ialu_imm);
4949 %}
4950 
4951 instruct loadConIMovn( iRegI dst, immIRotn src ) %{
4952   match(Set dst src);
4953   size(4);
4954   format %{ "MVN    $dst, ~$src" %}
4955   ins_encode %{
4956     __ mvn_i($dst$$Register, ~$src$$constant);
4957   %}
4958   ins_pipe(ialu_imm_n);
4959 %}
4960 
4961 instruct loadConI16( iRegI dst, immI16 src ) %{
4962   match(Set dst src);
4963   size(4);
4964   format %{ "MOVW    $dst, $src" %}
4965   ins_encode %{
4966     __ movw_i($dst$$Register, $src$$constant);
4967   %}
4968   ins_pipe(ialu_imm_n);
4969 %}
4970 
4971 instruct loadConP(iRegP dst, immP src) %{
4972   match(Set dst src);
4973   ins_cost(DEFAULT_COST * 3/2);
4974   format %{ "MOV_SLOW    $dst,$src\t!ptr" %}
4975   ins_encode %{
4976     relocInfo::relocType constant_reloc = _opnds[1]->constant_reloc();
4977     intptr_t val = $src$$constant;
4978     if (constant_reloc == relocInfo::oop_type) {
4979       __ movoop($dst$$Register, (jobject)val, true);
4980     } else if (constant_reloc == relocInfo::metadata_type) {
4981       __ mov_metadata($dst$$Register, (Metadata*)val);
4982     } else {
4983       __ mov($dst$$Register, val);
4984     }
4985   %}
4986   ins_pipe(loadConP);
4987 %}
4988 
4989 
4990 instruct loadConP_poll(iRegP dst, immP_poll src) %{
4991   match(Set dst src);
4992   ins_cost(DEFAULT_COST);
4993   format %{ "MOV_SLOW    $dst,$src\t!ptr" %}
4994   ins_encode %{
4995       __ mov($dst$$Register, $src$$constant);
4996   %}
4997   ins_pipe(loadConP_poll);
4998 %}
4999 
5000 instruct loadConL(iRegL dst, immL src) %{
5001   match(Set dst src);
5002   ins_cost(DEFAULT_COST * 4);
5003   format %{ "MOV_SLOW   $dst.lo, $src & 0x0FFFFFFFFL \t! long\n\t"
5004             "MOV_SLOW   $dst.hi, $src >> 32" %}
5005   ins_encode %{
5006     __ mov(reg_to_register_object($dst$$reg), $src$$constant & 0x0FFFFFFFFL);
5007     __ mov(reg_to_register_object($dst$$reg + 1), ((julong)($src$$constant)) >> 32);
5008   %}
5009   ins_pipe(loadConL);
5010 %}
5011 
5012 instruct loadConL16( iRegL dst, immL16 src ) %{
5013   match(Set dst src);
5014   ins_cost(DEFAULT_COST * 2);
5015 
5016   size(8);
5017   format %{ "MOVW    $dst.lo, $src \n\t"
5018             "MOVW    $dst.hi, 0 \n\t" %}
5019   ins_encode %{
5020     __ movw_i($dst$$Register, $src$$constant);
5021     __ movw_i(reg_to_register_object($dst$$reg + 1), 0);
5022   %}
5023   ins_pipe(ialu_imm);
5024 %}
5025 
5026 instruct loadConF_imm8(regF dst, imm8F src) %{
5027   match(Set dst src);
5028   ins_cost(DEFAULT_COST);
5029   size(4);
5030 
5031   format %{ "FCONSTS      $dst, $src"%}
5032 
5033   ins_encode %{
5034     __ vmov_f32($dst$$FloatRegister, $src$$constant);
5035   %}
5036   ins_pipe(loadConFD); // FIXME
5037 %}
5038 
5039 instruct loadConF(regF dst, immF src, iRegI tmp) %{
5040   match(Set dst src);
5041   ins_cost(DEFAULT_COST * 2);
5042   effect(TEMP tmp);
5043   size(3*4);
5044 
5045   format %{ "MOV_SLOW  $tmp, $src\n\t"
5046             "FMSR      $dst, $tmp"%}
5047 
5048   ins_encode %{
5049     // FIXME revisit once 6961697 is in
5050     union {
5051       jfloat f;
5052       int i;
5053     } v;
5054     v.f = $src$$constant;
5055     __ mov($tmp$$Register, v.i);
5056     __ vmov_f32($dst$$FloatRegister, $tmp$$Register);
5057   %}
5058   ins_pipe(loadConFD); // FIXME
5059 %}
5060 
5061 instruct loadConD_imm8(regD dst, imm8D src) %{
5062   match(Set dst src);
5063   ins_cost(DEFAULT_COST);
5064   size(4);
5065 
5066   format %{ "FCONSTD      $dst, $src"%}
5067 
5068   ins_encode %{
5069     __ vmov_f64($dst$$FloatRegister, $src$$constant);
5070   %}
5071   ins_pipe(loadConFD); // FIXME
5072 %}
5073 
5074 instruct loadConD(regD dst, immD src, iRegP tmp) %{
5075   match(Set dst src);
5076   effect(TEMP tmp);
5077   ins_cost(MEMORY_REF_COST);
5078   format %{ "FLDD  $dst, [$constanttablebase + $constantoffset]\t! load from constant table: double=$src" %}
5079 
5080   ins_encode %{
5081     Register r = $constanttablebase;
5082     int offset  = $constantoffset($src);
5083     if (!is_memoryD(offset)) {                // can't use a predicate
5084                                               // in load constant instructs
5085       __ add($tmp$$Register, r, offset);
5086       r = $tmp$$Register;
5087       offset = 0;
5088     }
5089     __ vldr_f64($dst$$FloatRegister, Address(r, offset));
5090   %}
5091   ins_pipe(loadConFD);
5092 %}
5093 
5094 // Prefetch instructions.
5095 // Must be safe to execute with invalid address (cannot fault).
5096 
5097 instruct prefetchAlloc_mp( memoryP mem ) %{
5098   predicate(VM_Version::features() & FT_MP_EXT);
5099   match( PrefetchAllocation mem );
5100   ins_cost(MEMORY_REF_COST);
5101   size(4);
5102 
5103   format %{ "PLDW $mem\t! Prefetch allocation" %}
5104   ins_encode %{
5105     __ pldw($mem$$Address);
5106   %}
5107   ins_pipe(iload_mem);
5108 %}
5109 
5110 instruct prefetchAlloc_sp( memoryP mem ) %{
5111   predicate(!(VM_Version::features() & FT_MP_EXT));
5112   match( PrefetchAllocation mem );
5113   ins_cost(MEMORY_REF_COST);
5114   size(4);
5115 
5116   format %{ "PLD $mem\t! Prefetch allocation" %}
5117   ins_encode %{
5118     __ pld($mem$$Address);
5119   %}
5120   ins_pipe(iload_mem);
5121 %}
5122 
5123 //----------Store Instructions-------------------------------------------------
5124 // Store Byte
5125 instruct storeB(memoryB mem, store_RegI src) %{
5126   match(Set mem (StoreB mem src));
5127   ins_cost(MEMORY_REF_COST);
5128 
5129   size(4);
5130   format %{ "STRB    $src,$mem\t! byte" %}
5131   ins_encode %{
5132     __ strb($src$$Register, $mem$$Address);
5133   %}
5134   ins_pipe(istore_mem_reg);
5135 %}
5136 
5137 instruct storeCM(memoryB mem, store_RegI src) %{
5138   match(Set mem (StoreCM mem src));
5139   ins_cost(MEMORY_REF_COST);
5140 
5141   size(4);
5142   format %{ "STRB    $src,$mem\t! CMS card-mark byte" %}
5143   ins_encode %{
5144     __ strb($src$$Register, $mem$$Address);
5145   %}
5146   ins_pipe(istore_mem_reg);
5147 %}
5148 
5149 // Store Char/Short
5150 
5151 instruct storeC(memoryS mem, store_RegI src) %{
5152   match(Set mem (StoreC mem src));
5153   ins_cost(MEMORY_REF_COST);
5154 
5155   size(4);
5156   format %{ "STRH    $src,$mem\t! short" %}
5157   ins_encode %{
5158     __ strh($src$$Register, $mem$$Address);
5159   %}
5160   ins_pipe(istore_mem_reg);
5161 %}
5162 
5163 // Store Integer
5164 
5165 instruct storeI(memoryI mem, store_RegI src) %{
5166   match(Set mem (StoreI mem src));
5167   ins_cost(MEMORY_REF_COST);
5168 
5169   size(4);
5170   format %{ "str $src,$mem" %}
5171   ins_encode %{
5172     __ str($src$$Register, $mem$$Address);
5173   %}
5174   ins_pipe(istore_mem_reg);
5175 %}
5176 
5177 // Store Long
5178 
5179 instruct storeL(memoryL mem, store_RegLd src) %{
5180   predicate(!((StoreLNode*)n)->require_atomic_access());
5181   match(Set mem (StoreL mem src));
5182   ins_cost(MEMORY_REF_COST);
5183 
5184   size(4);
5185   format %{ "strd  $src,$mem\t! long\n\t" %}
5186 
5187   ins_encode %{
5188     __ strd($src$$Register, $mem$$Address);
5189   %}
5190   ins_pipe(istore_mem_reg);
5191 %}
5192 
5193 instruct storeL_2instr(memorylong mem, iRegL src) %{
5194   predicate(!((StoreLNode*)n)->require_atomic_access());
5195   match(Set mem (StoreL mem src));
5196   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
5197 
5198   size(8);
5199   format %{ "STR    $src.lo,$mem\t! long\n\t"
5200             "STR    $src.hi,$mem+4" %}
5201 
5202   ins_encode %{
5203     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5204     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5205     __ str($src$$Register, Amemlo);
5206     __ str($src$$Register->successor(), Amemhi);
5207   %}
5208   ins_pipe(istore_mem_reg);
5209 %}
5210 
5211 instruct storeL_volatile(indirect mem, iRegL src) %{
5212   predicate(((StoreLNode*)n)->require_atomic_access());
5213   match(Set mem (StoreL mem src));
5214   ins_cost(MEMORY_REF_COST);
5215   size(4);
5216   format %{ "STMIA    $src,$mem\t! long" %}
5217   ins_encode %{
5218     // FIXME: why is stmia considered atomic?  Should be strexd
5219     // TODO: need 3 temp registers to use atomic_strd
5220     __ stmia(reg_to_register_object($mem$$base), RegSet::of($src$$Register, reg_to_register_object($src$$reg + 1)).bits(), /*wb*/false);
5221   %}
5222   ins_pipe(istore_mem_reg);
5223 %}
5224 
5225 instruct storeL_volatile_fp(memoryD mem, iRegL src) %{
5226   predicate(((StoreLNode*)n)->require_atomic_access());
5227   match(Set mem (StoreL mem src));
5228   ins_cost(MEMORY_REF_COST);
5229   size(8);
5230   format %{ "FMDRR    S14, $src\t! long \n\t"
5231             "FSTD     S14, $mem" %}
5232   ins_encode %{
5233     __ vmov_f64(f14, $src$$Register, $src$$Register->successor());
5234     __ vstr_f64(f14, $mem$$Address);
5235   %}
5236   ins_pipe(istore_mem_reg);
5237 %}
5238 
5239 // Store Pointer
5240 
5241 instruct storeP(memoryP mem, store_ptr_RegP src) %{
5242   match(Set mem (StoreP mem src));
5243   ins_cost(MEMORY_REF_COST);
5244   size(4);
5245 
5246   format %{ "STR    $src,$mem\t! ptr" %}
5247   ins_encode %{
5248     __ str($src$$Register, $mem$$Address);
5249   %}
5250   ins_pipe(istore_mem_spORreg);
5251 %}
5252 
5253 // Store Double
5254 
5255 instruct storeD(memoryD mem, regD src) %{
5256   match(Set mem (StoreD mem src));
5257   ins_cost(MEMORY_REF_COST);
5258 
5259   size(4);
5260   // FIXME: needs to be atomic, but  ARMv7 A.R.M. guarantees
5261   // only LDREXD and STREXD are 64-bit single-copy atomic
5262   format %{ "FSTD   $src,$mem" %}
5263   ins_encode %{
5264     __ vstr_f64($src$$FloatRegister, $mem$$Address);
5265   %}
5266   ins_pipe(fstoreD_mem_reg);
5267 %}
5268 
5269 // Store Float
5270 
5271 instruct storeF( memoryF mem, regF src) %{
5272   match(Set mem (StoreF mem src));
5273   ins_cost(MEMORY_REF_COST);
5274 
5275   size(4);
5276   format %{ "FSTS    $src,$mem" %}
5277   ins_encode %{
5278     __ vstr_f32($src$$FloatRegister, $mem$$Address);
5279   %}
5280   ins_pipe(fstoreF_mem_reg);
5281 %}
5282 
5283 //----------MemBar Instructions-----------------------------------------------
5284 // Memory barrier flavors
5285 
5286 // TODO: take advantage of Aarch64 load-acquire, store-release, etc
5287 // pattern-match out unnecessary membars
5288 instruct membar_storestore() %{
5289   match(MemBarStoreStore);
5290   ins_cost(4*MEMORY_REF_COST);
5291 
5292   size(4);
5293   format %{ "MEMBAR-storestore" %}
5294   ins_encode %{
5295     __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore));
5296   %}
5297   ins_pipe(long_memory_op);
5298 %}
5299 
5300 instruct membar_acquire() %{
5301   match(MemBarAcquire);
5302   match(LoadFence);
5303   ins_cost(4*MEMORY_REF_COST);
5304 
5305   size(4);
5306   format %{ "MEMBAR-acquire" %}
5307   ins_encode %{
5308     __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore));
5309   %}
5310   ins_pipe(long_memory_op);
5311 %}
5312 
5313 instruct membar_acquire_lock() %{
5314   match(MemBarAcquireLock);
5315   ins_cost(0);
5316 
5317   size(0);
5318   format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %}
5319   ins_encode( );
5320   ins_pipe(empty);
5321 %}
5322 
5323 instruct membar_release() %{
5324   match(MemBarRelease);
5325   match(StoreFence);
5326   ins_cost(4*MEMORY_REF_COST);
5327 
5328   size(4);
5329   format %{ "MEMBAR-release" %}
5330   ins_encode %{
5331     __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore));
5332   %}
5333   ins_pipe(long_memory_op);
5334 %}
5335 
5336 instruct membar_release_lock() %{
5337   match(MemBarReleaseLock);
5338   ins_cost(0);
5339 
5340   size(0);
5341   format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %}
5342   ins_encode( );
5343   ins_pipe(empty);
5344 %}
5345 
5346 instruct membar_volatile() %{
5347   match(MemBarVolatile);
5348   ins_cost(4*MEMORY_REF_COST);
5349 
5350   size(4);
5351   format %{ "MEMBAR-volatile" %}
5352   ins_encode %{
5353     __ membar(MacroAssembler::StoreLoad);
5354   %}
5355   ins_pipe(long_memory_op);
5356 %}
5357 
5358 instruct unnecessary_membar_volatile() %{
5359   match(MemBarVolatile);
5360   predicate(Matcher::post_store_load_barrier(n));
5361   ins_cost(0);
5362 
5363   size(0);
5364   format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %}
5365   ins_encode( );
5366   ins_pipe(empty);
5367 %}
5368 
5369 //----------Register Move Instructions-----------------------------------------
5370 // instruct roundDouble_nop(regD dst) %{
5371 //   match(Set dst (RoundDouble dst));
5372 //   ins_pipe(empty);
5373 // %}
5374 
5375 
5376 // instruct roundFloat_nop(regF dst) %{
5377 //   match(Set dst (RoundFloat dst));
5378 //   ins_pipe(empty);
5379 // %}
5380 
5381 
5382 // Cast Index to Pointer for unsafe natives
5383 instruct castX2P(iRegX src, iRegP dst) %{
5384   match(Set dst (CastX2P src));
5385 
5386   format %{ "MOV    $dst,$src\t! IntX->Ptr if $dst != $src" %}
5387   ins_encode %{
5388     if ($dst$$Register !=  $src$$Register) {
5389       __ mov($dst$$Register, $src$$Register);
5390     }
5391   %}
5392   ins_pipe(ialu_reg);
5393 %}
5394 
5395 // Cast Pointer to Index for unsafe natives
5396 instruct castP2X(iRegP src, iRegX dst) %{
5397   match(Set dst (CastP2X src));
5398 
5399   format %{ "MOV    $dst,$src\t! Ptr->IntX if $dst != $src" %}
5400   ins_encode %{
5401     if ($dst$$Register !=  $src$$Register) {
5402       __ mov($dst$$Register, $src$$Register);
5403     }
5404   %}
5405   ins_pipe(ialu_reg);
5406 %}
5407 
5408 //----------Conditional Move---------------------------------------------------
5409 // Conditional move
5410 instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{
5411   match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
5412   ins_cost(150);
5413   size(4);
5414   format %{ "MOV$cmp  $dst,$src\t! int" %}
5415   ins_encode %{
5416     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5417   %}
5418   ins_pipe(ialu_reg);
5419 %}
5420 
5421 instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{
5422   match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
5423   ins_cost(140);
5424   size(4);
5425   format %{ "MOV$cmp  $dst,$src" %}
5426   ins_encode %{
5427     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5428   %}
5429   ins_pipe(ialu_imm);
5430 %}
5431 
5432 instruct cmovIP_imm16(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI16 src) %{
5433   match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
5434   ins_cost(140);
5435   size(4);
5436   format %{ "MOVw$cmp  $dst,$src" %}
5437   ins_encode %{
5438     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5439   %}
5440   ins_pipe(ialu_imm);
5441 %}
5442 
5443 instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{
5444   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5445   ins_cost(150);
5446   size(4);
5447   format %{ "MOV$cmp  $dst,$src" %}
5448   ins_encode %{
5449     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5450   %}
5451   ins_pipe(ialu_reg);
5452 %}
5453 
5454 instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{
5455   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5456   ins_cost(140);
5457   size(4);
5458   format %{ "MOV$cmp  $dst,$src" %}
5459   ins_encode %{
5460     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5461   %}
5462   ins_pipe(ialu_imm);
5463 %}
5464 
5465 instruct cmovII_imm16(cmpOp cmp, flagsReg icc, iRegI dst, immI16 src) %{
5466   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5467   ins_cost(140);
5468   size(4);
5469   format %{ "MOVw$cmp  $dst,$src" %}
5470   ins_encode %{
5471     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5472   %}
5473   ins_pipe(ialu_imm);
5474 %}
5475 
5476 instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{
5477   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5478   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5479             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5480             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5481             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5482   ins_cost(150);
5483   size(4);
5484   format %{ "MOV$cmp  $dst,$src" %}
5485   ins_encode %{
5486     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5487   %}
5488   ins_pipe(ialu_reg);
5489 %}
5490 
5491 instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{
5492   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5493   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5494             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5495             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5496             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5497   ins_cost(140);
5498   size(4);
5499   format %{ "MOV$cmp  $dst,$src" %}
5500   ins_encode %{
5501     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5502   %}
5503   ins_pipe(ialu_imm);
5504 %}
5505 
5506 instruct cmovII_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immI16 src) %{
5507   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5508   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5509             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5510             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5511             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5512   ins_cost(140);
5513   size(4);
5514   format %{ "MOVW$cmp  $dst,$src" %}
5515   ins_encode %{
5516     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5517   %}
5518   ins_pipe(ialu_imm);
5519 %}
5520 
5521 instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{
5522   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5523   ins_cost(150);
5524   size(4);
5525   format %{ "MOV$cmp  $dst,$src" %}
5526   ins_encode %{
5527     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5528   %}
5529   ins_pipe(ialu_reg);
5530 %}
5531 
5532 instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{
5533   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5534   ins_cost(140);
5535   size(4);
5536   format %{ "MOV$cmp  $dst,$src" %}
5537   ins_encode %{
5538     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5539   %}
5540   ins_pipe(ialu_imm);
5541 %}
5542 
5543 instruct cmovIIu_imm16(cmpOpU cmp, flagsRegU icc, iRegI dst, immI16 src) %{
5544   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
5545   ins_cost(140);
5546   size(4);
5547   format %{ "MOVW$cmp  $dst,$src" %}
5548   ins_encode %{
5549     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5550   %}
5551   ins_pipe(ialu_imm);
5552 %}
5553 
5554 // Conditional move
5555 instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{
5556   match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
5557   ins_cost(150);
5558   size(4);
5559   format %{ "MOV$cmp  $dst,$src" %}
5560   ins_encode %{
5561     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5562   %}
5563   ins_pipe(ialu_reg);
5564 %}
5565 
5566 instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{
5567   match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
5568   ins_cost(140);
5569   size(4);
5570   format %{ "MOV$cmp  $dst,$src" %}
5571   ins_encode %{
5572     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5573   %}
5574   ins_pipe(ialu_imm);
5575 %}
5576 
5577 // This instruction also works with CmpN so we don't need cmovPN_reg.
5578 instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{
5579   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
5580   ins_cost(150);
5581 
5582   size(4);
5583   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
5584   ins_encode %{
5585     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5586   %}
5587   ins_pipe(ialu_reg);
5588 %}
5589 
5590 instruct cmovPI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src) %{
5591   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
5592   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5593             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5594             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5595             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5596   ins_cost(150);
5597 
5598   size(4);
5599   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
5600   ins_encode %{
5601     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5602   %}
5603   ins_pipe(ialu_reg);
5604 %}
5605 
5606 instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{
5607   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
5608   ins_cost(150);
5609 
5610   size(4);
5611   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
5612   ins_encode %{
5613     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5614   %}
5615   ins_pipe(ialu_reg);
5616 %}
5617 
5618 instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{
5619   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
5620   ins_cost(140);
5621 
5622   size(4);
5623   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
5624   ins_encode %{
5625     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5626   %}
5627   ins_pipe(ialu_imm);
5628 %}
5629 
5630 instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{
5631   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
5632   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5633             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5634             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5635             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5636   ins_cost(140);
5637 
5638   size(4);
5639   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
5640   ins_encode %{
5641     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5642   %}
5643   ins_pipe(ialu_imm);
5644 %}
5645 
5646 instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{
5647   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
5648   ins_cost(140);
5649 
5650   size(4);
5651   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
5652   ins_encode %{
5653     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5654   %}
5655   ins_pipe(ialu_imm);
5656 %}
5657 
5658 // Conditional move
5659 instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{
5660   match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src)));
5661   ins_cost(150);
5662   size(4);
5663   format %{ "FCPYS$cmp $dst,$src" %}
5664   ins_encode %{
5665     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5666   %}
5667   ins_pipe(int_conditional_float_move);
5668 %}
5669 
5670 instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{
5671   match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
5672   ins_cost(150);
5673 
5674   size(4);
5675   format %{ "FCPYS$cmp $dst,$src" %}
5676   ins_encode %{
5677     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5678   %}
5679   ins_pipe(int_conditional_float_move);
5680 %}
5681 
5682 instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{
5683   match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
5684   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5685             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5686             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5687             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5688   ins_cost(150);
5689 
5690   size(4);
5691   format %{ "FCPYS$cmp $dst,$src" %}
5692   ins_encode %{
5693     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5694   %}
5695   ins_pipe(int_conditional_float_move);
5696 %}
5697 
5698 instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{
5699   match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
5700   ins_cost(150);
5701 
5702   size(4);
5703   format %{ "FCPYS$cmp $dst,$src" %}
5704   ins_encode %{
5705     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5706   %}
5707   ins_pipe(int_conditional_float_move);
5708 %}
5709 
5710 // Conditional move
5711 instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{
5712   match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src)));
5713   ins_cost(150);
5714   size(4);
5715   format %{ "FCPYD$cmp $dst,$src" %}
5716   ins_encode %{
5717     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5718   %}
5719   ins_pipe(int_conditional_double_move);
5720 %}
5721 
5722 instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{
5723   match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
5724   ins_cost(150);
5725 
5726   size(4);
5727   format %{ "FCPYD$cmp $dst,$src" %}
5728   ins_encode %{
5729     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5730   %}
5731   ins_pipe(int_conditional_double_move);
5732 %}
5733 
5734 instruct cmovDI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src) %{
5735   match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
5736   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5737   ins_cost(150);
5738 
5739   size(4);
5740   format %{ "FCPYD$cmp $dst,$src" %}
5741   ins_encode %{
5742     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5743   %}
5744   ins_pipe(int_conditional_double_move);
5745 %}
5746 
5747 instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{
5748   match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
5749   ins_cost(150);
5750 
5751   size(4);
5752   format %{ "FCPYD$cmp $dst,$src" %}
5753   ins_encode %{
5754     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
5755   %}
5756   ins_pipe(int_conditional_double_move);
5757 %}
5758 
5759 // Conditional move
5760 instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{
5761   match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
5762   ins_cost(150);
5763 
5764   size(8);
5765   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
5766             "MOV$cmp  $dst.hi,$src.hi" %}
5767   ins_encode %{
5768     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5769     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
5770   %}
5771   ins_pipe(ialu_reg);
5772 %}
5773 
5774 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
5775 // (hi($con$$constant), lo($con$$constant)) becomes
5776 instruct cmovLP_immRot(cmpOpP cmp, flagsRegP pcc, iRegL dst, immLlowRot src) %{
5777   match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
5778   ins_cost(140);
5779 
5780   size(8);
5781   format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
5782             "MOV$cmp  $dst.hi,0" %}
5783   ins_encode %{
5784     __ mov($dst$$Register, (long)$src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5785     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
5786   %}
5787   ins_pipe(ialu_imm);
5788 %}
5789 
5790 instruct cmovLP_imm16(cmpOpP cmp, flagsRegP pcc, iRegL dst, immL16 src) %{
5791   match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
5792   ins_cost(140);
5793 
5794   size(8);
5795   format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
5796             "MOV$cmp  $dst.hi,0" %}
5797   ins_encode %{
5798     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5799     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
5800   %}
5801   ins_pipe(ialu_imm);
5802 %}
5803 
5804 instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{
5805   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5806   ins_cost(150);
5807 
5808   size(8);
5809   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
5810             "MOV$cmp  $dst.hi,$src.hi" %}
5811   ins_encode %{
5812     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5813     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
5814   %}
5815   ins_pipe(ialu_reg);
5816 %}
5817 
5818 instruct cmovLI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src) %{
5819   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5820   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5821             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5822             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5823             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5824   ins_cost(150);
5825 
5826   size(8);
5827   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
5828             "MOV$cmp  $dst.hi,$src.hi" %}
5829   ins_encode %{
5830     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5831     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
5832   %}
5833   ins_pipe(ialu_reg);
5834 %}
5835 
5836 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
5837 // (hi($con$$constant), lo($con$$constant)) becomes
5838 instruct cmovLI_immRot(cmpOp cmp, flagsReg icc, iRegL dst, immLlowRot src) %{
5839   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5840   ins_cost(140);
5841 
5842   size(8);
5843   format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
5844             "MOV$cmp  $dst.hi,0" %}
5845   ins_encode %{
5846     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5847     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
5848   %}
5849   ins_pipe(ialu_imm);
5850 %}
5851 
5852 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
5853 // (hi($con$$constant), lo($con$$constant)) becomes
5854 instruct cmovLI_immRot_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immLlowRot src) %{
5855   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5856   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5857             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5858             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5859             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5860   ins_cost(140);
5861 
5862   size(8);
5863   format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
5864             "MOV$cmp  $dst.hi,0" %}
5865   ins_encode %{
5866     __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5867     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
5868   %}
5869   ins_pipe(ialu_imm);
5870 %}
5871 
5872 instruct cmovLI_imm16(cmpOp cmp, flagsReg icc, iRegL dst, immL16 src) %{
5873   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5874   ins_cost(140);
5875 
5876   size(8);
5877   format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
5878             "MOV$cmp  $dst.hi,0" %}
5879   ins_encode %{
5880     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5881     __ movw_i($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
5882   %}
5883   ins_pipe(ialu_imm);
5884 %}
5885 
5886 instruct cmovLI_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immL16 src) %{
5887   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5888   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
5889             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
5890             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
5891             _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
5892   ins_cost(140);
5893 
5894   size(8);
5895   format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
5896             "MOV$cmp  $dst.hi,0" %}
5897   ins_encode %{
5898     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
5899     __ movw_i($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
5900   %}
5901   ins_pipe(ialu_imm);
5902 %}
5903 
5904 instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{
5905   match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
5906   ins_cost(150);
5907 
5908   size(8);
5909   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
5910             "MOV$cmp  $dst.hi,$src.hi" %}
5911   ins_encode %{
5912     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
5913     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
5914   %}
5915   ins_pipe(ialu_reg);
5916 %}
5917 
5918 
5919 //----------OS and Locking Instructions----------------------------------------
5920 
5921 // This name is KNOWN by the ADLC and cannot be changed.
5922 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
5923 // for this guy.
5924 instruct tlsLoadP(RthreadRegP dst) %{
5925   match(Set dst (ThreadLocal));
5926 
5927   size(0);
5928   ins_cost(0);
5929   format %{ "! TLS is in $dst" %}
5930   ins_encode( /*empty encoding*/ );
5931   ins_pipe(ialu_none);
5932 %}
5933 
5934 instruct checkCastPP( iRegP dst ) %{
5935   match(Set dst (CheckCastPP dst));
5936 
5937   size(0);
5938   format %{ "! checkcastPP of $dst" %}
5939   ins_encode( /*empty encoding*/ );
5940   ins_pipe(empty);
5941 %}
5942 
5943 
5944 instruct castPP( iRegP dst ) %{
5945   match(Set dst (CastPP dst));
5946   format %{ "! castPP of $dst" %}
5947   ins_encode( /*empty encoding*/ );
5948   ins_pipe(empty);
5949 %}
5950 
5951 instruct castII( iRegI dst ) %{
5952   match(Set dst (CastII dst));
5953   format %{ "! castII of $dst" %}
5954   ins_encode( /*empty encoding*/ );
5955   ins_cost(0);
5956   ins_pipe(empty);
5957 %}
5958 
5959 //----------Arithmetic Instructions--------------------------------------------
5960 // Addition Instructions
5961 // Register Addition
5962 instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
5963   match(Set dst (AddI src1 src2));
5964 
5965   size(4);
5966   format %{ "add_32 $dst,$src1,$src2\t! int" %}
5967   ins_encode %{
5968     __ add($dst$$Register, $src1$$Register, $src2$$Register);
5969   %}
5970   ins_pipe(ialu_reg_reg);
5971 %}
5972 
5973 instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
5974   match(Set dst (AddI (LShiftI src1 src2) src3));
5975 
5976   size(4);
5977   format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %}
5978   ins_encode %{
5979     __ add($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$Register));
5980   %}
5981   ins_pipe(ialu_reg_reg);
5982 %}
5983 
5984 instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
5985   match(Set dst (AddI (LShiftI src1 src2) src3));
5986 
5987   size(4);
5988   format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %}
5989   ins_encode %{
5990     __ add($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$constant));
5991   %}
5992   ins_pipe(ialu_reg_reg);
5993 %}
5994 
5995 instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
5996   match(Set dst (AddI (RShiftI src1 src2) src3));
5997 
5998   size(4);
5999   format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %}
6000   ins_encode %{
6001     __ add($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$Register));
6002   %}
6003   ins_pipe(ialu_reg_reg);
6004 %}
6005 
6006 instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
6007   match(Set dst (AddI (RShiftI src1 src2) src3));
6008 
6009   size(4);
6010   format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %}
6011   ins_encode %{
6012     __ add($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$constant));
6013   %}
6014   ins_pipe(ialu_reg_reg);
6015 %}
6016 
6017 instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6018   match(Set dst (AddI (URShiftI src1 src2) src3));
6019 
6020   size(4);
6021   format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %}
6022   ins_encode %{
6023     __ add($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$Register));
6024   %}
6025   ins_pipe(ialu_reg_reg);
6026 %}
6027 
6028 instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
6029   match(Set dst (AddI (URShiftI src1 src2) src3));
6030 
6031   size(4);
6032   format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %}
6033   ins_encode %{
6034     __ add($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$constant));
6035   %}
6036   ins_pipe(ialu_reg_reg);
6037 %}
6038 
6039 // Immediate Addition
6040 instruct addI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
6041   match(Set dst (AddI src1 src2));
6042 
6043   size(4);
6044   format %{ "add_32 $dst,$src1,$src2\t! int" %}
6045   ins_encode %{
6046     __ add($dst$$Register, $src1$$Register, $src2$$constant);
6047   %}
6048   ins_pipe(ialu_reg_imm);
6049 %}
6050 
6051 // Pointer Register Addition
6052 instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{
6053   match(Set dst (AddP src1 src2));
6054 
6055   size(4);
6056   format %{ "ADD    $dst,$src1,$src2\t! ptr" %}
6057   ins_encode %{
6058     __ add($dst$$Register, $src1$$Register, $src2$$Register);
6059   %}
6060   ins_pipe(ialu_reg_reg);
6061 %}
6062 
6063 // shifted iRegX operand
6064 operand shiftedX(iRegX src2, shimmX src3) %{
6065 //constraint(ALLOC_IN_RC(sp_ptr_reg));
6066   match(LShiftX src2 src3);
6067 
6068   op_cost(1);
6069   format %{ "$src2 << $src3" %}
6070   interface(MEMORY_INTER) %{
6071     base($src2);
6072     index(0xff);
6073     scale($src3);
6074     disp(0x0);
6075   %}
6076 %}
6077 
6078 instruct addshlP_reg_reg_imm(iRegP dst, iRegP src1, shiftedX src2) %{
6079   match(Set dst (AddP src1 src2));
6080 
6081   ins_cost(DEFAULT_COST * 3/2);
6082   size(4);
6083   format %{ "ADD    $dst,$src1,$src2\t! ptr" %}
6084   ins_encode %{
6085     Register base = reg_to_register_object($src2$$base);
6086     __ add($dst$$Register, $src1$$Register, base, lsl($src2$$scale));
6087   %}
6088   ins_pipe(ialu_reg_reg);
6089 %}
6090 
6091 // Pointer Immediate Addition
6092 instruct addP_reg_aimmX(iRegP dst, iRegP src1, aimmX src2) %{
6093   match(Set dst (AddP src1 src2));
6094 
6095   size(4);
6096   format %{ "ADD    $dst,$src1,$src2\t! ptr" %}
6097   ins_encode %{
6098     __ add($dst$$Register, $src1$$Register, $src2$$constant);
6099   %}
6100   ins_pipe(ialu_reg_imm);
6101 %}
6102 
6103 // Long Addition
6104 instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{
6105   match(Set dst (AddL src1 src2));
6106   effect(KILL ccr);
6107   ins_cost(DEFAULT_COST*2);
6108   size(8);
6109   format %{ "ADDS    $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
6110             "ADC     $dst.hi,$src1.hi,$src2.hi" %}
6111   ins_encode %{
6112     __ adds($dst$$Register, $src1$$Register, $src2$$Register);
6113     __ adc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
6114   %}
6115   ins_pipe(ialu_reg_reg);
6116 %}
6117 
6118 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
6119 // (hi($con$$constant), lo($con$$constant)) becomes
6120 instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{
6121   match(Set dst (AddL src1 con));
6122   effect(KILL ccr);
6123   size(8);
6124   format %{ "ADDS    $dst.lo,$src1.lo,$con\t! long\n\t"
6125             "ADC     $dst.hi,$src1.hi,0" %}
6126   ins_encode %{
6127     __ adds($dst$$Register, $src1$$Register, (long)$con$$constant);
6128     __ adc($dst$$Register->successor(), $src1$$Register->successor(), 0);
6129   %}
6130   ins_pipe(ialu_reg_imm);
6131 %}
6132 
6133 //----------Conditional_store--------------------------------------------------
6134 // Conditional-store of the updated heap-top.
6135 // Used during allocation of the shared heap.
6136 // Sets flags (EQ) on success.
6137 
6138 // TODO: optimize out barriers with AArch64 load-acquire/store-release
6139 // LoadP-locked.
6140 instruct loadPLocked(iRegP dst, memoryex mem) %{
6141   match(Set dst (LoadPLocked mem));
6142   size(4);
6143   format %{ "LDREX  $dst,$mem" %}
6144   ins_encode %{
6145     __ ldrex($dst$$Register,$mem$$Address);
6146   %}
6147   ins_pipe(iload_mem);
6148 %}
6149 
6150 instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, iRegI tmp, flagsRegP pcc ) %{
6151   predicate(_kids[1]->_kids[0]->_leaf->Opcode() == Op_LoadPLocked); // only works in conjunction with a LoadPLocked node
6152   match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval)));
6153   effect( TEMP tmp );
6154   size(8);
6155   format %{ "STREX  $tmp,$newval,$heap_top_ptr\n\t"
6156             "CMP    $tmp, 0" %}
6157   ins_encode %{
6158     __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address);
6159     __ cmp($tmp$$Register, 0);
6160   %}
6161   ins_pipe( long_memory_op );
6162 %}
6163 
6164 // Conditional-store of an intx value.
6165 instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{
6166   match(Set icc (StoreIConditional mem (Binary oldval newval)));
6167   effect( TEMP tmp );
6168   size(28);
6169   format %{ "loop: \n\t"
6170             "LDREX    $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t"
6171             "XORS     $tmp,$tmp, $oldval\n\t"
6172             "STREX.eq $tmp, $newval, $mem\n\t"
6173             "CMP.eq   $tmp, 1 \n\t"
6174             "B.eq     loop \n\t"
6175             "TEQ      $tmp, 0\n\t"
6176             "membar   LoadStore|LoadLoad" %}
6177   ins_encode %{
6178     Label loop;
6179     __ bind(loop);
6180     __ ldrex($tmp$$Register, $mem$$Address);
6181     __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register);
6182     __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
6183     __ cmp($tmp$$Register, 1, Assembler::EQ);
6184     __ b(loop, Assembler::EQ);
6185     __ teq($tmp$$Register, 0);
6186     // used by biased locking only. Requires a membar.
6187     __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad));
6188   %}
6189   ins_pipe( long_memory_op );
6190 %}
6191 
6192 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
6193 
6194 instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{
6195   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
6196   effect( KILL ccr, TEMP tmp);
6197   size(32);
6198   format %{ "loop: \n\t"
6199             "LDREXD   $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
6200             "CMP      $tmp.lo, $oldval.lo\n\t"
6201             "CMP.eq   $tmp.hi, $oldval.hi\n\t"
6202             "STREXD.eq $tmp, $newval, $mem\n\t"
6203             "MOV.ne   $tmp, 0 \n\t"
6204             "XORS.eq  $tmp,$tmp, 1 \n\t"
6205             "B.eq     loop \n\t"
6206             "MOV      $res, $tmp" %}
6207   ins_encode %{
6208     Label loop;
6209     __ bind(loop);
6210     __ ldrexd($tmp$$Register, $mem$$Address);
6211     __ cmp($tmp$$Register, $oldval$$Register);
6212     __ cmp($tmp$$Register->successor(), $oldval$$Register->successor(), Assembler::EQ);
6213     __ strexd($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
6214     __ mov($tmp$$Register, 0, Assembler::NE);
6215     __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ);
6216     __ b(loop, Assembler::EQ);
6217     __ mov($res$$Register, $tmp$$Register);
6218   %}
6219   ins_pipe( long_memory_op );
6220 %}
6221 
6222 
6223 instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
6224   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
6225   effect( KILL ccr, TEMP tmp);
6226   size(28);
6227   format %{ "loop: \n\t"
6228             "LDREX    $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
6229             "CMP      $tmp, $oldval\n\t"
6230             "STREX.eq $tmp, $newval, $mem\n\t"
6231             "MOV.ne   $tmp, 0 \n\t"
6232             "XORS.eq  $tmp,$tmp, 1 \n\t"
6233             "B.eq     loop \n\t"
6234             "MOV      $res, $tmp" %}
6235 
6236   ins_encode %{
6237     Label loop;
6238     __ bind(loop);
6239     __ ldrex($tmp$$Register,$mem$$Address);
6240     __ cmp($tmp$$Register, $oldval$$Register);
6241     __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
6242     __ mov($tmp$$Register, 0, Assembler::NE);
6243     __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ);
6244     __ b(loop, Assembler::EQ);
6245     __ mov($res$$Register, $tmp$$Register);
6246   %}
6247   ins_pipe( long_memory_op );
6248 %}
6249 
6250 instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
6251   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
6252   effect( KILL ccr, TEMP tmp);
6253   size(28);
6254   format %{ "loop: \n\t"
6255             "LDREX    $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
6256             "CMP      $tmp, $oldval\n\t"
6257             "STREX.eq $tmp, $newval, $mem\n\t"
6258             "MOV.ne   $tmp, 0 \n\t"
6259             "EORS.eq  $tmp,$tmp, 1 \n\t"
6260             "B.eq     loop \n\t"
6261             "MOV      $res, $tmp" %}
6262 
6263   ins_encode %{
6264     Label loop;
6265     __ bind(loop);
6266     __ ldrex($tmp$$Register,$mem$$Address);
6267     __ cmp($tmp$$Register, $oldval$$Register);
6268     __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
6269     __ mov($tmp$$Register, 0, Assembler::NE);
6270     __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ);
6271     __ b(loop, Assembler::EQ);
6272     __ mov($res$$Register, $tmp$$Register);
6273   %}
6274   ins_pipe( long_memory_op );
6275 %}
6276 
6277 instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
6278   predicate(n->as_LoadStore()->result_not_used());
6279   match(Set dummy (GetAndAddI mem add));
6280   effect(KILL ccr, TEMP tmp1, TEMP tmp2);
6281   size(20);
6282   format %{ "loop: \n\t"
6283             "LDREX    $tmp1, $mem\n\t"
6284             "ADD      $tmp1, $tmp1, $add\n\t"
6285             "STREX    $tmp2, $tmp1, $mem\n\t"
6286             "CMP      $tmp2, 0 \n\t"
6287             "B.ne     loop \n\t" %}
6288 
6289   ins_encode %{
6290     Label loop;
6291     __ bind(loop);
6292     __ ldrex($tmp1$$Register,$mem$$Address);
6293     __ add($tmp1$$Register, $tmp1$$Register, $add$$constant);
6294     __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6295     __ cmp($tmp2$$Register, 0);
6296     __ b(loop, Assembler::NE);
6297   %}
6298   ins_pipe( long_memory_op );
6299 %}
6300 
6301 instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
6302   predicate(n->as_LoadStore()->result_not_used());
6303   match(Set dummy (GetAndAddI mem add));
6304   effect(KILL ccr, TEMP tmp1, TEMP tmp2);
6305   size(20);
6306   format %{ "loop: \n\t"
6307             "LDREX    $tmp1, $mem\n\t"
6308             "ADD      $tmp1, $tmp1, $add\n\t"
6309             "STREX    $tmp2, $tmp1, $mem\n\t"
6310             "CMP      $tmp2, 0 \n\t"
6311             "B.ne     loop \n\t" %}
6312 
6313   ins_encode %{
6314     Label loop;
6315     __ bind(loop);
6316     __ ldrex($tmp1$$Register,$mem$$Address);
6317     __ add($tmp1$$Register, $tmp1$$Register, $add$$Register);
6318     __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6319     __ cmp($tmp2$$Register, 0);
6320     __ b(loop, Assembler::NE);
6321   %}
6322   ins_pipe( long_memory_op );
6323 %}
6324 
6325 instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
6326   match(Set res (GetAndAddI mem add));
6327   effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
6328   size(20);
6329   format %{ "loop: \n\t"
6330             "LDREX    $res, $mem\n\t"
6331             "ADD      $tmp1, $res, $add\n\t"
6332             "STREX    $tmp2, $tmp1, $mem\n\t"
6333             "CMP      $tmp2, 0 \n\t"
6334             "B.ne     loop \n\t" %}
6335 
6336   ins_encode %{
6337     Label loop;
6338     __ bind(loop);
6339     __ ldrex($res$$Register,$mem$$Address);
6340     __ add($tmp1$$Register, $res$$Register, $add$$constant);
6341     __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6342     __ cmp($tmp2$$Register, 0);
6343     __ b(loop, Assembler::NE);
6344   %}
6345   ins_pipe( long_memory_op );
6346 %}
6347 
6348 instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
6349   match(Set res (GetAndAddI mem add));
6350   effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
6351   size(20);
6352   format %{ "loop: \n\t"
6353             "LDREX    $res, $mem\n\t"
6354             "ADD      $tmp1, $res, $add\n\t"
6355             "STREX    $tmp2, $tmp1, $mem\n\t"
6356             "CMP      $tmp2, 0 \n\t"
6357             "B.ne     loop \n\t" %}
6358 
6359   ins_encode %{
6360     Label loop;
6361     __ bind(loop);
6362     __ ldrex($res$$Register,$mem$$Address);
6363     __ add($tmp1$$Register, $res$$Register, $add$$Register);
6364     __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6365     __ cmp($tmp2$$Register, 0);
6366     __ b(loop, Assembler::NE);
6367   %}
6368   ins_pipe( long_memory_op );
6369 %}
6370 
6371 instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
6372   predicate(n->as_LoadStore()->result_not_used());
6373   match(Set dummy (GetAndAddL mem add));
6374   effect( KILL ccr, TEMP tmp1, TEMP tmp2);
6375   size(24);
6376   format %{ "loop: \n\t"
6377             "LDREXD   $tmp1, $mem\n\t"
6378             "ADDS     $tmp1.lo, $tmp1.lo, $add.lo\n\t"
6379             "ADC      $tmp1.hi, $tmp1.hi, $add.hi\n\t"
6380             "STREXD   $tmp2, $tmp1, $mem\n\t"
6381             "CMP      $tmp2, 0 \n\t"
6382             "B.ne     loop \n\t" %}
6383 
6384   ins_encode %{
6385     Label loop;
6386     __ bind(loop);
6387     __ ldrexd($tmp1$$Register, $mem$$Address);
6388     __ adds($tmp1$$Register, $tmp1$$Register, $add$$Register);
6389     __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), $add$$Register->successor());
6390     __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6391     __ cmp($tmp2$$Register, 0);
6392     __ b(loop, Assembler::NE);
6393   %}
6394   ins_pipe( long_memory_op );
6395 %}
6396 
6397 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
6398 // (hi($con$$constant), lo($con$$constant)) becomes
6399 instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
6400   predicate(n->as_LoadStore()->result_not_used());
6401   match(Set dummy (GetAndAddL mem add));
6402   effect( KILL ccr, TEMP tmp1, TEMP tmp2);
6403   size(24);
6404   format %{ "loop: \n\t"
6405             "LDREXD   $tmp1, $mem\n\t"
6406             "ADDS     $tmp1.lo, $tmp1.lo, $add\n\t"
6407             "ADC      $tmp1.hi, $tmp1.hi, 0\n\t"
6408             "STREXD   $tmp2, $tmp1, $mem\n\t"
6409             "CMP      $tmp2, 0 \n\t"
6410             "B.ne     loop \n\t" %}
6411 
6412   ins_encode %{
6413     Label loop;
6414     __ bind(loop);
6415     __ ldrexd($tmp1$$Register, $mem$$Address);
6416     __ adds($tmp1$$Register, $tmp1$$Register, (long)$add$$constant);
6417     __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), 0);
6418     __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6419     __ cmp($tmp2$$Register, 0);
6420     __ b(loop, Assembler::NE);
6421   %}
6422   ins_pipe( long_memory_op );
6423 %}
6424 
6425 instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
6426   match(Set res (GetAndAddL mem add));
6427   effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
6428   size(24);
6429   format %{ "loop: \n\t"
6430             "LDREXD   $res, $mem\n\t"
6431             "ADDS     $tmp1.lo, $res.lo, $add.lo\n\t"
6432             "ADC      $tmp1.hi, $res.hi, $add.hi\n\t"
6433             "STREXD   $tmp2, $tmp1, $mem\n\t"
6434             "CMP      $tmp2, 0 \n\t"
6435             "B.ne     loop \n\t" %}
6436 
6437   ins_encode %{
6438     Label loop;
6439     __ bind(loop);
6440     __ ldrexd($res$$Register, $mem$$Address);
6441     __ adds($tmp1$$Register, $res$$Register, $add$$Register);
6442     __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor());
6443     __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6444     __ cmp($tmp2$$Register, 0);
6445     __ b(loop, Assembler::NE);
6446   %}
6447   ins_pipe( long_memory_op );
6448 %}
6449 
6450 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
6451 // (hi($con$$constant), lo($con$$constant)) becomes
6452 instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
6453   match(Set res (GetAndAddL mem add));
6454   effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
6455   size(24);
6456   format %{ "loop: \n\t"
6457             "LDREXD   $res, $mem\n\t"
6458             "ADDS     $tmp1.lo, $res.lo, $add\n\t"
6459             "ADC      $tmp1.hi, $res.hi, 0\n\t"
6460             "STREXD   $tmp2, $tmp1, $mem\n\t"
6461             "CMP      $tmp2, 0 \n\t"
6462             "B.ne     loop \n\t" %}
6463 
6464   ins_encode %{
6465     Label loop;
6466     __ bind(loop);
6467     __ ldrexd($res$$Register, $mem$$Address);
6468     __ adds($tmp1$$Register, $res$$Register, (long)$add$$constant);
6469     __ adc($tmp1$$Register->successor(), $res$$Register->successor(), 0);
6470     __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
6471     __ cmp($tmp2$$Register, 0);
6472     __ b(loop, Assembler::NE);
6473   %}
6474   ins_pipe( long_memory_op );
6475 %}
6476 
6477 instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{
6478   match(Set res (GetAndSetI mem newval));
6479   effect(KILL ccr, TEMP tmp, TEMP res);
6480   size(16);
6481   format %{ "loop: \n\t"
6482             "LDREX    $res, $mem\n\t"
6483             "STREX    $tmp, $newval, $mem\n\t"
6484             "CMP      $tmp, 0 \n\t"
6485             "B.ne     loop \n\t" %}
6486 
6487   ins_encode %{
6488     Label loop;
6489     __ bind(loop);
6490     __ ldrex($res$$Register,$mem$$Address);
6491     __ strex($tmp$$Register, $newval$$Register, $mem$$Address);
6492     __ cmp($tmp$$Register, 0);
6493     __ b(loop, Assembler::NE);
6494   %}
6495   ins_pipe( long_memory_op );
6496 %}
6497 
6498 instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{
6499   match(Set res (GetAndSetL mem newval));
6500   effect( KILL ccr, TEMP tmp, TEMP res);
6501   size(16);
6502   format %{ "loop: \n\t"
6503             "LDREXD   $res, $mem\n\t"
6504             "STREXD   $tmp, $newval, $mem\n\t"
6505             "CMP      $tmp, 0 \n\t"
6506             "B.ne     loop \n\t" %}
6507 
6508   ins_encode %{
6509     Label loop;
6510     __ bind(loop);
6511     __ ldrexd($res$$Register, $mem$$Address);
6512     __ strexd($tmp$$Register, $newval$$Register, $mem$$Address);
6513     __ cmp($tmp$$Register, 0);
6514     __ b(loop, Assembler::NE);
6515   %}
6516   ins_pipe( long_memory_op );
6517 %}
6518 
6519 instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{
6520   match(Set res (GetAndSetP mem newval));
6521   effect(KILL ccr, TEMP tmp, TEMP res);
6522   size(16);
6523   format %{ "loop: \n\t"
6524             "LDREX    $res, $mem\n\t"
6525             "STREX    $tmp, $newval, $mem\n\t"
6526             "CMP      $tmp, 0 \n\t"
6527             "B.ne     loop \n\t" %}
6528 
6529   ins_encode %{
6530     Label loop;
6531     __ bind(loop);
6532     __ ldrex($res$$Register,$mem$$Address);
6533     __ strex($tmp$$Register, $newval$$Register, $mem$$Address);
6534     __ cmp($tmp$$Register, 0);
6535     __ b(loop, Assembler::NE);
6536   %}
6537   ins_pipe( long_memory_op );
6538 %}
6539 
6540 //---------------------
6541 // Subtraction Instructions
6542 // Register Subtraction
6543 instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
6544   match(Set dst (SubI src1 src2));
6545 
6546   size(4);
6547   format %{ "sub_32 $dst,$src1,$src2\t! int" %}
6548   ins_encode %{
6549     __ sub($dst$$Register, $src1$$Register, $src2$$Register);
6550   %}
6551   ins_pipe(ialu_reg_reg);
6552 %}
6553 
6554 instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6555   match(Set dst (SubI src1 (LShiftI src2 src3)));
6556 
6557   size(4);
6558   format %{ "SUB    $dst,$src1,$src2<<$src3" %}
6559   ins_encode %{
6560     __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
6561   %}
6562   ins_pipe(ialu_reg_reg);
6563 %}
6564 
6565 instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
6566   match(Set dst (SubI src1 (LShiftI src2 src3)));
6567 
6568   size(4);
6569   format %{ "sub_32 $dst,$src1,$src2<<$src3\t! int" %}
6570   ins_encode %{
6571     __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
6572   %}
6573   ins_pipe(ialu_reg_reg);
6574 %}
6575 
6576 instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6577   match(Set dst (SubI src1 (RShiftI src2 src3)));
6578 
6579   size(4);
6580   format %{ "SUB    $dst,$src1,$src2>>$src3" %}
6581   ins_encode %{
6582     __ sub($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
6583   %}
6584   ins_pipe(ialu_reg_reg);
6585 %}
6586 
6587 instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
6588   match(Set dst (SubI src1 (RShiftI src2 src3)));
6589 
6590   size(4);
6591   format %{ "sub_32 $dst,$src1,$src2>>$src3\t! int" %}
6592   ins_encode %{
6593     __ sub($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
6594   %}
6595   ins_pipe(ialu_reg_reg);
6596 %}
6597 
6598 instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6599   match(Set dst (SubI src1 (URShiftI src2 src3)));
6600 
6601   size(4);
6602   format %{ "SUB    $dst,$src1,$src2>>>$src3" %}
6603   ins_encode %{
6604     __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
6605   %}
6606   ins_pipe(ialu_reg_reg);
6607 %}
6608 
6609 instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
6610   match(Set dst (SubI src1 (URShiftI src2 src3)));
6611 
6612   size(4);
6613   format %{ "sub_32 $dst,$src1,$src2>>>$src3\t! int" %}
6614   ins_encode %{
6615     __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
6616   %}
6617   ins_pipe(ialu_reg_reg);
6618 %}
6619 
6620 instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6621   match(Set dst (SubI (LShiftI src1 src2) src3));
6622 
6623   size(4);
6624   format %{ "RSB    $dst,$src3,$src1<<$src2" %}
6625   ins_encode %{
6626     __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$Register));
6627   %}
6628   ins_pipe(ialu_reg_reg);
6629 %}
6630 
6631 instruct rsbshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
6632   match(Set dst (SubI (LShiftI src1 src2) src3));
6633 
6634   size(4);
6635   format %{ "RSB    $dst,$src3,$src1<<$src2" %}
6636   ins_encode %{
6637     __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$constant));
6638   %}
6639   ins_pipe(ialu_reg_reg);
6640 %}
6641 
6642 instruct rsbsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6643   match(Set dst (SubI (RShiftI src1 src2) src3));
6644 
6645   size(4);
6646   format %{ "RSB    $dst,$src3,$src1>>$src2" %}
6647   ins_encode %{
6648     __ rsb($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$Register));
6649   %}
6650   ins_pipe(ialu_reg_reg);
6651 %}
6652 
6653 instruct rsbsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
6654   match(Set dst (SubI (RShiftI src1 src2) src3));
6655 
6656   size(4);
6657   format %{ "RSB    $dst,$src3,$src1>>$src2" %}
6658   ins_encode %{
6659     __ rsb($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$constant));
6660   %}
6661   ins_pipe(ialu_reg_reg);
6662 %}
6663 
6664 instruct rsbshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
6665   match(Set dst (SubI (URShiftI src1 src2) src3));
6666 
6667   size(4);
6668   format %{ "RSB    $dst,$src3,$src1>>>$src2" %}
6669   ins_encode %{
6670     __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$Register));
6671   %}
6672   ins_pipe(ialu_reg_reg);
6673 %}
6674 
6675 instruct rsbshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
6676   match(Set dst (SubI (URShiftI src1 src2) src3));
6677 
6678   size(4);
6679   format %{ "RSB    $dst,$src3,$src1>>>$src2" %}
6680   ins_encode %{
6681     __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$constant));
6682   %}
6683   ins_pipe(ialu_reg_reg);
6684 %}
6685 
6686 // Immediate Subtraction
6687 instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
6688   match(Set dst (SubI src1 src2));
6689 
6690   size(4);
6691   format %{ "sub_32 $dst,$src1,$src2\t! int" %}
6692   ins_encode %{
6693     __ sub($dst$$Register, $src1$$Register, $src2$$constant);
6694   %}
6695   ins_pipe(ialu_reg_imm);
6696 %}
6697 
6698 instruct subI_reg_immRotneg(iRegI dst, iRegI src1, aimmIneg src2) %{
6699   match(Set dst (AddI src1 src2));
6700 
6701   size(4);
6702   format %{ "sub_32 $dst,$src1,-($src2)\t! int" %}
6703   ins_encode %{
6704     __ sub($dst$$Register, $src1$$Register, -$src2$$constant);
6705   %}
6706   ins_pipe(ialu_reg_imm);
6707 %}
6708 
6709 instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{
6710   match(Set dst (SubI src1 src2));
6711 
6712   size(4);
6713   format %{ "RSB    $dst,$src2,src1" %}
6714   ins_encode %{
6715     __ rsb($dst$$Register, $src2$$Register, $src1$$constant);
6716   %}
6717   ins_pipe(ialu_zero_reg);
6718 %}
6719 
6720 // Register Subtraction
6721 instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{
6722   match(Set dst (SubL src1 src2));
6723   effect (KILL icc);
6724 
6725   size(8);
6726   format %{ "SUBS   $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
6727             "SBC    $dst.hi,$src1.hi,$src2.hi" %}
6728   ins_encode %{
6729     __ subs($dst$$Register, $src1$$Register, $src2$$Register);
6730     __ sbc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
6731   %}
6732   ins_pipe(ialu_reg_reg);
6733 %}
6734 
6735 // Immediate Subtraction
6736 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
6737 // (hi($con$$constant), lo($con$$constant)) becomes
6738 instruct subL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg icc) %{
6739   match(Set dst (SubL src1 con));
6740   effect (KILL icc);
6741 
6742   size(8);
6743   format %{ "SUB    $dst.lo,$src1.lo,$con\t! long\n\t"
6744             "SBC    $dst.hi,$src1.hi,0" %}
6745   ins_encode %{
6746     __ subs($dst$$Register, $src1$$Register, (long)$con$$constant);
6747     __ sbc($dst$$Register->successor(), $src1$$Register->successor(), 0);
6748   %}
6749   ins_pipe(ialu_reg_imm);
6750 %}
6751 
6752 // Long negation
6753 instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2, flagsReg icc) %{
6754   match(Set dst (SubL zero src2));
6755   effect (KILL icc);
6756 
6757   size(8);
6758   format %{ "RSBS   $dst.lo,$src2.lo,0\t! long\n\t"
6759             "RSC    $dst.hi,$src2.hi,0" %}
6760   ins_encode %{
6761     __ rsbs($dst$$Register, $src2$$Register, 0);
6762     __ rsc($dst$$Register->successor(), $src2$$Register->successor(), 0);
6763   %}
6764   ins_pipe(ialu_zero_reg);
6765 %}
6766 
6767 // Multiplication Instructions
6768 // Integer Multiplication
6769 // Register Multiplication
6770 instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
6771   match(Set dst (MulI src1 src2));
6772 
6773   ins_cost(DEFAULT_COST);
6774   size(4);
6775   format %{ "mul_32 $dst,$src1,$src2" %}
6776   ins_encode %{
6777     __ mul($dst$$Register, $src1$$Register, $src2$$Register);
6778   %}
6779   ins_pipe(imul_reg_reg);
6780 %}
6781 
6782 instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{
6783   effect(DEF dst, USE src1, USE src2);
6784   ins_cost(DEFAULT_COST);
6785   size(4);
6786   format %{ "MUL  $dst.hi,$src1.lo,$src2.hi\t! long" %}
6787   ins_encode %{
6788     __ mul($dst$$Register->successor(), $src1$$Register, $src2$$Register->successor());
6789   %}
6790   ins_pipe(imul_reg_reg);
6791 %}
6792 
6793 instruct mulL_hi1_lo2(iRegL dst, iRegL src1, iRegL src2) %{
6794   effect(USE_DEF dst, USE src1, USE src2);
6795   ins_cost(DEFAULT_COST*3/2);
6796   size(8);
6797   format %{ "MLA  $dst.hi,$src1.hi,$src2.lo,$dst.hi\t! long\n\t"
6798             "MOV  $dst.lo, 0"%}
6799   ins_encode %{
6800     __ mla($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register, $dst$$Register->successor());
6801     __ mov($dst$$Register, 0);
6802   %}
6803   ins_pipe(imul_reg_reg);
6804 %}
6805 
6806 instruct mulL_lo1_lo2(iRegL dst, iRegL src1, iRegL src2) %{
6807   effect(USE_DEF dst, USE src1, USE src2);
6808   ins_cost(DEFAULT_COST*3/2);
6809   size(4);
6810   format %{ "UMLAL  $dst.lo,$dst.hi,$src1,$src2\t! long" %}
6811   ins_encode %{
6812     __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
6813   %}
6814   ins_pipe(imul_reg_reg);
6815 %}
6816 
6817 instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
6818   match(Set dst (MulL src1 src2));
6819   ins_cost(DEFAULT_COST*8/2);
6820 
6821   expand %{
6822     mulL_lo1_hi2(dst, src1, src2);
6823     mulL_hi1_lo2(dst, src1, src2);
6824     mulL_lo1_lo2(dst, src1, src2);
6825   %}
6826 %}
6827 
6828 instruct mla_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI srcA) %{
6829   match(Set dst (AddI (MulI src1 src2) srcA));
6830 
6831   ins_cost(DEFAULT_COST*3/2);
6832   size(4);
6833   format %{ "MLA $dst,$src1,$src2,$srcA" %}
6834   ins_encode %{
6835     __ mla($dst$$Register, $src1$$Register, $src2$$Register, $srcA$$Register);
6836   %}
6837   ins_pipe(ialu_reg_reg);
6838 %}
6839 
6840 instruct mls_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI srcA) %{
6841   match(Set dst (SubI srcA (MulI src1 src2)));
6842 
6843   ins_cost(DEFAULT_COST*3/2);
6844   size(4);
6845   format %{ "MLS $dst,$src1,$src2,$srcA" %}
6846   ins_encode %{
6847     __ mls($dst$$Register, $src1$$Register, $src2$$Register, $srcA$$Register);
6848   %}
6849   ins_pipe(ialu_reg_reg);
6850 %}
6851 
6852 instruct smlal_reg_reg_reg(iRegL dst, iRegI src1, iRegI src2) %{
6853   match(Set dst (AddL (MulL (ConvI2L src1) (ConvI2L src2)) dst));
6854 
6855   ins_cost(DEFAULT_COST*3/2);
6856   size(4);
6857   format %{ "SMLAL $dst.lo,$dst.hi,$src1,$src2" %}
6858   ins_encode %{
6859     __ smlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
6860   %}
6861   ins_pipe(ialu_reg_reg);
6862 %}
6863 
6864 instruct smull_reg_reg_reg(iRegL dst, iRegI src1, iRegI src2) %{
6865   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
6866 
6867   ins_cost(DEFAULT_COST*3/2);
6868   size(4);
6869   format %{ "SMULL $dst.lo,$dst.hi,$src1,$src2" %}
6870   ins_encode %{
6871     __ smull($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
6872   %}
6873   ins_pipe(ialu_reg_reg);
6874 %}
6875 
6876 // Integer Division
6877 // Register Division
6878 instruct divI_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2) %{
6879   match(Set dst (DivI src1 src2));
6880   predicate(VM_Version::features() & FT_HW_DIVIDE);
6881   ins_cost(2*DEFAULT_COST);
6882 
6883   format %{ "SDIV   $dst,$src1,$src2"%}
6884   ins_encode %{
6885     __ sdiv($dst$$Register, $src1$$Register, $src2$$Register);
6886   %}
6887   ins_pipe(sdiv_reg_reg_IDIV);
6888 %}
6889 
6890 instruct divI_reg_reg_SW(R0RegI dst, R1RegI src1, R2RegI src2, R9RegI temp1, R12RegI temp2, LRRegP lr, flagsReg ccr) %{
6891   match(Set dst (DivI src1 src2));
6892   predicate(!(VM_Version::features() & FT_HW_DIVIDE));
6893   effect( KILL ccr, TEMP temp1, TEMP temp2, USE_KILL src1,USE_KILL src2, KILL lr);
6894   ins_cost((2+71)*DEFAULT_COST);
6895 
6896   format %{ "DIV   $dst,$src1,$src2 ! call to StubRoutines::aarch32::idiv_entry()" %}
6897   ins_encode %{
6898     __ call(StubRoutines::aarch32::idiv_entry(), relocInfo::runtime_call_type);
6899   %}
6900   ins_pipe(sdiv_reg_reg_SW);
6901 %}
6902 
6903 // Register Long Division
6904 instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
6905   match(Set dst (DivL src1 src2));
6906   effect(CALL);
6907   ins_cost(DEFAULT_COST*71);
6908   format %{ "DIVL  $src1,$src2,$dst\t! long ! call to SharedRuntime::ldiv" %}
6909   ins_encode %{
6910     address target = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv);
6911     __ call(target, relocInfo::runtime_call_type);
6912   %}
6913   ins_pipe(divL_reg_reg);
6914 %}
6915 
6916 // Integer Remainder
6917 // Register Remainder
6918 instruct modI_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{
6919   match(Set dst (ModI src1 src2));
6920   predicate(VM_Version::features() & FT_HW_DIVIDE);
6921   effect( TEMP temp);
6922 
6923   format %{ "SDIV   $temp,$src1,$src2\n\t"
6924             "MLS    $dst, $temp, $src2, $src1"%}
6925   ins_encode %{
6926     __ sdiv($temp$$Register, $src1$$Register, $src2$$Register);
6927     __ mls($dst$$Register, $temp$$Register, $src2$$Register, $src1$$Register);
6928   %}
6929   ins_pipe(sdiv_reg_reg_IDIV);
6930 %}
6931 
6932 instruct modI_reg_reg_SW(R0RegI dst, R1RegI src1, R2RegI src2, R9RegI temp1, R12RegI temp2, LRRegP lr, flagsReg ccr ) %{
6933   match(Set dst (ModI src1 src2));
6934   predicate(!(VM_Version::features() & FT_HW_DIVIDE));
6935   effect( KILL ccr, TEMP temp1, TEMP temp2, KILL lr, USE_KILL src1, USE_KILL src2);
6936 
6937   format %{ "MODI   $dst,$src1,$src2\t ! call to StubRoutines::aarch32::irem_entry" %}
6938   ins_encode %{
6939     __ call(StubRoutines::aarch32::irem_entry(), relocInfo::runtime_call_type);
6940   %}
6941   ins_pipe(sdiv_reg_reg_SW);
6942 %}
6943 
6944 // Register Long Remainder
6945 instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
6946   match(Set dst (ModL src1 src2));
6947   effect(CALL);
6948   ins_cost(MEMORY_REF_COST); // FIXME
6949   format %{ "modL    $dst,$src1,$src2\t ! call to SharedRuntime::lrem" %}
6950   ins_encode %{
6951     address target = CAST_FROM_FN_PTR(address, SharedRuntime::lrem);
6952     __ call(target, relocInfo::runtime_call_type);
6953   %}
6954   ins_pipe(divL_reg_reg);
6955 %}
6956 
6957 // Integer Shift Instructions
6958 
6959 // Register Shift Left
6960 instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
6961   match(Set dst (LShiftI src1 src2));
6962 
6963   size(4);
6964   format %{ "LSL  $dst,$src1,$src2 \n\t" %}
6965   ins_encode %{
6966     __ mov($dst$$Register, $src1$$Register, lsl($src2$$Register));
6967   %}
6968   ins_pipe(ialu_reg_reg);
6969 %}
6970 
6971 // Register Shift Left Immediate
6972 instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
6973   match(Set dst (LShiftI src1 src2));
6974 
6975   size(4);
6976   format %{ "LSL    $dst,$src1,$src2\t! int" %}
6977   ins_encode %{
6978     __ lsl($dst$$Register, $src1$$Register, $src2$$constant);
6979   %}
6980   ins_pipe(ialu_reg_imm);
6981 %}
6982 
6983 instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
6984   effect(USE_DEF dst, USE src1, USE src2);
6985   size(4);
6986   format %{"OR  $dst.hi,$dst.hi,($src1.hi << $src2)"  %}
6987   ins_encode %{
6988     __ orr($dst$$Register->successor(), $dst$$Register->successor(), $src1$$Register->successor(), lsl($src2$$Register));
6989   %}
6990   ins_pipe(ialu_reg_reg);
6991 %}
6992 
6993 instruct shlL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
6994   effect(USE_DEF dst, USE src1, USE src2);
6995   size(4);
6996   format %{ "LSL  $dst.lo,$src1.lo,$src2 \n\t" %}
6997   ins_encode %{
6998     __ mov($dst$$Register, $src1$$Register, lsl($src2$$Register));
6999   %}
7000   ins_pipe(ialu_reg_reg);
7001 %}
7002 
7003 instruct shlL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
7004   effect(DEF dst, USE src1, USE src2, KILL ccr);
7005   size(16);
7006   format %{ "SUBS  $dst.hi,$src2,32 \n\t"
7007             "LSLpl $dst.hi,$src1.lo,$dst.hi \n\t"
7008             "RSBmi $dst.hi,$dst.hi,0 \n\t"
7009             "LSRmi $dst.hi,$src1.lo,$dst.hi" %}
7010 
7011   ins_encode %{
7012     // $src1$$Register and $dst$$Register->successor() can't be the same
7013     __ subs($dst$$Register->successor(), $src2$$Register, 32);
7014     __ mov($dst$$Register->successor(), $src1$$Register, lsl($dst$$Register->successor()), Assembler::PL);
7015     __ rsb($dst$$Register->successor(), $dst$$Register->successor(), 0, Assembler::MI);
7016     __ mov($dst$$Register->successor(), $src1$$Register, lsr($dst$$Register->successor()), Assembler::MI);
7017   %}
7018   ins_pipe(ialu_reg_reg);
7019 %}
7020 
7021 instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
7022   match(Set dst (LShiftL src1 src2));
7023 
7024   expand %{
7025     flagsReg ccr;
7026     shlL_reg_reg_overlap(dst, src1, src2, ccr);
7027     shlL_reg_reg_merge_hi(dst, src1, src2);
7028     shlL_reg_reg_merge_lo(dst, src1, src2);
7029   %}
7030 %}
7031 
7032 // Register Shift Left Immediate
7033 instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
7034   match(Set dst (LShiftL src1 src2));
7035 
7036   size(8);
7037   format %{ "LSL   $dst.hi,$src1.lo,$src2-32\t! or mov if $src2==32\n\t"
7038             "MOV   $dst.lo, 0" %}
7039   ins_encode %{
7040     if ($src2$$constant == 32) {
7041       __ mov($dst$$Register->successor(), $src1$$Register);
7042     } else {
7043       __ mov($dst$$Register->successor(), $src1$$Register, lsl($src2$$constant-32));
7044     }
7045     __ mov($dst$$Register, 0);
7046   %}
7047   ins_pipe(ialu_reg_imm);
7048 %}
7049 
7050 instruct shlL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
7051   match(Set dst (LShiftL src1 src2));
7052 
7053   size(12);
7054   format %{ "LSL   $dst.hi,$src1.lo,$src2\n\t"
7055             "OR    $dst.hi, $dst.hi, $src1.lo >> 32-$src2\n\t"
7056             "LSL   $dst.lo,$src1.lo,$src2" %}
7057   ins_encode %{
7058     // The order of the following 3 instructions matters: src1.lo and
7059     // dst.hi can't overlap but src.hi and dst.hi can.
7060     __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsl($src2$$constant));
7061     __ orr($dst$$Register->successor(), $dst$$Register->successor(), $src1$$Register, lsr(32-$src2$$constant));
7062     __ mov($dst$$Register, $src1$$Register, lsl($src2$$constant));
7063   %}
7064   ins_pipe(ialu_reg_imm);
7065 %}
7066 
7067 // Register Arithmetic Shift Right
7068 instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
7069   match(Set dst (RShiftI src1 src2));
7070   size(4);
7071   format %{ "ASR    $dst,$src1,$src2\t! int" %}
7072   ins_encode %{
7073     __ mov($dst$$Register, $src1$$Register, asr($src2$$Register));
7074   %}
7075   ins_pipe(ialu_reg_reg);
7076 %}
7077 
7078 // Register Arithmetic Shift Right Immediate
7079 instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
7080   match(Set dst (RShiftI src1 src2));
7081 
7082   size(4);
7083   format %{ "ASR    $dst,$src1,$src2" %}
7084   ins_encode %{
7085     __ mov($dst$$Register, $src1$$Register, asr($src2$$constant));
7086   %}
7087   ins_pipe(ialu_reg_imm);
7088 %}
7089 
7090 // Register Shift Right Arithmetic Long
7091 instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
7092   effect(USE_DEF dst, USE src1, USE src2);
7093   size(4);
7094   format %{ "OR  $dst.lo,$dst.lo,($src1.lo >> $src2)"  %}
7095   ins_encode %{
7096     __ orr($dst$$Register, $dst$$Register, $src1$$Register, lsr($src2$$Register));
7097   %}
7098   ins_pipe(ialu_reg_reg);
7099 %}
7100 
7101 instruct sarL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
7102   effect(USE_DEF dst, USE src1, USE src2);
7103   size(4);
7104   format %{ "ASR  $dst.hi,$src1.hi,$src2 \n\t" %}
7105   ins_encode %{
7106     __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr($src2$$Register));
7107   %}
7108   ins_pipe(ialu_reg_reg);
7109 %}
7110 
7111 instruct sarL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
7112   effect(DEF dst, USE src1, USE src2, KILL ccr);
7113   size(16);
7114   format %{ "SUBS  $dst.lo,$src2,32 \n\t"
7115             "ASRpl $dst.lo,$src1.hi,$dst.lo \n\t"
7116             "RSBmi $dst.lo,$dst.lo,0 \n\t"
7117             "LSLmi $dst.lo,$src1.hi,$dst.lo" %}
7118 
7119   ins_encode %{
7120     // $src1$$Register->successor() and $dst$$Register can't be the same
7121     __ subs($dst$$Register, $src2$$Register, 32);
7122     __ mov($dst$$Register, $src1$$Register->successor(), asr($dst$$Register), Assembler::PL);
7123     __ rsb($dst$$Register, $dst$$Register, 0, Assembler::MI);
7124     __ mov($dst$$Register, $src1$$Register->successor(), lsl($dst$$Register), Assembler::MI);
7125   %}
7126   ins_pipe(ialu_reg_reg);
7127 %}
7128 
7129 instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
7130   match(Set dst (RShiftL src1 src2));
7131 
7132   expand %{
7133     flagsReg ccr;
7134     sarL_reg_reg_overlap(dst, src1, src2, ccr);
7135     sarL_reg_reg_merge_lo(dst, src1, src2);
7136     sarL_reg_reg_merge_hi(dst, src1, src2);
7137   %}
7138 %}
7139 
7140 // Register Shift Left Immediate
7141 instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
7142   match(Set dst (RShiftL src1 src2));
7143 
7144   size(8);
7145   format %{ "ASR   $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t"
7146             "ASR   $dst.hi,$src1.hi, $src2" %}
7147   ins_encode %{
7148     if ($src2$$constant == 32) {
7149       __ mov($dst$$Register, $src1$$Register->successor());
7150     } else{
7151       __ mov($dst$$Register, $src1$$Register->successor(), asr($src2$$constant-32));
7152     }
7153     __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr(32));
7154   %}
7155 
7156   ins_pipe(ialu_reg_imm);
7157 %}
7158 
7159 instruct sarL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
7160   match(Set dst (RShiftL src1 src2));
7161   size(12);
7162   format %{ "LSR   $dst.lo,$src1.lo,$src2\n\t"
7163             "OR    $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t"
7164             "ASR   $dst.hi,$src1.hi,$src2" %}
7165   ins_encode %{
7166     // The order of the following 3 instructions matters: src1.lo and
7167     // dst.hi can't overlap but src.hi and dst.hi can.
7168     __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant));
7169     __ orr($dst$$Register, $dst$$Register, $src1$$Register->successor(), lsl(32-$src2$$constant));
7170     __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr($src2$$constant));
7171   %}
7172   ins_pipe(ialu_reg_imm);
7173 %}
7174 
7175 // Register Shift Right
7176 instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
7177   match(Set dst (URShiftI src1 src2));
7178   size(4);
7179   format %{ "LSR    $dst,$src1,$src2\t! int" %}
7180   ins_encode %{
7181     __ mov($dst$$Register, $src1$$Register, lsr($src2$$Register));
7182   %}
7183   ins_pipe(ialu_reg_reg);
7184 %}
7185 
7186 // Register Shift Right Immediate
7187 instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
7188   match(Set dst (URShiftI src1 src2));
7189 
7190   size(4);
7191   format %{ "LSR    $dst,$src1,$src2" %}
7192   ins_encode %{
7193     __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant));
7194   %}
7195   ins_pipe(ialu_reg_imm);
7196 %}
7197 
7198 // Register Shift Right
7199 instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
7200   effect(USE_DEF dst, USE src1, USE src2);
7201   size(4);
7202   format %{ "OR   $dst.lo,$dst,($src1.lo >>> $src2)"  %}
7203   ins_encode %{
7204     __ orr($dst$$Register, $dst$$Register, $src1$$Register, lsr($src2$$Register));
7205   %}
7206   ins_pipe(ialu_reg_reg);
7207 %}
7208 
7209 instruct shrL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
7210   effect(USE_DEF dst, USE src1, USE src2);
7211   size(4);
7212   format %{ "LSR  $dst.hi,$src1.hi,$src2 \n\t" %}
7213   ins_encode %{
7214     __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsr($src2$$Register));
7215   %}
7216   ins_pipe(ialu_reg_reg);
7217 %}
7218 
7219 instruct shrL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
7220   effect(DEF dst, USE src1, USE src2, KILL ccr);
7221   size(16);
7222   format %{ "SUBS  $dst,$src2,32 \n\t"
7223             "LSRpl $dst,$src1.hi,$dst \n\t"
7224             "RSBmi $dst,$dst,0 \n\t"
7225             "LSLmi $dst,$src1.hi,$dst" %}
7226 
7227   ins_encode %{
7228     // $src1$$Register->successor() and $dst$$Register can't be the same
7229     __ subs($dst$$Register, $src2$$Register, 32);
7230     __ mov($dst$$Register, $src1$$Register->successor(), lsr($dst$$Register), Assembler::PL);
7231     __ rsb($dst$$Register, $dst$$Register, 0, Assembler::MI);
7232     __ mov($dst$$Register, $src1$$Register->successor(), lsl($dst$$Register), Assembler::MI);
7233   %}
7234   ins_pipe(ialu_reg_reg);
7235 %}
7236 
7237 instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
7238   match(Set dst (URShiftL src1 src2));
7239 
7240   expand %{
7241     flagsReg ccr;
7242     shrL_reg_reg_overlap(dst, src1, src2, ccr);
7243     shrL_reg_reg_merge_lo(dst, src1, src2);
7244     shrL_reg_reg_merge_hi(dst, src1, src2);
7245   %}
7246 %}
7247 
7248 // Register Shift Right Immediate
7249 instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
7250   match(Set dst (URShiftL src1 src2));
7251 
7252   size(8);
7253   format %{ "LSR   $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t"
7254             "MOV   $dst.hi, 0" %}
7255   ins_encode %{
7256     if ($src2$$constant == 32) {
7257       __ mov($dst$$Register, $src1$$Register->successor());
7258     } else {
7259       __ mov($dst$$Register, $src1$$Register->successor(), lsr($src2$$constant-32));
7260     }
7261     __ mov($dst$$Register->successor(), 0);
7262   %}
7263 
7264   ins_pipe(ialu_reg_imm);
7265 %}
7266 
7267 instruct shrL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
7268   match(Set dst (URShiftL src1 src2));
7269 
7270   size(12);
7271   format %{ "LSR   $dst.lo,$src1.lo,$src2\n\t"
7272             "OR    $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t"
7273             "LSR   $dst.hi,$src1.hi,$src2" %}
7274   ins_encode %{
7275     // The order of the following 3 instructions matters: src1.lo and
7276     // dst.hi can't overlap but src.hi and dst.hi can.
7277     __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant));
7278     __ orr($dst$$Register, $dst$$Register, $src1$$Register->successor(), lsl(32-$src2$$constant));
7279     __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsr($src2$$constant));
7280   %}
7281   ins_pipe(ialu_reg_imm);
7282 %}
7283 
7284 
7285 instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{
7286   match(Set dst (URShiftI (CastP2X src1) src2));
7287   size(4);
7288   format %{ "LSR    $dst,$src1,$src2\t! Cast ptr $src1 to int and shift" %}
7289   ins_encode %{
7290     __ lsr($dst$$Register, $src1$$Register, $src2$$constant);
7291   %}
7292   ins_pipe(ialu_reg_imm);
7293 %}
7294 
7295 // Overcomplicated unsigned math
7296 instruct umull_lreg32_lreg32(iRegL dst, iRegL src1, iRegL src2) %{
7297   match(Set dst (MulL src1 src2));
7298   predicate(n->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(2)->find_long_con(-1))>>32)==0 &&
7299             n->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(2)->find_long_con(-1))>>32)==0);
7300 
7301   ins_cost(DEFAULT_COST*3/2);
7302   size(4);
7303   format %{ "UMULL $dst.lo,$dst.hi,$src1.lo,$src2.lo" %}
7304   ins_encode %{
7305     __ umull($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
7306   %}
7307   ins_pipe(imul_reg_reg);
7308 %}
7309 
7310 instruct umlal_reg32_reg32(iRegL dst, iRegL src1, iRegL src2) %{
7311   match(Set dst (AddL dst (MulL src1 src2)));
7312   predicate(
7313     n->in(2)->Opcode() == Op_MulL ?
7314     n->in(2)->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(1)->in(2)->find_long_con(-1))>>32)==0 &&
7315     n->in(2)->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(2)->in(2)->find_long_con(-1))>>32)==0 :
7316     n->in(1)->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(1)->in(2)->find_long_con(-1))>>32)==0 &&
7317     n->in(1)->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(2)->in(2)->find_long_con(-1))>>32)==0
7318     );
7319 
7320   ins_cost(DEFAULT_COST*3/2);
7321   size(4);
7322   format %{ "UMLAL $dst.lo,$dst.hi,$src1.lo,$src2.lo" %}
7323   ins_encode %{
7324     __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
7325   %}
7326   ins_pipe(ialu_reg_reg);
7327 %}
7328 
7329 //----------Floating Point Arithmetic Instructions-----------------------------
7330 
7331 //  Add float single precision
7332 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
7333   match(Set dst (AddF src1 src2));
7334 
7335   size(4);
7336   format %{ "FADDS  $dst,$src1,$src2" %}
7337   ins_encode %{
7338     __ vadd_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7339   %}
7340 
7341   ins_pipe(faddF_reg_reg);
7342 %}
7343 
7344 //  Add float double precision
7345 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
7346   match(Set dst (AddD src1 src2));
7347 
7348   size(4);
7349   format %{ "FADDD  $dst,$src1,$src2" %}
7350   ins_encode %{
7351     __ vadd_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7352   %}
7353 
7354   ins_pipe(faddD_reg_reg);
7355 %}
7356 
7357 //  Sub float single precision
7358 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
7359   match(Set dst (SubF src1 src2));
7360 
7361   size(4);
7362   format %{ "FSUBS  $dst,$src1,$src2" %}
7363   ins_encode %{
7364     __ vsub_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7365   %}
7366   ins_pipe(faddF_reg_reg);
7367 %}
7368 
7369 //  Sub float double precision
7370 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
7371   match(Set dst (SubD src1 src2));
7372 
7373   size(4);
7374   format %{ "FSUBD  $dst,$src1,$src2" %}
7375   ins_encode %{
7376     __ vsub_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7377   %}
7378   ins_pipe(faddD_reg_reg);
7379 %}
7380 
7381 //  Mul float single precision
7382 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
7383   match(Set dst (MulF src1 src2));
7384 
7385   size(4);
7386   format %{ "FMULS  $dst,$src1,$src2" %}
7387   ins_encode %{
7388     __ vmul_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7389   %}
7390 
7391   ins_pipe(fmulF_reg_reg);
7392 %}
7393 
7394 //  Mul float double precision
7395 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
7396   match(Set dst (MulD src1 src2));
7397 
7398   size(4);
7399   format %{ "FMULD  $dst,$src1,$src2" %}
7400   ins_encode %{
7401     __ vmul_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7402   %}
7403 
7404   ins_pipe(fmulD_reg_reg);
7405 %}
7406 
7407 //  Div float single precision
7408 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
7409   match(Set dst (DivF src1 src2));
7410 
7411   size(4);
7412   format %{ "FDIVS  $dst,$src1,$src2" %}
7413   ins_encode %{
7414     __ vdiv_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7415   %}
7416 
7417   ins_pipe(fdivF_reg_reg);
7418 %}
7419 
7420 //  Div float double precision
7421 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
7422   match(Set dst (DivD src1 src2));
7423 
7424   size(4);
7425   format %{ "FDIVD  $dst,$src1,$src2" %}
7426   ins_encode %{
7427     __ vdiv_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
7428   %}
7429 
7430   ins_pipe(fdivD_reg_reg);
7431 %}
7432 
7433 //  Absolute float double precision
7434 instruct absD_reg(regD dst, regD src) %{
7435   match(Set dst (AbsD src));
7436 
7437   size(4);
7438   format %{ "FABSd  $dst,$src" %}
7439   ins_encode %{
7440     __ vabs_f64($dst$$FloatRegister, $src$$FloatRegister);
7441   %}
7442   ins_pipe(faddD_reg);
7443 %}
7444 
7445 //  Absolute float single precision
7446 instruct absF_reg(regF dst, regF src) %{
7447   match(Set dst (AbsF src));
7448   format %{ "FABSs  $dst,$src" %}
7449   ins_encode %{
7450     __ vabs_f32($dst$$FloatRegister, $src$$FloatRegister);
7451   %}
7452   ins_pipe(faddF_reg);
7453 %}
7454 
7455 instruct negF_reg(regF dst, regF src) %{
7456   match(Set dst (NegF src));
7457 
7458   size(4);
7459   format %{ "FNEGs  $dst,$src" %}
7460   ins_encode %{
7461     __ vneg_f32($dst$$FloatRegister, $src$$FloatRegister);
7462   %}
7463   ins_pipe(faddF_reg);
7464 %}
7465 
7466 instruct negD_reg(regD dst, regD src) %{
7467   match(Set dst (NegD src));
7468 
7469   format %{ "FNEGd  $dst,$src" %}
7470   ins_encode %{
7471     __ vneg_f64($dst$$FloatRegister, $src$$FloatRegister);
7472   %}
7473   ins_pipe(faddD_reg);
7474 %}
7475 
7476 //  Sqrt float double precision
7477 instruct sqrtF_reg_reg(regF dst, regF src) %{
7478   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
7479 
7480   size(4);
7481   format %{ "FSQRTS $dst,$src" %}
7482   ins_encode %{
7483     __ vsqrt_f32($dst$$FloatRegister, $src$$FloatRegister);
7484   %}
7485   ins_pipe(fdivF_reg_reg);
7486 %}
7487 
7488 //  Sqrt float double precision
7489 instruct sqrtD_reg_reg(regD dst, regD src) %{
7490   match(Set dst (SqrtD src));
7491 
7492   size(4);
7493   format %{ "FSQRTD $dst,$src" %}
7494   ins_encode %{
7495     __ vsqrt_f64($dst$$FloatRegister, $src$$FloatRegister);
7496   %}
7497   ins_pipe(fdivD_reg_reg);
7498 %}
7499 
7500 //----------Logical Instructions-----------------------------------------------
7501 // And Instructions
7502 // Register And
7503 instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
7504   match(Set dst (AndI src1 src2));
7505 
7506   size(4);
7507   format %{ "and_32 $dst,$src1,$src2" %}
7508   ins_encode %{
7509     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
7510   %}
7511   ins_pipe(ialu_reg_reg);
7512 %}
7513 
7514 instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7515   match(Set dst (AndI src1 (LShiftI src2 src3)));
7516 
7517   size(4);
7518   format %{ "AND    $dst,$src1,$src2<<$src3" %}
7519   ins_encode %{
7520     __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
7521   %}
7522   ins_pipe(ialu_reg_reg);
7523 %}
7524 
7525 instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7526   match(Set dst (AndI src1 (LShiftI src2 src3)));
7527 
7528   size(4);
7529   format %{ "and_32 $dst,$src1,$src2<<$src3" %}
7530   ins_encode %{
7531     __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
7532   %}
7533   ins_pipe(ialu_reg_reg);
7534 %}
7535 
7536 instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7537   match(Set dst (AndI src1 (RShiftI src2 src3)));
7538 
7539   size(4);
7540   format %{ "AND    $dst,$src1,$src2>>$src3" %}
7541   ins_encode %{
7542     __ andr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
7543   %}
7544   ins_pipe(ialu_reg_reg);
7545 %}
7546 
7547 instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7548   match(Set dst (AndI src1 (RShiftI src2 src3)));
7549 
7550   size(4);
7551   format %{ "and_32 $dst,$src1,$src2>>$src3" %}
7552   ins_encode %{
7553     __ andr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
7554   %}
7555   ins_pipe(ialu_reg_reg);
7556 %}
7557 
7558 instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7559   match(Set dst (AndI src1 (URShiftI src2 src3)));
7560 
7561   size(4);
7562   format %{ "AND    $dst,$src1,$src2>>>$src3" %}
7563   ins_encode %{
7564     __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
7565   %}
7566   ins_pipe(ialu_reg_reg);
7567 %}
7568 
7569 instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7570   match(Set dst (AndI src1 (URShiftI src2 src3)));
7571 
7572   size(4);
7573   format %{ "and_32 $dst,$src1,$src2>>>$src3" %}
7574   ins_encode %{
7575     __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
7576   %}
7577   ins_pipe(ialu_reg_reg);
7578 %}
7579 
7580 // Immediate And
7581 instruct andI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
7582   match(Set dst (AndI src1 src2));
7583 
7584   size(4);
7585   format %{ "and_32 $dst,$src1,$src2\t! int" %}
7586   ins_encode %{
7587     __ andr($dst$$Register, $src1$$Register, $src2$$constant);
7588   %}
7589   ins_pipe(ialu_reg_imm);
7590 %}
7591 
7592 instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{
7593   match(Set dst (AndI src1 src2));
7594 
7595   size(4);
7596   format %{ "bic    $dst,$src1,~$src2\t! int" %}
7597   ins_encode %{
7598     __ bic($dst$$Register, $src1$$Register, ~$src2$$constant);
7599   %}
7600   ins_pipe(ialu_reg_imm);
7601 %}
7602 
7603 // Register And Long
7604 instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
7605   match(Set dst (AndL src1 src2));
7606 
7607   ins_cost(DEFAULT_COST);
7608   size(8);
7609   format %{ "AND    $dst,$src1,$src2\t! long" %}
7610   ins_encode %{
7611     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
7612     __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
7613   %}
7614   ins_pipe(ialu_reg_reg);
7615 %}
7616 
7617 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
7618 // (hi($con$$constant), lo($con$$constant)) becomes
7619 instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
7620   match(Set dst (AndL src1 con));
7621   ins_cost(DEFAULT_COST);
7622   size(8);
7623   format %{ "AND    $dst,$src1,$con\t! long" %}
7624   ins_encode %{
7625     __ andr($dst$$Register, $src1$$Register, $con$$constant);
7626     __ andr($dst$$Register->successor(), $src1$$Register->successor(), 0u);
7627   %}
7628   ins_pipe(ialu_reg_imm);
7629 %}
7630 
7631 // Or Instructions
7632 // Register Or
7633 instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
7634   match(Set dst (OrI src1 src2));
7635 
7636   size(4);
7637   format %{ "orr_32 $dst,$src1,$src2\t! int" %}
7638   ins_encode %{
7639     __ orr($dst$$Register, $src1$$Register, $src2$$Register);
7640   %}
7641   ins_pipe(ialu_reg_reg);
7642 %}
7643 
7644 instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7645   match(Set dst (OrI src1 (LShiftI src2 src3)));
7646 
7647   size(4);
7648   format %{ "OR    $dst,$src1,$src2<<$src3" %}
7649   ins_encode %{
7650     __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
7651   %}
7652   ins_pipe(ialu_reg_reg);
7653 %}
7654 
7655 instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7656   match(Set dst (OrI src1 (LShiftI src2 src3)));
7657 
7658   size(4);
7659   format %{ "orr_32 $dst,$src1,$src2<<$src3" %}
7660   ins_encode %{
7661     __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
7662   %}
7663   ins_pipe(ialu_reg_reg);
7664 %}
7665 
7666 instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7667   match(Set dst (OrI src1 (RShiftI src2 src3)));
7668 
7669   size(4);
7670   format %{ "OR    $dst,$src1,$src2>>$src3" %}
7671   ins_encode %{
7672     __ orr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
7673   %}
7674   ins_pipe(ialu_reg_reg);
7675 %}
7676 
7677 instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7678   match(Set dst (OrI src1 (RShiftI src2 src3)));
7679 
7680   size(4);
7681   format %{ "orr_32 $dst,$src1,$src2>>$src3" %}
7682   ins_encode %{
7683     __ orr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
7684   %}
7685   ins_pipe(ialu_reg_reg);
7686 %}
7687 
7688 instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7689   match(Set dst (OrI src1 (URShiftI src2 src3)));
7690 
7691   size(4);
7692   format %{ "OR    $dst,$src1,$src2>>>$src3" %}
7693   ins_encode %{
7694     __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
7695   %}
7696   ins_pipe(ialu_reg_reg);
7697 %}
7698 
7699 instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7700   match(Set dst (OrI src1 (URShiftI src2 src3)));
7701 
7702   size(4);
7703   format %{ "orr_32 $dst,$src1,$src2>>>$src3" %}
7704   ins_encode %{
7705     __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
7706   %}
7707   ins_pipe(ialu_reg_reg);
7708 %}
7709 
7710 // Immediate Or
7711 instruct orI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
7712   match(Set dst (OrI src1 src2));
7713 
7714   size(4);
7715   format %{ "orr_32  $dst,$src1,$src2" %}
7716   ins_encode %{
7717     __ orr($dst$$Register, $src1$$Register, $src2$$constant);
7718   %}
7719   ins_pipe(ialu_reg_imm);
7720 %}
7721 // TODO: orn_32 with limmIn
7722 
7723 // Register Or Long
7724 instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
7725   match(Set dst (OrL src1 src2));
7726 
7727   ins_cost(DEFAULT_COST);
7728   size(8);
7729   format %{ "OR     $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
7730             "OR     $dst.hi,$src1.hi,$src2.hi" %}
7731   ins_encode %{
7732     __ orr($dst$$Register, $src1$$Register, $src2$$Register);
7733     __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
7734   %}
7735   ins_pipe(ialu_reg_reg);
7736 %}
7737 
7738 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
7739 // (hi($con$$constant), lo($con$$constant)) becomes
7740 instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
7741   match(Set dst (OrL src1 con));
7742   ins_cost(DEFAULT_COST);
7743   size(8);
7744   format %{ "OR     $dst.lo,$src1.lo,$con\t! long\n\t"
7745             "OR     $dst.hi,$src1.hi,$con" %}
7746   ins_encode %{
7747     __ orr($dst$$Register, $src1$$Register, $con$$constant);
7748     __ orr($dst$$Register->successor(), $src1$$Register->successor(), 0u);
7749   %}
7750   ins_pipe(ialu_reg_imm);
7751 %}
7752 
7753 #ifdef TODO
7754 // Use SPRegP to match Rthread (TLS register) without spilling.
7755 // Use store_ptr_RegP to match Rthread (TLS register) without spilling.
7756 // Use sp_ptr_RegP to match Rthread (TLS register) without spilling.
7757 instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{
7758   match(Set dst (OrI src1 (CastP2X src2)));
7759   size(4);
7760   format %{ "OR     $dst,$src1,$src2" %}
7761   ins_encode %{
7762     __ orr($dst$$Register, $src1$$Register, $src2$$Register);
7763   %}
7764   ins_pipe(ialu_reg_reg);
7765 %}
7766 #endif
7767 
7768 // Xor Instructions
7769 // Register Xor
7770 instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
7771   match(Set dst (XorI src1 src2));
7772 
7773   size(4);
7774   format %{ "eor_32 $dst,$src1,$src2" %}
7775   ins_encode %{
7776     __ eor($dst$$Register, $src1$$Register, $src2$$Register);
7777   %}
7778   ins_pipe(ialu_reg_reg);
7779 %}
7780 
7781 instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7782   match(Set dst (XorI src1 (LShiftI src2 src3)));
7783 
7784   size(4);
7785   format %{ "XOR    $dst,$src1,$src2<<$src3" %}
7786   ins_encode %{
7787     __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
7788   %}
7789   ins_pipe(ialu_reg_reg);
7790 %}
7791 
7792 instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7793   match(Set dst (XorI src1 (LShiftI src2 src3)));
7794 
7795   size(4);
7796   format %{ "eor_32 $dst,$src1,$src2<<$src3" %}
7797   ins_encode %{
7798     __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
7799   %}
7800   ins_pipe(ialu_reg_reg);
7801 %}
7802 
7803 instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7804   match(Set dst (XorI src1 (RShiftI src2 src3)));
7805 
7806   size(4);
7807   format %{ "XOR    $dst,$src1,$src2>>$src3" %}
7808   ins_encode %{
7809     __ eor($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
7810   %}
7811   ins_pipe(ialu_reg_reg);
7812 %}
7813 
7814 instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7815   match(Set dst (XorI src1 (RShiftI src2 src3)));
7816 
7817   size(4);
7818   format %{ "eor_32 $dst,$src1,$src2>>$src3" %}
7819   ins_encode %{
7820     __ eor($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
7821   %}
7822   ins_pipe(ialu_reg_reg);
7823 %}
7824 
7825 instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
7826   match(Set dst (XorI src1 (URShiftI src2 src3)));
7827 
7828   size(4);
7829   format %{ "XOR    $dst,$src1,$src2>>>$src3" %}
7830   ins_encode %{
7831     __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
7832   %}
7833   ins_pipe(ialu_reg_reg);
7834 %}
7835 
7836 instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
7837   match(Set dst (XorI src1 (URShiftI src2 src3)));
7838 
7839   size(4);
7840   format %{ "eor_32 $dst,$src1,$src2>>>$src3" %}
7841   ins_encode %{
7842     __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
7843   %}
7844   ins_pipe(ialu_reg_reg);
7845 %}
7846 
7847 // Immediate Xor
7848 instruct xorI_reg_imm(iRegI dst, iRegI src1, limmI src2) %{
7849   match(Set dst (XorI src1 src2));
7850 
7851   size(4);
7852   format %{ "eor_32 $dst,$src1,$src2" %}
7853   ins_encode %{
7854     __ eor($dst$$Register, $src1$$Register, $src2$$constant);
7855   %}
7856   ins_pipe(ialu_reg_imm);
7857 %}
7858 
7859 // Register Xor Long
7860 instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
7861   match(Set dst (XorL src1 src2));
7862   ins_cost(DEFAULT_COST);
7863   size(8);
7864   format %{ "XOR     $dst.hi,$src1.hi,$src2.hi\t! long\n\t"
7865             "XOR     $dst.lo,$src1.lo,$src2.lo\t! long" %}
7866   ins_encode %{
7867     __ eor($dst$$Register, $src1$$Register, $src2$$Register);
7868     __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
7869   %}
7870   ins_pipe(ialu_reg_reg);
7871 %}
7872 
7873 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
7874 // (hi($con$$constant), lo($con$$constant)) becomes
7875 instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
7876   match(Set dst (XorL src1 con));
7877   ins_cost(DEFAULT_COST);
7878   size(8);
7879   format %{ "XOR     $dst.hi,$src1.hi,$con\t! long\n\t"
7880             "XOR     $dst.lo,$src1.lo,0\t! long" %}
7881   ins_encode %{
7882     __ eor($dst$$Register, $src1$$Register, $con$$constant);
7883     __ eor($dst$$Register->successor(), $src1$$Register->successor(), 0u);
7884   %}
7885   ins_pipe(ialu_reg_imm);
7886 %}
7887 
7888 //----------Convert to Boolean-------------------------------------------------
7889 instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
7890   match(Set dst (Conv2B src));
7891   effect(KILL ccr);
7892   size(12);
7893   ins_cost(DEFAULT_COST*2);
7894   format %{ "TST    $src,$src \n\t"
7895             "MOV    $dst, 0   \n\t"
7896             "MOV.ne $dst, 1" %}
7897   ins_encode %{ // FIXME: can do better?
7898     __ tst($src$$Register, $src$$Register);
7899     __ mov($dst$$Register, 0);
7900     __ mov($dst$$Register, 1, Assembler::NE);
7901   %}
7902   ins_pipe(ialu_reg_ialu);
7903 %}
7904 
7905 instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
7906   match(Set dst (Conv2B src));
7907   effect(KILL ccr);
7908   size(12);
7909   ins_cost(DEFAULT_COST*2);
7910   format %{ "TST    $src,$src \n\t"
7911             "MOV    $dst, 0   \n\t"
7912             "MOV.ne $dst, 1" %}
7913   ins_encode %{
7914     __ tst($src$$Register, $src$$Register);
7915     __ mov($dst$$Register, 0);
7916     __ mov($dst$$Register, 1, Assembler::NE);
7917   %}
7918   ins_pipe(ialu_reg_ialu);
7919 %}
7920 
7921 instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
7922   match(Set dst (CmpLTMask p q));
7923   effect( KILL ccr );
7924   ins_cost(DEFAULT_COST*3);
7925   format %{ "CMP    $p,$q\n\t"
7926             "MOV    $dst, #0\n\t"
7927             "MOV.lt $dst, #-1" %}
7928   ins_encode %{
7929     __ cmp($p$$Register, $q$$Register);
7930     __ mov_i($dst$$Register, 0);
7931     __ mvn_i($dst$$Register, 0, Assembler::LT);
7932   %}
7933   ins_pipe(ialu_reg_reg_ialu);
7934 %}
7935 
7936 instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{
7937   match(Set dst (CmpLTMask p q));
7938   effect( KILL ccr );
7939   ins_cost(DEFAULT_COST*3);
7940   format %{ "CMP    $p,$q\n\t"
7941             "MOV    $dst, #0\n\t"
7942             "MOV.lt $dst, #-1" %}
7943   ins_encode %{
7944     __ cmp($p$$Register, $q$$constant);
7945     __ mov_i($dst$$Register, 0);
7946     __ mvn_i($dst$$Register, 0, Assembler::LT);
7947   %}
7948   ins_pipe(ialu_reg_reg_ialu);
7949 %}
7950 
7951 instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{
7952   match(Set z (AddI (AndI (CmpLTMask p q) y) z));
7953   effect( KILL ccr );
7954   ins_cost(DEFAULT_COST*2);
7955   format %{ "CMP    $p,$q\n\t"
7956             "ADD.lt $z,$y,$z" %}
7957   ins_encode %{
7958     __ cmp($p$$Register, $q$$Register);
7959     __ add($z$$Register, $y$$Register, $z$$Register, Assembler::LT);
7960   %}
7961   ins_pipe( cadd_cmpltmask );
7962 %}
7963 
7964 // FIXME: remove unused "dst"
7965 instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{
7966   match(Set z (AddI (AndI (CmpLTMask p q) y) z));
7967   effect( KILL ccr );
7968   ins_cost(DEFAULT_COST*2);
7969   format %{ "CMP    $p,$q\n\t"
7970             "ADD.lt $z,$y,$z" %}
7971   ins_encode %{
7972     __ cmp($p$$Register, $q$$constant);
7973     __ add($z$$Register, $y$$Register, $z$$Register, Assembler::LT);
7974   %}
7975   ins_pipe( cadd_cmpltmask );
7976 %}
7977 
7978 instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
7979   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
7980   effect( KILL ccr );
7981   ins_cost(DEFAULT_COST*2);
7982   format %{ "SUBS   $p,$p,$q\n\t"
7983             "ADD.lt $p,$y,$p" %}
7984   ins_encode %{
7985     __ subs($p$$Register, $p$$Register, $q$$Register);
7986     __ add($p$$Register, $y$$Register, $p$$Register, Assembler::LT);
7987   %}
7988   ins_pipe( cadd_cmpltmask );
7989 %}
7990 
7991 //----------Arithmetic Conversion Instructions---------------------------------
7992 // The conversions operations are all Alpha sorted.  Please keep it that way!
7993 
7994 instruct convD2F_reg(regF dst, regD src) %{
7995   match(Set dst (ConvD2F src));
7996   size(4);
7997   format %{ "FCVTSD  $dst,$src" %}
7998   ins_encode %{
7999     __ vcvt_f32_f64($dst$$FloatRegister, $src$$FloatRegister);
8000   %}
8001   ins_pipe(fcvtD2F);
8002 %}
8003 
8004 // Convert a double to an int in a float register.
8005 // If the double is a NAN, stuff a zero in instead.
8006 
8007 instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{
8008   match(Set dst (ConvD2I src));
8009   effect( TEMP tmp );
8010   ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
8011   format %{ "FTOSIZD  $tmp,$src\n\t"
8012             "FMRS     $dst, $tmp" %}
8013   ins_encode %{
8014     __ vcvt_s32_f64($tmp$$FloatRegister, $src$$FloatRegister);
8015     __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
8016   %}
8017   ins_pipe(fcvtD2I);
8018 %}
8019 
8020 // Convert a double to a long in a double register.
8021 // If the double is a NAN, stuff a zero in instead.
8022 
8023 // Double to Long conversion
8024 instruct convD2L_reg(R0R1RegL dst, regD src) %{
8025   match(Set dst (ConvD2L src));
8026   effect(CALL);
8027   ins_cost(MEMORY_REF_COST); // FIXME
8028   format %{ "convD2L    $dst,$src\t ! call to SharedRuntime::d2l" %}
8029   ins_encode %{
8030 #ifndef HARD_FLOAT_CC
8031     __ vmov_f64($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
8032 #else
8033     if ($src$$FloatRegister != d0) {
8034       __ vmov_f64(d0, $src$$FloatRegister);
8035     }
8036 #endif
8037     address target = CAST_FROM_FN_PTR(address, SharedRuntime::d2l);
8038     __ call(target, relocInfo::runtime_call_type);
8039   %}
8040   ins_pipe(fcvtD2L);
8041 %}
8042 
8043 instruct convF2D_reg(regD dst, regF src) %{
8044   match(Set dst (ConvF2D src));
8045   size(4);
8046   format %{ "FCVTDS  $dst,$src" %}
8047   ins_encode %{
8048     __ vcvt_f64_f32($dst$$FloatRegister, $src$$FloatRegister);
8049   %}
8050   ins_pipe(fcvtF2D);
8051 %}
8052 
8053 instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{
8054   match(Set dst (ConvF2I src));
8055   effect( TEMP tmp );
8056   ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
8057   size(8);
8058   format %{ "FTOSIZS  $tmp,$src\n\t"
8059             "FMRS     $dst, $tmp" %}
8060   ins_encode %{
8061     __ vcvt_s32_f32($tmp$$FloatRegister, $src$$FloatRegister);
8062     __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
8063   %}
8064   ins_pipe(fcvtF2I);
8065 %}
8066 
8067 // Float to Long conversion
8068 instruct convF2L_reg(R0R1RegL dst, regF src, R0RegI arg1) %{
8069   match(Set dst (ConvF2L src));
8070   ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
8071   effect(CALL);
8072   format %{ "convF2L  $dst,$src\t! call to SharedRuntime::f2l" %}
8073   ins_encode %{
8074 #ifndef HARD_FLOAT_CC
8075     __ vmov_f32($arg1$$Register, $src$$FloatRegister);
8076 #else
8077     if($src$$FloatRegister != f0) {
8078       __ vmov_f32(f0, $src$$FloatRegister);
8079     }
8080 #endif
8081     address target = CAST_FROM_FN_PTR(address, SharedRuntime::f2l);
8082     __ call(target, relocInfo::runtime_call_type);
8083   %}
8084   ins_pipe(fcvtF2L);
8085 %}
8086 
8087 instruct convI2D_reg_reg(iRegI src, regD_low dst) %{
8088   match(Set dst (ConvI2D src));
8089   ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
8090   size(8);
8091   format %{ "FMSR     $dst,$src \n\t"
8092             "FSITOD   $dst $dst"%}
8093   ins_encode %{
8094       __ vmov_f32($dst$$FloatRegister, $src$$Register);
8095       __ vcvt_f64_s32($dst$$FloatRegister, $dst$$FloatRegister);
8096   %}
8097   ins_pipe(fcvtI2D);
8098 %}
8099 
8100 instruct convI2F_reg_reg( regF dst, iRegI src ) %{
8101   match(Set dst (ConvI2F src));
8102   ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
8103   size(8);
8104   format %{ "FMSR     $dst,$src \n\t"
8105             "FSITOS   $dst, $dst"%}
8106   ins_encode %{
8107       __ vmov_f32($dst$$FloatRegister, $src$$Register);
8108       __ vcvt_f32_s32($dst$$FloatRegister, $dst$$FloatRegister);
8109   %}
8110   ins_pipe(fcvtI2F);
8111 %}
8112 
8113 instruct convI2L_reg(iRegL dst, iRegI src) %{
8114   match(Set dst (ConvI2L src));
8115   size(8);
8116   format %{ "MOV    $dst.lo, $src \n\t"
8117             "ASR    $dst.hi,$src,31\t! int->long" %}
8118   ins_encode %{
8119     __ mov($dst$$Register, $src$$Register);
8120     __ mov($dst$$Register->successor(), $src$$Register, asr(31));
8121   %}
8122   ins_pipe(ialu_reg_reg);
8123 %}
8124 
8125 // Zero-extend convert int to long
8126 instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{
8127   match(Set dst (AndL (ConvI2L src) mask) );
8128   size(8);
8129   format %{ "MOV    $dst.lo,$src.lo\t! zero-extend int to long\n\t"
8130             "MOV    $dst.hi, 0"%}
8131   ins_encode %{
8132     __ mov($dst$$Register, $src$$Register);
8133     __ mov($dst$$Register->successor(), 0);
8134   %}
8135   ins_pipe(ialu_reg_reg);
8136 %}
8137 
8138 // Zero-extend long
8139 instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{
8140   match(Set dst (AndL src mask) );
8141   size(8);
8142   format %{ "MOV    $dst.lo,$src.lo\t! zero-extend long\n\t"
8143             "MOV    $dst.hi, 0"%}
8144   ins_encode %{
8145     __ mov($dst$$Register, $src$$Register);
8146     __ mov($dst$$Register->successor(), 0);
8147   %}
8148   ins_pipe(ialu_reg_reg);
8149 %}
8150 
8151 instruct MoveF2I_reg_reg(iRegI dst, regF src) %{
8152   match(Set dst (MoveF2I src));
8153   effect(DEF dst, USE src);
8154   ins_cost(MEMORY_REF_COST); // FIXME
8155 
8156   size(4);
8157   format %{ "FMRS   $dst,$src\t! MoveF2I" %}
8158   ins_encode %{
8159     __ vmov_f32($dst$$Register, $src$$FloatRegister);
8160   %}
8161   ins_pipe(iload_mem); // FIXME
8162 %}
8163 
8164 instruct MoveI2F_reg_reg(regF dst, iRegI src) %{
8165   match(Set dst (MoveI2F src));
8166   ins_cost(MEMORY_REF_COST); // FIXME
8167 
8168   size(4);
8169   format %{ "FMSR   $dst,$src\t! MoveI2F" %}
8170   ins_encode %{
8171     __ vmov_f32($dst$$FloatRegister, $src$$Register);
8172   %}
8173   ins_pipe(iload_mem); // FIXME
8174 %}
8175 
8176 instruct MoveD2L_reg_reg(iRegL dst, regD src) %{
8177   match(Set dst (MoveD2L src));
8178   effect(DEF dst, USE src);
8179   ins_cost(MEMORY_REF_COST); // FIXME
8180 
8181   size(4);
8182   format %{ "FMRRD    $dst,$src\t! MoveD2L" %}
8183   ins_encode %{
8184     __ vmov_f64($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
8185   %}
8186   ins_pipe(iload_mem); // FIXME
8187 %}
8188 
8189 instruct MoveL2D_reg_reg(regD dst, iRegL src) %{
8190   match(Set dst (MoveL2D src));
8191   effect(DEF dst, USE src);
8192   ins_cost(MEMORY_REF_COST); // FIXME
8193 
8194   size(4);
8195   format %{ "FMDRR   $dst,$src\t! MoveL2D" %}
8196   ins_encode %{
8197     __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
8198   %}
8199   ins_pipe(ialu_reg_reg); // FIXME
8200 %}
8201 
8202 //-----------
8203 // Long to Double conversion
8204 
8205 // Magic constant, 0x43300000
8206 instruct loadConI_x43300000(iRegI dst) %{
8207   effect(DEF dst);
8208   size(8);
8209   format %{ "MOV_SLOW  $dst,0x43300000\t! 2^52" %}
8210   ins_encode %{
8211     __ mov($dst$$Register, 0x43300000);
8212   %}
8213   ins_pipe(ialu_none);
8214 %}
8215 
8216 // Magic constant, 0x41f00000
8217 instruct loadConI_x41f00000(iRegI dst) %{
8218   effect(DEF dst);
8219   size(8);
8220   format %{ "MOV_SLOW  $dst, 0x41f00000\t! 2^32" %}
8221   ins_encode %{
8222     __ mov($dst$$Register, 0x41f00000);
8223   %}
8224   ins_pipe(ialu_none);
8225 %}
8226 
8227 instruct loadConI_x0(iRegI dst) %{
8228   effect(DEF dst);
8229   size(4);
8230   format %{ "MOV  $dst, 0x0\t! 0" %}
8231   ins_encode %{
8232     __ mov($dst$$Register, 0);
8233   %}
8234   ins_pipe(ialu_none);
8235 %}
8236 
8237 // Construct a double from two float halves
8238 instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{
8239   effect(DEF dst, USE src1, USE src2);
8240   size(8);
8241   format %{ "FCPYS  $dst.hi,$src1.hi\n\t"
8242             "FCPYS  $dst.lo,$src2.lo" %}
8243   ins_encode %{
8244     __ vmov_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
8245     __ vmov_f32($dst$$FloatRegister, $src2$$FloatRegister);
8246   %}
8247   ins_pipe(faddD_reg_reg);
8248 %}
8249 
8250 // Convert integer in high half of a double register (in the lower half of
8251 // the double register file) to double
8252 instruct convI2D_regDHi_regD(regD dst, regD_low src) %{
8253   effect(DEF dst, USE src);
8254   size(4);
8255   format %{ "FSITOD  $dst,$src" %}
8256   ins_encode %{
8257     __ vcvt_f64_s32($dst$$FloatRegister, $src$$FloatRegister->successor(FloatRegisterImpl::SINGLE));// TODO verify the samentics is the same as was before
8258   %}
8259   ins_pipe(fcvtLHi2D);
8260 %}
8261 
8262 // Add float double precision
8263 instruct addD_regD_regD(regD dst, regD src1, regD src2) %{
8264   effect(DEF dst, USE src1, USE src2);
8265   size(4);
8266   format %{ "FADDD  $dst,$src1,$src2" %}
8267   ins_encode %{
8268     __ vadd_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
8269   %}
8270   ins_pipe(faddD_reg_reg);
8271 %}
8272 
8273 // Sub float double precision
8274 instruct subD_regD_regD(regD dst, regD src1, regD src2) %{
8275   effect(DEF dst, USE src1, USE src2);
8276   size(4);
8277   format %{ "FSUBD  $dst,$src1,$src2" %}
8278   ins_encode %{
8279     __ vsub_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
8280   %}
8281   ins_pipe(faddD_reg_reg);
8282 %}
8283 
8284 // Mul float double precision
8285 instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{
8286   effect(DEF dst, USE src1, USE src2);
8287   size(4);
8288   format %{ "FMULD  $dst,$src1,$src2" %}
8289   ins_encode %{
8290     __ vmul_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
8291   %}
8292   ins_pipe(fmulD_reg_reg);
8293 %}
8294 
8295 instruct regL_to_regD(regD dst, iRegL src) %{
8296   // No match rule to avoid chain rule match.
8297   effect(DEF dst, USE src);
8298   ins_cost(MEMORY_REF_COST);
8299   size(4);
8300   format %{ "FMDRR   $dst,$src\t! regL to regD" %}
8301   ins_encode %{
8302     __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
8303   %}
8304   ins_pipe(ialu_reg_reg); // FIXME
8305 %}
8306 
8307 instruct regI_regI_to_regD(regD dst, iRegI src1, iRegI src2) %{
8308   // No match rule to avoid chain rule match.
8309   effect(DEF dst, USE src1, USE src2);
8310   ins_cost(MEMORY_REF_COST);
8311   size(4);
8312   format %{ "FMDRR   $dst,$src1,$src2\t! regI,regI to regD" %}
8313   ins_encode %{
8314     __ vmov_f64($dst$$FloatRegister, $src1$$Register, $src2$$Register);
8315   %}
8316   ins_pipe(ialu_reg_reg); // FIXME
8317 %}
8318 
8319 instruct convL2D_reg_slow_fxtof(regD dst, iRegL src) %{
8320   match(Set dst (ConvL2D src));
8321   ins_cost(DEFAULT_COST*8 + MEMORY_REF_COST*6); // FIXME
8322 
8323   expand %{
8324     regD_low   tmpsrc;
8325     iRegI      ix43300000;
8326     iRegI      ix41f00000;
8327     iRegI      ix0;
8328     regD_low   dx43300000;
8329     regD       dx41f00000;
8330     regD       tmp1;
8331     regD_low   tmp2;
8332     regD       tmp3;
8333     regD       tmp4;
8334 
8335     regL_to_regD(tmpsrc, src);
8336 
8337     loadConI_x43300000(ix43300000);
8338     loadConI_x41f00000(ix41f00000);
8339     loadConI_x0(ix0);
8340 
8341     regI_regI_to_regD(dx43300000, ix0, ix43300000);
8342     regI_regI_to_regD(dx41f00000, ix0, ix41f00000);
8343 
8344     convI2D_regDHi_regD(tmp1, tmpsrc);
8345     regDHi_regDLo_to_regD(tmp2, dx43300000, tmpsrc);
8346     subD_regD_regD(tmp3, tmp2, dx43300000);
8347     mulD_regD_regD(tmp4, tmp1, dx41f00000);
8348     addD_regD_regD(dst, tmp3, tmp4);
8349   %}
8350 %}
8351 
8352 instruct convL2I_reg(iRegI dst, iRegL src) %{
8353   match(Set dst (ConvL2I src));
8354   size(4);
8355   format %{ "MOV    $dst,$src.lo\t! long->int" %}
8356   ins_encode %{
8357     __ mov($dst$$Register, $src$$Register);
8358   %}
8359   ins_pipe(ialu_move_reg_I_to_L);
8360 %}
8361 
8362 // Register Shift Right Immediate
8363 instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{
8364   match(Set dst (ConvL2I (RShiftL src cnt)));
8365   size(4);
8366   format %{ "ASR    $dst,$src.hi,($cnt - 32)\t! long->int or mov if $cnt==32" %}
8367   ins_encode %{
8368     if ($cnt$$constant == 32) {
8369       __ mov($dst$$Register, $src$$Register->successor());
8370     } else {
8371       __ mov($dst$$Register, $src$$Register->successor(), asr($cnt$$constant - 32));
8372     }
8373   %}
8374   ins_pipe(ialu_reg_imm);
8375 %}
8376 
8377 
8378 //----------Control Flow Instructions------------------------------------------
8379 // Compare Instructions
8380 // Compare Integers
8381 instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{
8382   match(Set icc (CmpI op1 op2));
8383   effect( DEF icc, USE op1, USE op2 );
8384 
8385   size(4);
8386   format %{ "cmp_32 $op1,$op2\t! int" %}
8387   ins_encode %{
8388     __ cmp($op1$$Register, $op2$$Register);
8389   %}
8390   ins_pipe(ialu_cconly_reg_reg);
8391 %}
8392 
8393 instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{
8394   match(Set icc (CmpU op1 op2));
8395 
8396   size(4);
8397   format %{ "cmp_32 $op1,$op2\t! unsigned int" %}
8398   ins_encode %{
8399     __ cmp($op1$$Register, $op2$$Register);
8400   %}
8401   ins_pipe(ialu_cconly_reg_reg);
8402 %}
8403 
8404 instruct compI_iReg_immneg(flagsReg icc, iRegI op1, aimmIneg op2) %{
8405   match(Set icc (CmpI op1 op2));
8406   effect( DEF icc, USE op1 );
8407 
8408   size(4);
8409   format %{ "cmn_32 $op1,-$op2\t! int" %}
8410   ins_encode %{
8411     __ cmn($op1$$Register, -$op2$$constant);
8412   %}
8413   ins_pipe(ialu_cconly_reg_imm);
8414 %}
8415 
8416 instruct compI_iReg_imm(flagsReg icc, iRegI op1, aimmI op2) %{
8417   match(Set icc (CmpI op1 op2));
8418   effect( DEF icc, USE op1 );
8419 
8420   size(4);
8421   format %{ "cmp_32 $op1,$op2\t! int" %}
8422   ins_encode %{
8423     __ cmp($op1$$Register, $op2$$constant);
8424   %}
8425   ins_pipe(ialu_cconly_reg_imm);
8426 %}
8427 
8428 instruct testI_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immI0 zero ) %{
8429   match(Set icc (CmpI (AndI op1 op2) zero));
8430   size(4);
8431   format %{ "tst $op2,$op1" %}
8432 
8433   ins_encode %{
8434     __ tst($op1$$Register, $op2$$Register);
8435   %}
8436   ins_pipe(ialu_cconly_reg_reg_zero);
8437 %}
8438 
8439 instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
8440   match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
8441   size(4);
8442   format %{ "TST   $op2,$op1<<$op3" %}
8443 
8444   ins_encode %{
8445     __ tst($op1$$Register, $op2$$Register, lsl($op3$$Register));
8446   %}
8447   ins_pipe(ialu_cconly_reg_reg_zero);
8448 %}
8449 
8450 instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
8451   match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
8452   size(4);
8453   format %{ "tst $op2,$op1<<$op3" %}
8454 
8455   ins_encode %{
8456     __ tst($op1$$Register, $op2$$Register, lsl($op3$$constant));
8457   %}
8458   ins_pipe(ialu_cconly_reg_reg_zero);
8459 %}
8460 
8461 instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
8462   match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
8463   size(4);
8464   format %{ "TST   $op2,$op1<<$op3" %}
8465 
8466   ins_encode %{
8467     __ tst($op1$$Register, $op2$$Register, asr($op3$$Register));
8468   %}
8469   ins_pipe(ialu_cconly_reg_reg_zero);
8470 %}
8471 
8472 instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
8473   match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
8474   size(4);
8475   format %{ "tst $op2,$op1<<$op3" %}
8476 
8477   ins_encode %{
8478     __ tst($op1$$Register, $op2$$Register, asr($op3$$constant));
8479   %}
8480   ins_pipe(ialu_cconly_reg_reg_zero);
8481 %}
8482 
8483 instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
8484   match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
8485   size(4);
8486   format %{ "TST   $op2,$op1<<$op3" %}
8487 
8488   ins_encode %{
8489     __ tst($op1$$Register, $op2$$Register, lsr($op3$$Register));
8490   %}
8491   ins_pipe(ialu_cconly_reg_reg_zero);
8492 %}
8493 
8494 instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
8495   match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
8496   size(4);
8497   format %{ "tst $op2,$op1<<$op3" %}
8498 
8499   ins_encode %{
8500     __ tst($op1$$Register, $op2$$Register, lsr($op3$$constant));
8501   %}
8502   ins_pipe(ialu_cconly_reg_reg_zero);
8503 %}
8504 
8505 instruct testI_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, limmI op2, immI0 zero ) %{
8506   match(Set icc (CmpI (AndI op1 op2) zero));
8507   size(4);
8508   format %{ "tst $op2,$op1" %}
8509 
8510   ins_encode %{
8511     __ tst($op1$$Register, $op2$$constant);
8512   %}
8513   ins_pipe(ialu_cconly_reg_imm_zero);
8514 %}
8515 
8516 instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegI tmp) %{
8517   match(Set xcc (CmpL op1 op2));
8518   effect( DEF xcc, USE op1, USE op2, TEMP tmp );
8519 
8520   size(8);
8521   format %{ "CMP     $op1.low,$op2.low\t\t! long\n\t"
8522             "SBCS    $tmp,$op1.hi,$op2.hi" %}
8523   ins_encode %{
8524     __ cmp($op1$$Register, $op2$$Register);
8525     __ sbcs($tmp$$Register, $op1$$Register->successor(), $op2$$Register->successor());
8526   %}
8527   ins_pipe(ialu_cconly_reg_reg);
8528 %}
8529 
8530 instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{
8531   match(Set xcc (CmpL op1 op2));
8532   effect( DEF xcc, USE op1, USE op2 );
8533 
8534   size(8);
8535   format %{ "TEQ    $op1.hi,$op2.hi\t\t! long\n\t"
8536             "TEQ.eq $op1.lo,$op2.lo" %}
8537   ins_encode %{
8538     __ teq($op1$$Register->successor(), $op2$$Register->successor());
8539     __ teq($op1$$Register, $op2$$Register, Assembler::EQ);
8540   %}
8541   ins_pipe(ialu_cconly_reg_reg);
8542 %}
8543 
8544 instruct compL_reg_reg_LEGT(flagsRegL_LEGT xcc, iRegL op1, iRegL op2, iRegI tmp) %{
8545   match(Set xcc (CmpL op1 op2));
8546   effect( DEF xcc, USE op1, USE op2, TEMP tmp );
8547 
8548   size(8);
8549   format %{ "CMP     $op2.low,$op1.low\t\t! long\n\t"
8550             "SBCS    $tmp,$op2.hi,$op1.hi" %}
8551   ins_encode %{
8552     __ cmp($op2$$Register, $op1$$Register);
8553     __ sbcs($tmp$$Register, $op2$$Register->successor(), $op1$$Register->successor());
8554   %}
8555   ins_pipe(ialu_cconly_reg_reg);
8556 %}
8557 
8558 instruct compUL_reg_reg(flagsRegUL xcc, iRegL op1, iRegL op2) %{
8559   match(Set xcc (CmpUL op1 op2));
8560   effect( DEF xcc, USE op1, USE op2 );
8561 
8562   size(8);
8563   format %{ "CMP     $op1.hi,$op2.hi\t\t! long\n\t"
8564             "CMP.eq  $op1.low,$op2.low" %}
8565   ins_encode %{
8566     __ cmp($op1$$Register->successor(), $op2$$Register->successor());
8567     __ cmp($op1$$Register, $op2$$Register, Assembler::EQ);
8568   %}
8569   ins_pipe(ialu_cconly_reg_reg);
8570 %}
8571 
8572 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
8573 // (hi($con$$constant), lo($con$$constant)) becomes
8574 instruct compL_reg_con_LTGE(flagsRegL_LTGE xcc, iRegL op1, immLlowRot con, iRegI tmp) %{
8575   match(Set xcc (CmpL op1 con));
8576   effect( DEF xcc, USE op1, USE con, TEMP tmp );
8577 
8578   size(8);
8579   format %{ "CMP     $op1.low,$con\t\t! long\n\t"
8580             "SBCS    $tmp,$op1.hi,0" %}
8581   ins_encode %{
8582     __ cmp($op1$$Register, (int)$con$$constant);
8583     __ sbcs($tmp$$Register, $op1$$Register->successor(), 0);
8584   %}
8585 
8586   ins_pipe(ialu_cconly_reg_reg);
8587 %}
8588 
8589 instruct compUL_reg_con(flagsRegUL xcc, iRegL op1, immLlowRot con ) %{
8590   match(Set xcc (CmpUL op1 con));
8591   effect( DEF xcc, USE op1, USE con );
8592 
8593   size(8);
8594   format %{ "CMP     $op1.hi,0\t\t! long\n\t"
8595             "CMP.eq  $op1.low,$con" %}
8596   ins_encode %{
8597     __ cmp($op1$$Register->successor(), 0);
8598     __ cmp($op1$$Register, (int)$con$$constant, Assembler::EQ);
8599   %}
8600 
8601   ins_pipe(ialu_cconly_reg_reg);
8602 %}
8603 
8604 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
8605 // (hi($con$$constant), lo($con$$constant)) becomes
8606 instruct compL_reg_con_EQNE(flagsRegL_EQNE xcc, iRegL op1, immLlowRot con) %{
8607   match(Set xcc (CmpL op1 con));
8608   effect( DEF xcc, USE op1, USE con );
8609 
8610   size(8);
8611   format %{ "TEQ    $op1.hi,0\t\t! long\n\t"
8612             "TEQ.eq $op1.lo,$con" %}
8613   ins_encode %{
8614     __ teq($op1$$Register->successor(), 0);
8615     __ teq($op1$$Register, (int)$con$$constant, Assembler::EQ);
8616   %}
8617 
8618   ins_pipe(ialu_cconly_reg_reg);
8619 %}
8620 
8621 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
8622 // (hi($con$$constant), lo($con$$constant)) becomes
8623 instruct compL_reg_con_LEGT(flagsRegL_LEGT xcc, iRegL op1, immLlowRot con, iRegL tmp) %{
8624   match(Set xcc (CmpL op1 con));
8625   effect( DEF xcc, USE op1, USE con, TEMP tmp );
8626 
8627   size(8);
8628   format %{ "RSBS    $tmp,$op1.low,$con\t\t! long\n\t"
8629             "RSCS    $tmp,$op1.hi,0" %}
8630   ins_encode %{
8631     __ rsbs($tmp$$Register, $op1$$Register, (long)$con$$constant);
8632     __ rscs($tmp$$Register->successor(), $op1$$Register->successor(), 0);
8633   %}
8634 
8635   ins_pipe(ialu_cconly_reg_reg);
8636 %}
8637 
8638 /* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */
8639 /*   match(Set xcc (CmpL (AndL op1 op2) zero)); */
8640 /*   ins_encode %{ */
8641 /*     __ stop("testL_reg_reg unimplemented"); */
8642 /*   %} */
8643 /*   ins_pipe(ialu_cconly_reg_reg); */
8644 /* %} */
8645 
8646 /* // useful for checking the alignment of a pointer: */
8647 /* instruct testL_reg_con(flagsRegL xcc, iRegL op1, immLlowRot con, immL0 zero) %{ */
8648 /*   match(Set xcc (CmpL (AndL op1 con) zero)); */
8649 /*   ins_encode %{ */
8650 /*     __ stop("testL_reg_con unimplemented"); */
8651 /*   %} */
8652 /*   ins_pipe(ialu_cconly_reg_reg); */
8653 /* %} */
8654 
8655 instruct compU_iReg_imm(flagsRegU icc, iRegI op1, aimmU31 op2 ) %{
8656   match(Set icc (CmpU op1 op2));
8657 
8658   size(4);
8659   format %{ "cmp_32 $op1,$op2\t! unsigned" %}
8660   ins_encode %{
8661     __ cmp($op1$$Register, $op2$$constant);
8662   %}
8663   ins_pipe(ialu_cconly_reg_imm);
8664 %}
8665 
8666 // Compare Pointers
8667 instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{
8668   match(Set pcc (CmpP op1 op2));
8669 
8670   size(4);
8671   format %{ "CMP    $op1,$op2\t! ptr" %}
8672   ins_encode %{
8673     __ cmp($op1$$Register, $op2$$Register);
8674   %}
8675   ins_pipe(ialu_cconly_reg_reg);
8676 %}
8677 
8678 instruct compP_iRegP_imm(flagsRegP pcc, iRegP op1, aimmP op2 ) %{
8679   match(Set pcc (CmpP op1 op2));
8680 
8681   size(4);
8682   format %{ "CMP    $op1,$op2\t! ptr" %}
8683   ins_encode %{
8684     assert($op2$$constant == 0 || _opnds[2]->constant_reloc() == relocInfo::none, "reloc in cmp?");
8685     __ cmp($op1$$Register, $op2$$constant);
8686   %}
8687   ins_pipe(ialu_cconly_reg_imm);
8688 %}
8689 
8690 //----------Max and Min--------------------------------------------------------
8691 // Min Instructions
8692 // Conditional move for min
8693 instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{
8694   effect( USE_DEF op2, USE op1, USE icc );
8695 
8696   size(4);
8697   format %{ "MOV.lt  $op2,$op1\t! min" %}
8698   ins_encode %{
8699     __ mov($op2$$Register, $op1$$Register, Assembler::LT);
8700   %}
8701   ins_pipe(ialu_reg_flags);
8702 %}
8703 
8704 // Min Register with Register.
8705 instruct minI_eReg(iRegI op1, iRegI op2) %{
8706   match(Set op2 (MinI op1 op2));
8707   ins_cost(DEFAULT_COST*2);
8708   expand %{
8709     flagsReg icc;
8710     compI_iReg(icc,op1,op2);
8711     cmovI_reg_lt(op2,op1,icc);
8712   %}
8713 %}
8714 
8715 // Max Instructions
8716 // Conditional move for max
8717 instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{
8718   effect( USE_DEF op2, USE op1, USE icc );
8719   format %{ "MOV.gt  $op2,$op1\t! max" %}
8720   ins_encode %{
8721     __ mov($op2$$Register, $op1$$Register, Assembler::GT);
8722   %}
8723   ins_pipe(ialu_reg_flags);
8724 %}
8725 
8726 // Max Register with Register
8727 instruct maxI_eReg(iRegI op1, iRegI op2) %{
8728   match(Set op2 (MaxI op1 op2));
8729   ins_cost(DEFAULT_COST*2);
8730   expand %{
8731     flagsReg icc;
8732     compI_iReg(icc,op1,op2);
8733     cmovI_reg_gt(op2,op1,icc);
8734   %}
8735 %}
8736 
8737 
8738 //----------Float Compares----------------------------------------------------
8739 // Compare floating, generate condition code
8740 instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{
8741   match(Set icc (CmpF src1 src2));
8742   effect(KILL fcc);
8743 
8744   size(8);
8745   format %{ "FCMPs  $src1,$src2\n\t"
8746             "FMSTAT" %}
8747   ins_encode %{
8748     __ vcmp_f32($src1$$FloatRegister, $src2$$FloatRegister);
8749     __ get_fpsr();
8750   %}
8751   ins_pipe(faddF_fcc_reg_reg_zero);
8752 %}
8753 
8754 instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{
8755   match(Set icc (CmpF src1 src2));
8756   effect(KILL fcc);
8757 
8758   size(8);
8759   format %{ "FCMPs  $src1,$src2\n\t"
8760             "FMSTAT" %}
8761   ins_encode %{
8762     __ vcmp_f32($src1$$FloatRegister, 0);
8763     __ get_fpsr();
8764   %}
8765   ins_pipe(faddF_fcc_reg_reg_zero);
8766 %}
8767 
8768 instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{
8769   match(Set icc (CmpD src1 src2));
8770   effect(KILL fcc);
8771 
8772   size(8);
8773   format %{ "FCMPd  $src1,$src2 \n\t"
8774             "FMSTAT" %}
8775   ins_encode %{
8776     __ vcmp_f64($src1$$FloatRegister, $src2$$FloatRegister);
8777     __ get_fpsr();
8778   %}
8779   ins_pipe(faddD_fcc_reg_reg_zero);
8780 %}
8781 
8782 instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{
8783   match(Set icc (CmpD src1 src2));
8784   effect(KILL fcc);
8785 
8786   size(8);
8787   format %{ "FCMPZd  $src1,$src2 \n\t"
8788             "FMSTAT" %}
8789   ins_encode %{
8790     __ vcmp_f64($src1$$FloatRegister, 0);
8791     __ get_fpsr();
8792   %}
8793   ins_pipe(faddD_fcc_reg_reg_zero);
8794 %}
8795 
8796 // Compare floating, generate -1,0,1
8797 instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{
8798   match(Set dst (CmpF3 src1 src2));
8799   effect(KILL fcc);
8800   ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
8801   size(20);
8802   // same number of instructions as code using conditional moves but
8803   // doesn't kill integer condition register
8804   format %{ "FCMPs  $dst,$src1,$src2 \n\t"
8805             "VMRS   $dst, FPSCR \n\t"
8806             "OR     $dst, $dst, 0x08000000 \n\t"
8807             "EOR    $dst, $dst, $dst << 3 \n\t"
8808             "MOV    $dst, $dst >> 30" %}
8809   ins_encode %{
8810     __ vcmp_f32($src1$$FloatRegister, $src2$$FloatRegister);
8811     __ floating_cmp($dst$$Register);
8812   %}
8813   ins_pipe( floating_cmp );
8814 %}
8815 
8816 instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsRegF fcc) %{
8817   match(Set dst (CmpF3 src1 src2));
8818   effect(KILL fcc);
8819   ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
8820   size(20);
8821   // same number of instructions as code using conditional moves but
8822   // doesn't kill integer condition register
8823   format %{ "FCMPZs $dst,$src1,$src2 \n\t"
8824             "VMRS   $dst, FPSCR \n\t"
8825             "OR     $dst, $dst, 0x08000000 \n\t"
8826             "EOR    $dst, $dst, $dst << 3 \n\t"
8827             "MOV    $dst, $dst >> 30" %}
8828   ins_encode %{
8829     __ vcmp_f32($src1$$FloatRegister, 0);
8830     __ floating_cmp($dst$$Register);
8831   %}
8832   ins_pipe( floating_cmp );
8833 %}
8834 
8835 instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF fcc) %{
8836   match(Set dst (CmpD3 src1 src2));
8837   effect(KILL fcc);
8838   ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
8839   size(20);
8840   // same number of instructions as code using conditional moves but
8841   // doesn't kill integer condition register
8842   format %{ "FCMPd  $dst,$src1,$src2 \n\t"
8843             "VMRS   $dst, FPSCR \n\t"
8844             "OR     $dst, $dst, 0x08000000 \n\t"
8845             "EOR    $dst, $dst, $dst << 3 \n\t"
8846             "MOV    $dst, $dst >> 30" %}
8847   ins_encode %{
8848     __ vcmp_f64($src1$$FloatRegister, $src2$$FloatRegister);
8849     __ floating_cmp($dst$$Register);
8850   %}
8851   ins_pipe( floating_cmp );
8852 %}
8853 
8854 instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsRegF fcc) %{
8855   match(Set dst (CmpD3 src1 src2));
8856   effect(KILL fcc);
8857   ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
8858   size(20);
8859   // same number of instructions as code using conditional moves but
8860   // doesn't kill integer condition register
8861   format %{ "FCMPZd $dst,$src1,$src2 \n\t"
8862             "VMRS   $dst, FPSCR \n\t"
8863             "OR     $dst, $dst, 0x08000000 \n\t"
8864             "EOR    $dst, $dst, $dst << 3 \n\t"
8865             "MOV    $dst, $dst >> 30" %}
8866   ins_encode %{
8867     __ vcmp_f64($src1$$FloatRegister, 0);
8868     __ floating_cmp($dst$$Register);
8869   %}
8870   ins_pipe( floating_cmp );
8871 %}
8872 
8873 //----------Branches---------------------------------------------------------
8874 // Jump
8875 // (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
8876 // FIXME
8877 instruct jumpXtnd(iRegX switch_val, iRegP tmp) %{
8878   match(Jump switch_val);
8879   effect(TEMP tmp);
8880   ins_cost(350);
8881   format %{  "ADD    $tmp, $constanttablebase, $switch_val\n\t"
8882              "LDR    $tmp,[$tmp + $constantoffset]\n\t"
8883              "BX     $tmp" %}
8884   size(20);
8885   ins_encode %{
8886     Register table_reg;
8887     Register label_reg = $tmp$$Register;
8888     if (constant_offset() == 0) {
8889       table_reg = $constanttablebase;
8890       __ ldr(label_reg, Address(table_reg, $switch_val$$Register));
8891     } else {
8892       table_reg = $tmp$$Register;
8893       int offset = $constantoffset;
8894       if (is_memoryP(offset)) {
8895         __ add(table_reg, $constanttablebase, $switch_val$$Register);
8896         __ ldr(label_reg, Address(table_reg, offset));
8897       } else {
8898         __ mov(table_reg, $constantoffset);
8899         __ add(table_reg, $constanttablebase, table_reg);
8900         __ ldr(label_reg, Address(table_reg, $switch_val$$Register));
8901       }
8902     }
8903     __ b(label_reg); // ldr + b better than ldr to PC for branch predictor?
8904     //    __ ldr(PC, Address($table$$Register, $switch_val$$Register));
8905   %}
8906   ins_pipe(ialu_reg_reg);
8907 %}
8908 
8909 // // Direct Branch.
8910 instruct branch(label labl) %{
8911   match(Goto);
8912   effect(USE labl);
8913 
8914   size(4);
8915   ins_cost(BRANCH_COST);
8916   format %{ "B     $labl" %}
8917   ins_encode %{
8918     __ b(*($labl$$label));
8919   %}
8920   ins_pipe(br);
8921 %}
8922 
8923 // Conditional Direct Branch
8924 instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
8925   match(If cmp icc);
8926   effect(USE labl);
8927 
8928   size(4);
8929   ins_cost(BRANCH_COST);
8930   format %{ "B$cmp   $icc,$labl" %}
8931   ins_encode %{
8932     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
8933   %}
8934   ins_pipe(br_cc);
8935 %}
8936 
8937 instruct branchCon_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, label labl) %{
8938   match(If cmp icc);
8939   effect(USE labl);
8940   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
8941 
8942   size(4);
8943   ins_cost(BRANCH_COST);
8944   format %{ "B$cmp   $icc,$labl" %}
8945   ins_encode %{
8946     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
8947   %}
8948   ins_pipe(br_cc);
8949 %}
8950 
8951 instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
8952   match(If cmp icc);
8953   effect(USE labl);
8954 
8955   size(4);
8956   ins_cost(BRANCH_COST);
8957   format %{ "B$cmp  $icc,$labl" %}
8958   ins_encode %{
8959     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
8960   %}
8961   ins_pipe(br_cc);
8962 %}
8963 
8964 instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
8965   match(If cmp pcc);
8966   effect(USE labl);
8967 
8968   size(4);
8969   ins_cost(BRANCH_COST);
8970   format %{ "B$cmp  $pcc,$labl" %}
8971   ins_encode %{
8972     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
8973   %}
8974   ins_pipe(br_cc);
8975 %}
8976 
8977 instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{
8978   match(If cmp xcc);
8979   effect(USE labl);
8980   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
8981 
8982   size(4);
8983   ins_cost(BRANCH_COST);
8984   format %{ "B$cmp  $xcc,$labl" %}
8985   ins_encode %{
8986     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
8987   %}
8988   ins_pipe(br_cc);
8989 %}
8990 
8991 instruct branchConL_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, label labl) %{
8992   match(If cmp xcc);
8993   effect(USE labl);
8994   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
8995 
8996   size(4);
8997   ins_cost(BRANCH_COST);
8998   format %{ "B$cmp  $xcc,$labl" %}
8999   ins_encode %{
9000     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
9001   %}
9002   ins_pipe(br_cc);
9003 %}
9004 
9005 instruct branchConL_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, label labl) %{
9006   match(If cmp xcc);
9007   effect(USE labl);
9008   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
9009 
9010   size(4);
9011   ins_cost(BRANCH_COST);
9012   format %{ "B$cmp  $xcc,$labl" %}
9013   ins_encode %{
9014     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
9015   %}
9016   ins_pipe(br_cc);
9017 %}
9018 
9019 instruct branchConUL(cmpOpU cmp, flagsRegUL xcc, label labl) %{
9020   match(If cmp xcc);
9021   effect(USE labl);
9022 
9023   size(4);
9024   ins_cost(BRANCH_COST);
9025   format %{ "B$cmp  $xcc,$labl" %}
9026   ins_encode %{
9027     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
9028   %}
9029   ins_pipe(br_cc);
9030 %}
9031 
9032 instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
9033   match(CountedLoopEnd cmp icc);
9034   effect(USE labl);
9035 
9036   size(4);
9037   ins_cost(BRANCH_COST);
9038   format %{ "B$cmp   $icc,$labl\t! Loop end" %}
9039   ins_encode %{
9040     __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
9041   %}
9042   ins_pipe(br_cc);
9043 %}
9044 
9045 // instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
9046 //   match(CountedLoopEnd cmp icc);
9047 //   ins_pipe(br_cc);
9048 // %}
9049 
9050 // ============================================================================
9051 // Long Compare
9052 //
9053 // Currently we hold longs in 2 registers.  Comparing such values efficiently
9054 // is tricky.  The flavor of compare used depends on whether we are testing
9055 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
9056 // The GE test is the negated LT test.  The LE test can be had by commuting
9057 // the operands (yielding a GE test) and then negating; negate again for the
9058 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
9059 // NE test is negated from that.
9060 
9061 // Due to a shortcoming in the ADLC, it mixes up expressions like:
9062 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
9063 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
9064 // are collapsed internally in the ADLC's dfa-gen code.  The match for
9065 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
9066 // foo match ends up with the wrong leaf.  One fix is to not match both
9067 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
9068 // both forms beat the trinary form of long-compare and both are very useful
9069 // on Intel which has so few registers.
9070 
9071 // instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{
9072 //   match(If cmp xcc);
9073 //   ins_pipe(br_cc);
9074 // %}
9075 
9076 // Manifest a CmpL3 result in an integer register.  Very painful.
9077 // This is the test to avoid.
9078 instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
9079   match(Set dst (CmpL3 src1 src2) );
9080   effect( KILL ccr );
9081   ins_cost(6*DEFAULT_COST); // FIXME
9082   size(32);
9083   format %{
9084       "CMP    $src1.hi, $src2.hi\t\t! long\n"
9085     "\tMOV.gt $dst, 1\n"
9086     "\tmvn.lt $dst, 0\n"
9087     "\tB.ne   done\n"
9088     "\tSUBS   $dst, $src1.lo, $src2.lo\n"
9089     "\tMOV.hi $dst, 1\n"
9090     "\tmvn.lo $dst, 0\n"
9091     "done:"     %}
9092   ins_encode %{
9093     Label done;
9094     __ cmp($src1$$Register->successor(), $src2$$Register->successor());
9095     __ mov_i($dst$$Register, 1, Assembler::GT);
9096     __ mvn_i($dst$$Register, 0, Assembler::LT);
9097     __ b(done, Assembler::NE);
9098     __ subs($dst$$Register, $src1$$Register, $src2$$Register);
9099     __ mov_i($dst$$Register, 1, Assembler::HI);
9100     __ mvn_i($dst$$Register, 0, Assembler::LO);
9101     __ bind(done);
9102   %}
9103   ins_pipe(cmpL_reg);
9104 %}
9105 
9106 // Conditional move
9107 instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{
9108   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
9109   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9110 
9111   ins_cost(150);
9112   size(8);
9113   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
9114             "MOV$cmp  $dst,$src.hi" %}
9115   ins_encode %{
9116     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9117     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
9118   %}
9119   ins_pipe(ialu_reg);
9120 %}
9121 
9122 instruct cmovLL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, iRegL src) %{
9123   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
9124   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9125 
9126   ins_cost(150);
9127   size(8);
9128   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
9129             "MOV$cmp  $dst,$src.hi" %}
9130   ins_encode %{
9131     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9132     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
9133   %}
9134   ins_pipe(ialu_reg);
9135 %}
9136 
9137 instruct cmovLL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, iRegL src) %{
9138   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
9139   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9140 
9141   ins_cost(150);
9142   size(8);
9143   format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
9144             "MOV$cmp  $dst,$src.hi" %}
9145   ins_encode %{
9146     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9147     __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
9148   %}
9149   ins_pipe(ialu_reg);
9150 %}
9151 
9152 instruct cmovLL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, immL0 src) %{
9153   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
9154   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9155   ins_cost(140);
9156   size(8);
9157   format %{ "MOV$cmp  $dst.lo,0\t! long\n\t"
9158             "MOV$cmp  $dst,0" %}
9159   ins_encode %{
9160     __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode));
9161     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
9162   %}
9163   ins_pipe(ialu_imm);
9164 %}
9165 
9166 instruct cmovLL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, immL0 src) %{
9167   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
9168   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9169   ins_cost(140);
9170   size(8);
9171   format %{ "MOV$cmp  $dst.lo,0\t! long\n\t"
9172             "MOV$cmp  $dst,0" %}
9173   ins_encode %{
9174     __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode));
9175     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
9176   %}
9177   ins_pipe(ialu_imm);
9178 %}
9179 
9180 instruct cmovLL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, immL0 src) %{
9181   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
9182   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9183   ins_cost(140);
9184   size(8);
9185   format %{ "MOV$cmp  $dst.lo,0\t! long\n\t"
9186             "MOV$cmp  $dst,0" %}
9187   ins_encode %{
9188     __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode));
9189     __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
9190   %}
9191   ins_pipe(ialu_imm);
9192 %}
9193 
9194 instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{
9195   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
9196   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9197 
9198   ins_cost(150);
9199   size(4);
9200   format %{ "MOV$cmp  $dst,$src" %}
9201   ins_encode %{
9202     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9203   %}
9204   ins_pipe(ialu_reg);
9205 %}
9206 
9207 instruct cmovIL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, iRegI src) %{
9208   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
9209   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9210 
9211   ins_cost(150);
9212   size(4);
9213   format %{ "MOV$cmp  $dst,$src" %}
9214   ins_encode %{
9215     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9216   %}
9217   ins_pipe(ialu_reg);
9218 %}
9219 
9220 instruct cmovIL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, iRegI src) %{
9221   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
9222   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9223 
9224   ins_cost(150);
9225   size(4);
9226   format %{ "MOV$cmp  $dst,$src" %}
9227   ins_encode %{
9228     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9229   %}
9230   ins_pipe(ialu_reg);
9231 %}
9232 
9233 instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{
9234   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
9235   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9236 
9237   ins_cost(140);
9238   format %{ "MOVW$cmp  $dst,$src" %}
9239   ins_encode %{
9240     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
9241   %}
9242   ins_pipe(ialu_imm);
9243 %}
9244 
9245 instruct cmovIL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, immI16 src) %{
9246   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
9247   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9248 
9249   ins_cost(140);
9250   format %{ "MOVW$cmp  $dst,$src" %}
9251   ins_encode %{
9252     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
9253   %}
9254   ins_pipe(ialu_imm);
9255 %}
9256 
9257 instruct cmovIL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, immI16 src) %{
9258   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
9259   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9260 
9261   ins_cost(140);
9262   format %{ "MOVW$cmp  $dst,$src" %}
9263   ins_encode %{
9264     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
9265   %}
9266   ins_pipe(ialu_imm);
9267 %}
9268 
9269 instruct cmovPL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, iRegP src) %{
9270   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
9271   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9272 
9273   ins_cost(150);
9274   size(4);
9275   format %{ "MOV$cmp  $dst,$src" %}
9276   ins_encode %{
9277     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9278   %}
9279   ins_pipe(ialu_reg);
9280 %}
9281 
9282 instruct cmovPL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, iRegP src) %{
9283   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
9284   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9285 
9286   ins_cost(150);
9287   size(4);
9288   format %{ "MOV$cmp  $dst,$src" %}
9289   ins_encode %{
9290     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9291   %}
9292   ins_pipe(ialu_reg);
9293 %}
9294 
9295 instruct cmovPL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, iRegP src) %{
9296   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
9297   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9298 
9299   ins_cost(150);
9300   size(4);
9301   format %{ "MOV$cmp  $dst,$src" %}
9302   ins_encode %{
9303     __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
9304   %}
9305   ins_pipe(ialu_reg);
9306 %}
9307 
9308 instruct cmovPL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, immP0 src) %{
9309   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
9310   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9311 
9312   ins_cost(140);
9313   format %{ "MOVW$cmp  $dst,$src" %}
9314   ins_encode %{
9315     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
9316   %}
9317   ins_pipe(ialu_imm);
9318 %}
9319 
9320 instruct cmovPL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, immP0 src) %{
9321   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
9322   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9323 
9324   ins_cost(140);
9325   format %{ "MOVW$cmp  $dst,$src" %}
9326   ins_encode %{
9327     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
9328   %}
9329   ins_pipe(ialu_imm);
9330 %}
9331 
9332 instruct cmovPL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, immP0 src) %{
9333   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
9334   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9335 
9336   ins_cost(140);
9337   format %{ "MOVW$cmp  $dst,$src" %}
9338   ins_encode %{
9339     __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
9340   %}
9341   ins_pipe(ialu_imm);
9342 %}
9343 
9344 instruct cmovFL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regF dst, regF src) %{
9345   match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
9346   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9347   ins_cost(150);
9348   size(4);
9349   format %{ "FCPYS$cmp $dst,$src" %}
9350   ins_encode %{
9351     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
9352   %}
9353   ins_pipe(int_conditional_float_move);
9354 %}
9355 
9356 instruct cmovFL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regF dst, regF src) %{
9357   match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
9358   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9359   ins_cost(150);
9360   size(4);
9361   format %{ "FCPYS$cmp $dst,$src" %}
9362   ins_encode %{
9363     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
9364   %}
9365   ins_pipe(int_conditional_float_move);
9366 %}
9367 
9368 instruct cmovFL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regF dst, regF src) %{
9369   match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
9370   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9371   ins_cost(150);
9372   size(4);
9373   format %{ "FCPYS$cmp $dst,$src" %}
9374   ins_encode %{
9375     __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
9376   %}
9377   ins_pipe(int_conditional_float_move);
9378 %}
9379 
9380 instruct cmovDL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regD dst, regD src) %{
9381   match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
9382   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
9383 
9384   ins_cost(150);
9385   size(4);
9386   format %{ "FCPYD$cmp $dst,$src" %}
9387   ins_encode %{
9388     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
9389   %}
9390   ins_pipe(int_conditional_float_move);
9391 %}
9392 
9393 instruct cmovDL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regD dst, regD src) %{
9394   match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
9395   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
9396 
9397   ins_cost(150);
9398   size(4);
9399   format %{ "FCPYD$cmp $dst,$src" %}
9400   ins_encode %{
9401     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
9402   %}
9403   ins_pipe(int_conditional_float_move);
9404 %}
9405 
9406 instruct cmovDL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regD dst, regD src) %{
9407   match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
9408   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
9409 
9410   ins_cost(150);
9411   size(4);
9412   format %{ "FCPYD$cmp $dst,$src" %}
9413   ins_encode %{
9414     __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
9415   %}
9416   ins_pipe(int_conditional_float_move);
9417 %}
9418 
9419 // ============================================================================
9420 // Safepoint Instruction
9421 // rather than KILL R12, it would be better to use any reg as
9422 // TEMP. Can't do that at this point because it crashes the compiler
9423 instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{
9424   match(SafePoint poll);
9425   effect(USE poll, KILL tmp, KILL icc);
9426 
9427   size(4);
9428   format %{ "LDR   $tmp,[$poll]\t! Safepoint: poll for GC" %}
9429   ins_encode %{
9430     __ relocate(relocInfo::poll_type);
9431     __ ldr($tmp$$Register, Address($poll$$Register));
9432   %}
9433   ins_pipe(loadPollP);
9434 %}
9435 
9436 
9437 // ============================================================================
9438 // Call Instructions
9439 // Call Java Static Instruction
9440 instruct CallStaticJavaDirect( method meth ) %{
9441   match(CallStaticJava);
9442   predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
9443   effect(USE meth);
9444   size(call_static_enc_size(this, _method, _method_handle_invoke));
9445 
9446   ins_cost(CALL_COST);
9447   format %{ "CALL,static ==> " %}
9448   ins_encode( Java_Static_Call( meth ), call_epilog );
9449   ins_pipe(simple_call);
9450 %}
9451 
9452 // Call Java Static Instruction (method handle version)
9453 instruct CallStaticJavaHandle( method meth ) %{
9454   match(CallStaticJava);
9455   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
9456   effect(USE meth);
9457   size(call_static_enc_size(this, _method, _method_handle_invoke));
9458 
9459   // FP is saved by all callees (for interpreter stack correction).
9460   // We use it here for a similar purpose, in {preserve,restore}_FP.
9461 
9462   ins_cost(CALL_COST);
9463   format %{ "CALL,static/MethodHandle ==> " %}
9464   ins_encode( preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog );
9465   ins_pipe(simple_call);
9466 %}
9467 
9468 // Call Java Dynamic Instruction
9469 instruct CallDynamicJavaDirect( method meth ) %{
9470   match(CallDynamicJava);
9471   effect(USE meth);
9472   size(call_dynamic_enc_size());
9473 
9474   ins_cost(CALL_COST);
9475   format %{ "MOV_OOP    (empty),R_R8\n\t"
9476             "CALL,dynamic  ; NOP ==> " %}
9477   ins_encode( Java_Dynamic_Call( meth ), call_epilog );
9478   ins_pipe(call);
9479 %}
9480 
9481 // Call Runtime Instruction
9482 instruct CallRuntimeDirect(method meth) %{
9483   match(CallRuntime);
9484   effect(USE meth);
9485   ins_cost(CALL_COST);
9486   size(call_runtime_enc_size(this));
9487 
9488   format %{ "CALL,runtime" %}
9489   ins_encode( Java_To_Runtime( meth ),
9490               call_epilog );
9491   ins_pipe(simple_call);
9492 %}
9493 
9494 // Call runtime without safepoint - same as CallRuntime
9495 instruct CallLeafDirect(method meth) %{
9496   match(CallLeaf);
9497   effect(USE meth);
9498   ins_cost(CALL_COST);
9499   size(call_runtime_enc_size(this));
9500 
9501   format %{ "CALL,runtime leaf" %}
9502   // TODO: ned save_last_PC here?
9503   ins_encode( Java_To_Runtime( meth ),
9504               call_epilog );
9505   ins_pipe(simple_call);
9506 %}
9507 
9508 // Call runtime without safepoint - same as CallLeaf
9509 instruct CallLeafNoFPDirect(method meth) %{
9510   match(CallLeafNoFP);
9511   effect(USE meth);
9512   ins_cost(CALL_COST);
9513   size(call_runtime_enc_size(this));
9514 
9515   format %{ "CALL,runtime leaf nofp" %}
9516   // TODO: ned save_last_PC here?
9517   ins_encode( Java_To_Runtime( meth ),
9518               call_epilog );
9519   ins_pipe(simple_call);
9520 %}
9521 
9522 // Tail Call; Jump from runtime stub to Java code.
9523 // Also known as an 'interprocedural jump'.
9524 // Target of jump will eventually return to caller.
9525 // TailJump below removes the return address.
9526 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9527   match(TailCall jump_target method_oop );
9528 
9529   ins_cost(CALL_COST);
9530   format %{ "MOV    Rexception_pc, LR\n\t"
9531             "jump   $jump_target  \t! $method_oop holds method oop" %}
9532   ins_encode %{
9533     __ mov(r3, lr);   // this is used only to call
9534                                  // StubRoutines::forward_exception_entry()
9535                                  // which expects PC of exception in
9536                                  // R3. FIXME?
9537     __ b($jump_target$$Register);
9538   %}
9539   ins_pipe(tail_call);
9540 %}
9541 
9542 
9543 // Return Instruction
9544 instruct Ret() %{
9545   match(Return);
9546 
9547   format %{ "ret LR" %}
9548 
9549   ins_encode %{
9550     __ ret(lr);
9551   %}
9552 
9553   ins_pipe(br);
9554 %}
9555 
9556 
9557 // Tail Jump; remove the return address; jump to target.
9558 // TailCall above leaves the return address around.
9559 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9560 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9561 // "restore" before this instruction (in Epilogue), we need to materialize it
9562 // in %i0.
9563 instruct tailjmpInd(IPRegP jump_target, RExceptionRegP ex_oop) %{
9564   match( TailJump jump_target ex_oop );
9565   ins_cost(CALL_COST);
9566   format %{ "MOV    Rexception_pc, LR\n\t"
9567             "jump   $jump_target \t! $ex_oop holds exc. oop" %}
9568   ins_encode %{
9569     __ mov(r3, lr);
9570     __ b($jump_target$$Register);
9571   %}
9572   ins_pipe(tail_call);
9573 %}
9574 
9575 // Create exception oop: created by stack-crawling runtime code.
9576 // Created exception is now available to this handler, and is setup
9577 // just prior to jumping to this handler.  No code emitted.
9578 instruct CreateException( RExceptionRegP ex_oop )
9579 %{
9580   match(Set ex_oop (CreateEx));
9581   ins_cost(0);
9582 
9583   size(0);
9584   // use the following format syntax
9585   format %{ "! exception oop is in Rexception_obj; no code emitted" %}
9586   ins_encode();
9587   ins_pipe(empty);
9588 %}
9589 
9590 
9591 // Rethrow exception:
9592 // The exception oop will come in the first argument position.
9593 // Then JUMP (not call) to the rethrow stub code.
9594 instruct RethrowException()
9595 %{
9596   match(Rethrow);
9597   ins_cost(CALL_COST);
9598 
9599   // use the following format syntax
9600   format %{ "b    rethrow_stub" %}
9601   ins_encode %{
9602     Register scratch = r1;
9603     assert_different_registers(scratch, c_rarg0, lr);
9604     __ jump(OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type, scratch);
9605   %}
9606   ins_pipe(tail_call);
9607 %}
9608 
9609 
9610 // Die now
9611 instruct ShouldNotReachHere( )
9612 %{
9613   match(Halt);
9614   ins_cost(CALL_COST);
9615 
9616   size(4);
9617   // Use the following format syntax
9618   format %{ "ShouldNotReachHere" %}
9619   ins_encode %{
9620     __ udf(0xdead);
9621   %}
9622   ins_pipe(tail_call);
9623 %}
9624 
9625 // ============================================================================
9626 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
9627 // array for an instance of the superklass.  Set a hidden internal cache on a
9628 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
9629 // not zero for a miss or zero for a hit.  The encoding ALSO sets flags.
9630 instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP pcc, LRRegP lr, R9RegI r9, R12RegI r12 ) %{
9631   match(Set index (PartialSubtypeCheck sub super));
9632   effect( KILL pcc, KILL r9, KILL r12, KILL lr );
9633   ins_cost(DEFAULT_COST*10);
9634   format %{ "CALL   PartialSubtypeCheck" %}
9635   ins_encode %{
9636     __ call(StubRoutines::aarch32::partial_subtype_check(), relocInfo::runtime_call_type);
9637   %}
9638   ins_pipe(partial_subtype_check_pipe);
9639 %}
9640 
9641 /* instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{ */
9642 /*   match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero)); */
9643 /*   ins_pipe(partial_subtype_check_pipe); */
9644 /* %} */
9645 
9646 
9647 // ============================================================================
9648 // inlined locking and unlocking
9649 
9650 instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP mark, iRegP scratch2, iRegP scratch )
9651 %{
9652   match(Set pcc (FastLock object box));
9653 
9654   effect(TEMP mark, TEMP scratch, TEMP scratch2);
9655   ins_cost(100);
9656 
9657   format %{ "FASTLOCK  $object, $box; KILL $mark, $scratch, $scratch2" %}
9658   ins_encode %{
9659     __ fast_lock($object$$Register, $box$$Register, $mark$$Register, $scratch$$Register, $scratch2$$Register);
9660   %}
9661   ins_pipe(long_memory_op);
9662 %}
9663 
9664 
9665 instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{
9666   match(Set pcc (FastUnlock object box));
9667   effect(TEMP scratch, TEMP scratch2);
9668   ins_cost(100);
9669 
9670   format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2" %}
9671   ins_encode %{
9672     __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register);
9673   %}
9674   ins_pipe(long_memory_op);
9675 %}
9676 
9677 // Count and Base registers are fixed because the allocator cannot
9678 // kill unknown registers.  The encodings are generic.
9679 instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{
9680   match(Set dummy (ClearArray cnt base));
9681   effect(TEMP temp, TEMP zero, KILL cpsr);
9682   ins_cost(300);
9683   format %{ "MOV    $zero,0\n"
9684       "        MOV    $temp,$cnt\n"
9685       "loop:   SUBS   $temp,$temp,4\t! Count down a dword of bytes\n"
9686       "        STR.ge $zero,[$base+$temp]\t! delay slot"
9687       "        B.gt   loop\t\t! Clearing loop\n" %}
9688   ins_encode %{
9689     __ mov($zero$$Register, 0);
9690     __ mov($temp$$Register, $cnt$$Register);
9691     Label(loop);
9692     __ bind(loop);
9693     __ subs($temp$$Register, $temp$$Register, 4);
9694     __ str($zero$$Register, Address($base$$Register, $temp$$Register), Assembler::GE);
9695     __ b(loop, Assembler::GT);
9696   %}
9697   ins_pipe(long_memory_op);
9698 %}
9699 
9700 instruct string_compareUU(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
9701                           iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
9702   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
9703   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
9704   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
9705 
9706   ins_cost(300);
9707   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
9708   ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (2), (2)) );
9709   ins_pipe(long_memory_op);
9710 %}
9711 
9712 instruct string_compareLL(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
9713                           iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
9714   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
9715   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
9716   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
9717 
9718   ins_cost(300);
9719   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
9720   ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (1), (1)) );
9721   ins_pipe(long_memory_op);
9722 %}
9723 
9724 instruct string_compareUL(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
9725                           iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
9726   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
9727   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
9728   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
9729 
9730   ins_cost(300);
9731   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
9732   ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (2), (1)) );
9733   ins_pipe(long_memory_op);
9734 %}
9735 
9736 instruct string_compareLU(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
9737                           iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
9738   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
9739   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
9740   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
9741 
9742   ins_cost(300);
9743   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
9744   ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (1), (2)) );
9745   ins_pipe(long_memory_op);
9746 %}
9747 
9748 instruct string_equalsUU(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, flagsReg ccr) %{
9749   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
9750   match(Set result (StrEquals (Binary str1 str2) cnt));
9751   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP result, KILL ccr);
9752 
9753   ins_cost(300);
9754   format %{ "String Equals $str1,$str2,$cnt -> $result   # KILL $tmp1" %}
9755   ins_encode( enc_Array_Equals(str1, str2, cnt, tmp1, result, (2), (false)) );
9756   ins_pipe(long_memory_op);
9757 %}
9758 
9759 instruct string_equalsLL(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, flagsReg ccr) %{
9760   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
9761   match(Set result (StrEquals (Binary str1 str2) cnt));
9762   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP result, KILL ccr);
9763 
9764   ins_cost(300);
9765   format %{ "String Equals $str1,$str2,$cnt -> $result   # KILL $tmp1" %}
9766   ins_encode( enc_Array_Equals(str1, str2, cnt, tmp1, result, (1), (false)) );
9767   ins_pipe(long_memory_op);
9768 %}
9769 
9770 instruct array_equalsUU(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI result, flagsReg ccr) %{
9771   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
9772   match(Set result (AryEq ary1 ary2));
9773   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr);
9774 
9775   ins_cost(300);
9776   format %{ "Array Equals $ary1,$ary2 -> $result   # KILL $tmp1,$tmp2" %}
9777   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result, (2), (true)));
9778   ins_pipe(long_memory_op);
9779 %}
9780 
9781 instruct array_equalsLL(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI result, flagsReg ccr) %{
9782   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
9783   match(Set result (AryEq ary1 ary2));
9784   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr);
9785 
9786   ins_cost(300);
9787   format %{ "Array Equals $ary1,$ary2 -> $result   # KILL $tmp1,$tmp2" %}
9788   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result, (1), (true)));
9789   ins_pipe(long_memory_op);
9790 %}
9791 
9792 instruct string_compress(R2RegP src, R1RegP dst, R3RegI len,
9793                          R9RegI tmp1, Q0_regD tmp2, Q1_regD tmp3, R12RegI tmp4, LRRegP lr, R0RegI result, flagsReg ccr)
9794 %{
9795   match(Set result (StrCompressedCopy src (Binary dst len)));
9796   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP lr, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
9797 
9798   format %{ "String Compress $src,$dst -> $result    // KILL $tmp1, $tmp2, $tmp3, $tmp4, $lr" %}
9799   ins_encode( enc_Char_Array_Compress(src, dst, len, tmp1, tmp2, tmp3, tmp4, result, ccr) );
9800   ins_pipe(long_memory_op);
9801 %}
9802 
9803 instruct string_inflate(Universe dummy, R0RegP src, R1RegP dst, R2RegI len,
9804                         iRegI tmp1, Q0_regD tmp2, LRRegP lr, flagsReg ccr)
9805 %{
9806   match(Set dummy (StrInflatedCopy src (Binary dst len)));
9807   effect(TEMP tmp1, TEMP tmp2, TEMP lr, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
9808 
9809   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2, $lr" %}
9810   ins_encode( enc_Byte_Array_Inflate(src, dst, len, tmp1, tmp2, ccr) );
9811   ins_pipe(long_memory_op);
9812 %}
9813 
9814 //---------- Zeros Count Instructions ------------------------------------------
9815 
9816 instruct countLeadingZerosI(iRegI dst, iRegI src) %{
9817   match(Set dst (CountLeadingZerosI src));
9818   size(4);
9819   format %{ "CLZ_32 $dst,$src" %}
9820   ins_encode %{
9821     __ clz($dst$$Register, $src$$Register);
9822   %}
9823   ins_pipe(ialu_reg);
9824 %}
9825 
9826 instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
9827   match(Set dst (CountLeadingZerosL src));
9828   effect(TEMP tmp, TEMP dst, KILL ccr);
9829   size(16);
9830   format %{ "CLZ    $dst,$src.hi\n\t"
9831             "TEQ    $dst,32\n\t"
9832             "CLZ.eq $tmp,$src.lo\n\t"
9833             "ADD.eq $dst, $dst, $tmp\n\t" %}
9834   ins_encode %{
9835     __ clz($dst$$Register, $src$$Register->successor());
9836     __ teq($dst$$Register, 32);
9837     __ clz($tmp$$Register, $src$$Register, Assembler::EQ);
9838     __ add($dst$$Register, $dst$$Register, $tmp$$Register, Assembler::EQ);
9839   %}
9840   ins_pipe(ialu_reg);
9841 %}
9842 
9843 instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{
9844   match(Set dst (CountTrailingZerosI src));
9845   effect(TEMP tmp);
9846   size(8);
9847   format %{ "RBIT_32 $tmp, $src\n\t"
9848             "CLZ_32  $dst,$tmp" %}
9849   ins_encode %{
9850     __ rbit($tmp$$Register, $src$$Register);
9851     __ clz($dst$$Register, $tmp$$Register);
9852   %}
9853   ins_pipe(ialu_reg);
9854 %}
9855 
9856 instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
9857   match(Set dst (CountTrailingZerosL src));
9858   effect(TEMP tmp, TEMP dst, KILL ccr);
9859   size(24);
9860   format %{ "RBIT   $tmp,$src.lo\n\t"
9861             "CLZ    $dst,$tmp\n\t"
9862             "TEQ    $dst,32\n\t"
9863             "RBIT   $tmp,$src.hi\n\t"
9864             "CLZ.eq $tmp,$tmp\n\t"
9865             "ADD.eq $dst,$dst,$tmp\n\t" %}
9866   ins_encode %{
9867     __ rbit($tmp$$Register, $src$$Register);
9868     __ clz($dst$$Register, $tmp$$Register);
9869     __ teq($dst$$Register, 32);
9870     __ rbit($tmp$$Register, $src$$Register->successor());
9871     __ clz($tmp$$Register, $tmp$$Register, Assembler::EQ);
9872     __ add($dst$$Register, $dst$$Register, $tmp$$Register, Assembler::EQ);
9873   %}
9874   ins_pipe(ialu_reg);
9875 %}
9876 
9877 
9878 //---------- Population Count Instructions -------------------------------------
9879 
9880 instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
9881   predicate(UsePopCountInstruction);
9882   match(Set dst (PopCountI src));
9883   effect(TEMP tmp);
9884 
9885   format %{ "FMSR       $tmp,$src\n\t"
9886             "VCNT.8     $tmp,$tmp\n\t"
9887             "VPADDL.U8  $tmp,$tmp\n\t"
9888             "VPADDL.U16 $tmp,$tmp\n\t"
9889             "FMRS       $dst,$tmp" %}
9890   size(20);
9891 
9892   ins_encode %{
9893     __ vmov_f32($tmp$$FloatRegister, $src$$Register);
9894     __ vcnt_64($tmp$$FloatRegister, $tmp$$FloatRegister);
9895     __ vpaddl_64_u8($tmp$$FloatRegister, $tmp$$FloatRegister);
9896     __ vpaddl_64_u16($tmp$$FloatRegister, $tmp$$FloatRegister);
9897     __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
9898   %}
9899   ins_pipe(ialu_reg); // FIXME
9900 %}
9901 
9902 // Note: Long.bitCount(long) returns an int.
9903 instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{
9904   predicate(UsePopCountInstruction);
9905   match(Set dst (PopCountL src));
9906   effect(TEMP tmp);
9907 
9908   format %{ "FMDRR       $tmp,$src.lo,$src.hi\n\t"
9909             "VCNT.8      $tmp,$tmp\n\t"
9910             "VPADDL.U8   $tmp,$tmp\n\t"
9911             "VPADDL.U16  $tmp,$tmp\n\t"
9912             "VPADDL.U32  $tmp,$tmp\n\t"
9913             "FMRS        $dst,$tmp" %}
9914 
9915   size(32);
9916 
9917   ins_encode %{
9918     __ vmov_f64($tmp$$FloatRegister, $src$$Register, $src$$Register->successor());
9919     __ vcnt_64($tmp$$FloatRegister, $tmp$$FloatRegister);
9920     __ vpaddl_64_u8($tmp$$FloatRegister, $tmp$$FloatRegister);
9921     __ vpaddl_64_u16($tmp$$FloatRegister, $tmp$$FloatRegister);
9922     __ vpaddl_64_u32($tmp$$FloatRegister, $tmp$$FloatRegister);
9923     __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
9924   %}
9925   ins_pipe(ialu_reg);
9926 %}
9927 
9928 
9929 // ============================================================================
9930 //------------Bytes reverse--------------------------------------------------
9931 
9932 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
9933   match(Set dst (ReverseBytesI src));
9934 
9935   size(4);
9936   format %{ "REV32 $dst,$src" %}
9937   ins_encode %{
9938     __ rev($dst$$Register, $src$$Register);
9939   %}
9940   ins_pipe( iload_mem ); // FIXME
9941 %}
9942 
9943 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
9944   match(Set dst (ReverseBytesL src));
9945   effect(TEMP dst);
9946   size(8);
9947   format %{ "REV $dst.lo,$src.lo\n\t"
9948             "REV $dst.hi,$src.hi" %}
9949   ins_encode %{
9950     __ rev($dst$$Register, $src$$Register->successor());
9951     __ rev($dst$$Register->successor(), $src$$Register);
9952   %}
9953   ins_pipe( iload_mem ); // FIXME
9954 %}
9955 
9956 instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
9957   match(Set dst (ReverseBytesUS src));
9958   size(4);
9959   format %{ "REV16 $dst,$src" %}
9960   ins_encode %{
9961     __ rev16($dst$$Register, $src$$Register);
9962   %}
9963   ins_pipe( iload_mem ); // FIXME
9964 %}
9965 
9966 instruct bytes_reverse_short(iRegI dst, iRegI src) %{
9967   match(Set dst (ReverseBytesS src));
9968   size(4);
9969   format %{ "REVSH $dst,$src" %}
9970   ins_encode %{
9971     __ revsh($dst$$Register, $src$$Register);
9972   %}
9973   ins_pipe( iload_mem ); // FIXME
9974 %}
9975 
9976 
9977 // ====================VECTOR INSTRUCTIONS=====================================
9978 
9979 // Load Aligned Packed values into a Double Register
9980 instruct loadV8(vecD dst, memoryD mem) %{
9981   predicate(n->as_LoadVector()->memory_size() == 8);
9982   match(Set dst (LoadVector mem));
9983   ins_cost(MEMORY_REF_COST);
9984   size(4);
9985   format %{ "FLDD   $mem,$dst\t! load vector (8 bytes)" %}
9986   ins_encode %{
9987     __ vldr_f64($dst$$FloatRegister, $mem$$Address);
9988   %}
9989   ins_pipe(floadD_mem);
9990 %}
9991 
9992 // Load Aligned Packed values into a Double Register Pair
9993 instruct loadV16(vecX dst, memoryvld mem) %{
9994   predicate(n->as_LoadVector()->memory_size() == 16);
9995   match(Set dst (LoadVector mem));
9996   ins_cost(MEMORY_REF_COST);
9997   size(4);
9998   format %{ "VLD1   $mem,$dst.Q\t! load vector (16 bytes)" %}
9999   ins_encode %{
10000     __ vld1_16($dst$$FloatRegister, $dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), $mem$$Address, Assembler::ALIGN_STD);
10001   %}
10002   ins_pipe(floadD_mem); // FIXME
10003 %}
10004 
10005 // Store Vector in Double register to memory
10006 instruct storeV8(memoryD mem, vecD src) %{
10007   predicate(n->as_StoreVector()->memory_size() == 8);
10008   match(Set mem (StoreVector mem src));
10009   ins_cost(MEMORY_REF_COST);
10010   size(4);
10011   format %{ "FSTD   $src,$mem\t! store vector (8 bytes)" %}
10012   ins_encode %{
10013     __ vstr_f64($src$$FloatRegister, $mem$$Address);
10014   %}
10015   ins_pipe(fstoreD_mem_reg);
10016 %}
10017 
10018 // Store Vector in Double Register Pair to memory
10019 instruct storeV16(memoryvld mem, vecX src) %{
10020   predicate(n->as_StoreVector()->memory_size() == 16);
10021   match(Set mem (StoreVector mem src));
10022   ins_cost(MEMORY_REF_COST);
10023   size(4);
10024   format %{ "VST1   $src,$mem\t! store vector (16 bytes)" %}
10025   ins_encode %{
10026     __ vst1_16($src$$FloatRegister, $src$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), $mem$$Address, Assembler::ALIGN_STD);
10027   %}
10028   ins_pipe(fstoreD_mem_reg); // FIXME
10029 %}
10030 
10031 // Replicate scalar to packed byte values in Double register
10032 instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{
10033   predicate(n->as_Vector()->length() == 8);
10034   match(Set dst (ReplicateB src));
10035   ins_cost(DEFAULT_COST*4);
10036   effect(TEMP tmp);
10037   size(16);
10038 
10039   // FIXME: could use PKH instruction instead?
10040   format %{ "LSL      $tmp, $src, 24 \n\t"
10041             "OR       $tmp, $tmp, ($tmp >> 8) \n\t"
10042             "OR       $tmp, $tmp, ($tmp >> 16) \n\t"
10043             "FMDRR    $dst,$tmp,$tmp\t" %}
10044   ins_encode %{
10045     __ mov($tmp$$Register, $src$$Register, lsl(24));
10046     __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(8));
10047     __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(16));
10048     __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
10049   %}
10050   ins_pipe(ialu_reg); // FIXME
10051 %}
10052 
10053 // Replicate scalar to packed byte values in Double register
10054 instruct Repl8B_reg_simd(vecD dst, iRegI src) %{
10055   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10056   match(Set dst (ReplicateB src));
10057   size(4);
10058 
10059   format %{ "VDUP.8 $dst,$src\t" %}
10060   ins_encode %{
10061     __ vdup_64_8($dst$$FloatRegister, $src$$Register);
10062   %}
10063   ins_pipe(ialu_reg); // FIXME
10064 %}
10065 
10066 // Replicate scalar to packed byte values in Double register pair
10067 instruct Repl16B_reg(vecX dst, iRegI src) %{
10068   predicate(n->as_Vector()->length_in_bytes() == 16);
10069   match(Set dst (ReplicateB src));
10070   size(4);
10071 
10072   format %{ "VDUP.8 $dst.Q,$src\t" %}
10073   ins_encode %{
10074     __ vdup_128_8($dst$$FloatRegister, $src$$Register);
10075   %}
10076   ins_pipe(ialu_reg); // FIXME
10077 %}
10078 
10079 // Replicate scalar constant to packed byte values in Double register
10080 instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{
10081   predicate(n->as_Vector()->length() == 8);
10082   match(Set dst (ReplicateB src));
10083   ins_cost(DEFAULT_COST*2);
10084   effect(TEMP tmp);
10085   size(12);
10086 
10087   format %{ "MOV      $tmp, Repl4($src))\n\t"
10088             "FMDRR    $dst,$tmp,$tmp\t" %}
10089   ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) );
10090   ins_pipe(loadConFD); // FIXME
10091 %}
10092 
10093 // Replicate scalar constant to packed byte values in Double register
10094 // TODO: support negative constants with MVNI?
10095 instruct Repl8B_immU8(vecD dst, immU8 src) %{
10096   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10097   match(Set dst (ReplicateB src));
10098   size(4);
10099 
10100   format %{ "VMOV.U8  $dst,$src" %}
10101   ins_encode %{
10102     __ vmov_64_8($dst$$FloatRegister, $src$$constant);
10103   %}
10104   ins_pipe(loadConFD); // FIXME
10105 %}
10106 
10107 // Replicate scalar constant to packed byte values in Double register pair
10108 instruct Repl16B_immU8(vecX dst, immU8 src) %{
10109   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10110   match(Set dst (ReplicateB src));
10111   size(4);
10112 
10113   format %{ "VMOV.U8  $dst.Q,$src" %}
10114   ins_encode %{
10115     __ vmov_128_8($dst$$FloatRegister, $src$$constant);
10116   %}
10117   ins_pipe(loadConFD); // FIXME
10118 %}
10119 
10120 // Replicate scalar to packed short/char values into Double register
10121 instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{
10122   predicate(n->as_Vector()->length() == 4);
10123   match(Set dst (ReplicateS src));
10124   ins_cost(DEFAULT_COST*3);
10125   effect(TEMP tmp);
10126   size(12);
10127 
10128   // FIXME: could use PKH instruction instead?
10129   format %{ "LSL      $tmp, $src, 16 \n\t"
10130             "OR       $tmp, $tmp, ($tmp >> 16) \n\t"
10131             "FMDRR    $dst,$tmp,$tmp\t" %}
10132   ins_encode %{
10133     __ mov($tmp$$Register, $src$$Register, lsl(16));
10134     __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(16));
10135     __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
10136   %}
10137   ins_pipe(ialu_reg); // FIXME
10138 %}
10139 
10140 // Replicate scalar to packed byte values in Double register
10141 instruct Repl4S_reg_simd(vecD dst, iRegI src) %{
10142   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10143   match(Set dst (ReplicateS src));
10144   size(4);
10145 
10146   format %{ "VDUP.16 $dst,$src\t" %}
10147   ins_encode %{
10148     __ vdup_64_16($dst$$FloatRegister, $src$$Register);
10149   %}
10150   ins_pipe(ialu_reg); // FIXME
10151 %}
10152 
10153 // Replicate scalar to packed byte values in Double register pair
10154 instruct Repl8S_reg(vecX dst, iRegI src) %{
10155   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10156   match(Set dst (ReplicateS src));
10157   size(4);
10158 
10159   format %{ "VDUP.16 $dst.Q,$src\t" %}
10160   ins_encode %{
10161     __ vdup_128_16($dst$$FloatRegister, $src$$Register);
10162   %}
10163   ins_pipe(ialu_reg); // FIXME
10164 %}
10165 
10166 
10167 // Replicate scalar constant to packed short/char values in Double register
10168 instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{
10169   predicate(n->as_Vector()->length() == 4);
10170   match(Set dst (ReplicateS src));
10171   effect(TEMP tmp);
10172   size(12);
10173   ins_cost(DEFAULT_COST*4); // FIXME
10174 
10175   format %{ "MOV      $tmp, Repl2($src))\n\t"
10176             "FMDRR    $dst,$tmp,$tmp\t" %}
10177   ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) );
10178   ins_pipe(loadConFD); // FIXME
10179 %}
10180 
10181 // Replicate scalar constant to packed byte values in Double register
10182 instruct Repl4S_immU8(vecD dst, immU8 src) %{
10183   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10184   match(Set dst (ReplicateS src));
10185   size(4);
10186 
10187   format %{ "VMOV.U16  $dst,$src" %}
10188   ins_encode %{
10189     __ vmov_64_16($dst$$FloatRegister, $src$$constant);
10190   %}
10191   ins_pipe(loadConFD); // FIXME
10192 %}
10193 
10194 // Replicate scalar constant to packed byte values in Double register pair
10195 instruct Repl8S_immU8(vecX dst, immU8 src) %{
10196   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10197   match(Set dst (ReplicateS src));
10198   size(4);
10199 
10200   format %{ "VMOV.U16  $dst.Q,$src" %}
10201   ins_encode %{
10202     __ vmov_128_16($dst$$FloatRegister, $src$$constant);
10203   %}
10204   ins_pipe(loadConFD); // FIXME
10205 %}
10206 
10207 // Replicate scalar to packed int values in Double register
10208 instruct Repl2I_reg(vecD dst, iRegI src) %{
10209   predicate(n->as_Vector()->length() == 2);
10210   match(Set dst (ReplicateI src));
10211   size(4);
10212 
10213   format %{ "FMDRR    $dst,$src,$src\t" %}
10214   ins_encode %{
10215     __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register);
10216   %}
10217   ins_pipe(ialu_reg); // FIXME
10218 %}
10219 
10220 // Replicate scalar to packed int values in Double register pair
10221 instruct Repl4I_reg(vecX dst, iRegI src) %{
10222   predicate(n->as_Vector()->length() == 4);
10223   match(Set dst (ReplicateI src));
10224   ins_cost(DEFAULT_COST*2);
10225   size(8);
10226 
10227   format %{ "FMDRR    $dst.lo,$src,$src\n\t"
10228             "FMDRR    $dst.hi,$src,$src" %}
10229 
10230   ins_encode %{
10231     __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register);
10232     __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE),
10233              $src$$Register, $src$$Register);
10234   %}
10235   ins_pipe(ialu_reg); // FIXME
10236 %}
10237 
10238 // Replicate scalar to packed int values in Double register
10239 instruct Repl2I_reg_simd(vecD dst, iRegI src) %{
10240   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10241   match(Set dst (ReplicateI src));
10242   size(4);
10243 
10244   format %{ "VDUP.32 $dst.D,$src\t" %}
10245   ins_encode %{
10246     __ vdup_64_32($dst$$FloatRegister, $src$$Register);
10247   %}
10248   ins_pipe(ialu_reg); // FIXME
10249 %}
10250 
10251 // Replicate scalar to packed int values in Double register pair
10252 instruct Repl4I_reg_simd(vecX dst, iRegI src) %{
10253   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10254   match(Set dst (ReplicateI src));
10255   size(4);
10256 
10257   format %{ "VDUP.32 $dst.Q,$src\t" %}
10258   ins_encode %{
10259     __ vdup_128_32($dst$$FloatRegister, $src$$Register);
10260   %}
10261   ins_pipe(ialu_reg); // FIXME
10262 %}
10263 
10264 
10265 // Replicate scalar zero constant to packed int values in Double register
10266 instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{
10267   predicate(n->as_Vector()->length() == 2);
10268   match(Set dst (ReplicateI src));
10269   effect(TEMP tmp);
10270   size(12);
10271   ins_cost(DEFAULT_COST*4); // FIXME
10272 
10273   format %{ "MOV      $tmp, Repl1($src))\n\t"
10274             "FMDRR    $dst,$tmp,$tmp\t" %}
10275   ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) );
10276   ins_pipe(loadConFD); // FIXME
10277 %}
10278 
10279 // Replicate scalar constant to packed byte values in Double register
10280 instruct Repl2I_immU8(vecD dst, immU8 src) %{
10281   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10282   match(Set dst (ReplicateI src));
10283   size(4);
10284 
10285   format %{ "VMOV.I32  $dst.D,$src" %}
10286   ins_encode %{
10287     __ vmov_64_32($dst$$FloatRegister, $src$$constant);
10288   %}
10289   ins_pipe(loadConFD); // FIXME
10290 %}
10291 
10292 // Replicate scalar constant to packed byte values in Double register pair
10293 instruct Repl4I_immU8(vecX dst, immU8 src) %{
10294   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10295   match(Set dst (ReplicateI src));
10296   size(4);
10297 
10298   format %{ "VMOV.I32  $dst.Q,$src" %}
10299   ins_encode %{
10300     __ vmov_128_32($dst$$FloatRegister, $src$$constant);
10301   %}
10302   ins_pipe(loadConFD); // FIXME
10303 %}
10304 
10305 // Replicate scalar to packed byte values in Double register pair
10306 instruct Repl2L_reg(vecX dst, iRegL src) %{
10307   predicate(n->as_Vector()->length() == 2);
10308   match(Set dst (ReplicateL src));
10309   size(8);
10310   ins_cost(DEFAULT_COST*2); // FIXME
10311 
10312   format %{ "FMDRR $dst.D,$src.lo,$src.hi\t\n"
10313             "FMDRR $dst.D.next,$src.lo,$src.hi" %}
10314   ins_encode %{
10315     __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
10316     __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE),
10317              $src$$Register, $src$$Register->successor());
10318   %}
10319   ins_pipe(ialu_reg); // FIXME
10320 %}
10321 
10322 
10323 // Replicate scalar to packed float values in Double register
10324 instruct Repl2F_regI(vecD dst, iRegI src) %{
10325   predicate(n->as_Vector()->length() == 2);
10326   match(Set dst (ReplicateF src));
10327   size(4);
10328 
10329   format %{ "FMDRR    $dst.D,$src,$src\t" %}
10330   ins_encode %{
10331     __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register);
10332   %}
10333   ins_pipe(ialu_reg); // FIXME
10334 %}
10335 
10336 // Replicate scalar to packed float values in Double register
10337 instruct Repl2F_reg_vfp(vecD dst, regF src) %{
10338   predicate(n->as_Vector()->length() == 2);
10339   match(Set dst (ReplicateF src));
10340   size(4*2);
10341   ins_cost(DEFAULT_COST*2); // FIXME
10342 
10343   expand %{
10344     iRegI tmp;
10345     MoveF2I_reg_reg(tmp, src);
10346     Repl2F_regI(dst,tmp);
10347   %}
10348 %}
10349 
10350 // Replicate scalar to packed float values in Double register
10351 instruct Repl2F_reg_simd(vecD dst, regF src) %{
10352   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10353   match(Set dst (ReplicateF src));
10354   size(4);
10355   ins_cost(DEFAULT_COST); // FIXME
10356 
10357   format %{ "VDUP.32  $dst.D,$src.D\t" %}
10358   ins_encode %{
10359     __ vdups_64($dst$$FloatRegister, $src$$FloatRegister);
10360   %}
10361   ins_pipe(ialu_reg); // FIXME
10362 %}
10363 
10364 // Replicate scalar to packed float values in Double register pair
10365 instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{
10366   predicate(n->as_Vector()->length() == 4);
10367   match(Set dst (ReplicateF src));
10368   effect(TEMP tmp);
10369   size(4*3);
10370   ins_cost(DEFAULT_COST*3); // FIXME
10371 
10372   format %{ "FMRS     $tmp,$src\n\t"
10373             "FMDRR    $dst.D,$tmp,$tmp\n\t"
10374             "FMDRR    $dst.D.next,$tmp,$tmp\t" %}
10375   ins_encode %{
10376     __ vmov_f32($tmp$$Register, $src$$FloatRegister);
10377     __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
10378     __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE),
10379              $tmp$$Register, $tmp$$Register);
10380   %}
10381   ins_pipe(ialu_reg); // FIXME
10382 %}
10383 
10384 // Replicate scalar to packed float values in Double register pair
10385 instruct Repl4F_reg_simd(vecX dst, regF src) %{
10386   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10387   match(Set dst (ReplicateF src));
10388   size(4);
10389   ins_cost(DEFAULT_COST); // FIXME
10390 
10391   format %{ "VDUP.32  $dst.Q,$src.D\t" %}
10392   ins_encode %{
10393     __ vdups_128($dst$$FloatRegister, $src$$FloatRegister);
10394   %}
10395   ins_pipe(ialu_reg); // FIXME
10396 %}
10397 
10398 // Replicate scalar zero constant to packed float values in Double register
10399 instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{
10400   predicate(n->as_Vector()->length() == 2);
10401   match(Set dst (ReplicateF src));
10402   effect(TEMP tmp);
10403   size(12);
10404   ins_cost(DEFAULT_COST*4); // FIXME
10405 
10406   format %{ "MOV      $tmp, Repl1($src))\n\t"
10407             "FMDRR    $dst,$tmp,$tmp\t" %}
10408   ins_encode( LdReplImmF(src, dst, tmp) );
10409   ins_pipe(loadConFD); // FIXME
10410 %}
10411 
10412 // Replicate scalar to packed double float values in Double register pair
10413 instruct Repl2D_reg(vecX dst, regD src) %{
10414   predicate(n->as_Vector()->length() == 2);
10415   match(Set dst (ReplicateD src));
10416   size(4*2);
10417   ins_cost(DEFAULT_COST*2); // FIXME
10418 
10419   format %{ "FCPYD    $dst.D.a,$src\n\t"
10420             "FCPYD    $dst.D.b,$src\t" %}
10421   ins_encode %{
10422     FloatRegister dsta = $dst$$FloatRegister;
10423     FloatRegister src = $src$$FloatRegister;
10424     __ vmov_f64(dsta, src);
10425     FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
10426     __ vmov_f64(dstb, src);
10427   %}
10428   ins_pipe(ialu_reg); // FIXME
10429 %}
10430 
10431 // ====================VECTOR ARITHMETIC=======================================
10432 
10433 // --------------------------------- ADD --------------------------------------
10434 
10435 // Bytes vector add
10436 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
10437   predicate(n->as_Vector()->length() == 8);
10438   match(Set dst (AddVB src1 src2));
10439   format %{ "VADD.I8 $dst,$src1,$src2\t! add packed8B" %}
10440   size(4);
10441   ins_encode %{
10442     __ vadd_64_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10443   %}
10444   ins_pipe( ialu_reg_reg ); // FIXME
10445 %}
10446 
10447 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
10448   predicate(n->as_Vector()->length() == 16);
10449   match(Set dst (AddVB src1 src2));
10450   size(4);
10451   format %{ "VADD.I8 $dst.Q,$src1.Q,$src2.Q\t! add packed16B" %}
10452   ins_encode %{
10453     __ vadd_128_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10454   %}
10455   ins_pipe( ialu_reg_reg ); // FIXME
10456 %}
10457 
10458 // Shorts/Chars vector add
10459 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
10460   predicate(n->as_Vector()->length() == 4);
10461   match(Set dst (AddVS src1 src2));
10462   size(4);
10463   format %{ "VADD.I16 $dst,$src1,$src2\t! add packed4S" %}
10464   ins_encode %{
10465     __ vadd_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10466   %}
10467   ins_pipe( ialu_reg_reg ); // FIXME
10468 %}
10469 
10470 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
10471   predicate(n->as_Vector()->length() == 8);
10472   match(Set dst (AddVS src1 src2));
10473   size(4);
10474   format %{ "VADD.I16 $dst.Q,$src1.Q,$src2.Q\t! add packed8S" %}
10475   ins_encode %{
10476     __ vadd_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10477   %}
10478   ins_pipe( ialu_reg_reg ); // FIXME
10479 %}
10480 
10481 // Integers vector add
10482 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
10483   predicate(n->as_Vector()->length() == 2);
10484   match(Set dst (AddVI src1 src2));
10485   size(4);
10486   format %{ "VADD.I32 $dst.D,$src1.D,$src2.D\t! add packed2I" %}
10487   ins_encode %{
10488     __ vadd_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10489   %}
10490   ins_pipe( ialu_reg_reg ); // FIXME
10491 %}
10492 
10493 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
10494   predicate(n->as_Vector()->length() == 4);
10495   match(Set dst (AddVI src1 src2));
10496   size(4);
10497   format %{ "VADD.I32 $dst.Q,$src1.Q,$src2.Q\t! add packed4I" %}
10498   ins_encode %{
10499     __ vadd_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10500   %}
10501   ins_pipe( ialu_reg_reg ); // FIXME
10502 %}
10503 
10504 // Longs vector add
10505 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
10506   predicate(n->as_Vector()->length() == 2);
10507   match(Set dst (AddVL src1 src2));
10508   size(4);
10509   format %{ "VADD.I64 $dst.Q,$src1.Q,$src2.Q\t! add packed2L" %}
10510   ins_encode %{
10511     bool quad = true;
10512     __ vadd_128_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10513   %}
10514   ins_pipe( ialu_reg_reg ); // FIXME
10515 %}
10516 
10517 // Floats vector add
10518 instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
10519   predicate(n->as_Vector()->length() == 2);
10520   match(Set dst (AddVF src1 src2));
10521   ins_cost(DEFAULT_COST*2); // FIXME
10522 
10523   size(4*2);
10524   format %{ "FADDS  $dst.a,$src1.a,$src2.a\n\t"
10525             "FADDS  $dst.b,$src1.b,$src2.b" %}
10526   ins_encode %{
10527     __ vadd_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10528     __ vadd_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
10529              $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
10530              $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
10531   %}
10532 
10533   ins_pipe(faddF_reg_reg); // FIXME
10534 %}
10535 
10536 instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10537   predicate(n->as_Vector()->length() == 4);
10538   match(Set dst (AddVF src1 src2));
10539   size(4*4);
10540   ins_cost(DEFAULT_COST*4); // FIXME
10541 
10542   format %{ "FADDS  $dst.a,$src1.a,$src2.a\n\t"
10543             "FADDS  $dst.b,$src1.b,$src2.b\n\t"
10544             "FADDS  $dst.c,$src1.c,$src2.c\n\t"
10545             "FADDS  $dst.d,$src1.d,$src2.d" %}
10546 
10547   ins_encode %{
10548     FloatRegister dsta = $dst$$FloatRegister;
10549     FloatRegister src1a = $src1$$FloatRegister;
10550     FloatRegister src2a = $src2$$FloatRegister;
10551     __ vadd_f32(dsta, src1a, src2a);
10552     FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
10553     FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
10554     FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
10555     __ vadd_f32(dstb, src1b, src2b);
10556     FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
10557     FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
10558     FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
10559     __ vadd_f32(dstc, src1c, src2c);
10560     FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
10561     FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
10562     FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
10563     __ vadd_f32(dstd, src1d, src2d);
10564   %}
10565 
10566   ins_pipe(faddF_reg_reg); // FIXME
10567 %}
10568 
10569 instruct vadd2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10570   predicate(n->as_Vector()->length() == 2);
10571   match(Set dst (AddVD src1 src2));
10572   size(4*2);
10573   ins_cost(DEFAULT_COST*2); // FIXME
10574 
10575   format %{ "FADDD  $dst.a,$src1.a,$src2.a\n\t"
10576             "FADDD  $dst.b,$src1.b,$src2.b" %}
10577 
10578   ins_encode %{
10579     FloatRegister dsta = $dst$$FloatRegister;
10580     FloatRegister src1a = $src1$$FloatRegister;
10581     FloatRegister src2a = $src2$$FloatRegister;
10582     __ vadd_f64(dsta, src1a, src2a);
10583     FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
10584     FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
10585     FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
10586     __ vadd_f64(dstb, src1b, src2b);
10587   %}
10588 
10589   ins_pipe(faddF_reg_reg); // FIXME
10590 %}
10591 
10592 
10593 // Bytes vector sub
10594 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
10595   predicate(n->as_Vector()->length() == 8);
10596   match(Set dst (SubVB src1 src2));
10597   size(4);
10598   format %{ "VSUB.I8 $dst,$src1,$src2\t! sub packed8B" %}
10599   ins_encode %{
10600     __ vsub_64_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10601   %}
10602   ins_pipe( ialu_reg_reg ); // FIXME
10603 %}
10604 
10605 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
10606   predicate(n->as_Vector()->length() == 16);
10607   match(Set dst (SubVB src1 src2));
10608   size(4);
10609   format %{ "VSUB.I8 $dst.Q,$src1.Q,$src2.Q\t! sub packed16B" %}
10610   ins_encode %{
10611     __ vsub_128_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10612   %}
10613   ins_pipe( ialu_reg_reg ); // FIXME
10614 %}
10615 
10616 // Shorts/Chars vector sub
10617 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
10618   predicate(n->as_Vector()->length() == 4);
10619   match(Set dst (SubVS src1 src2));
10620   size(4);
10621   format %{ "VSUB.I16 $dst,$src1,$src2\t! sub packed4S" %}
10622   ins_encode %{
10623     __ vsub_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10624   %}
10625   ins_pipe( ialu_reg_reg ); // FIXME
10626 %}
10627 
10628 instruct vsub16S_reg(vecX dst, vecX src1, vecX src2) %{
10629   predicate(n->as_Vector()->length() == 8);
10630   match(Set dst (SubVS src1 src2));
10631   size(4);
10632   format %{ "VSUB.I16 $dst.Q,$src1.Q,$src2.Q\t! sub packed8S" %}
10633   ins_encode %{
10634     __ vsub_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10635   %}
10636   ins_pipe( ialu_reg_reg ); // FIXME
10637 %}
10638 
10639 // Integers vector sub
10640 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
10641   predicate(n->as_Vector()->length() == 2);
10642   match(Set dst (SubVI src1 src2));
10643   size(4);
10644   format %{ "VSUB.I32 $dst,$src1,$src2\t! sub packed2I" %}
10645   ins_encode %{
10646     __ vsub_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10647   %}
10648   ins_pipe( ialu_reg_reg ); // FIXME
10649 %}
10650 
10651 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
10652   predicate(n->as_Vector()->length() == 4);
10653   match(Set dst (SubVI src1 src2));
10654   size(4);
10655   format %{ "VSUB.I32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4I" %}
10656   ins_encode %{
10657     bool quad = true;
10658     __ vsub_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10659   %}
10660   ins_pipe( ialu_reg_reg ); // FIXME
10661 %}
10662 
10663 // Longs vector sub
10664 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
10665   predicate(n->as_Vector()->length() == 2);
10666   match(Set dst (SubVL src1 src2));
10667   size(4);
10668   format %{ "VSUB.I64 $dst.Q,$src1.Q,$src2.Q\t! sub packed2L" %}
10669   ins_encode %{
10670     __ vsub_128_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10671   %}
10672   ins_pipe( ialu_reg_reg ); // FIXME
10673 %}
10674 
10675 // Floats vector sub
10676 instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
10677   predicate(n->as_Vector()->length() == 2);
10678   match(Set dst (SubVF src1 src2));
10679   size(4*2);
10680   ins_cost(DEFAULT_COST*2); // FIXME
10681 
10682   format %{ "FSUBS  $dst.a,$src1.a,$src2.a\n\t"
10683             "FSUBS  $dst.b,$src1.b,$src2.b" %}
10684 
10685   ins_encode %{
10686     FloatRegister dsta = $dst$$FloatRegister;
10687     FloatRegister src1a = $src1$$FloatRegister;
10688     FloatRegister src2a = $src2$$FloatRegister;
10689     __ vsub_f32(dsta, src1a, src2a);
10690     FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
10691     FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
10692     FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
10693     __ vsub_f32(dstb, src1b, src2b);
10694   %}
10695 
10696   ins_pipe(faddF_reg_reg); // FIXME
10697 %}
10698 
10699 
10700 instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10701   predicate(n->as_Vector()->length() == 4);
10702   match(Set dst (SubVF src1 src2));
10703   size(4*4);
10704   ins_cost(DEFAULT_COST*4); // FIXME
10705 
10706   format %{ "FSUBS  $dst.a,$src1.a,$src2.a\n\t"
10707             "FSUBS  $dst.b,$src1.b,$src2.b\n\t"
10708             "FSUBS  $dst.c,$src1.c,$src2.c\n\t"
10709             "FSUBS  $dst.d,$src1.d,$src2.d" %}
10710 
10711   ins_encode %{
10712     FloatRegister dsta = $dst$$FloatRegister;
10713     FloatRegister src1a = $src1$$FloatRegister;
10714     FloatRegister src2a = $src2$$FloatRegister;
10715     __ vsub_f32(dsta, src1a, src2a);
10716     FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
10717     FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
10718     FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
10719     __ vsub_f32(dstb, src1b, src2b);
10720     FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
10721     FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
10722     FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
10723     __ vsub_f32(dstc, src1c, src2c);
10724     FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
10725     FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
10726     FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
10727     __ vsub_f32(dstd, src1d, src2d);
10728   %}
10729 
10730   ins_pipe(faddF_reg_reg); // FIXME
10731 %}
10732 
10733 instruct vsub2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10734   predicate(n->as_Vector()->length() == 2);
10735   match(Set dst (SubVD src1 src2));
10736   size(4*2);
10737   ins_cost(DEFAULT_COST*2); // FIXME
10738 
10739   format %{ "FSUBD  $dst.a,$src1.a,$src2.a\n\t"
10740             "FSUBD  $dst.b,$src1.b,$src2.b" %}
10741 
10742   ins_encode %{
10743     FloatRegister dsta = $dst$$FloatRegister;
10744     FloatRegister src1a = $src1$$FloatRegister;
10745     FloatRegister src2a = $src2$$FloatRegister;
10746     __ vsub_f64(dsta, src1a, src2a);
10747     FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
10748     FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
10749     FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
10750     __ vsub_f64(dstb, src1b, src2b);
10751   %}
10752 
10753   ins_pipe(faddF_reg_reg); // FIXME
10754 %}
10755 
10756 // Shorts/Chars vector mul
10757 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
10758   predicate(n->as_Vector()->length() == 4);
10759   match(Set dst (MulVS src1 src2));
10760   size(4);
10761   format %{ "VMUL.I16 $dst,$src1,$src2\t! mul packed4S" %}
10762   ins_encode %{
10763     __ vmul_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10764   %}
10765   ins_pipe( ialu_reg_reg ); // FIXME
10766 %}
10767 
10768 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
10769   predicate(n->as_Vector()->length() == 8);
10770   match(Set dst (MulVS src1 src2));
10771   size(4);
10772   format %{ "VMUL.I16 $dst.Q,$src1.Q,$src2.Q\t! mul packed8S" %}
10773   ins_encode %{
10774     __ vmul_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10775   %}
10776   ins_pipe( ialu_reg_reg ); // FIXME
10777 %}
10778 
10779 // Integers vector mul
10780 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
10781   predicate(n->as_Vector()->length() == 2);
10782   match(Set dst (MulVI src1 src2));
10783   size(4);
10784   format %{ "VMUL.I32 $dst,$src1,$src2\t! mul packed2I" %}
10785   ins_encode %{
10786     __ vmul_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10787   %}
10788   ins_pipe( ialu_reg_reg ); // FIXME
10789 %}
10790 
10791 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
10792   predicate(n->as_Vector()->length() == 4);
10793   match(Set dst (MulVI src1 src2));
10794   size(4);
10795   format %{ "VMUL.I32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4I" %}
10796   ins_encode %{
10797     __ vmul_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10798   %}
10799   ins_pipe( ialu_reg_reg ); // FIXME
10800 %}
10801 
10802 // Floats vector mul
10803 instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
10804   predicate(n->as_Vector()->length() == 2);
10805   match(Set dst (MulVF src1 src2));
10806   size(4*2);
10807   ins_cost(DEFAULT_COST*2); // FIXME
10808 
10809   format %{ "FMULS  $dst.a,$src1.a,$src2.a\n\t"
10810             "FMULS  $dst.b,$src1.b,$src2.b" %}
10811   ins_encode %{
10812     __ vmul_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10813     __ vmul_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
10814              $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
10815              $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
10816   %}
10817 
10818   ins_pipe(fmulF_reg_reg); // FIXME
10819 %}
10820 
10821 instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10822   predicate(n->as_Vector()->length() == 4);
10823   match(Set dst (MulVF src1 src2));
10824   size(4*4);
10825   ins_cost(DEFAULT_COST*4); // FIXME
10826 
10827   format %{ "FMULS  $dst.a,$src1.a,$src2.a\n\t"
10828             "FMULS  $dst.b,$src1.b,$src2.b\n\t"
10829             "FMULS  $dst.c,$src1.c,$src2.c\n\t"
10830             "FMULS  $dst.d,$src1.d,$src2.d" %}
10831 
10832   ins_encode %{
10833     FloatRegister dsta = $dst$$FloatRegister;
10834     FloatRegister src1a = $src1$$FloatRegister;
10835     FloatRegister src2a = $src2$$FloatRegister;
10836     __ vmul_f32(dsta, src1a, src2a);
10837     FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
10838     FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
10839     FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
10840     __ vmul_f32(dstb, src1b, src2b);
10841     FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
10842     FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
10843     FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
10844     __ vmul_f32(dstc, src1c, src2c);
10845     FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
10846     FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
10847     FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
10848     __ vmul_f32(dstd, src1d, src2d);
10849   %}
10850 
10851   ins_pipe(fmulF_reg_reg); // FIXME
10852 %}
10853 
10854 instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10855   predicate(n->as_Vector()->length() == 2);
10856   match(Set dst (MulVD src1 src2));
10857   size(4*2);
10858   ins_cost(DEFAULT_COST*2); // FIXME
10859 
10860   format %{ "FMULD  $dst.D.a,$src1.D.a,$src2.D.a\n\t"
10861             "FMULD  $dst.D.b,$src1.D.b,$src2.D.b" %}
10862   ins_encode %{
10863     FloatRegister dsta = $dst$$FloatRegister;
10864     FloatRegister src1a = $src1$$FloatRegister;
10865     FloatRegister src2a = $src2$$FloatRegister;
10866     __ vmul_f64(dsta, src1a, src2a);
10867     FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
10868     FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
10869     FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
10870     __ vmul_f64(dstb, src1b, src2b);
10871   %}
10872 
10873   ins_pipe(fmulD_reg_reg); // FIXME
10874 %}
10875 
10876 
10877 // Floats vector div
10878 instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
10879   predicate(n->as_Vector()->length() == 2);
10880   match(Set dst (DivVF src1 src2));
10881   size(4*2);
10882   ins_cost(DEFAULT_COST*2); // FIXME
10883 
10884   format %{ "FDIVS  $dst.a,$src1.a,$src2.a\n\t"
10885             "FDIVS  $dst.b,$src1.b,$src2.b" %}
10886   ins_encode %{
10887     __ vdiv_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
10888     __ vdiv_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
10889              $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
10890              $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
10891   %}
10892 
10893   ins_pipe(fdivF_reg_reg); // FIXME
10894 %}
10895 
10896 instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10897   predicate(n->as_Vector()->length() == 4);
10898   match(Set dst (DivVF src1 src2));
10899   size(4*4);
10900   ins_cost(DEFAULT_COST*4); // FIXME
10901 
10902   format %{ "FDIVS  $dst.a,$src1.a,$src2.a\n\t"
10903             "FDIVS  $dst.b,$src1.b,$src2.b\n\t"
10904             "FDIVS  $dst.c,$src1.c,$src2.c\n\t"
10905             "FDIVS  $dst.d,$src1.d,$src2.d" %}
10906 
10907   ins_encode %{
10908     FloatRegister dsta = $dst$$FloatRegister;
10909     FloatRegister src1a = $src1$$FloatRegister;
10910     FloatRegister src2a = $src2$$FloatRegister;
10911     __ vdiv_f32(dsta, src1a, src2a);
10912     FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
10913     FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
10914     FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
10915     __ vdiv_f32(dstb, src1b, src2b);
10916     FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
10917     FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
10918     FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
10919     __ vdiv_f32(dstc, src1c, src2c);
10920     FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
10921     FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
10922     FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
10923     __ vdiv_f32(dstd, src1d, src2d);
10924   %}
10925 
10926   ins_pipe(fdivF_reg_reg); // FIXME
10927 %}
10928 
10929 instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
10930   predicate(n->as_Vector()->length() == 2);
10931   match(Set dst (DivVD src1 src2));
10932   size(4*2);
10933   ins_cost(DEFAULT_COST*2); // FIXME
10934 
10935   format %{ "FDIVD  $dst.D.a,$src1.D.a,$src2.D.a\n\t"
10936             "FDIVD  $dst.D.b,$src1.D.b,$src2.D.b" %}
10937   ins_encode %{
10938     FloatRegister dsta = $dst$$FloatRegister;
10939     FloatRegister src1a = $src1$$FloatRegister;
10940     FloatRegister src2a = $src2$$FloatRegister;
10941     __ vdiv_f64(dsta, src1a, src2a);
10942     FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
10943     FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
10944     FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
10945     __ vdiv_f64(dstb, src1b, src2b);
10946   %}
10947 
10948   ins_pipe(fdivD_reg_reg); // FIXME
10949 %}
10950 
10951 // --------------------------------- NEG --------------------------------------
10952 
10953 instruct vneg8B_reg(vecD dst, vecD src) %{
10954   predicate(n->as_Vector()->length_in_bytes() == 8);
10955   effect(DEF dst, USE src);
10956   size(4);
10957   ins_cost(DEFAULT_COST); // FIXME
10958   format %{ "VNEG.S8 $dst.D,$src.D\t! neg packed8B" %}
10959   ins_encode %{
10960     __ vneg_64_s8($dst$$FloatRegister, $src$$FloatRegister);
10961   %}
10962   ins_pipe( ialu_reg_reg ); // FIXME
10963 %}
10964 
10965 instruct vneg16B_reg(vecX dst, vecX src) %{
10966   predicate(n->as_Vector()->length_in_bytes() == 16);
10967   effect(DEF dst, USE src);
10968   size(4);
10969   ins_cost(DEFAULT_COST); // FIXME
10970   format %{ "VNEG.S8 $dst.Q,$src.Q\t! neg0 packed16B" %}
10971   ins_encode %{
10972     __ vneg_128_s8($dst$$FloatRegister, $src$$FloatRegister);
10973   %}
10974   ins_pipe( ialu_reg_reg ); // FIXME
10975 %}
10976 
10977 // ------------------------------ Shift ---------------------------------------
10978 
10979 instruct vslcntD(vecD dst, iRegI cnt) %{
10980   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
10981   match(Set dst (LShiftCntV cnt));
10982   size(4);
10983   ins_cost(DEFAULT_COST); // FIXME
10984   expand %{
10985     Repl8B_reg_simd(dst, cnt);
10986   %}
10987 %}
10988 
10989 instruct vslcntX(vecX dst, iRegI cnt) %{
10990   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
10991   match(Set dst (LShiftCntV cnt));
10992   size(4);
10993   ins_cost(DEFAULT_COST); // FIXME
10994   expand %{
10995     Repl16B_reg(dst, cnt);
10996   %}
10997 %}
10998 
10999 // Low bits of vector "shift" elements are used, so it
11000 // doesn't matter if we treat it as ints or bytes here.
11001 instruct vsrcntD(vecD dst, iRegI cnt) %{
11002   predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
11003   match(Set dst (RShiftCntV cnt));
11004   size(4*2);
11005   ins_cost(DEFAULT_COST*2); // FIXME
11006 
11007   format %{ "VDUP.8 $dst.D,$cnt\n\t"
11008             "VNEG.S8 $dst.D,$dst.D\t! neg packed8B" %}
11009   ins_encode %{
11010     __ vdup_64_8($dst$$FloatRegister, $cnt$$Register);
11011     __ vneg_64_s8($dst$$FloatRegister, $dst$$FloatRegister);
11012   %}
11013   ins_pipe( ialu_reg_reg ); // FIXME
11014 %}
11015 
11016 instruct vsrcntX(vecX dst, iRegI cnt) %{
11017   predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
11018   match(Set dst (RShiftCntV cnt));
11019   size(4*2);
11020   ins_cost(DEFAULT_COST*2); // FIXME
11021   format %{ "VDUP.8 $dst.Q,$cnt\n\t"
11022             "VNEG.S8 $dst.Q,$dst.Q\t! neg packed16B" %}
11023   ins_encode %{
11024     __ vdup_128_8($dst$$FloatRegister, $cnt$$Register);
11025     __ vneg_128_s8($dst$$FloatRegister, $dst$$FloatRegister);
11026   %}
11027   ins_pipe( ialu_reg_reg ); // FIXME
11028 %}
11029 
11030 // Byte vector logical left/right shift based on sign
11031 instruct vsh8B_reg(vecD dst, vecD src, vecD shift) %{
11032   predicate(n->as_Vector()->length() == 8);
11033   effect(DEF dst, USE src, USE shift);
11034   size(4);
11035   ins_cost(DEFAULT_COST); // FIXME
11036   format %{
11037     "VSHL.U8 $dst.D,$src.D,$shift.D\t! logical left/right shift packed8B"
11038   %}
11039   ins_encode %{
11040     __ vshl_64_u8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11041   %}
11042   ins_pipe( ialu_reg_reg ); // FIXME
11043 %}
11044 
11045 instruct vsh16B_reg(vecX dst, vecX src, vecX shift) %{
11046   predicate(n->as_Vector()->length() == 16);
11047   effect(DEF dst, USE src, USE shift);
11048   size(4);
11049   ins_cost(DEFAULT_COST); // FIXME
11050   format %{
11051     "VSHL.U8 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed16B"
11052   %}
11053   ins_encode %{
11054     bool quad = true;
11055     __ vshl_128_u8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11056   %}
11057   ins_pipe( ialu_reg_reg ); // FIXME
11058 %}
11059 
11060 // Shorts/Char vector logical left/right shift based on sign
11061 instruct vsh4S_reg(vecD dst, vecD src, vecD shift) %{
11062   predicate(n->as_Vector()->length() == 4);
11063   effect(DEF dst, USE src, USE shift);
11064   size(4);
11065   ins_cost(DEFAULT_COST); // FIXME
11066   format %{
11067     "VSHL.U16 $dst.D,$src.D,$shift.D\t! logical left/right shift packed4S"
11068   %}
11069   ins_encode %{
11070     __ vshl_64_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11071   %}
11072   ins_pipe( ialu_reg_reg ); // FIXME
11073 %}
11074 
11075 instruct vsh8S_reg(vecX dst, vecX src, vecX shift) %{
11076   predicate(n->as_Vector()->length() == 8);
11077   effect(DEF dst, USE src, USE shift);
11078   size(4);
11079   ins_cost(DEFAULT_COST); // FIXME
11080   format %{
11081     "VSHL.U16 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed8S"
11082   %}
11083   ins_encode %{
11084     __ vshl_128_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11085   %}
11086   ins_pipe( ialu_reg_reg ); // FIXME
11087 %}
11088 
11089 // Integers vector logical left/right shift based on sign
11090 instruct vsh2I_reg(vecD dst, vecD src, vecD shift) %{
11091   predicate(n->as_Vector()->length() == 2);
11092   effect(DEF dst, USE src, USE shift);
11093   size(4);
11094   ins_cost(DEFAULT_COST); // FIXME
11095   format %{
11096     "VSHL.U32 $dst.D,$src.D,$shift.D\t! logical left/right shift packed2I"
11097   %}
11098   ins_encode %{
11099     __ vshl_64_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11100   %}
11101   ins_pipe( ialu_reg_reg ); // FIXME
11102 %}
11103 
11104 instruct vsh4I_reg(vecX dst, vecX src, vecX shift) %{
11105   predicate(n->as_Vector()->length() == 4);
11106   effect(DEF dst, USE src, USE shift);
11107   size(4);
11108   ins_cost(DEFAULT_COST); // FIXME
11109   format %{
11110     "VSHL.U32 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed4I"
11111   %}
11112   ins_encode %{
11113     __ vshl_128_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11114   %}
11115   ins_pipe( ialu_reg_reg ); // FIXME
11116 %}
11117 
11118 // Longs vector logical left/right shift based on sign
11119 instruct vsh2L_reg(vecX dst, vecX src, vecX shift) %{
11120   predicate(n->as_Vector()->length() == 2);
11121   effect(DEF dst, USE src, USE shift);
11122   size(4);
11123   ins_cost(DEFAULT_COST); // FIXME
11124   format %{
11125     "VSHL.U64 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed2L"
11126   %}
11127   ins_encode %{
11128     __ vshl_128_u64($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11129   %}
11130   ins_pipe( ialu_reg_reg ); // FIXME
11131 %}
11132 
11133 // ------------------------------ LeftShift -----------------------------------
11134 
11135 // Byte vector left shift
11136 instruct vsl8B_reg(vecD dst, vecD src, vecD shift) %{
11137   predicate(n->as_Vector()->length() == 8);
11138   match(Set dst (LShiftVB src shift));
11139   size(4*1);
11140   ins_cost(DEFAULT_COST*1); // FIXME
11141   expand %{
11142     vsh8B_reg(dst, src, shift);
11143   %}
11144 %}
11145 
11146 instruct vsl16B_reg(vecX dst, vecX src, vecX shift) %{
11147   predicate(n->as_Vector()->length() == 16);
11148   match(Set dst (LShiftVB src shift));
11149   size(4*1);
11150   ins_cost(DEFAULT_COST*1); // FIXME
11151   expand %{
11152     vsh16B_reg(dst, src, shift);
11153   %}
11154 %}
11155 
11156 instruct vsl8B_immI(vecD dst, vecD src, immI shift) %{
11157   predicate(n->as_Vector()->length() == 8);
11158   match(Set dst (LShiftVB src shift));
11159   size(4);
11160   ins_cost(DEFAULT_COST); // FIXME
11161   format %{
11162     "VSHL.I8 $dst.D,$src.D,$shift\t! logical left shift packed8B"
11163   %}
11164   ins_encode %{
11165     __ vshl_64_8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11166   %}
11167   ins_pipe( ialu_reg_reg ); // FIXME
11168 %}
11169 
11170 instruct vsl16B_immI(vecX dst, vecX src, immI shift) %{
11171   predicate(n->as_Vector()->length() == 16);
11172   match(Set dst (LShiftVB src shift));
11173   size(4);
11174   ins_cost(DEFAULT_COST); // FIXME
11175   format %{
11176     "VSHL.I8 $dst.Q,$src.Q,$shift\t! logical left shift packed16B"
11177   %}
11178   ins_encode %{
11179     bool quad = true;
11180     __ vshl_128_8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11181   %}
11182   ins_pipe( ialu_reg_reg ); // FIXME
11183 %}
11184 
11185 // Shorts/Chars vector logical left/right shift
11186 instruct vsl4S_reg(vecD dst, vecD src, vecD shift) %{
11187   predicate(n->as_Vector()->length() == 4);
11188   match(Set dst (LShiftVS src shift));
11189   match(Set dst (URShiftVS src shift));
11190   size(4*1);
11191   ins_cost(DEFAULT_COST*1); // FIXME
11192   expand %{
11193     vsh4S_reg(dst, src, shift);
11194   %}
11195 %}
11196 
11197 instruct vsl8S_reg(vecX dst, vecX src, vecX shift) %{
11198   predicate(n->as_Vector()->length() == 8);
11199   match(Set dst (LShiftVS src shift));
11200   match(Set dst (URShiftVS src shift));
11201   size(4*1);
11202   ins_cost(DEFAULT_COST*1); // FIXME
11203   expand %{
11204     vsh8S_reg(dst, src, shift);
11205   %}
11206 %}
11207 
11208 instruct vsl4S_immI(vecD dst, vecD src, immI shift) %{
11209   predicate(n->as_Vector()->length() == 4);
11210   match(Set dst (LShiftVS src shift));
11211   size(4);
11212   ins_cost(DEFAULT_COST); // FIXME
11213   format %{
11214     "VSHL.I16 $dst.D,$src.D,$shift\t! logical left shift packed4S"
11215   %}
11216   ins_encode %{
11217     bool quad = false;
11218     __ vshl_64_16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11219   %}
11220   ins_pipe( ialu_reg_reg ); // FIXME
11221 %}
11222 
11223 instruct vsl8S_immI(vecX dst, vecX src, immI shift) %{
11224   predicate(n->as_Vector()->length() == 8);
11225   match(Set dst (LShiftVS src shift));
11226   size(4);
11227   ins_cost(DEFAULT_COST); // FIXME
11228   format %{
11229     "VSHL.I16 $dst.Q,$src.Q,$shift\t! logical left shift packed8S"
11230   %}
11231   ins_encode %{
11232     bool quad = true;
11233     __ vshl_128_16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11234   %}
11235   ins_pipe( ialu_reg_reg ); // FIXME
11236 %}
11237 
11238 // Integers vector logical left/right shift
11239 instruct vsl2I_reg(vecD dst, vecD src, vecD shift) %{
11240   predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD));
11241   match(Set dst (LShiftVI src shift));
11242   match(Set dst (URShiftVI src shift));
11243   size(4*1);
11244   ins_cost(DEFAULT_COST*1); // FIXME
11245   expand %{
11246     vsh2I_reg(dst, src, shift);
11247   %}
11248 %}
11249 
11250 instruct vsl4I_reg(vecX dst, vecX src, vecX shift) %{
11251   predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD));
11252   match(Set dst (LShiftVI src shift));
11253   match(Set dst (URShiftVI src shift));
11254   size(4*1);
11255   ins_cost(DEFAULT_COST*1); // FIXME
11256   expand %{
11257     vsh4I_reg(dst, src, shift);
11258   %}
11259 %}
11260 
11261 instruct vsl2I_immI(vecD dst, vecD src, immI shift) %{
11262   predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD));
11263   match(Set dst (LShiftVI src shift));
11264   size(4);
11265   ins_cost(DEFAULT_COST); // FIXME
11266   format %{
11267     "VSHL.I32 $dst.D,$src.D,$shift\t! logical left shift packed2I"
11268   %}
11269   ins_encode %{
11270     __ vshl_64_32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11271   %}
11272   ins_pipe( ialu_reg_reg ); // FIXME
11273 %}
11274 
11275 instruct vsl4I_immI(vecX dst, vecX src, immI shift) %{
11276   predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD));
11277   match(Set dst (LShiftVI src shift));
11278   size(4);
11279   ins_cost(DEFAULT_COST); // FIXME
11280   format %{
11281     "VSHL.I32 $dst.Q,$src.Q,$shift\t! logical left shift packed4I"
11282   %}
11283   ins_encode %{
11284     __ vshl_128_32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11285   %}
11286   ins_pipe( ialu_reg_reg ); // FIXME
11287 %}
11288 
11289 // Longs vector logical left/right shift
11290 instruct vsl2L_reg(vecX dst, vecX src, vecX shift) %{
11291   predicate(n->as_Vector()->length() == 2);
11292   match(Set dst (LShiftVL src shift));
11293   match(Set dst (URShiftVL src shift));
11294   size(4*1);
11295   ins_cost(DEFAULT_COST*1); // FIXME
11296   expand %{
11297     vsh2L_reg(dst, src, shift);
11298   %}
11299 %}
11300 
11301 instruct vsl2L_immI(vecX dst, vecX src, immI shift) %{
11302   predicate(n->as_Vector()->length() == 2);
11303   match(Set dst (LShiftVL src shift));
11304   size(4);
11305   ins_cost(DEFAULT_COST); // FIXME
11306   format %{
11307     "VSHL.I64 $dst.Q,$src.Q,$shift\t! logical left shift packed2L"
11308   %}
11309   ins_encode %{
11310     __ vshl_128_64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11311   %}
11312   ins_pipe( ialu_reg_reg ); // FIXME
11313 %}
11314 
11315 // ----------------------- LogicalRightShift -----------------------------------
11316 
11317 // Bytes/Shorts vector logical right shift produces incorrect Java result
11318 // for negative data because java code convert short value into int with
11319 // sign extension before a shift.
11320 
11321 // Chars vector logical right shift
11322 instruct vsrl4S_immI(vecD dst, vecD src, immI shift) %{
11323   predicate(n->as_Vector()->length() == 4);
11324   match(Set dst (URShiftVS src shift));
11325   size(4);
11326   ins_cost(DEFAULT_COST); // FIXME
11327   format %{
11328     "VSHR.U16 $dst.D,$src.D,$shift\t! logical right shift packed4S"
11329   %}
11330   ins_encode %{
11331     __ vshr_64_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11332   %}
11333   ins_pipe( ialu_reg_reg ); // FIXME
11334 %}
11335 
11336 instruct vsrl8S_immI(vecX dst, vecX src, immI shift) %{
11337   predicate(n->as_Vector()->length() == 8);
11338   match(Set dst (URShiftVS src shift));
11339   size(4);
11340   ins_cost(DEFAULT_COST); // FIXME
11341   format %{
11342     "VSHR.U16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S"
11343   %}
11344   ins_encode %{
11345     __ vshr_128_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11346   %}
11347   ins_pipe( ialu_reg_reg ); // FIXME
11348 %}
11349 
11350 // Integers vector logical right shift
11351 instruct vsrl2I_immI(vecD dst, vecD src, immI shift) %{
11352   predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD));
11353   match(Set dst (URShiftVI src shift));
11354   size(4);
11355   ins_cost(DEFAULT_COST); // FIXME
11356   format %{
11357     "VSHR.U32 $dst.D,$src.D,$shift\t! logical right shift packed2I"
11358   %}
11359   ins_encode %{
11360     __ vshr_64_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11361   %}
11362   ins_pipe( ialu_reg_reg ); // FIXME
11363 %}
11364 
11365 instruct vsrl4I_immI(vecX dst, vecX src, immI shift) %{
11366   predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD));
11367   match(Set dst (URShiftVI src shift));
11368   size(4);
11369   ins_cost(DEFAULT_COST); // FIXME
11370   format %{
11371     "VSHR.U32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I"
11372   %}
11373   ins_encode %{
11374     __ vshr_128_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11375   %}
11376   ins_pipe( ialu_reg_reg ); // FIXME
11377 %}
11378 
11379 // Longs vector logical right shift
11380 instruct vsrl2L_immI(vecX dst, vecX src, immI shift) %{
11381   predicate(n->as_Vector()->length() == 2);
11382   match(Set dst (URShiftVL src shift));
11383   size(4);
11384   ins_cost(DEFAULT_COST); // FIXME
11385   format %{
11386     "VSHR.U64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L"
11387   %}
11388   ins_encode %{
11389     __ vshr_128_u64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11390   %}
11391   ins_pipe( ialu_reg_reg ); // FIXME
11392 %}
11393 
11394 // ------------------- ArithmeticRightShift -----------------------------------
11395 
11396 // Bytes vector arithmetic left/right shift based on sign
11397 instruct vsha8B_reg(vecD dst, vecD src, vecD shift) %{
11398   predicate(n->as_Vector()->length() == 8);
11399   effect(DEF dst, USE src, USE shift);
11400   size(4);
11401   ins_cost(DEFAULT_COST); // FIXME
11402   format %{
11403     "VSHL.S8 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed8B"
11404   %}
11405   ins_encode %{
11406     __ vshl_64_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11407   %}
11408   ins_pipe( ialu_reg_reg ); // FIXME
11409 %}
11410 
11411 instruct vsha16B_reg(vecX dst, vecX src, vecX shift) %{
11412   predicate(n->as_Vector()->length() == 16);
11413   effect(DEF dst, USE src, USE shift);
11414   size(4);
11415   ins_cost(DEFAULT_COST); // FIXME
11416   format %{
11417     "VSHL.S8 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed16B"
11418   %}
11419   ins_encode %{
11420     __ vshl_128_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11421   %}
11422   ins_pipe( ialu_reg_reg ); // FIXME
11423 %}
11424 
11425 // Shorts vector arithmetic left/right shift based on sign
11426 instruct vsha4S_reg(vecD dst, vecD src, vecD shift) %{
11427   predicate(n->as_Vector()->length() == 4);
11428   effect(DEF dst, USE src, USE shift);
11429   size(4);
11430   ins_cost(DEFAULT_COST); // FIXME
11431   format %{
11432     "VSHL.S16 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed4S"
11433   %}
11434   ins_encode %{
11435     __ vshl_64_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11436   %}
11437   ins_pipe( ialu_reg_reg ); // FIXME
11438 %}
11439 
11440 instruct vsha8S_reg(vecX dst, vecX src, vecX shift) %{
11441   predicate(n->as_Vector()->length() == 8);
11442   effect(DEF dst, USE src, USE shift);
11443   size(4);
11444   ins_cost(DEFAULT_COST); // FIXME
11445   format %{
11446     "VSHL.S16 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed8S"
11447   %}
11448   ins_encode %{
11449     __ vshl_128_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11450   %}
11451   ins_pipe( ialu_reg_reg ); // FIXME
11452 %}
11453 
11454 // Integers vector arithmetic left/right shift based on sign
11455 instruct vsha2I_reg(vecD dst, vecD src, vecD shift) %{
11456   predicate(n->as_Vector()->length() == 2);
11457   effect(DEF dst, USE src, USE shift);
11458   size(4);
11459   ins_cost(DEFAULT_COST); // FIXME
11460   format %{
11461     "VSHL.S32 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed2I"
11462   %}
11463   ins_encode %{
11464     __ vshl_64_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11465   %}
11466   ins_pipe( ialu_reg_reg ); // FIXME
11467 %}
11468 
11469 instruct vsha4I_reg(vecX dst, vecX src, vecX shift) %{
11470   predicate(n->as_Vector()->length() == 4);
11471   effect(DEF dst, USE src, USE shift);
11472   size(4);
11473   ins_cost(DEFAULT_COST); // FIXME
11474   format %{
11475     "VSHL.S32 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed4I"
11476   %}
11477   ins_encode %{
11478     __ vshl_128_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11479   %}
11480   ins_pipe( ialu_reg_reg ); // FIXME
11481 %}
11482 
11483 // Longs vector arithmetic left/right shift based on sign
11484 instruct vsha2L_reg(vecX dst, vecX src, vecX shift) %{
11485   predicate(n->as_Vector()->length() == 2);
11486   effect(DEF dst, USE src, USE shift);
11487   size(4);
11488   ins_cost(DEFAULT_COST); // FIXME
11489   format %{
11490     "VSHL.S64 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed2L"
11491   %}
11492   ins_encode %{
11493     __ vshl_128_s64($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
11494   %}
11495   ins_pipe( ialu_reg_reg ); // FIXME
11496 %}
11497 
11498 // Byte vector arithmetic right shift
11499 
11500 instruct vsra8B_reg(vecD dst, vecD src, vecD shift) %{
11501   predicate(n->as_Vector()->length() == 8);
11502   match(Set dst (RShiftVB src shift));
11503   size(4);
11504   ins_cost(DEFAULT_COST); // FIXME
11505   expand %{
11506     vsha8B_reg(dst, src, shift);
11507   %}
11508 %}
11509 
11510 instruct vsrl16B_reg(vecX dst, vecX src, vecX shift) %{
11511   predicate(n->as_Vector()->length() == 16);
11512   match(Set dst (RShiftVB src shift));
11513   size(4);
11514   ins_cost(DEFAULT_COST); // FIXME
11515   expand %{
11516     vsha16B_reg(dst, src, shift);
11517   %}
11518 %}
11519 
11520 instruct vsrl8B_immI(vecD dst, vecD src, immI shift) %{
11521   predicate(n->as_Vector()->length() == 8);
11522   match(Set dst (RShiftVB src shift));
11523   size(4);
11524   ins_cost(DEFAULT_COST); // FIXME
11525   format %{
11526     "VSHR.S8 $dst.D,$src.D,$shift\t! logical right shift packed8B"
11527   %}
11528   ins_encode %{
11529     __ vshr_64_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11530   %}
11531   ins_pipe( ialu_reg_reg ); // FIXME
11532 %}
11533 
11534 instruct vsrl16B_immI(vecX dst, vecX src, immI shift) %{
11535   predicate(n->as_Vector()->length() == 16);
11536   match(Set dst (RShiftVB src shift));
11537   size(4);
11538   ins_cost(DEFAULT_COST); // FIXME
11539   format %{
11540     "VSHR.S8 $dst.Q,$src.Q,$shift\t! logical right shift packed16B"
11541   %}
11542   ins_encode %{
11543     __ vshr_128_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11544   %}
11545   ins_pipe( ialu_reg_reg ); // FIXME
11546 %}
11547 
11548 // Shorts vector arithmetic right shift
11549 instruct vsra4S_reg(vecD dst, vecD src, vecD shift) %{
11550   predicate(n->as_Vector()->length() == 4);
11551   match(Set dst (RShiftVS src shift));
11552   size(4);
11553   ins_cost(DEFAULT_COST); // FIXME
11554   expand %{
11555     vsha4S_reg(dst, src, shift);
11556   %}
11557 %}
11558 
11559 instruct vsra8S_reg(vecX dst, vecX src, vecX shift) %{
11560   predicate(n->as_Vector()->length() == 8);
11561   match(Set dst (RShiftVS src shift));
11562   size(4);
11563   ins_cost(DEFAULT_COST); // FIXME
11564   expand %{
11565     vsha8S_reg(dst, src, shift);
11566   %}
11567 %}
11568 
11569 instruct vsra4S_immI(vecD dst, vecD src, immI shift) %{
11570   predicate(n->as_Vector()->length() == 4);
11571   match(Set dst (RShiftVS src shift));
11572   size(4);
11573   ins_cost(DEFAULT_COST); // FIXME
11574   format %{
11575     "VSHR.S16 $dst.D,$src.D,$shift\t! logical right shift packed4S"
11576   %}
11577   ins_encode %{
11578     __ vshr_64_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11579   %}
11580   ins_pipe( ialu_reg_reg ); // FIXME
11581 %}
11582 
11583 instruct vsra8S_immI(vecX dst, vecX src, immI shift) %{
11584   predicate(n->as_Vector()->length() == 8);
11585   match(Set dst (RShiftVS src shift));
11586   size(4);
11587   ins_cost(DEFAULT_COST); // FIXME
11588   format %{
11589     "VSHR.S16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S"
11590   %}
11591   ins_encode %{
11592     __ vshr_128_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11593   %}
11594   ins_pipe( ialu_reg_reg ); // FIXME
11595 %}
11596 
11597 // Integers vector arithmetic right shift
11598 instruct vsra2I_reg(vecD dst, vecD src, vecD shift) %{
11599   predicate(n->as_Vector()->length() == 2);
11600   match(Set dst (RShiftVI src shift));
11601   size(4);
11602   ins_cost(DEFAULT_COST); // FIXME
11603   expand %{
11604     vsha2I_reg(dst, src, shift);
11605   %}
11606 %}
11607 
11608 instruct vsra4I_reg(vecX dst, vecX src, vecX shift) %{
11609   predicate(n->as_Vector()->length() == 4);
11610   match(Set dst (RShiftVI src shift));
11611   size(4);
11612   ins_cost(DEFAULT_COST); // FIXME
11613   expand %{
11614     vsha4I_reg(dst, src, shift);
11615   %}
11616 %}
11617 
11618 instruct vsra2I_immI(vecD dst, vecD src, immI shift) %{
11619   predicate(n->as_Vector()->length() == 2);
11620   match(Set dst (RShiftVI src shift));
11621   size(4);
11622   ins_cost(DEFAULT_COST); // FIXME
11623   format %{
11624     "VSHR.S32 $dst.D,$src.D,$shift\t! logical right shift packed2I"
11625   %}
11626   ins_encode %{
11627     __ vshr_64_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11628   %}
11629   ins_pipe( ialu_reg_reg ); // FIXME
11630 %}
11631 
11632 instruct vsra4I_immI(vecX dst, vecX src, immI shift) %{
11633   predicate(n->as_Vector()->length() == 4);
11634   match(Set dst (RShiftVI src shift));
11635   size(4);
11636   ins_cost(DEFAULT_COST); // FIXME
11637   format %{
11638     "VSHR.S32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I"
11639   %}
11640   ins_encode %{
11641     __ vshr_128_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11642   %}
11643   ins_pipe( ialu_reg_reg ); // FIXME
11644 %}
11645 
11646 // Longs vector arithmetic right shift
11647 instruct vsra2L_reg(vecX dst, vecX src, vecX shift) %{
11648   predicate(n->as_Vector()->length() == 2);
11649   match(Set dst (RShiftVL src shift));
11650   size(4);
11651   ins_cost(DEFAULT_COST); // FIXME
11652   expand %{
11653     vsha2L_reg(dst, src, shift);
11654   %}
11655 %}
11656 
11657 instruct vsra2L_immI(vecX dst, vecX src, immI shift) %{
11658   predicate(n->as_Vector()->length() == 2);
11659   match(Set dst (RShiftVL src shift));
11660   size(4);
11661   ins_cost(DEFAULT_COST); // FIXME
11662   format %{
11663     "VSHR.S64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L"
11664   %}
11665   ins_encode %{
11666     __ vshr_128_s64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
11667   %}
11668   ins_pipe( ialu_reg_reg ); // FIXME
11669 %}
11670 
11671 // --------------------------------- AND --------------------------------------
11672 
11673 instruct vandD(vecD dst, vecD src1, vecD src2) %{
11674   predicate(n->as_Vector()->length_in_bytes() == 8);
11675   match(Set dst (AndV src1 src2));
11676   format %{ "VAND    $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
11677   ins_encode %{
11678     __ vand_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11679   %}
11680   ins_pipe( ialu_reg_reg ); // FIXME
11681 %}
11682 
11683 instruct vandX(vecX dst, vecX src1, vecX src2) %{
11684   predicate(n->as_Vector()->length_in_bytes() == 16);
11685   match(Set dst (AndV src1 src2));
11686   format %{ "VAND    $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
11687   ins_encode %{
11688     bool quad = true;
11689     __ vand_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11690   %}
11691   ins_pipe( ialu_reg_reg ); // FIXME
11692 %}
11693 
11694 // --------------------------------- OR ---------------------------------------
11695 
11696 instruct vorD(vecD dst, vecD src1, vecD src2) %{
11697   predicate(n->as_Vector()->length_in_bytes() == 8);
11698   match(Set dst (OrV src1 src2));
11699   format %{ "VOR     $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
11700   ins_encode %{
11701     __ vorr_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11702   %}
11703   ins_pipe( ialu_reg_reg ); // FIXME
11704 %}
11705 
11706 instruct vorX(vecX dst, vecX src1, vecX src2) %{
11707   predicate(n->as_Vector()->length_in_bytes() == 16);
11708   match(Set dst (OrV src1 src2));
11709   format %{ "VOR     $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
11710   ins_encode %{
11711     __ vorr_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11712   %}
11713   ins_pipe( ialu_reg_reg ); // FIXME
11714 %}
11715 
11716 // --------------------------------- XOR --------------------------------------
11717 
11718 instruct vxorD(vecD dst, vecD src1, vecD src2) %{
11719   predicate(n->as_Vector()->length_in_bytes() == 8);
11720   match(Set dst (XorV src1 src2));
11721   format %{ "VXOR    $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
11722   ins_encode %{
11723     __ veor_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11724   %}
11725   ins_pipe( ialu_reg_reg ); // FIXME
11726 %}
11727 
11728 instruct vxorX(vecX dst, vecX src1, vecX src2) %{
11729   predicate(n->as_Vector()->length_in_bytes() == 16);
11730   match(Set dst (XorV src1 src2));
11731   format %{ "VXOR    $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
11732   ins_encode %{
11733     __ veor_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11734   %}
11735   ins_pipe( ialu_reg_reg ); // FIXME
11736 %}
11737 
11738 
11739 //----------PEEPHOLE RULES-----------------------------------------------------
11740 // These must follow all instruction definitions as they use the names
11741 // defined in the instructions definitions.
11742 //
11743 // peepmatch ( root_instr_name [preceding_instruction]* );
11744 //
11745 // peepconstraint %{
11746 // (instruction_number.operand_name relational_op instruction_number.operand_name
11747 //  [, ...] );
11748 // // instruction numbers are zero-based using left to right order in peepmatch
11749 //
11750 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11751 // // provide an instruction_number.operand_name for each operand that appears
11752 // // in the replacement instruction's match rule
11753 //
11754 // ---------VM FLAGS---------------------------------------------------------
11755 //
11756 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11757 //
11758 // Each peephole rule is given an identifying number starting with zero and
11759 // increasing by one in the order seen by the parser.  An individual peephole
11760 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11761 // on the command-line.
11762 //
11763 // ---------CURRENT LIMITATIONS----------------------------------------------
11764 //
11765 // Only match adjacent instructions in same basic block
11766 // Only equality constraints
11767 // Only constraints between operands, not (0.dest_reg == EAX_enc)
11768 // Only one replacement instruction
11769 //
11770 // ---------EXAMPLE----------------------------------------------------------
11771 //
11772 // // pertinent parts of existing instructions in architecture description
11773 // instruct movI(eRegI dst, eRegI src) %{
11774 //   match(Set dst (CopyI src));
11775 // %}
11776 //
11777 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
11778 //   match(Set dst (AddI dst src));
11779 //   effect(KILL cr);
11780 // %}
11781 //
11782 // // Change (inc mov) to lea
11783 // peephole %{
11784 //   // increment preceeded by register-register move
11785 //   peepmatch ( incI_eReg movI );
11786 //   // require that the destination register of the increment
11787 //   // match the destination register of the move
11788 //   peepconstraint ( 0.dst == 1.dst );
11789 //   // construct a replacement instruction that sets
11790 //   // the destination to ( move's source register + one )
11791 //   peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) );
11792 // %}
11793 //
11794 
11795 // // Change load of spilled value to only a spill
11796 // instruct storeI(memory mem, eRegI src) %{
11797 //   match(Set mem (StoreI mem src));
11798 // %}
11799 //
11800 // instruct loadI(eRegI dst, memory mem) %{
11801 //   match(Set dst (LoadI mem));
11802 // %}
11803 //
11804 // peephole %{
11805 //   peepmatch ( loadI storeI );
11806 //   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
11807 //   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
11808 // %}
11809 
11810 //----------SMARTSPILL RULES---------------------------------------------------
11811 // These must follow all instruction definitions as they use the names
11812 // defined in the instructions definitions.
11813 //
11814 // ARM will probably not have any of these rules due to RISC instruction set.
11815 
11816 //----------PIPELINE-----------------------------------------------------------
11817 // Rules which define the behavior of the target architectures pipeline.