--- /dev/null	2018-09-25 19:24:06.000000000 +0300
+++ new/src/hotspot/cpu/aarch32/aarch32.ad	2018-09-25 19:24:06.000000000 +0300
@@ -0,0 +1,11817 @@
+//
+// Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+// AARCH32 Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def"  name ( register save type, C convention save type,
+//                   ideal register type, encoding, vm name );
+// Register Save Types:
+//
+// NS  = No-Save:       The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call:  The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call
+//                      sites.
+//
+// AS  = Always-Save:   The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+
+// ----------------------------
+// Integer/Long Registers
+// ----------------------------
+
+reg_def R_R0 (SOC, SOC, Op_RegI,  0,  R(0)->as_VMReg());
+reg_def R_R1 (SOC, SOC, Op_RegI,  1,  R(1)->as_VMReg());
+reg_def R_R2 (SOC, SOC, Op_RegI,  2,  R(2)->as_VMReg());
+reg_def R_R3 (SOC, SOC, Op_RegI,  3,  R(3)->as_VMReg());
+reg_def R_R4 (SOC, SOE, Op_RegI,  4,  R(4)->as_VMReg());
+reg_def R_R5 (SOC, SOE, Op_RegI,  5,  R(5)->as_VMReg());
+reg_def R_R6 (SOC, SOE, Op_RegI,  6,  R(6)->as_VMReg());
+reg_def R_R7 (SOC, SOE, Op_RegI,  7,  R(7)->as_VMReg());
+reg_def R_R8 (SOC, SOE, Op_RegI,  8,  R(8)->as_VMReg());
+reg_def R_R9 (SOC, SOE, Op_RegI,  9,  R(9)->as_VMReg());
+reg_def R_R10(NS,  SOE, Op_RegI, 10, R(10)->as_VMReg());
+reg_def R_R11(NS,  SOE, Op_RegI, 11, R(11)->as_VMReg());
+reg_def R_R12(SOC, SOC, Op_RegI, 12, R(12)->as_VMReg());
+reg_def R_R13(NS,  NS,  Op_RegI, 13, R(13)->as_VMReg());
+reg_def R_R14(SOC, SOC, Op_RegI, 14, R(14)->as_VMReg());
+reg_def R_R15(NS,  NS,  Op_RegI, 15, R(15)->as_VMReg());
+
+// ----------------------------
+// Float/Double Registers
+// ----------------------------
+
+// Float Registers
+
+reg_def R_S0 ( SOC, SOC, Op_RegF,  0, f0->as_VMReg());
+reg_def R_S1 ( SOC, SOC, Op_RegF,  1, f1->as_VMReg());
+reg_def R_S2 ( SOC, SOC, Op_RegF,  2, f2->as_VMReg());
+reg_def R_S3 ( SOC, SOC, Op_RegF,  3, f3->as_VMReg());
+reg_def R_S4 ( SOC, SOC, Op_RegF,  4, f4->as_VMReg());
+reg_def R_S5 ( SOC, SOC, Op_RegF,  5, f5->as_VMReg());
+reg_def R_S6 ( SOC, SOC, Op_RegF,  6, f6->as_VMReg());
+reg_def R_S7 ( SOC, SOC, Op_RegF,  7, f7->as_VMReg());
+reg_def R_S8 ( SOC, SOC, Op_RegF,  8, f8->as_VMReg());
+reg_def R_S9 ( SOC, SOC, Op_RegF,  9, f9->as_VMReg());
+reg_def R_S10( SOC, SOC, Op_RegF, 10,f10->as_VMReg());
+reg_def R_S11( SOC, SOC, Op_RegF, 11,f11->as_VMReg());
+reg_def R_S12( SOC, SOC, Op_RegF, 12,f12->as_VMReg());
+reg_def R_S13( SOC, SOC, Op_RegF, 13,f13->as_VMReg());
+reg_def R_S14( SOC, SOC, Op_RegF, 14,f14->as_VMReg());
+reg_def R_S15( SOC, SOC, Op_RegF, 15,f15->as_VMReg());
+reg_def R_S16( SOC, SOE, Op_RegF, 16,f16->as_VMReg());
+reg_def R_S17( SOC, SOE, Op_RegF, 17,f17->as_VMReg());
+reg_def R_S18( SOC, SOE, Op_RegF, 18,f18->as_VMReg());
+reg_def R_S19( SOC, SOE, Op_RegF, 19,f19->as_VMReg());
+reg_def R_S20( SOC, SOE, Op_RegF, 20,f20->as_VMReg());
+reg_def R_S21( SOC, SOE, Op_RegF, 21,f21->as_VMReg());
+reg_def R_S22( SOC, SOE, Op_RegF, 22,f22->as_VMReg());
+reg_def R_S23( SOC, SOE, Op_RegF, 23,f23->as_VMReg());
+reg_def R_S24( SOC, SOE, Op_RegF, 24,f24->as_VMReg());
+reg_def R_S25( SOC, SOE, Op_RegF, 25,f25->as_VMReg());
+reg_def R_S26( SOC, SOE, Op_RegF, 26,f26->as_VMReg());
+reg_def R_S27( SOC, SOE, Op_RegF, 27,f27->as_VMReg());
+reg_def R_S28( SOC, SOE, Op_RegF, 28,f28->as_VMReg());
+reg_def R_S29( SOC, SOE, Op_RegF, 29,f29->as_VMReg());
+reg_def R_S30( SOC, SOE, Op_RegF, 30,f30->as_VMReg());
+reg_def R_S31( SOC, SOE, Op_RegF, 31,f31->as_VMReg());
+
+// Double Registers
+// The rules of ADL require that double registers be defined in pairs.
+// Each pair must be two 32-bit values, but not necessarily a pair of
+// single float registers.  In each pair, ADLC-assigned register numbers
+// must be adjacent, with the lower number even.  Finally, when the
+// CPU stores such a register pair to memory, the word associated with
+// the lower ADLC-assigned number must be stored to the lower address.
+
+// TODO, the problem is that AArch32 port has same same numeric value for
+// d16->as_VMReg and f1->as_VMReg which breaks reverse mapping from
+// VMReg to OptoReg
+// reg_def R_D16 (SOC, SOC, Op_RegD, 32, d16->as_VMReg());
+// reg_def R_D16x(SOC, SOC, Op_RegD,255, d16->as_VMReg()->next());
+// reg_def R_D17 (SOC, SOC, Op_RegD, 34, d17->as_VMReg());
+// reg_def R_D17x(SOC, SOC, Op_RegD,255, d17->as_VMReg()->next());
+// reg_def R_D18 (SOC, SOC, Op_RegD, 36, d18->as_VMReg());
+// reg_def R_D18x(SOC, SOC, Op_RegD,255, d18->as_VMReg()->next());
+// reg_def R_D19 (SOC, SOC, Op_RegD, 38, d19->as_VMReg());
+// reg_def R_D19x(SOC, SOC, Op_RegD,255, d19->as_VMReg()->next());
+// reg_def R_D20 (SOC, SOC, Op_RegD, 40, d20->as_VMReg());
+// reg_def R_D20x(SOC, SOC, Op_RegD,255, d20->as_VMReg()->next());
+// reg_def R_D21 (SOC, SOC, Op_RegD, 42, d21->as_VMReg());
+// reg_def R_D21x(SOC, SOC, Op_RegD,255, d21->as_VMReg()->next());
+// reg_def R_D22 (SOC, SOC, Op_RegD, 44, d22->as_VMReg());
+// reg_def R_D22x(SOC, SOC, Op_RegD,255, d22->as_VMReg()->next());
+// reg_def R_D23 (SOC, SOC, Op_RegD, 46, d23->as_VMReg());
+// reg_def R_D23x(SOC, SOC, Op_RegD,255, d23->as_VMReg()->next());
+// reg_def R_D24 (SOC, SOC, Op_RegD, 48, d24->as_VMReg());
+// reg_def R_D24x(SOC, SOC, Op_RegD,255, d24->as_VMReg()->next());
+// reg_def R_D25 (SOC, SOC, Op_RegD, 50, d25->as_VMReg());
+// reg_def R_D25x(SOC, SOC, Op_RegD,255, d25->as_VMReg()->next());
+// reg_def R_D26 (SOC, SOC, Op_RegD, 52, d26->as_VMReg());
+// reg_def R_D26x(SOC, SOC, Op_RegD,255, d26->as_VMReg()->next());
+// reg_def R_D27 (SOC, SOC, Op_RegD, 54, d27->as_VMReg());
+// reg_def R_D27x(SOC, SOC, Op_RegD,255, d27->as_VMReg()->next());
+// reg_def R_D28 (SOC, SOC, Op_RegD, 56, d28->as_VMReg());
+// reg_def R_D28x(SOC, SOC, Op_RegD,255, d28->as_VMReg()->next());
+// reg_def R_D29 (SOC, SOC, Op_RegD, 58, d29->as_VMReg());
+// reg_def R_D29x(SOC, SOC, Op_RegD,255, d29->as_VMReg()->next());
+// reg_def R_D30 (SOC, SOC, Op_RegD, 60, d30->as_VMReg());
+// reg_def R_D30x(SOC, SOC, Op_RegD,255, d30->as_VMReg()->next());
+// reg_def R_D31 (SOC, SOC, Op_RegD, 62, d31->as_VMReg());
+// reg_def R_D31x(SOC, SOC, Op_RegD,255, d31->as_VMReg()->next());
+
+// ----------------------------
+// Special Registers
+// Condition Codes Flag Registers
+reg_def APSR (SOC, SOC,  Op_RegFlags, 0, VMRegImpl::Bad());
+reg_def FPSCR(SOC, SOC,  Op_RegFlags, 0, VMRegImpl::Bad());
+
+// ----------------------------
+// Specify the enum values for the registers.  These enums are only used by the
+// OptoReg "class". We can convert these enum values at will to VMReg when needed
+// for visibility to the rest of the vm. The order of this enum influences the
+// register allocator so having the freedom to set this order and not be stuck
+// with the order that is natural for the rest of the vm is worth it.
+
+// registers in that order so that R11/R12 is an aligned pair that can be used for longs
+alloc_class chunk0(
+                   R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R10, R_R13, R_R14, R_R15, R_R0, R_R1, R_R2, R_R3);
+
+// Note that a register is not allocatable unless it is also mentioned
+// in a widely-used reg_class below.
+
+alloc_class chunk1(
+                   R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23,
+                   R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31,
+                   R_S0,  R_S1,  R_S2,  R_S3,  R_S4,  R_S5,  R_S6,  R_S7,
+                   R_S8,  R_S9,  R_S10, R_S11, R_S12, R_S13, R_S14, R_S15
+                   // ,
+                   // R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
+                   // R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
+                   // R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
+                   // R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x
+);
+
+alloc_class chunk2(APSR, FPSCR);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg           ( as defined in frame section )
+// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// ----------------------------
+// Integer Register Classes
+// ----------------------------
+// Exclusions from i_reg:
+// sp (R13), PC (R15)
+// R10: reserved by HotSpot to the TLS register (invariant within Java)
+reg_class int_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14);
+
+reg_class R0_regI(R_R0);
+reg_class R1_regI(R_R1);
+reg_class R2_regI(R_R2);
+reg_class R3_regI(R_R3);
+reg_class R9_regI(R_R9);
+reg_class R12_regI(R_R12);
+
+// ----------------------------
+// Pointer Register Classes
+// ----------------------------
+reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14);
+// Special class for storeP instructions, which can store SP or RPC to TLS.
+// It is also used for memory addressing, allowing direct TLS addressing.
+reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R9, R_R11, R_R12, R_R14, R_R8, R_R10 /* TLS*/, R_R13 /* SP*/);
+
+#define R_Ricklass R_R12
+#define R_Rmethod  R_R8
+#define R_Rthread  R_R10
+#define R_Rexception_obj R_R0
+
+// Other special pointer regs
+reg_class R0_regP(R_R0);
+reg_class R1_regP(R_R1);
+reg_class R2_regP(R_R2);
+reg_class R4_regP(R_R4);
+reg_class Rexception_regP(R_Rexception_obj);
+reg_class Ricklass_regP(R_Ricklass);
+reg_class Rmethod_regP(R_Rmethod);
+reg_class Rthread_regP(R_Rthread);
+reg_class IP_regP(R_R12);
+reg_class LR_regP(R_R14);
+
+reg_class FP_regP(R_R11);
+
+// ----------------------------
+// Long Register Classes
+// ----------------------------
+reg_class long_reg (             R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9, R_R11,R_R12);
+// for ldrexd, strexd: first reg of pair must be even
+reg_class long_reg_align (       R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9);
+
+reg_class R0R1_regL(R_R0,R_R1);
+reg_class R2R3_regL(R_R2,R_R3);
+
+// ----------------------------
+// Special Class for Condition Code Flags Register
+reg_class int_flags(APSR);
+reg_class float_flags(FPSCR);
+
+
+// ----------------------------
+// Float Point Register Classes
+// ----------------------------
+// Skip f14/f15, they are reserved for mem-mem copies
+reg_class sflt_reg(R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11, R_S12, R_S13,
+                   R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31);
+
+// Paired floating point registers--they show up in the same order as the floats,
+// but they are used with the "Op_RegD" type, and always occur in even/odd pairs.
+reg_class dflt_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13,
+                   R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31
+                   // ,
+                   // R_D16,R_D16x, R_D17,R_D17x, R_D18,R_D18x, R_D19,R_D19x, R_D20,R_D20x, R_D21,R_D21x, R_D22,R_D22x,
+                   // R_D23,R_D23x, R_D24,R_D24x, R_D25,R_D25x, R_D26,R_D26x, R_D27,R_D27x, R_D28,R_D28x, R_D29,R_D29x,
+                   // R_D30,R_D30x, R_D31,R_D31x
+  );
+
+reg_class dflt_low_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13,
+                       R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31);
+
+
+reg_class actual_dflt_reg %{
+  if (/*VM_Version::features() & FT_VFPV3D32*/0) { // TODO verify and enable
+    return DFLT_REG_mask();
+  } else {
+    return DFLT_LOW_REG_mask();
+  }
+%}
+
+reg_class f0_regF(R_S0);
+reg_class D0_regD(R_S0,R_S1);
+reg_class D1_regD(R_S2,R_S3);
+reg_class D2_regD(R_S4,R_S5);
+reg_class D3_regD(R_S6,R_S7);
+reg_class D4_regD(R_S8,R_S9);
+reg_class D5_regD(R_S10,R_S11);
+reg_class D6_regD(R_S12,R_S13);
+reg_class D7_regD(R_S14,R_S15);
+reg_class D0D1_regD(R_S0,R_S1,R_S2,R_S3);
+reg_class D2D3_regD(R_S4,R_S5,R_S6,R_S7);
+
+// reg_class D16_regD(R_D16,R_D16x);
+// reg_class D17_regD(R_D17,R_D17x);
+// reg_class D18_regD(R_D18,R_D18x);
+// reg_class D19_regD(R_D19,R_D19x);
+// reg_class D20_regD(R_D20,R_D20x);
+// reg_class D21_regD(R_D21,R_D21x);
+// reg_class D22_regD(R_D22,R_D22x);
+// reg_class D23_regD(R_D23,R_D23x);
+// reg_class D24_regD(R_D24,R_D24x);
+// reg_class D25_regD(R_D25,R_D25x);
+// reg_class D26_regD(R_D26,R_D26x);
+// reg_class D27_regD(R_D27,R_D27x);
+// reg_class D28_regD(R_D28,R_D28x);
+// reg_class D29_regD(R_D29,R_D29x);
+// reg_class D30_regD(R_D30,R_D30x);
+// reg_class D31_regD(R_D31,R_D31x);
+
+reg_class vectorx_reg(R_S0,R_S1,R_S2,R_S3, R_S4,R_S5,R_S6,R_S7,
+                      R_S8,R_S9,R_S10,R_S11, /* skip f14/f15 */
+                      R_S16,R_S17,R_S18,R_S19, R_S20,R_S21,R_S22,R_S23,
+                      R_S24,R_S25,R_S26,R_S27, R_S28,R_S29,R_S30,R_S31
+                      // ,
+                      // R_D16,R_D16x,R_D17,R_D17x, R_D18,R_D18x,R_D19,R_D19x,
+                      // R_D20,R_D20x,R_D21,R_D21x, R_D22,R_D22x,R_D23,R_D23x,
+                      // R_D24,R_D24x,R_D25,R_D25x, R_D26,R_D26x,R_D27,R_D27x,
+                      // R_D28,R_D28x,R_D29,R_D29x, R_D30,R_D30x,R_D31,R_D31x
+  );
+
+%}
+
+source_hpp %{
+// FIXME
+const MachRegisterNumbers R_mem_copy_lo_num = R_S14_num;
+const MachRegisterNumbers R_mem_copy_hi_num = R_S15_num;
+const FloatRegister Rmemcopy = f14;
+const MachRegisterNumbers R_hf_ret_lo_num = R_S0_num;
+const MachRegisterNumbers R_hf_ret_hi_num = R_S1_num;
+
+const MachRegisterNumbers R_Ricklass_num = R_R12_num;
+const MachRegisterNumbers R_Rmethod_num  = R_R8_num;
+
+#define LDR_DOUBLE "FLDD"
+#define LDR_FLOAT  "FLDS"
+#define STR_DOUBLE "FSTD"
+#define STR_FLOAT  "FSTS"
+#define LDR_64     "LDRD"
+#define STR_64     "STRD"
+#define LDR_32     "LDR"
+#define STR_32     "STR"
+#define MOV_DOUBLE "FCPYD"
+#define MOV_FLOAT  "FCPYS"
+#define FMSR       "FMSR"
+#define FMRS       "FMRS"
+#define LDREX      "ldrex "
+#define STREX      "strex "
+
+static inline bool is_memoryD(int offset) {
+  return offset < 1024 && offset > -1024;
+}
+
+static inline bool is_memoryfp(int offset) {
+  return offset < 1024 && offset > -1024;
+}
+
+static inline bool is_memoryI(int offset) {
+  return offset < 4096 && offset > -4096;
+}
+
+static inline bool is_memoryP(int offset) {
+  return offset < 4096 && offset > -4096;
+}
+
+static inline bool is_memoryHD(int offset) {
+  return offset < 256 && offset > -256;
+}
+
+static inline bool is_aimm(int imm) {
+  return Assembler::is_valid_for_imm12(imm);
+}
+
+static inline bool is_limmI(jint imm) {
+  return Assembler::is_valid_for_imm12(imm);
+}
+
+static inline bool is_limmI_low(jint imm, int n) {
+  int imml = imm & right_n_bits(n);
+  return is_limmI(imml) || is_limmI(imm);
+}
+
+static inline int limmI_low(jint imm, int n) {
+  int imml = imm & right_n_bits(n);
+  return is_limmI(imml) ? imml : imm;
+}
+
+%}
+
+source %{
+
+// Given a register encoding, produce a Integer Register object
+static Register reg_to_register_object(int register_encoding) {
+  assert(r0->encoding() == R_R0_enc && r15->encoding() == R_R15_enc, "right coding");
+  return as_Register(register_encoding);
+}
+
+// Given a register encoding, produce a Float Register object
+static FloatRegister reg_to_FloatRegister_object(int register_encoding) {
+  assert(f0->encoding() == R_S0_enc && f31->encoding() == R_S31_enc, "right coding");
+  // [d16,d31] share FloatRegister encoding with [f1,f31] since it numericall equals to ARM insn parameter encoding
+  // in contrary OptoReg encoding for d16+ is different
+  return as_FloatRegister((register_encoding&0x1f)|(register_encoding>>5));
+}
+
+void Compile::pd_compiler2_init() {
+  // Umimplemented
+}
+
+OptoRegPair c2::return_value(int ideal_reg) {
+  assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+  static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num,     R_R0_num,     R_hf_ret_lo_num,  R_hf_ret_lo_num, R_R0_num };
+  static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad,     R_hf_ret_hi_num, R_R1_num };
+#ifndef HARD_FLOAT_CC
+  assert(hasFPU(), "non-VFP java ABI is not supported");
+#endif
+  return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
+}
+
+#ifndef HARD_FLOAT_CC
+OptoRegPair c2::c_return_value(int ideal_reg) {
+  assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+  static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num,     R_R0_num,     R_R0_num,     R_R0_num, R_R0_num };
+  static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_R1_num, R_R1_num };
+  return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
+}
+#endif
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+
+static uint call_static_enc_size(const MachCallNode *n, ciMethod *_method, bool _method_handle_invoke) {
+  int call_sz = (_method == NULL) ?
+    (maybe_far_call(n) ? 3 : 1) :
+    (far_branches() ? NativeCall::instruction_size / NativeInstruction::arm_insn_sz : 1);
+  return (call_sz + (_method_handle_invoke ? 2 : 0)) *
+    NativeInstruction::arm_insn_sz;
+}
+
+static uint call_dynamic_enc_size() {
+  return 2 * NativeInstruction::arm_insn_sz +
+    (far_branches() ? NativeCall::instruction_size : NativeInstruction::arm_insn_sz);
+}
+
+static uint call_runtime_enc_size(const MachCallNode *n) {
+  // bl or movw; movt; blx
+  bool far = maybe_far_call(n);
+  return (far ? 3 : 1) * NativeInstruction::arm_insn_sz;
+}
+
+int MachCallStaticJavaNode::ret_addr_offset() {
+  return call_static_enc_size(this, _method, _method_handle_invoke) -
+    (_method_handle_invoke ? 1 : 0) * NativeInstruction::arm_insn_sz;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+  return call_dynamic_enc_size();
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  return call_runtime_enc_size(this);
+}
+%}
+
+// The intptr_t operand types, defined by textual substitution.
+// (Cf. opto/type.hpp.  This lets us avoid many, many other ifdefs.)
+#define immX      immI
+#define immXRot   immIRot
+#define iRegX     iRegI
+#define aimmX     aimmI
+#define limmX     limmI
+#define immX10x2  immI10x2
+#define LShiftX   LShiftI
+#define shimmX    immU5
+
+// Compatibility interface
+#define aimmP     immPRot
+#define immIMov   immIRot
+
+#define store_RegL     iRegL
+#define store_RegLd    iRegLd
+#define store_RegI     iRegI
+#define store_ptr_RegP iRegP
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1);          // Required cost attribute
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+
+operand immIRot() %{
+  predicate(Assembler::is_valid_for_imm12(n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIRotn() %{
+  predicate(n->get_int() != 0 && Assembler::is_valid_for_imm12(~n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIRotneg() %{
+  // if Assembler::is_valid_for_imm12() is true for this constant, it is
+  // a immIRot and an optimal instruction combination exists to handle the
+  // constant as an immIRot
+  predicate(!Assembler::is_valid_for_imm12(n->get_int()) && Assembler::is_valid_for_imm12(-n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Non-negative integer immediate that is encodable using the rotation scheme,
+// and that when expanded fits in 31 bits.
+operand immU31Rot() %{
+  predicate((0 <= n->get_int()) && Assembler::is_valid_for_imm12(n->get_int()));
+  match(ConI);
+
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immPRot() %{
+  predicate(n->get_ptr() == 0 || (Assembler::is_valid_for_imm12(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none));
+
+  match(ConP);
+
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLlowRot() %{
+  predicate(n->get_long() >> 32 == 0 && Assembler::is_valid_for_imm12((int)n->get_long()));
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLRot2() %{
+  predicate(Assembler::is_valid_for_imm12((int)(n->get_long() >> 32)) &&
+            Assembler::is_valid_for_imm12((int)(n->get_long())));
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 12-bit - for addressing mode
+operand immI12() %{
+  predicate((-4096 < n->get_int()) && (n->get_int() < 4096));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 10-bit disp and disp+4 - for addressing float pair
+operand immI10x2() %{
+  predicate((-1024 < n->get_int()) && (n->get_int() < 1024 - 4));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 12-bit disp and disp+4 - for addressing word pair
+operand immI12x2() %{
+  predicate((-4096 < n->get_int()) && (n->get_int() < 4096 - 4));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+// The default cost (of an ALU instruction).
+  int_def DEFAULT_COST      (    100,     100);
+  int_def HUGE_COST         (1000000, 1000000);
+
+// Memory refs are twice as expensive as run-of-the-mill.
+  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
+
+// Branches are even more expensive.
+  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
+  int_def CALL_COST         (    300, DEFAULT_COST * 3);
+%}
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+source_hpp %{
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) __ stop(error)
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) __ block_comment(error); stop(error)
+#endif
+
+#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":")
+
+// Does destination need to be loaded in a register then passed to a
+// branch instruction?
+extern bool maybe_far_call(const CallNode *n);
+extern bool maybe_far_call(const MachCallNode *n);
+static inline bool cache_reachable() {
+  return MacroAssembler::_cache_fully_reachable();
+}
+static inline bool far_branches() {
+  return MacroAssembler::far_branches();
+}
+
+extern bool PrintOptoAssembly;
+
+class c2 {
+public:
+  static OptoRegPair return_value(int ideal_reg);
+#ifndef HARD_FLOAT_CC
+  static OptoRegPair c_return_value(int ideal_reg);
+#endif
+};
+
+class CallStubImpl {
+
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::Shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    return ( 3 * 4 );
+  }
+
+
+  static uint size_deopt_handler() {
+    return ( 9 * 4 );
+  }
+
+};
+
+%}
+
+source %{
+#define __ _masm.
+
+static FloatRegister reg_to_FloatRegister_object(int register_encoding);
+static Register reg_to_register_object(int register_encoding);
+
+
+// ****************************************************************************
+
+// REQUIRED FUNCTIONALITY
+
+// Indicate if the safepoint node needs the polling page as an input.
+// Since ARM does not have absolute addressing, it does.
+bool SafePointNode::needs_polling_address_input() {
+  return true;
+}
+
+// emit an interrupt that is caught by the debugger (for debugging compiler)
+void emit_break(CodeBuffer &cbuf) {
+  MacroAssembler _masm(&cbuf);
+  __ bkpt(0);
+}
+
+#ifndef PRODUCT
+void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const {
+  st->print("TA");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  emit_break(cbuf);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+
+void emit_nop(CodeBuffer &cbuf) {
+  MacroAssembler _masm(&cbuf);
+  __ nop();
+}
+
+
+void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
+  int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
+  int call_site_offset = cbuf.insts()->mark_off();
+  MacroAssembler _masm(&cbuf);
+  __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
+  address target = (address)m->method();
+  assert(n->as_MachCall()->entry_point() == target, "sanity");
+  assert(maybe_far_call(n) == !__ reachable_from_cache(target), "sanity");
+  assert(cache_reachable() == __ cache_fully_reachable(), "sanity");
+
+  assert(target != NULL, "need real address");
+
+  if (rspec.type() == relocInfo::runtime_call_type ||
+    rspec.type() == relocInfo::none) {
+    __ call(target, rspec);
+  } else {
+    __ trampoline_call(Address(target, rspec), NULL);
+  }
+  int ret_addr_offset = __ offset();
+  assert(ret_addr_offset - call_site_offset == ret_addr_offset0, "fix ret_addr_offset()");
+}
+
+//=============================================================================
+// REQUIRED FUNCTIONALITY for encoding
+void emit_lo(CodeBuffer &cbuf, int val) {  }
+void emit_hi(CodeBuffer &cbuf, int val) {  }
+
+
+//=============================================================================
+const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  int offset = -(size() / 2);
+  // vldr_f32, vldr_f64: 8-bit  offset multiplied by 4: +/- 1024
+  // ldr, ldrb : 12-bit offset:                 +/- 4096
+  if (!Assembler::is_simm10(offset)) {
+    offset = Assembler::min_simm10();
+  }
+  return offset;
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  Compile* C = ra_->C;
+  Compile::ConstantTable& constant_table = C->constant_table();
+  MacroAssembler _masm(&cbuf);
+
+  Register r = as_Register(ra_->get_encode(this));
+  CodeSection* consts_section = __ code()->consts();
+  int consts_size = consts_section->align_at_start(consts_section->size());
+  assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size);
+
+  // Materialize the constant table base.
+  address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
+  RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
+  __ mov_address(r, baseaddr, rspec);
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
+  return 8;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  char reg[128];
+  ra_->dump_register(this, reg);
+  st->print("MOV_SLOW    &constanttable,%s\t! constant table base", reg);
+}
+#endif
+
+#ifndef PRODUCT
+void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+  Compile* C = ra_->C;
+
+  for (int i = 0; i < OptoPrologueNops; i++) {
+    st->print_cr("NOP"); st->print("\t");
+  }
+
+  size_t framesize = C->frame_size_in_bytes();
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  int bangsize = C->bang_size_in_bytes();
+  // Remove two words for return addr and rbp,
+  framesize -= 2*wordSize;
+  bangsize -= 2*wordSize;
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them.  But be careful, because
+  // some VM calls (such as call site linkage) can use several kilobytes of
+  // stack.  But the stack safety zone should account for that.
+  // See bugs 4446381, 4468289, 4497237.
+  if (C->need_stack_bang(bangsize)) {
+    st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t");
+  }
+  st->print_cr("PUSH   R_FP|R_LR_LR"); st->print("\t");
+  if (framesize != 0) {
+    st->print   ("SUB    R_SP, R_SP, " SIZE_FORMAT,framesize);
+  }
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  // insert a nop at the start of the prolog so we can patch in a
+  // branch if we need to invalidate the method later
+  __ nop();
+
+  size_t framesize = C->frame_size_in_bytes();
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  int bangsize = C->bang_size_in_bytes();
+  // Remove two words for return addr and fp,
+  framesize -= 2*wordSize;
+  bangsize -= 2*wordSize;
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them.  But be careful, because
+  // some VM calls (such as call site linkage) can use several kilobytes of
+  // stack.  But the stack safety zone should account for that.
+  // See bugs 4446381, 4468289, 4497237.
+  if (C->need_stack_bang(bangsize)) {
+    __ arm_stack_overflow_check(bangsize, r12);
+  }
+
+  __ push(RegSet::of(rfp, lr), sp);
+  if (framesize != 0) {
+    __ sub(sp, sp, framesize);
+  }
+
+  // offset from scratch buffer is not valid
+  if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
+    C->set_frame_complete( __ offset() );
+  }
+
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+int MachPrologNode::reloc() const {
+  return 10; // a large enough number
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+  Compile* C = ra_->C;
+
+  size_t framesize = C->frame_size_in_bytes();
+  framesize -= 2*wordSize;
+
+  if (framesize != 0) {
+    st->print("ADD    R_SP, R_SP, " SIZE_FORMAT "\n\t",framesize);
+  }
+  st->print("POP    R_FP|R_LR_LR");
+
+  if (do_polling() && ra_->C->is_method_compilation()) {
+    st->print("\n\t");
+    st->print("MOV    r12, #PollAddr\t! Load Polling address\n\t");
+    st->print("LDR    r12,[r12]\t!Poll for Safepointing");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  Compile* C = ra_->C;
+
+  size_t framesize = C->frame_size_in_bytes();
+  framesize -= 2*wordSize;
+  if (framesize != 0) {
+    __ add(sp, sp, framesize);
+  }
+  __ pop(RegSet::of(rfp, lr), sp);
+
+  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
+    __ reserved_stack_check();
+  }
+
+  // If this does safepoint polling, then do it here
+  if (do_polling() && ra_->C->is_method_compilation()) {
+    // mov here is usually one or two instruction
+    __ mov_address(r12, (address)os::get_polling_page(), RelocationHolder::none);
+    __ relocate(relocInfo::poll_return_type);
+    __ ldr(r12, Address(r12));
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+  return 16; // a large enough number
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+int MachEpilogNode::safepoint_offset() const {
+  assert( do_polling(), "no return for this epilog node");
+  //  return MacroAssembler::size_of_sethi(os::get_polling_page());
+  Unimplemented();
+  return 0;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+static enum RC rc_class( OptoReg::Name reg ) {
+  if (!OptoReg::is_valid(reg)) return rc_bad;
+  if (OptoReg::is_stack(reg)) return rc_stack;
+  VMReg r = OptoReg::as_VMReg(reg);
+  if (r->is_Register()) return rc_int;
+  assert(r->is_FloatRegister(), "must be");
+  return rc_float;
+}
+
+static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) {
+  int rlo = Matcher::_regEncode[src_first];
+  int rhi = Matcher::_regEncode[src_second];
+  // if (!((rlo&1)==0 && (rlo+1 == rhi))) {
+  //   tty->print_cr("CAUGHT BAD LDRD/STRD");
+  // }
+  return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset);
+}
+
+uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
+                                        PhaseRegAlloc *ra_,
+                                        bool do_size,
+                                        outputStream* st ) const {
+  // Get registers to move
+  OptoReg::Name src_second = ra_->get_reg_second(in(1));
+  OptoReg::Name src_first = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_second = ra_->get_reg_second(this );
+  OptoReg::Name dst_first = ra_->get_reg_first(this );
+
+  enum RC src_second_rc = rc_class(src_second);
+  enum RC src_first_rc = rc_class(src_first);
+  enum RC dst_second_rc = rc_class(dst_second);
+  enum RC dst_first_rc = rc_class(dst_first);
+
+  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
+
+  // Generate spill code!
+  int size = 0;
+
+  if (src_first == dst_first && src_second == dst_second)
+    return size;            // Self copy, no move
+
+#ifdef TODO
+  if (bottom_type()->isa_vect() != NULL) {
+  }
+#endif
+
+  // Shared code does not expect instruction set capability based bailouts here.
+  // Handle offset unreachable bailout with minimal change in shared code.
+  // Bailout only for real instruction emit.
+  // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )
+
+  MacroAssembler _masm(cbuf);
+
+  // --------------------------------------
+  // Check for mem-mem move.  Load into unused float registers and fall into
+  // the float-store case.
+  if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
+    int offset = ra_->reg2offset(src_first);
+    if (cbuf && !is_memoryfp(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      if (src_second_rc != rc_bad) {
+        assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
+        src_first     = OptoReg::Name(R_mem_copy_lo_num);
+        src_second    = OptoReg::Name(R_mem_copy_hi_num);
+        src_first_rc  = rc_float;
+        src_second_rc = rc_float;
+        if (cbuf) {
+          __ vldr_f64(Rmemcopy, Address(sp, offset));
+        } else if (!do_size) {
+          st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+        }
+      } else {
+        src_first     = OptoReg::Name(R_mem_copy_lo_num);
+        src_first_rc  = rc_float;
+        if (cbuf) {
+          __ vldr_f32(Rmemcopy, Address(sp, offset));
+        } else if (!do_size) {
+          st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+        }
+      }
+      size += 4;
+    }
+  }
+
+  if (src_second_rc == rc_stack && dst_second_rc == rc_stack) {
+    Unimplemented();
+  }
+
+  // --------------------------------------
+  // Check for integer reg-reg copy
+  if (src_first_rc == rc_int && dst_first_rc == rc_int) {
+    // Else normal reg-reg copy
+    assert( src_second != dst_first, "smashed second before evacuating it" );
+    if (cbuf) {
+      __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+    } else if (!do_size) {
+      st->print("MOV    R_%s, R_%s\t# spill",
+                Matcher::regName[dst_first],
+                Matcher::regName[src_first]);
+#endif
+    }
+    size += 4;
+  }
+
+  // Check for integer store
+  if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
+    int offset = ra_->reg2offset(dst_first);
+    if (cbuf && !is_memoryI(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) {
+        assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
+        if (cbuf) {
+          __ strd(reg_to_register_object(Matcher::_regEncode[src_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(STR_64 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
+#endif
+        }
+        return size + 4;
+      } else {
+        if (cbuf) {
+          __ str(reg_to_register_object(Matcher::_regEncode[src_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(STR_32 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
+#endif
+        }
+      }
+    }
+    size += 4;
+  }
+
+  // Check for integer load
+  if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
+    int offset = ra_->reg2offset(src_first);
+    if (cbuf && !is_memoryI(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) {
+        assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
+        if (cbuf) {
+          __ ldrd(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(LDR_64 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
+#endif
+        }
+        return size + 4;
+      } else {
+        if (cbuf) {
+          __ ldr(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(LDR_32 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
+#endif
+        }
+      }
+    }
+    size += 4;
+  }
+
+  // Check for float reg-reg copy
+  if (src_first_rc == rc_float && dst_first_rc == rc_float) {
+    if (src_second_rc != rc_bad) {
+      assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
+      if (cbuf) {
+      __ vmov_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        st->print(MOV_DOUBLE "    R_%s, R_%s\t# spill",
+                  Matcher::regName[dst_first],
+                  Matcher::regName[src_first]);
+#endif
+      }
+      return 4;
+    }
+    if (cbuf) {
+      __ vmov_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+    } else if (!do_size) {
+      st->print(MOV_FLOAT "    R_%s, R_%s\t# spill",
+                Matcher::regName[dst_first],
+                Matcher::regName[src_first]);
+#endif
+    }
+    size = 4;
+  }
+
+  // Check for float store
+  if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
+    int offset = ra_->reg2offset(dst_first);
+    if (cbuf && !is_memoryfp(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      // Further check for aligned-adjacent pair, so we can use a double store
+      if (src_second_rc != rc_bad) {
+        assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
+        if (cbuf) {
+          __ vstr_f64(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(STR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+#endif
+        }
+        return size + 4;
+      } else {
+        if (cbuf) {
+          __ vstr_f32(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(STR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
+#endif
+        }
+      }
+    }
+    size += 4;
+  }
+
+  // Check for float load
+  if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
+    int offset = ra_->reg2offset(src_first);
+    if (cbuf && !is_memoryfp(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      // Further check for aligned-adjacent pair, so we can use a double store
+      if (src_second_rc != rc_bad) {
+        assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
+        if (cbuf) {
+          __ vldr_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
+#endif
+        }
+        return size + 4;
+      } else {
+        if (cbuf) {
+          __ vldr_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(sp, offset));
+#ifndef PRODUCT
+        } else if (!do_size) {
+          if (size != 0) st->print("\n\t");
+          st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
+#endif
+        }
+      }
+    }
+    size += 4;
+  }
+
+  // check for int reg -> float reg move
+  if (src_first_rc == rc_int && dst_first_rc == rc_float) {
+    // Further check for aligned-adjacent pair, so we can use a single instruction
+    if (src_second_rc != rc_bad) {
+      assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
+      assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
+      assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported");
+      if (cbuf) {
+        __ vmov_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second]));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        if (size != 0) st->print("\n\t");
+        st->print("FMDRR   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second));
+#endif
+      }
+      return size + 4;
+    } else {
+      if (cbuf) {
+        __ vmov_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        if (size != 0) st->print("\n\t");
+        st->print(FMSR "   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
+#endif
+      }
+      size += 4;
+    }
+  }
+
+  // check for float reg -> int reg move
+  if (src_first_rc == rc_float && dst_first_rc == rc_int) {
+    // Further check for aligned-adjacent pair, so we can use a single instruction
+    if (src_second_rc != rc_bad) {
+      assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
+      assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
+      assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported");
+      if (cbuf) {
+        __ vmov_f64(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        if (size != 0) st->print("\n\t");
+        st->print("FMRRD   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first));
+#endif
+      }
+      return size + 4;
+    } else {
+      if (cbuf) {
+        __ vmov_f32(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        if (size != 0) st->print("\n\t");
+        st->print(FMRS "   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
+#endif
+      }
+      size += 4;
+    }
+  }
+
+  // --------------------------------------------------------------------
+  // Check for hi bits still needing moving.  Only happens for misaligned
+  // arguments to native calls.
+  if (src_second == dst_second)
+    return size;               // Self copy; no move
+  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
+
+  // Check for integer reg-reg copy.  Hi bits are stuck up in the top
+  // 32-bits of a 64-bit register, but are needed in low bits of another
+  // register (else it's a hi-bits-to-hi-bits copy which should have
+  // happened already as part of a 64-bit move)
+  if (src_second_rc == rc_int && dst_second_rc == rc_int) {
+    if (cbuf) {
+      __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second]));
+#ifndef PRODUCT
+    } else if (!do_size) {
+      if (size != 0) st->print("\n\t");
+      st->print("MOV    R_%s, R_%s\t# spill high",
+                Matcher::regName[dst_second],
+                Matcher::regName[src_second]);
+#endif
+    }
+    return size+4;
+  }
+
+  // Check for high word integer store
+  if (src_second_rc == rc_int && dst_second_rc == rc_stack) {
+    int offset = ra_->reg2offset(dst_second);
+
+    if (cbuf && !is_memoryP(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      if (cbuf) {
+        __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(sp, offset));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        if (size != 0) st->print("\n\t");
+        st->print("STR   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_second), offset);
+#endif
+      }
+    }
+    return size + 4;
+  }
+
+  // Check for high word integer load
+  if (dst_second_rc == rc_int && src_second_rc == rc_stack) {
+    int offset = ra_->reg2offset(src_second);
+    if (cbuf && !is_memoryP(offset)) {
+      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
+      return 0;
+    } else {
+      if (cbuf) {
+        __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(sp, offset));
+#ifndef PRODUCT
+      } else if (!do_size) {
+        if (size != 0) st->print("\n\t");
+        st->print("LDR   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_second), offset);
+#endif
+      }
+    }
+    return size + 4;
+  }
+
+  Unimplemented();
+  return 0; // Mute compiler
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+  implementation( NULL, ra_, false, st );
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation( &cbuf, ra_, false, NULL );
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return implementation( NULL, ra_, true, NULL );
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
+  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
+}
+#endif
+
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
+  MacroAssembler _masm(&cbuf);
+  for(int i = 0; i < _count; i += 1) {
+    __ nop();
+  }
+}
+
+uint MachNopNode::size(PhaseRegAlloc *ra_) const {
+  return 4 * _count;
+}
+
+
+//=============================================================================
+#ifndef PRODUCT
+void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_reg_first(this);
+  st->print("ADD    %s,R_SP+#%d",Matcher::regName[reg], offset);
+}
+#endif
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_encode(this);
+  Register dst = reg_to_register_object(reg);
+
+  if (is_aimm(offset)) {
+    __ add(dst, sp, offset);
+  } else {
+    __ mov(dst, offset);
+    __ add(dst, sp, dst);
+  }
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
+  assert(ra_ == ra_->C->regalloc(), "sanity");
+  return ra_->C->scratch_emit_size(this);
+}
+
+//=============================================================================
+#ifndef PRODUCT
+#define R_RTEMP "R_R12"
+void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+  st->print_cr("\nUEP:");
+  if (UseCompressedClassPointers) {
+    st->print_cr("\tLDR_w " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
+    st->print_cr("\tdecode_klass " R_RTEMP);
+  } else {
+    st->print_cr("\tLDR   " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
+  }
+  st->print_cr("\tCMP   " R_RTEMP ",R_R12" );
+  st->print   ("\tB.NE  SharedRuntime::handle_ic_miss_stub");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  Register iCache  = reg_to_register_object(Matcher::inline_cache_reg_encode());
+  assert(iCache == rscratch2/*Ricklass*/, "should be");
+  Register receiver = r0;
+
+  __ load_klass(r9, receiver);
+  __ cmp(r9, iCache);
+  // r9 seems temporary here
+  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, r9, Assembler::NE);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+
+// REQUIRED EMIT CODE
+
+//=============================================================================
+
+// Emit exception handler code.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(size_exception_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+
+  // OK to trash LR, because exception blob will kill it
+  __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, lr);
+
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+
+  __ end_a_stub();
+
+  return offset;
+}
+
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+  // Can't use any of the current frame's registers as we may have deopted
+  // at a poll and everything can be live.
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(size_deopt_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+  address deopt_pc = __ pc();
+
+  __ sub(sp, sp, wordSize); // make room for saved PC
+  __ push(lr); // save LR that may be live when we get here
+  __ mov_relative_address(lr, deopt_pc);
+  __ str(lr, Address(sp, wordSize)); // save deopt PC
+  __ pop(lr); // restore LR
+  // rscratch1 seems killed  at deopt_blob
+  __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, rscratch1);
+
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+
+  __ end_a_stub();
+  return offset;
+}
+
+// REQUIRED MATCHER CODE
+
+//=============================================================================
+
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode))
+    return false;
+
+  switch (opcode) {
+  case Op_PopCountI:
+  case Op_PopCountL:
+    if (!UsePopCountInstruction)
+      return false;
+    break;
+  case Op_LShiftCntV:
+  case Op_RShiftCntV:
+  case Op_AddVB:
+  case Op_AddVS:
+  case Op_AddVI:
+  case Op_AddVL:
+  case Op_SubVB:
+  case Op_SubVS:
+  case Op_SubVI:
+  case Op_SubVL:
+  case Op_MulVS:
+  case Op_MulVI:
+  case Op_LShiftVB:
+  case Op_LShiftVS:
+  case Op_LShiftVI:
+  case Op_LShiftVL:
+  case Op_RShiftVB:
+  case Op_RShiftVS:
+  case Op_RShiftVI:
+  case Op_RShiftVL:
+  case Op_URShiftVB:
+  case Op_URShiftVS:
+  case Op_URShiftVI:
+  case Op_URShiftVL:
+  case Op_AndV:
+  case Op_OrV:
+  case Op_XorV:
+    return VM_Version::features() & FT_AdvSIMD;
+  case Op_LoadVector:
+  case Op_StoreVector:
+  case Op_AddVF:
+  case Op_SubVF:
+  case Op_MulVF:
+    return VM_Version::features() & (FT_VFPV2 | FT_AdvSIMD);
+  case Op_AddVD:
+  case Op_SubVD:
+  case Op_MulVD:
+  case Op_DivVF:
+  case Op_DivVD:
+    return VM_Version::features() & FT_VFPV2;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
+const bool Matcher::has_predicated_vectors(void) {
+  return false;
+}
+
+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum) {
+  return regnum - 32; // The FP registers are in the second chunk
+}
+
+// Vector width in bytes
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  return MaxVectorSize;
+}
+
+// Vector ideal reg corresponding to specified size in bytes
+const uint Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize >= size, "");
+  switch(size) {
+    case  8: return Op_VecD;
+    case 16: return Op_VecX;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+const uint Matcher::vector_shift_count_ideal_reg(int size) {
+  return vector_ideal_reg(size);
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return 8/type2aelembytes(bt);
+}
+
+// ARM doesn't support misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  return false;
+}
+
+// ARM doesn't support AES intrinsics
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+const bool Matcher::convL2FSupported(void) {
+  return false; // TODO why not?
+}
+
+// Is this branch offset short enough that a short branch can be used?
+//
+// NOTE: If the platform does not provide any short branch variants, then
+//       this method should return false for offset 0.
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+  // On ARM a branch displacement is calculated relative to address
+  // of the branch + 8.
+  //
+  // offset -= 8;
+  // return (Assembler::is_simm24(offset));
+  return false;
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+  return false;
+}
+
+// No scaling for the parameter the ClearArray node.
+const bool Matcher::init_array_count_is_in_bytes = true;
+
+// Needs 2 CMOV's for longs.
+const int Matcher::long_cmove_cost() { return 2; }
+
+// CMOVF/CMOVD are expensive on ARM.
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+// FIXME: does this handle vector shifts as well?
+const bool Matcher::need_masked_shift_count = true;
+
+const bool Matcher::convi2l_type_required = true;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+  return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
+bool Matcher::narrow_oop_use_complex_address() {
+  ShouldNotCallThis();
+  return false;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+  ShouldNotCallThis();
+  return false;
+}
+
+bool Matcher::const_oop_prefer_decode() {
+  ShouldNotCallThis();
+  return true;
+}
+
+bool Matcher::const_klass_prefer_decode() {
+  ShouldNotCallThis();
+  return true;
+}
+
+// Is it better to copy float constants, or load them directly from memory?
+// Intel can load a float constant from a direct address, requiring no
+// extra registers.  Most RISCs will have to materialize an address into a
+// register first, so they would do better to copy the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no fixup is
+// needed.  Else we split the double into 2 integer pieces and move it
+// piece-by-piece.  Only happens when passing doubles into C code as the
+// Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = false;
+
+// No-op on ARM.
+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
+}
+
+// Advertise here if the CPU requires explicit rounding operations
+// to implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Are floats converted to double when stored to stack during deoptimization?
+// ARM does not handle callee-save floats.
+bool Matcher::float_in_double() {
+  return false;
+}
+
+// Do ints take an entire long register or just half?
+// Note that we if-def off of _LP64.
+// The relevant question is how the int is callee-saved.  In _LP64
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits, in not-_LP64 only the low 32 bits is written.
+const bool Matcher::int_in_long = false;
+
+// Return whether or not this register is ever used as an argument.  This
+// function is used on startup to build the trampoline stubs in generateOptoStub.
+// Registers not mentioned will be killed by the VM call in the trampoline, and
+// arguments in those registers not be available to the callee.
+bool Matcher::can_be_java_arg( int reg ) {
+  if (reg == R_R0_num ||
+      reg == R_R1_num ||
+      reg == R_R2_num ||
+      reg == R_R3_num) return true;
+
+  if (reg >= R_S0_num &&
+      reg <= R_S15_num) return true;
+  return false;
+}
+
+bool Matcher::is_spillable_arg( int reg ) {
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
+  return false;
+}
+
+// Register for DIVI projection of divmodI
+RegMask Matcher::divI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODI projection of divmodI
+RegMask Matcher::modI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for DIVL projection of divmodL
+RegMask Matcher::divL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODL projection of divmodL
+RegMask Matcher::modL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return FP_REGP_mask();
+}
+
+bool maybe_far_call(const CallNode *n) {
+  return !MacroAssembler::_reachable_from_cache(n->as_Call()->entry_point());
+}
+
+bool maybe_far_call(const MachCallNode *n) {
+  return !MacroAssembler::_reachable_from_cache(n->as_MachCall()->entry_point());
+}
+
+%}
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams.  Encoding classes are parameterized macros used by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction.  Operands specify their base encoding interface with the
+// interface keyword.  There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried.   CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried.  MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried.  COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+//
+// Instructions specify two basic values for encoding.  Again, a function
+// is available to check if the constant displacement is an oop. They use the
+// ins_encode keyword to specify their encoding classes (which must be
+// a sequence of enc_class names, and their parameters, specified in
+// the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+  enc_class call_epilog %{
+    // nothing
+  %}
+
+  enc_class Java_To_Runtime (method meth) %{
+    // CALL directly to the runtime
+    emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
+  %}
+
+  enc_class Java_Static_Call (method meth) %{
+    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
+    // who we intended to call.
+
+    if ( !_method) {
+      emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
+    } else {
+      int method_index = resolved_method_index(cbuf);
+      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
+                                                  : static_call_Relocation::spec(method_index);
+      emit_call_reloc(cbuf, as_MachCall(), $meth, rspec);
+
+      // Emit stubs for static call.
+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+      if (stub == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    }
+  %}
+
+  enc_class save_last_PC %{
+    // preserve mark
+    address mark = cbuf.insts()->mark();
+    debug_only(int off0 = cbuf.insts_size());
+    MacroAssembler _masm(&cbuf);
+    int ret_addr_offset = as_MachCall()->ret_addr_offset();
+    __ adr(lr, mark + ret_addr_offset);
+    __ str(lr, Address(Rthread, JavaThread::last_Java_pc_offset()));
+    debug_only(int off1 = cbuf.insts_size());
+    assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction");
+    // restore mark
+    cbuf.insts()->set_mark(mark);
+  %}
+
+  enc_class preserve_SP %{
+    // preserve mark
+    address mark = cbuf.insts()->mark();
+    debug_only(int off0 = cbuf.insts_size());
+    MacroAssembler _masm(&cbuf);
+    // FP is preserved across all calls, even compiled calls.
+    // Use it to preserve SP in places where the callee might change the SP.
+    __ mov(Rmh_SP_save, sp);
+    debug_only(int off1 = cbuf.insts_size());
+    assert(off1 - off0 == 4, "correct size prediction");
+    // restore mark
+    cbuf.insts()->set_mark(mark);
+  %}
+
+  enc_class restore_SP %{
+    MacroAssembler _masm(&cbuf);
+    __ mov(sp, Rmh_SP_save);
+  %}
+
+  enc_class Java_Dynamic_Call (method meth) %{
+    MacroAssembler _masm(&cbuf);
+    Register R12_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
+    assert(R12_ic_reg == rscratch2/*Ricklass*/, "should be");
+    __ set_inst_mark();
+    __ movw_i(R12_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff);
+    __ movt_i(R12_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16);
+    address  virtual_call_oop_addr = __ inst_mark();
+    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
+    // who we intended to call.
+    int method_index = resolved_method_index(cbuf);
+    emit_call_reloc(cbuf, as_MachCall(), $meth, virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
+  %}
+
+  enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{
+    // FIXME: load from constant table?
+    // Load a constant replicated "count" times with width "width"
+    int count = $cnt$$constant;
+    int width = $wth$$constant;
+    assert(count*width == 4, "sanity");
+    int val = $src$$constant;
+    if (width < 4) {
+      int bit_width = width * 8;
+      val &= (((int)1) << bit_width) - 1; // mask off sign bits
+      for (int i = 0; i < count - 1; i++) {
+        val |= (val << bit_width);
+      }
+    }
+    MacroAssembler _masm(&cbuf);
+
+    if (val == -1) {
+      __ mvn_i($tmp$$Register, 0);
+    } else if (val == 0) {
+      __ mov_i($tmp$$Register, 0);
+    } else {
+      __ movw_i($tmp$$Register, val & 0xffff);
+      __ movt_i($tmp$$Register, (unsigned int)val >> 16);
+    }
+    __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+  %}
+
+  enc_class LdReplImmF(immF src, regD dst, iRegI tmp) %{
+    // Replicate float con 2 times and pack into vector (8 bytes) in regD.
+    float fval = $src$$constant;
+    int val = *((int*)&fval);
+    MacroAssembler _masm(&cbuf);
+
+    if (val == -1) {
+      __ mvn_i($tmp$$Register, 0);
+    } else if (val == 0) {
+      __ mov_i($tmp$$Register, 0);
+    } else {
+      __ movw_i($tmp$$Register, val & 0xffff);
+      __ movt_i($tmp$$Register, (unsigned int)val >> 16);
+    }
+    __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+  %}
+
+  enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
+                               iRegI tmp1, iRegI tmp2, Q0_regD ftmp1, Q1_regD ftmp2,
+                               int bytes_per_char1, int bytes_per_char2) %{
+    MacroAssembler _masm(&cbuf);
+
+    Register       str1 = $str1$$Register;
+    Register       str2 = $str2$$Register;
+    Register       cnt1 = $cnt1$$Register;
+    Register       cnt2 = $cnt2$$Register;
+    Register       tmp1 = $tmp1$$Register;
+    Register       tmp2 = $tmp2$$Register;
+    FloatRegister ftmp1 = $ftmp1$$FloatRegister;
+    FloatRegister ftmp2 = $ftmp2$$FloatRegister;
+    Register     result = $result$$Register;
+    int bytes_per_char1 = $bytes_per_char1;
+    int bytes_per_char2 = $bytes_per_char2;
+
+    typedef void (Assembler::*ldfp)(Register, const Address &, Assembler::Condition);
+    typedef void (Assembler::*usubp)(Register, Register, Register, Assembler::Condition);
+    ldfp ldf_16 = &Assembler::ldrh;
+    ldfp ldf_8  = &Assembler::ldrb;
+
+    // slow path: single char load
+    int cnt_per_char = bytes_per_char1==2 && bytes_per_char2==2 ? 2 : 1;
+    ldfp lds1 = bytes_per_char1 == 2 ? ldf_16 : ldf_8;
+    ldfp lds2 = bytes_per_char2 == 2 ? ldf_16 : ldf_8;
+    usubp usub = bytes_per_char1 == 1 ? (usubp)&Assembler::usub8 : (usubp)&Assembler::usub16;
+
+    assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
+
+    Label Llength_diff, Ldone, Lshort_loop;
+
+    BLOCK_COMMENT("string_compare {");
+
+    // for UU we count bytes (saves 1 insn) for others count in chars
+    if (cnt_per_char == 1 && bytes_per_char1 == 2)
+      __ lsr(cnt1, cnt1, 1);
+    if (cnt_per_char == 1 && bytes_per_char2 == 2)
+      __ lsr(cnt2, cnt2, 1);
+
+    // Compute the minimum of the string lengths and save the difference.
+    __ subs(tmp1, cnt1, cnt2);
+    __ mov(cnt2, cnt1, Assembler::LE); // min
+
+    // Check if the strings start at the same location.
+    __ cmp(str1, str2);
+    __ b(Llength_diff, Assembler::EQ);
+
+    // without NEON only for UU and LL fast path is available
+    if ((VM_Version::features() & FT_AdvSIMD) || bytes_per_char1 == bytes_per_char2) {
+      Label Lshort_string, Lnext_word, Ldifference;
+
+      // A very short string
+      __ cmp(cnt2, 8+4);
+      __ b(Lshort_string, Assembler::LT);
+
+      // Compare words
+      {
+        const int bits_per_char = bytes_per_char1==1 && bytes_per_char2==1 ? 8 : 16;
+        // Check first few chars to avoid excessive processing
+        if (bytes_per_char1 == 1 && bytes_per_char2 == 1) {
+          Label Lfull_speed;
+          __ ldr(tmp2, __ post(str1, wordSize));
+          __ ldr(result, __ post(str2, wordSize));
+          (_masm.*usub)(result, tmp2, result, Assembler::AL);
+          __ tst(result, result);
+          __ b(Lfull_speed, Assembler::EQ);
+
+          __ rbit(cnt1, result);
+          __ clz(cnt1, cnt1);
+          __ bic(cnt1, cnt1, bits_per_char-1);
+          __ lsr(result, result, cnt1);
+          __ lsr(tmp2, tmp2, cnt1);
+          __ ubfx(result, result, 0, bits_per_char);
+          __ ubfx(tmp2, tmp2, 0, bits_per_char);
+          __ cmp(result, tmp2);
+          __ sub(result, result, 1<<bits_per_char, Assembler::HI);
+          __ b(Ldone);
+
+          __ bind(Lfull_speed);
+        } else {
+          (_masm.*lds1)(result, __ post(str1, bytes_per_char1), Assembler::AL);
+          (_masm.*lds2)(cnt1, __ post(str2, bytes_per_char2), Assembler::AL);
+          __ subs(result, result, cnt1);
+          __ b(Ldone, Assembler::NE);
+          (_masm.*lds1)(result, __ post(str1, bytes_per_char1), Assembler::AL);
+          (_masm.*lds2)(cnt1, __ post(str2, bytes_per_char2), Assembler::AL);
+          __ subs(result, result, cnt1);
+          __ b(Ldone, Assembler::NE);
+        }
+
+        if (VM_Version::features() & FT_AdvSIMD) {
+#define LD(expand_needed,reg,str)        \
+          if (!(expand_needed))                                  \
+            __ vld1_64((reg), __ post(str, 8), Assembler::ALIGN_STD);     \
+          else { \
+            __ vld1_32((reg), 0, __ post(str, 4), false);         \
+            __ vmovl_8u((reg), (reg)); /* kills reg+1 */        \
+          }
+          const int cnt_per_LD  = bytes_per_char1==bytes_per_char2 ? 8 : 4;
+          const bool expand_needed1 = bytes_per_char1==1 && bytes_per_char2==2;
+          const bool expand_needed2 = bytes_per_char1==2 && bytes_per_char2==1;
+
+          __ sub(cnt2, cnt2, 2*cnt_per_char*(16/bits_per_char) + cnt_per_LD); // 4 chars processed above for LL, 2 for the rest of encodings
+          __ bind(Lnext_word);
+          LD(expand_needed1,ftmp1,str1);
+          LD(expand_needed2,ftmp2,str2);
+          if (bits_per_char == 8)
+            __ vsub_64_8(ftmp2, ftmp1, ftmp2);
+          else
+            __ vsub_64_16(ftmp2, ftmp1, ftmp2);
+          __ vmov_f64(result, cnt1, ftmp2);
+          __ orrs(tmp2, result, cnt1);
+          __ b(Ldifference, Assembler::NE);
+          __ subs(cnt2, cnt2, cnt_per_LD);
+          __ b(Lnext_word, Assembler::HS);
+
+          // check the tail
+          __ adds(cnt2, cnt2, cnt_per_LD);
+          __ b(Llength_diff, Assembler::EQ);
+          __ b(Lshort_loop);
+
+          __ bind(Ldifference);
+          __ vmov_f64(tmp2, cnt2, ftmp1);
+          __ tst(result, result);
+          __ mov(result, cnt1, Assembler::EQ);
+          __ mov(tmp2, cnt2, Assembler::EQ);
+        } else {
+          __ sub(cnt2, cnt2, 2*cnt_per_char*(16/bits_per_char)+4); // Skip 4 or 2 chars processed above. The last word is a special case
+
+          // Move both string pointers to the last word of their
+          // strings, negate the remaining count.
+          __ lea(str1, Address(str1, cnt2));
+          __ lea(str2, Address(str2, cnt2));
+          __ neg(cnt2, cnt2);
+
+          // Loop, loading words and comparing them into tmp2.
+          __ bind(Lnext_word);
+          __ ldr(tmp2, Address(str1, cnt2));
+          __ ldr(result, Address(str2, cnt2));
+          __ teq(result, tmp2);
+          __ b(Ldifference, Assembler::NE);
+          __ adds(cnt2, cnt2, wordSize); // cnt is per-byte for both UU and LL
+          __ b(Lnext_word, Assembler::LT);
+
+          // Last word.  In the case where length == 2 we compare the
+          // same word twice, but that's still faster than another
+          // conditional branch.
+
+          __ ldr(tmp2, Address(str1));
+          __ ldr(result, Address(str2));
+          __ teq(result, tmp2);
+          __ b(Llength_diff, Assembler::EQ);
+
+          // Find the first different characters in the words and
+          // compute their difference.
+          __ bind(Ldifference);
+          (_masm.*usub)(result, tmp2, result, Assembler::AL);
+        }
+
+        // now result is a-b and tmp2 is a
+        if (bits_per_char == 8) {
+          __ rbit(cnt1, result);
+          __ clz(cnt1, cnt1);
+          __ bic(cnt1, cnt1, bits_per_char-1);
+          __ lsr(result, result, cnt1);
+          __ lsr(tmp2, tmp2, cnt1);
+          __ ubfx(result, result, 0, bits_per_char);
+          __ ubfx(tmp2, tmp2, 0, bits_per_char);
+        } else {
+          __ lsls(cnt1, result, 16);
+          __ uxth(result, result, ror(16), Assembler::EQ);
+          __ uxth(result, result, ror(), Assembler::NE);
+          __ uxth(tmp2, tmp2, ror(16), Assembler::EQ);
+          __ uxth(tmp2, tmp2, ror(), Assembler::NE);
+        }
+        __ cmp(result, tmp2);
+        __ sub(result, result, 1<<bits_per_char, Assembler::HI);
+
+        __ b(Ldone);
+      }
+
+      __ bind(Lshort_string);
+    }
+
+    // Is the minimum length zero?
+    __ cbz(cnt2, Llength_diff);
+
+    __ bind(Lshort_loop);
+    (_masm.*lds1)(result, __ post(str1, bytes_per_char1), Assembler::AL);
+    (_masm.*lds2)(cnt1, __ post(str2, bytes_per_char2), Assembler::AL);
+    __ subs(result, result, cnt1);
+    __ b(Ldone, Assembler::NE);
+    __ subs(cnt2, cnt2, cnt_per_char);
+    __ b(Lshort_loop, Assembler::NE);
+
+    // Strings are equal up to min length.  Return the length difference.
+    __ bind(Llength_diff);
+    __ asr(result, tmp1, cnt_per_char-1); // input in bytes, result in chars, nice convention
+
+    // That's it
+    __ bind(Ldone);
+
+    BLOCK_COMMENT("} string_compare");
+  %}
+
+  enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI cnt, iRegI tmp2, iRegI result, int elemSize, bool isArray) %{
+    Label Ldone, Lloop, Lset_result, Lshort_array, Lnext_word, Lshort_array_cont, Lone_byte;
+    MacroAssembler _masm(&cbuf);
+
+    Register   ary1 = $ary1$$Register;
+    Register   ary2 = $ary2$$Register;
+    Register   cnt =  $cnt$$Register;
+    Register   tmp2 = $tmp2$$Register;
+    Register result = $result$$Register;
+    int elemSize    = $elemSize$$constant;
+    bool isArray    = $isArray$$constant;
+
+    assert_different_registers(ary1, ary2, cnt, tmp2, result);
+
+    if (isArray) {
+      int length_offset  = arrayOopDesc::length_offset_in_bytes();
+      int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+      BLOCK_COMMENT(elemSize == 2 ? "char_array_equalsUU {" : "char_array_equalsLL {");
+
+      // return true if the same array
+      __ cmpoop(ary1, ary2);
+      __ b(Lset_result, Assembler::EQ); // equal
+
+      __ ands(result, ary1, ary1);
+      __ b(Ldone, Assembler::EQ);    // not equal
+
+      __ ands(result, ary2, ary2);
+      __ b(Ldone, Assembler::EQ);    // not equal
+
+      //load the lengths of arrays
+      __ ldr(cnt, Address(ary1, length_offset));
+      __ ldr(tmp2, Address(ary2, length_offset));
+
+      // return false if the two arrays are not equal length
+      __ teq(cnt, tmp2);
+      __ b(Lset_result, Assembler::NE);    // not equal
+
+      __ tst(cnt, cnt);
+      __ b(Lset_result, Assembler::EQ);    // zero-length arrays are equal
+
+      // load array addresses
+      __ add(ary1, ary1, base_offset);
+      __ add(ary2, ary2, base_offset);
+    } else {
+    // Check if the strings start at the same location.
+      BLOCK_COMMENT(elemSize == 2 ? "string_equalsUU {" : "string_equalsLL {");
+
+      __ cmp(ary1, ary2);
+      __ b(Lset_result, Assembler::EQ);
+    }
+
+    __ cmp(cnt, 4*(2/elemSize));
+    __ b(Lshort_array, Assembler::LT);
+
+    {
+      // Move both string pointers to the last word of their
+      // strings, negate the remaining count, and convert it to bytes if needed.
+      if (isArray && elemSize == 2)
+          __ lsl(cnt, cnt, 1);
+      __ sub(cnt, cnt, wordSize); // The last word is a special case
+
+      __ lea(ary1, Address(ary1, cnt));
+      __ lea(ary2, Address(ary2, cnt));
+      __ neg(cnt, cnt);
+
+      // Loop, loading words and comparing them.
+      __ bind(Lnext_word);
+      __ ldr(result, Address(ary1, cnt));
+      __ ldr(tmp2, Address(ary2, cnt));
+      __ cmp(result, tmp2);
+      __ b(Lset_result, Assembler::NE);
+      __ adds(cnt, cnt, wordSize);
+      __ b(Lnext_word, Assembler::LT);
+
+      // Last word.  In the case where length < 4 we compare the
+      // same bytes twice, but that's still faster than another
+      // conditional branch.
+      __ ldr(result, Address(ary1));
+      __ ldr(tmp2, Address(ary2));
+      __ cmp(result, tmp2);
+      __ b(Lset_result);
+    }
+
+    __ bind(Lshort_array); {
+      if (!isArray) {
+        __ tst(cnt, cnt);
+        __ b(Lset_result, Assembler::EQ);
+      }
+
+      if (elemSize == 1) {
+        __ subs(cnt, cnt, 1);
+        __ b(Lone_byte, Assembler::EQ);
+      }
+      __ bind(Lshort_array_cont);
+
+      __ ldrh(result, __ post(ary1, 2));
+      __ ldrh(tmp2, __ post(ary2, 2));
+      __ cmp(result, tmp2);
+      __ b(Lset_result, Assembler::NE);
+      __ subs(cnt, cnt, isArray ? 2/elemSize : 2);
+      __ b(Lshort_array_cont, Assembler::GT);
+    }
+
+    if (elemSize == 1) {
+      __ cmn(cnt, 1);
+      __ b(Lset_result, Assembler::EQ);
+
+      __ bind(Lone_byte); {
+        __ ldrb(result, Address(ary1));
+        __ ldrb(tmp2, Address(ary2));
+        __ cmp(result, tmp2);
+      }
+    }
+
+    __ bind(Lset_result);
+    __ mov(result, 1, Assembler::EQ);
+    __ mov(result, 0, Assembler::NE);
+
+    __ bind(Ldone);
+
+    if (isArray)
+      BLOCK_COMMENT(elemSize == 2 ? "} char_array_equalsUU" : "} char_array_equalsLL");
+    else
+      BLOCK_COMMENT(elemSize == 2? "} string_equalsUU" : "} string_equalsLL");
+
+    %}
+
+  enc_class enc_Char_Array_Compress(R2RegP src, R1RegP dst, R3RegI len, R9RegI tmp1,
+                                    Q0_regD tmp2, Q1_regD tmp3, R12RegI tmp4,
+                                    R0RegI result, flagsReg ccr) %{
+    Label Ldone, Lloop1, Lset_result;
+    MacroAssembler _masm(&cbuf);
+
+    Register      src    = $src$$Register;
+    Register      dst    = $dst$$Register;
+    Register      len    = $len$$Register;
+    Register      tmp1   = $tmp1$$Register;
+    Register      result = $result$$Register;
+    // tmp2, tmp3 and tmp4 are consumed by NEON stub
+
+    BLOCK_COMMENT("char_array_compress {");
+
+    __ movs(result, len);
+    __ b(Ldone, Assembler::EQ);
+
+    if (VM_Version::features() & FT_AdvSIMD) {
+      Label Lloop2;
+      __ cmp(len, 2+8+16); // neon stub consumes minimum 24 chars
+      __ b(Lloop1, Assembler::LO);
+
+      // check first 2 chars in hope they quickly give information about encoding
+      __ ldrh(tmp1, __ post(src, 2));
+      __ strb(tmp1, __ post(dst, 1));
+      __ lsrs(tmp1, tmp1, 8);
+      __ ldrh(tmp1, __ post(src, 2), Assembler::EQ);
+      __ strb(tmp1, __ post(dst, 1), Assembler::EQ);
+      __ lsrs(tmp1, tmp1, 8, Assembler::EQ);
+      __ b(Lset_result, Assembler::NE);
+      __ sub(len, len, 2);
+
+      __ call(StubRoutines::aarch32::string_compress_neon());
+      __ b(Ldone, Assembler::EQ);
+    }
+
+    // nothing better we could do with Aarch32 basic instruction set
+    __ bind(Lloop1); {
+      __ ldrh(tmp1, __ post(src, 2));
+      __ strb(tmp1, __ post(dst, 1));
+      __ rsbs(tmp1, tmp1, 0x100); // GT good, LE bad
+      __ subs(len, len, 1, Assembler::GT);
+      __ b(Lloop1, Assembler::GT);
+    }
+
+    __ cmp(len, 0);
+    __ bind(Lset_result);
+    __ mov(result, 0, Assembler::NE);
+
+    __ bind(Ldone);
+    BLOCK_COMMENT("} char_array_compress");
+    %}
+
+  enc_class enc_Byte_Array_Inflate(R0RegP src, R1RegP dst, R2RegI len,
+                                   iRegI tmp1, Q0_regD tmp2, flagsReg ccr) %{
+    Label Ldone, Lloop1, Lone_char;
+    MacroAssembler _masm(&cbuf);
+
+    Register      src = $src$$Register;
+    Register      dst = $dst$$Register;
+    Register      len = $len$$Register;
+    Register     tmp1 = $tmp1$$Register;
+    // tmp2 is consumed by NEON stub
+
+    BLOCK_COMMENT("byte_array_inflate {");
+
+    __ cbz(len, Ldone);
+    if (VM_Version::features() & FT_AdvSIMD) {
+      Label Lskip_simd;
+
+      __ cmp(len, 16);
+      __ b(Lskip_simd, Assembler::LO);
+      __ call(StubRoutines::aarch32::string_inflate_neon());
+      __ b(Ldone, Assembler::EQ);
+      __ bind(Lskip_simd);
+    }
+
+    // nothing better we could do with Aarch32 basic instruction set
+    __ subs(len, len, 1);
+    __ b(Lone_char, Assembler::EQ);
+    __ bind(Lloop1); {
+      __ ldrb(tmp1, __ post(src, 1));
+      __ strh(tmp1, __ post(dst, 2));
+      __ ldrb(tmp1, __ post(src, 1));
+      __ strh(tmp1, __ post(dst, 2));
+      __ subs(len, len, 2);
+      __ b(Lloop1, Assembler::HI);
+    }
+    __ b(Ldone, Assembler::LO);
+
+    __ bind(Lone_char);
+    __ ldrb(tmp1, __ post(src, 1));
+    __ strh(tmp1, __ post(dst, 2));
+
+    __ bind(Ldone);
+    BLOCK_COMMENT("} byte_array_inflate");
+  %}
+
+%}
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add VMRegImpl::stack0)
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |        |        |  3
+//  |     |        +--------+
+//  V     |        | old out|      Empty on Intel, window on Sparc
+//        |    old |preserve|      Must be even aligned.
+//        |     SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned
+//        |        |   in   |  3   area for Intel ret address
+//     Owned by    |preserve|      Empty on Sparc.
+//       SELF      +--------+
+//        |        |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//     Owned by    +--------+
+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
+//        |    new |preserve|      Must be even-aligned.
+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
+//        |        |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+
+frame %{
+  // What direction does stack grow in (assumed to be same for native & Java)
+  stack_direction(TOWARDS_LOW);
+
+  // These two registers define part of the calling convention
+  // between compiled code and the interpreter.
+  inline_cache_reg(R_Ricklass);          // Inline Cache Register or Method* for I2C
+  interpreter_method_oop_reg(R_Rmethod); // Method Oop Register when calling interpreter
+
+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+  cisc_spilling_operand_name(indOffset);
+
+  // Number of stack slots consumed by a Monitor enter
+  sync_stack_slots(1 * VMRegImpl::slots_per_word);
+
+  // Compiled code's Frame Pointer
+  frame_pointer(R_R13);
+
+  // Stack alignment requirement
+  stack_alignment(StackAlignmentInBytes);
+  //  LP64: Alignment size in bytes (128-bit -> 16 bytes)
+  // !LP64: Alignment size in bytes (64-bit  ->  8 bytes)
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots.
+  // FP + LR
+  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  // ADLC doesn't support parsing expressions, so I folded the math by hand.
+  varargs_C_out_slots_killed( 0);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  return_addr(STACK - 1*VMRegImpl::slots_per_word +
+              align_up((Compile::current()->in_preserve_stack_slots() +
+                        Compile::current()->fixed_slots()),
+                       stack_alignment_in_slots()));
+
+  // Body of function which returns an OptoRegs array locating
+  // arguments either in registers or in stack slots for calling
+  // java
+  calling_convention %{
+    (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing);
+
+  %}
+
+  // Body of function which returns an OptoRegs array locating
+  // arguments either in registers or in stack slots for callin
+  // C.
+  c_calling_convention %{
+    // This is obviously always outgoing
+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+  %}
+
+  // Location of compiled Java return values.
+  return_value %{
+    return c2::return_value(ideal_reg);
+  %}
+
+  // Location of C return values.
+  c_return_value %{
+#ifndef HARD_FLOAT_CC
+    return c2::c_return_value(ideal_reg);
+#else
+    return c2::return_value(ideal_reg);
+#endif
+  %}
+
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
+ins_attrib ins_size(32);           // Required size attribute (in bits)
+ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
+                                   // non-matching short branch variant of some
+                                                            // long branch?
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+// Integer Immediate: 32-bit
+operand immI() %{
+  match(ConI);
+
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 8-bit unsigned - for VMOV
+operand immU8() %{
+  predicate(0 <= n->get_int() && (n->get_int() <= 255));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 16-bit
+operand immI16() %{
+  predicate((n->get_int() >> 16) == 0 && (VM_Version::features() & FT_ARMV6T2));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: offset for half and double word loads and stores
+operand immIHD() %{
+  predicate(is_memoryHD(n->get_int()));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: offset for fp loads and stores
+operand immIFP() %{
+  predicate(is_memoryfp(n->get_int()) && ((n->get_int() & 3) == 0));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Valid scale values for addressing modes and shifts
+operand immU5() %{
+  predicate(0 <= n->get_int() && (n->get_int() <= 31));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 6-bit
+operand immU6Big() %{
+  predicate(n->get_int() >= 32 && n->get_int() <= 63);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: 0-bit
+operand immI0() %{
+  predicate(n->get_int() == 0);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 1
+operand immI_1() %{
+  predicate(n->get_int() == 1);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 2
+operand immI_2() %{
+  predicate(n->get_int() == 2);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 3
+operand immI_3() %{
+  predicate(n->get_int() == 3);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 4
+operand immI_4() %{
+  predicate(n->get_int() == 4);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 8
+operand immI_8() %{
+  predicate(n->get_int() == 8);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Int Immediate non-negative
+operand immU31()
+%{
+  predicate(n->get_int() >= 0);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the values 32-63
+operand immI_32_63() %{
+  predicate(n->get_int() >= 32 && n->get_int() <= 63);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Immediates for special shifts (sign extend)
+
+// Integer Immediate: the value 16
+operand immI_16() %{
+  predicate(n->get_int() == 16);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 24
+operand immI_24() %{
+  predicate(n->get_int() == 24);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 255
+operand immI_255() %{
+  predicate( n->get_int() == 255 );
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 65535
+operand immI_65535() %{
+  predicate(n->get_int() == 65535);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediates for arithmetic instructions
+
+operand aimmI() %{
+  predicate(is_aimm(n->get_int()));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand aimmIneg() %{
+  predicate(is_aimm(-n->get_int()));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand aimmU31() %{
+  predicate((0 <= n->get_int()) && is_aimm(n->get_int()));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediates for logical instructions
+
+operand limmI() %{
+  predicate(is_limmI(n->get_int()));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand limmIlow8() %{
+  predicate(is_limmI_low(n->get_int(), 8));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand limmU31() %{
+  predicate(0 <= n->get_int() && is_limmI(n->get_int()));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand limmIn() %{
+  predicate(is_limmI(~n->get_int()));
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FF
+operand immL_FF() %{
+  predicate( n->get_long() == 0xFFL );
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: the value FFFF
+operand immL_FFFF() %{
+  predicate( n->get_long() == 0xFFFFL );
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 32 or 64-bit
+operand immP() %{
+  match(ConP);
+
+  op_cost(5);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immP0() %{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immP_poll() %{
+  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
+  match(ConP);
+
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immN()
+%{
+  match(ConN);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immNKlass()
+%{
+  match(ConNKlass);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immN0()
+%{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL() %{
+  match(ConL);
+  op_cost(40);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL0() %{
+  predicate(n->get_long() == 0L);
+  match(ConL);
+  op_cost(0);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: 16-bit
+operand immL16() %{
+  predicate(n->get_long() >= 0 && n->get_long() < (1<<16)  && (VM_Version::features() & FT_ARMV6T2));
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits() %{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate
+operand immD() %{
+  match(ConD);
+
+  op_cost(40);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate: +0.0d.
+operand immD0() %{
+  predicate(jlong_cast(n->getd()) == 0);
+
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand imm8D() %{
+  predicate(Assembler::operand_valid_for_double_immediate(n->getd()));
+  match(ConD);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF() %{
+  match(ConF);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate: +0.0f
+operand immF0() %{
+  predicate(jint_cast(n->getf()) == 0);
+  match(ConF);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate: encoded as 8 bits
+operand imm8F() %{
+  predicate(Assembler::operand_valid_for_float_immediate(n->getf()));
+  match(ConF);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Register Operands
+// Integer Register
+operand iRegI() %{
+  constraint(ALLOC_IN_RC(int_reg));
+  match(RegI);
+  match(R0RegI);
+  match(R1RegI);
+  match(R2RegI);
+  match(R3RegI);
+  match(R12RegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register
+operand iRegP() %{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(RegP);
+  match(R0RegP);
+  match(R1RegP);
+  match(R2RegP);
+  match(RExceptionRegP);
+  match(RmethodRegP); // R8
+  match(R9RegP);
+  match(RthreadRegP); // R10, TODO Oracle FIXME: move to sp_ptr_RegP?
+  match(R12RegP);
+  match(LRRegP);
+
+  match(sp_ptr_RegP);
+  match(store_ptr_RegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// GPRs + Rmethod + Rthread + SP
+operand sp_ptr_RegP() %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(RegP);
+  match(iRegP);
+  match(SPRegP); // FIXME: check cost
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R0RegP() %{
+  constraint(ALLOC_IN_RC(R0_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R1RegP() %{
+  constraint(ALLOC_IN_RC(R1_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R2RegP() %{
+  constraint(ALLOC_IN_RC(R2_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand RExceptionRegP() %{
+  constraint(ALLOC_IN_RC(Rexception_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand RthreadRegP() %{
+  constraint(ALLOC_IN_RC(Rthread_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand RmethodRegP() %{
+  constraint(ALLOC_IN_RC(Rmethod_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand IPRegP() %{
+  constraint(ALLOC_IN_RC(IP_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand LRRegP() %{
+  constraint(ALLOC_IN_RC(LR_regP));
+  match(iRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R0RegI() %{
+  constraint(ALLOC_IN_RC(R0_regI));
+  match(iRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R1RegI() %{
+  constraint(ALLOC_IN_RC(R1_regI));
+  match(iRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R2RegI() %{
+  constraint(ALLOC_IN_RC(R2_regI));
+  match(iRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R3RegI() %{
+  constraint(ALLOC_IN_RC(R3_regI));
+  match(iRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R9RegI() %{
+  constraint(ALLOC_IN_RC(R9_regI));
+  match(iRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R12RegI() %{
+  constraint(ALLOC_IN_RC(R12_regI));
+  match(iRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long Register
+operand iRegL() %{
+  constraint(ALLOC_IN_RC(long_reg));
+  match(RegL);
+  match(R0R1RegL);
+  match(R2R3RegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegLd() %{
+  constraint(ALLOC_IN_RC(long_reg_align));
+  match(iRegL); // FIXME: allows unaligned R11/R12?
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// first long arg, or return value
+operand R0R1RegL() %{
+  constraint(ALLOC_IN_RC(R0R1_regL));
+  match(iRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand R2R3RegL() %{
+  constraint(ALLOC_IN_RC(R2R3_regL));
+  match(iRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Flag Register
+operand flagsReg() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr" %}
+  interface(REG_INTER);
+%}
+
+// Result of compare to 0 (TST)
+operand flagsReg_EQNELTGE() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr_EQNELTGE" %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Register, unsigned comparisons.
+operand flagsRegU() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+#ifdef TODO
+  match(RegFlagsP);
+#endif
+
+  format %{ "apsr_U" %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Register, pointer comparisons.
+operand flagsRegP() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr_P" %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Register, long comparisons.
+operand flagsRegL_LTGE() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr_L_LTGE" %}
+  interface(REG_INTER);
+%}
+
+operand flagsRegUL() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr_UL" %}
+  interface(REG_INTER);
+%}
+
+operand flagsRegL_EQNE() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr_L_EQNE" %}
+  interface(REG_INTER);
+%}
+
+operand flagsRegL_LEGT() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "apsr_L_LEGT" %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Register, floating comparisons, unordered same as "less".
+operand flagsRegF() %{
+  constraint(ALLOC_IN_RC(float_flags));
+  match(RegFlags);
+
+  format %{ "fpscr_F" %}
+  interface(REG_INTER);
+%}
+
+// Vectors
+operand vecD() %{
+  constraint(ALLOC_IN_RC(actual_dflt_reg));
+  match(VecD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecX() %{
+  constraint(ALLOC_IN_RC(vectorx_reg));
+  match(VecX);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand regD() %{
+  constraint(ALLOC_IN_RC(actual_dflt_reg));
+  match(RegD);
+  match(regD_low);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand Q0_regD() %{
+  constraint(ALLOC_IN_RC(D0D1_regD));
+  match(RegD);
+  match(regD_low);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand Q1_regD() %{
+  constraint(ALLOC_IN_RC(D2D3_regD));
+  match(RegD);
+  match(regD_low);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand regF() %{
+  constraint(ALLOC_IN_RC(sflt_reg));
+  match(RegF);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand regD_low() %{
+  constraint(ALLOC_IN_RC(dflt_low_reg));
+  match(RegD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_regP(iRegP reg) %{
+  constraint(ALLOC_IN_RC(Ricklass_regP));
+  match(reg);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand interpreter_method_oop_regP(iRegP reg) %{
+  constraint(ALLOC_IN_RC(Rmethod_regP));
+  match(reg);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
+//----------Complex Operands---------------------------------------------------
+// Indirect Memory Reference
+operand indirect(sp_ptr_RegP reg) %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(reg);
+
+  op_cost(100);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xf); // PC => no index
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect with Offset in ]-4096, 4096[
+operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(AddP reg offset);
+
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xf); // PC => no index
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with offset for float load/store
+operand indOffsetFP(sp_ptr_RegP reg, immIFP offset) %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(AddP reg offset);
+
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xf); // PC => no index
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Offset for half and double words
+operand indOffsetHD(sp_ptr_RegP reg, immIHD offset) %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(AddP reg offset);
+
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xf); // PC => no index
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Offset and Offset+4 in ]-1024, 1024[
+operand indOffsetFPx2(sp_ptr_RegP reg, immX10x2 offset) %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(AddP reg offset);
+
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xf); // PC => no index
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Offset and Offset+4 in ]-4096, 4096[
+operand indOffset12x2(sp_ptr_RegP reg, immI12x2 offset) %{
+  constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(AddP reg offset);
+
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xf); // PC => no index
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+// Indirect with Register Index
+operand indIndex(iRegP addr, iRegX index) %{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP addr index);
+
+  op_cost(100);
+  format %{ "[$addr + $index]" %}
+  interface(MEMORY_INTER) %{
+    base($addr);
+    index($index);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP addr (LShiftX index scale));
+
+  op_cost(100);
+  format %{"[$addr + $index << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($addr);
+    index($index);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+// Operands for expressing Control Flow
+// NOTE:  Label is a predefined operand which should not be redefined in
+//        the AD file.  It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+operand cmpOp() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0);
+    not_equal(0x1);
+    less(0xb);
+    greater_equal(0xa);
+    less_equal(0xd);
+    greater(0xc);
+    overflow(0x0); // unsupported/unimplemented
+    no_overflow(0x0); // unsupported/unimplemented
+  %}
+%}
+
+// integer comparison with 0, signed
+operand cmpOp0() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0);
+    not_equal(0x1);
+    less(0x4);
+    greater_equal(0x5);
+    less_equal(0xd); // unsupported
+    greater(0xc); // unsupported
+    overflow(0x0); // unsupported/unimplemented
+    no_overflow(0x0); // unsupported/unimplemented
+  %}
+%}
+
+// Comparison Op, unsigned
+operand cmpOpU() %{
+  match(Bool);
+
+  format %{ "u" %}
+  interface(COND_INTER) %{
+    equal(0x0);
+    not_equal(0x1);
+    less(0x3);
+    greater_equal(0x2);
+    less_equal(0x9);
+    greater(0x8);
+    overflow(0x0); // unsupported/unimplemented
+    no_overflow(0x0); // unsupported/unimplemented
+  %}
+%}
+
+// Comparison Op, pointer (same as unsigned)
+operand cmpOpP() %{
+  match(Bool);
+
+  format %{ "p" %}
+  interface(COND_INTER) %{
+    equal(0x0);
+    not_equal(0x1);
+    less(0x3);
+    greater_equal(0x2);
+    less_equal(0x9);
+    greater(0x8);
+    overflow(0x0); // unsupported/unimplemented
+    no_overflow(0x0); // unsupported/unimplemented
+  %}
+%}
+
+operand cmpOpL() %{
+  match(Bool);
+
+  format %{ "L" %}
+  interface(COND_INTER) %{
+    equal(0x0);
+    not_equal(0x1);
+    less(0xb);
+    greater_equal(0xa);
+    less_equal(0xd);
+    greater(0xc);
+    overflow(0x0); // unsupported/unimplemented
+    no_overflow(0x0); // unsupported/unimplemented
+  %}
+%}
+
+operand cmpOpL_commute() %{
+  match(Bool);
+
+  format %{ "L" %}
+  interface(COND_INTER) %{
+    equal(0x0);
+    not_equal(0x1);
+    less(0xc);
+    greater_equal(0xd);
+    less_equal(0xa);
+    greater(0xb);
+    overflow(0x0); // unsupported/unimplemented
+    no_overflow(0x0); // unsupported/unimplemented
+  %}
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used to simplify
+// instruction definitions by not requiring the AD writer to specify separate
+// instructions for every form of operand when the instruction accepts
+// multiple operand types with the same basic encoding and format.  The classic
+// case of this is memory operands.
+opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale );
+opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale );
+opclass memoryF ( indirect, indOffsetFP );
+opclass memoryF2 ( indirect, indOffsetFPx2 );
+opclass memoryD ( indirect, indOffsetFP );
+opclass memoryfp( indirect, indOffsetFP );
+opclass memoryB ( indirect, indIndex, indOffsetHD );
+opclass memoryS ( indirect, indIndex, indOffsetHD );
+opclass memoryL ( indirect, indIndex, indOffsetHD );
+
+opclass memoryScaledI(indIndexScale);
+opclass memoryScaledP(indIndexScale);
+
+// when ldrex/strex is used:
+opclass memoryex ( indirect );
+opclass indIndexMemory( indIndex );
+opclass memorylong ( indirect, indOffset12x2 );
+opclass memoryvld ( indirect /* , write back mode not implemented */ );
+
+//----------PIPELINE-----------------------------------------------------------
+pipeline %{
+
+//----------ATTRIBUTES---------------------------------------------------------
+attributes %{
+  fixed_size_instructions;           // Fixed size instructions
+  max_instructions_per_bundle = 4;   // Up to 4 instructions per bundle
+  instruction_unit_size = 4;         // An instruction is 4 bytes long
+  instruction_fetch_unit_size = 16;  // The processor fetches one line
+  instruction_fetch_units = 1;       // of 16 bytes
+
+  // List of nop instructions
+  nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR );
+%}
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine
+resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1);
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline
+
+pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D);
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+// Integer ALU reg-reg operation
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+    single_instruction;
+    dst   : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg long operation
+pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{
+    instruction_count(2);
+    dst   : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg long dependent operation
+pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
+    instruction_count(1); multiple_bundles;
+    dst   : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    cr    : E(write);
+    IALU  : R(2);
+%}
+
+// Integer ALU reg-imm operaion
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1) %{
+    single_instruction;
+    dst   : E(write);
+    src1  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg operation with condition code
+pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
+    single_instruction;
+    dst   : E(write);
+    cr    : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU zero-reg operation
+pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{
+    single_instruction;
+    dst   : E(write);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU zero-reg operation with condition code only
+pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{
+    single_instruction;
+    cr    : E(write);
+    src   : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg operation with condition code only
+pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
+    single_instruction;
+    cr    : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-imm operation with condition code only
+pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1) %{
+    single_instruction;
+    cr    : E(write);
+    src1  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg-zero operation with condition code only
+pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{
+    single_instruction;
+    cr    : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-imm-zero operation with condition code only
+pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI0 zero) %{
+    single_instruction;
+    cr    : E(write);
+    src1  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg operation with condition code, src1 modified
+pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
+    single_instruction;
+    cr    : E(write);
+    src1  : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{
+    multiple_bundles;
+    dst   : E(write)+4;
+    cr    : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R(3);
+    BR    : R(2);
+%}
+
+// Integer ALU operation
+pipe_class ialu_none(iRegI dst) %{
+    single_instruction;
+    dst   : E(write);
+    IALU  : R;
+%}
+
+// Integer ALU reg operation
+pipe_class ialu_reg(iRegI dst, iRegI src) %{
+    single_instruction; may_have_no_code;
+    dst   : E(write);
+    src   : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU reg conditional operation
+// This instruction has a 1 cycle stall, and cannot execute
+// in the same cycle as the instruction setting the condition
+// code. We kludge this by pretending to read the condition code
+// 1 cycle earlier, and by marking the functional units as busy
+// for 2 cycles with the result available 1 cycle later than
+// is really the case.
+pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{
+    single_instruction;
+    op2_out : C(write);
+    op1     : R(read);
+    cr      : R(read);       // This is really E, with a 1 cycle stall
+    BR      : R(2);
+    MS      : R(2);
+%}
+
+// Integer ALU reg operation
+pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{
+    single_instruction; may_have_no_code;
+    dst   : E(write);
+    src   : R(read);
+    IALU  : R;
+%}
+pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{
+    single_instruction; may_have_no_code;
+    dst   : E(write);
+    src   : R(read);
+    IALU  : R;
+%}
+
+// Two integer ALU reg operations
+pipe_class ialu_reg_2(iRegL dst, iRegL src) %{
+    instruction_count(2);
+    dst   : E(write);
+    src   : R(read);
+    A0    : R;
+    A1    : R;
+%}
+
+// Two integer ALU reg operations
+pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{
+    instruction_count(2); may_have_no_code;
+    dst   : E(write);
+    src   : R(read);
+    A0    : R;
+    A1    : R;
+%}
+
+// Integer ALU imm operation
+pipe_class ialu_imm(iRegI dst) %{
+    single_instruction;
+    dst   : E(write);
+    IALU  : R;
+%}
+
+pipe_class ialu_imm_n(iRegI dst) %{
+    single_instruction;
+    dst   : E(write);
+    IALU  : R;
+%}
+
+// Integer ALU reg-reg with carry operation
+pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{
+    single_instruction;
+    dst   : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU cc operation
+pipe_class ialu_cc(iRegI dst, flagsReg cc) %{
+    single_instruction;
+    dst   : E(write);
+    cc    : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU cc / second IALU operation
+pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{
+    instruction_count(1); multiple_bundles;
+    dst   : E(write)+1;
+    src   : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU cc / second IALU operation
+pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{
+    instruction_count(1); multiple_bundles;
+    dst   : E(write)+1;
+    p     : R(read);
+    q     : R(read);
+    IALU  : R;
+%}
+
+// Integer ALU hi-lo-reg operation
+pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{
+    instruction_count(1); multiple_bundles;
+    dst   : E(write)+1;
+    IALU  : R(2);
+%}
+
+// Long Constant
+pipe_class loadConL( iRegL dst, immL src ) %{
+    instruction_count(2); multiple_bundles;
+    dst   : E(write)+1;
+    IALU  : R(2);
+    IALU  : R(2);
+%}
+
+// Pointer Constant
+pipe_class loadConP( iRegP dst, immP src ) %{
+    instruction_count(0); multiple_bundles;
+    fixed_latency(6);
+%}
+
+// Polling Address
+pipe_class loadConP_poll( iRegP dst, immP_poll src ) %{
+    dst   : E(write);
+    IALU  : R;
+%}
+
+// Long Constant small
+pipe_class loadConLlo( iRegL dst, immL src ) %{
+    instruction_count(2);
+    dst   : E(write);
+    IALU  : R;
+    IALU  : R;
+%}
+
+// [PHH] This is wrong for 64-bit.  See LdImmF/D.
+pipe_class loadConFD(regF dst, immF src, iRegP tmp) %{
+    instruction_count(1); multiple_bundles;
+    src   : R(read);
+    dst   : M(write)+1;
+    IALU  : R;
+    MS    : E;
+%}
+
+// Integer ALU nop operation
+pipe_class ialu_nop() %{
+    single_instruction;
+    IALU  : R;
+%}
+
+// Integer ALU nop operation
+pipe_class ialu_nop_A0() %{
+    single_instruction;
+    A0    : R;
+%}
+
+// Integer ALU nop operation
+pipe_class ialu_nop_A1() %{
+    single_instruction;
+    A1    : R;
+%}
+
+// Integer Multiply reg-reg operation
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+    single_instruction;
+    dst   : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    MS    : R(5);
+%}
+
+pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+    single_instruction;
+    dst   : E(write)+4;
+    src1  : R(read);
+    src2  : R(read);
+    MS    : R(6);
+%}
+
+// Integer Divide reg-reg
+pipe_class sdiv_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{
+    single_instruction;
+    dst   : E(write);
+    temp  : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    temp  : R(read);
+    MS    : R(10);
+%}
+
+pipe_class sdiv_reg_reg_SW(iRegI dst, iRegI src1, iRegI src2, iRegI temp1, iRegI temp2, flagsReg cr) %{
+    instruction_count(1); multiple_bundles;
+    dst   : E(write);
+    temp1 : E(write);
+    temp2 : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    temp1 : R(read);
+    temp2 : R(read);
+    MS    : R(38);
+%}
+
+// Long Divide
+pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+    dst  : E(write)+71;
+    src1 : R(read);
+    src2 : R(read)+1;
+    MS   : R(70);
+%}
+
+// Floating Point Add Float
+pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FA    : R;
+%}
+
+// Floating Point Add Double
+pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FA    : R;
+%}
+
+// Floating Point Conditional Move based on integer flags
+pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    cr    : R(read);
+    FA    : R(2);
+    BR    : R(2);
+%}
+
+// Floating Point Conditional Move based on integer flags
+pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    cr    : R(read);
+    FA    : R(2);
+    BR    : R(2);
+%}
+
+// Floating Point Multiply Float
+pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FM    : R;
+%}
+
+// Floating Point Multiply Double
+pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FM    : R;
+%}
+
+// Floating Point Divide Float
+pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FM    : R;
+    FDIV  : C(14);
+%}
+
+// Floating Point Divide Double
+pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FM    : R;
+    FDIV  : C(17);
+%}
+
+// Floating Point Move/Negate/Abs Float
+pipe_class faddF_reg(regF dst, regF src) %{
+    single_instruction;
+    dst   : W(write);
+    src   : E(read);
+    FA    : R(1);
+%}
+
+// Floating Point Move/Negate/Abs Double
+pipe_class faddD_reg(regD dst, regD src) %{
+    single_instruction;
+    dst   : W(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert F->D
+pipe_class fcvtF2D(regD dst, regF src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert I->D
+pipe_class fcvtI2D(regD dst, regF src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert LHi->D
+pipe_class fcvtLHi2D(regD dst, regD src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert L->D
+pipe_class fcvtL2D(regD dst, iRegL src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert L->F
+pipe_class fcvtL2F(regF dst, iRegL src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert D->F
+pipe_class fcvtD2F(regD dst, regF src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert I->L
+pipe_class fcvtI2L(regD dst, regF src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert D->F
+pipe_class fcvtD2I(iRegI dst, regD src, flagsReg cr) %{
+    instruction_count(1); multiple_bundles;
+    dst   : X(write)+6;
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert D->L
+pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{
+    instruction_count(1); multiple_bundles;
+    dst   : X(write)+6;
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert F->I
+pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{
+    instruction_count(1); multiple_bundles;
+    dst   : X(write)+6;
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert F->L
+pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{
+    instruction_count(1); multiple_bundles;
+    dst   : X(write)+6;
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Convert I->F
+pipe_class fcvtI2F(regF dst, regF src) %{
+    single_instruction;
+    dst   : X(write);
+    src   : E(read);
+    FA    : R;
+%}
+
+// Floating Point Compare
+pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{
+    single_instruction;
+    cr    : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FA    : R;
+%}
+
+// Floating Point Compare
+pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{
+    single_instruction;
+    cr    : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    FA    : R;
+%}
+
+// Floating Add Nop
+pipe_class fadd_nop() %{
+    single_instruction;
+    FA  : R;
+%}
+
+// Integer Store to Memory
+pipe_class istore_mem_reg(memoryI mem, iRegI src) %{
+    single_instruction;
+    mem   : R(read);
+    src   : C(read);
+    MS    : R;
+%}
+
+// Integer Store to Memory
+pipe_class istore_mem_spORreg(memoryI mem, sp_ptr_RegP src) %{
+    single_instruction;
+    mem   : R(read);
+    src   : C(read);
+    MS    : R;
+%}
+
+// Float Store
+pipe_class fstoreF_mem_reg(memoryF mem, RegF src) %{
+    single_instruction;
+    mem : R(read);
+    src : C(read);
+    MS  : R;
+%}
+
+// Float Store
+pipe_class fstoreF_mem_zero(memoryF mem, immF0 src) %{
+    single_instruction;
+    mem : R(read);
+    MS  : R;
+%}
+
+// Double Store
+pipe_class fstoreD_mem_reg(memoryD mem, RegD src) %{
+    instruction_count(1);
+    mem : R(read);
+    src : C(read);
+    MS  : R;
+%}
+
+// Double Store
+pipe_class fstoreD_mem_zero(memoryD mem, immD0 src) %{
+    single_instruction;
+    mem : R(read);
+    MS  : R;
+%}
+
+// Integer Load (when sign bit propagation not needed)
+pipe_class iload_mem(iRegI dst, memoryI mem) %{
+    single_instruction;
+    mem : R(read);
+    dst : C(write);
+    MS  : R;
+%}
+
+// Integer Load (when sign bit propagation or masking is needed)
+pipe_class iload_mask_mem(iRegI dst, memoryI mem) %{
+    single_instruction;
+    mem : R(read);
+    dst : M(write);
+    MS  : R;
+%}
+
+// Float Load
+pipe_class floadF_mem(regF dst, memoryF mem) %{
+    single_instruction;
+    mem : R(read);
+    dst : M(write);
+    MS  : R;
+%}
+
+// Float Load
+pipe_class floadD_mem(regD dst, memoryD mem) %{
+    instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case
+    mem : R(read);
+    dst : M(write);
+    MS  : R;
+%}
+
+// Memory Nop
+pipe_class mem_nop() %{
+    single_instruction;
+    MS  : R;
+%}
+
+pipe_class sethi(iRegP dst, immI src) %{
+    single_instruction;
+    dst  : E(write);
+    IALU : R;
+%}
+
+pipe_class loadPollP(iRegP poll) %{
+    single_instruction;
+    poll : R(read);
+    MS   : R;
+%}
+
+pipe_class br(Universe br, label labl) %{
+    single_instruction_with_delay_slot;
+    BR  : R;
+%}
+
+pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{
+    single_instruction_with_delay_slot;
+    cr    : E(read);
+    BR    : R;
+%}
+
+pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{
+    single_instruction_with_delay_slot;
+    op1 : E(read);
+    BR  : R;
+    MS  : R;
+%}
+
+pipe_class br_nop() %{
+    single_instruction;
+    BR  : R;
+%}
+
+pipe_class simple_call(method meth) %{
+    instruction_count(2); multiple_bundles; force_serialization;
+    fixed_latency(100);
+    BR  : R(1);
+    MS  : R(1);
+    A0  : R(1);
+%}
+
+pipe_class compiled_call(method meth) %{
+    instruction_count(1); multiple_bundles; force_serialization;
+    fixed_latency(100);
+    MS  : R(1);
+%}
+
+pipe_class call(method meth) %{
+    instruction_count(0); multiple_bundles; force_serialization;
+    fixed_latency(100);
+%}
+
+pipe_class tail_call(Universe ignore, label labl) %{
+    single_instruction; has_delay_slot;
+    fixed_latency(100);
+    BR  : R(1);
+    MS  : R(1);
+%}
+
+pipe_class ret(Universe ignore) %{
+    single_instruction; has_delay_slot;
+    BR  : R(1);
+    MS  : R(1);
+%}
+
+// The real do-nothing guy
+pipe_class empty( ) %{
+    instruction_count(0);
+%}
+
+pipe_class long_memory_op() %{
+    instruction_count(0); multiple_bundles; force_serialization;
+    fixed_latency(25);
+    MS  : R(1);
+%}
+
+// Check-cast
+pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{
+    array : R(read);
+    match  : R(read);
+    IALU   : R(2);
+    BR     : R(2);
+    MS     : R;
+%}
+
+// Convert FPU flags into +1,0,-1
+pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{
+    src1  : E(read);
+    src2  : E(read);
+    dst   : E(write);
+    FA    : R;
+    MS    : R(2);
+    BR    : R(2);
+%}
+
+// Compare for p < q, and conditionally add y
+pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{
+    p     : E(read);
+    q     : E(read);
+    y     : E(read);
+    IALU  : R(3)
+%}
+
+// Perform a compare, then move conditionally in a branch delay slot.
+pipe_class min_max( iRegI src2, iRegI srcdst ) %{
+    src2   : E(read);
+    srcdst : E(read);
+    IALU   : R;
+    BR     : R;
+%}
+
+// Define the class for the Nop node
+define %{
+   MachNop = ialu_nop;
+%}
+
+%}
+
+//----------INSTRUCTIONS-------------------------------------------------------
+
+//------------Special Nop instructions for bundling - no match rules-----------
+// Nop using the A0 functional unit
+instruct Nop_A0() %{
+  ins_pipe(ialu_nop_A0);
+%}
+
+// Nop using the A1 functional unit
+instruct Nop_A1( ) %{
+  ins_pipe(ialu_nop_A1);
+%}
+
+// Nop using the memory functional unit
+instruct Nop_MS( ) %{
+  ins_pipe(mem_nop);
+%}
+
+// Nop using the floating add functional unit
+instruct Nop_FA( ) %{
+  ins_pipe(fadd_nop);
+%}
+
+// Nop using the branch functional unit
+instruct Nop_BR( ) %{
+  ins_pipe(br_nop);
+%}
+
+//----------Load/Store/Move Instructions---------------------------------------
+//----------Load Instructions--------------------------------------------------
+// Load Byte (8bit signed)
+instruct loadB(iRegI dst, memoryB mem) %{
+  match(Set dst (LoadB mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRSB   $dst,$mem\t! byte -> int" %}
+  ins_encode %{
+    __ ldrsb($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Byte (8bit signed) into a Long Register
+instruct loadB2L(iRegL dst, memoryB mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t"
+            "ASR   $dst.hi,$dst.lo,31" %}
+  ins_encode %{
+    __ ldrsb($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), $dst$$Register, asr(31));
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into an int reg
+instruct loadUB(iRegI dst, memoryB mem) %{
+  match(Set dst (LoadUB mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRB   $dst,$mem\t! ubyte -> int" %}
+  ins_encode %{
+    __ ldrb($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into a Long Register
+instruct loadUB2L(iRegL dst, memoryB mem) %{
+  match(Set dst (ConvI2L (LoadUB mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRB  $dst.lo,$mem\t! ubyte -> long\n\t"
+            "MOV   $dst.hi,0" %}
+  ins_encode %{
+    __ ldrb($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned) with immediate mask into Long Register
+instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
+
+  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+  size(12);
+  format %{ "LDRB  $dst.lo,$mem\t! ubyte -> long\n\t"
+            "MOV   $dst.hi,0\n\t"
+            "AND  $dst.lo,$dst.lo,$mask" %}
+  ins_encode %{
+    __ ldrb($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+    __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8));
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Short (16bit signed)
+
+instruct loadS(iRegI dst, memoryS mem) %{
+  match(Set dst (LoadS mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRSH   $dst,$mem\t! short" %}
+  ins_encode %{
+    __ ldrsh($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Short (16 bit signed) to Byte (8 bit signed)
+instruct loadS2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+
+  format %{ "LDRSB   $dst,$mem\t! short -> byte" %}
+  ins_encode %{
+    // High 32 bits are harmlessly set on Aarch64
+    __ ldrsb($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Short (16bit signed) into a Long Register
+instruct loadS2L(iRegL dst, memoryS mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t"
+            "ASR   $dst.hi,$dst.lo,31" %}
+  ins_encode %{
+    __ ldrsh($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), $dst$$Register, asr(31));
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned)
+
+
+instruct loadUS(iRegI dst, memoryS mem) %{
+  match(Set dst (LoadUS mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRH   $dst,$mem\t! ushort/char" %}
+  ins_encode %{
+    __ ldrh($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
+instruct loadUS2B(iRegI dst, memoryB mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRSB   $dst,$mem\t! ushort -> byte" %}
+  ins_encode %{
+    __ ldrsb($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) into a Long Register
+instruct loadUS2L(iRegL dst, memoryS mem) %{
+  match(Set dst (ConvI2L (LoadUS mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRH  $dst.lo,$mem\t! short -> long\n\t"
+            "MOV   $dst.hi, 0" %}
+  ins_encode %{
+    __ ldrh($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register
+instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRB  $dst.lo,$mem\t! \n\t"
+            "MOV   $dst.hi, 0" %}
+  ins_encode %{
+    __ ldrb($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register
+instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{
+  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
+  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+
+  size(12);
+  format %{ "LDRH   $dst,$mem\t! ushort/char & mask -> long\n\t"
+            "MOV    $dst.hi, 0\n\t"
+            "AND    $dst,$dst,$mask" %}
+  ins_encode %{
+    __ ldrh($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+    __ andr($dst$$Register, $dst$$Register, $mask$$constant);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer
+
+instruct loadI(iRegI dst, memoryI mem) %{
+  match(Set dst (LoadI mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "ldr $dst,$mem\t! int" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer to Byte (8 bit signed)
+instruct loadI2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+
+  format %{ "LDRSB   $dst,$mem\t! int -> byte" %}
+  ins_encode %{
+    __ ldrsb($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer to Unsigned Byte (8 bit UNsigned)
+instruct loadI2UB(iRegI dst, memoryB mem, immI_255 mask) %{
+  match(Set dst (AndI (LoadI mem) mask));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+
+  format %{ "LDRB   $dst,$mem\t! int -> ubyte" %}
+  ins_encode %{
+    __ ldrb($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer to Short (16 bit signed)
+instruct loadI2S(iRegI dst, memoryS mem, immI_16 sixteen) %{
+  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRSH   $dst,$mem\t! int -> short" %}
+  ins_encode %{
+    __ ldrsh($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer to Unsigned Short (16 bit UNsigned)
+instruct loadI2US(iRegI dst, memoryS mem, immI_65535 mask) %{
+  match(Set dst (AndI (LoadI mem) mask));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDRH   $dst,$mem\t! int -> ushort/char" %}
+  ins_encode %{
+    __ ldrh($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer into a Long Register
+instruct loadI2L(iRegL dst, memoryI mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDR   $dst.lo,$mem\t! int -> long\n\t"
+            "ASR   $dst.hi,$dst.lo,31\t! int->long" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), $dst$$Register, asr(31));
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer with mask 0xFF into a Long Register
+instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRB   $dst.lo,$mem\t! int & 0xFF -> long\n\t"
+            "MOV    $dst.hi, 0" %}
+  ins_encode %{
+    __ ldrb($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer with mask 0xFFFF into a Long Register
+instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDRH   $dst,$mem\t! int & 0xFFFF -> long\n\t"
+            "MOV    $dst.hi, 0" %}
+  ins_encode %{
+    __ ldrh($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Integer with a 31-bit immediate mask into a Long Register
+instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
+
+  size(12);
+  format %{ "LDR   $dst.lo,$mem\t! int -> long\n\t"
+            "MOV    $dst.hi, 0\n\t"
+            "AND   $dst,$dst,$mask" %}
+
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+    __ andr($dst$$Register, $dst$$Register, $mask$$constant);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer with a 31-bit mask into a Long Register
+// FIXME: use iRegI mask, remove tmp?
+instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{
+  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
+  effect(TEMP dst, TEMP tmp);
+
+  ins_cost(MEMORY_REF_COST + 4*DEFAULT_COST);
+  size(20);
+  format %{ "LDR      $mem,$dst\t! int & 31-bit mask -> long\n\t"
+            "MOV      $dst.hi, 0\n\t"
+            "MOV_SLOW $tmp,$mask\n\t"
+            "AND      $dst,$tmp,$dst" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+    __ mov($tmp$$Register, $mask$$constant);
+    __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Integer into a Long Register
+instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDR   $dst.lo,$mem\t! uint -> long\n\t"
+            "MOV   $dst.hi,0" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Long
+
+instruct loadL(iRegLd dst, memoryL mem ) %{
+  predicate(!((LoadLNode*)n)->require_atomic_access());
+  match(Set dst (LoadL mem));
+  effect(TEMP dst);
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "ldrd  $dst,$mem\t! long" %}
+  ins_encode %{
+    __ ldrd($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct loadL_2instr(iRegL dst, memorylong mem ) %{
+  predicate(!((LoadLNode*)n)->require_atomic_access());
+  match(Set dst (LoadL mem));
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+  size(8);
+  format %{ "LDR    $dst.lo,$mem \t! long order of instrs reversed if $dst.lo == base($mem)\n\t"
+            "LDR    $dst.hi,$mem+4 or $mem" %}
+  ins_encode %{
+    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+
+    if ($dst$$Register == reg_to_register_object($mem$$base)) {
+      __ ldr($dst$$Register->successor(), Amemhi);
+      __ ldr($dst$$Register, Amemlo);
+    } else {
+      __ ldr($dst$$Register, Amemlo);
+      __ ldr($dst$$Register->successor(), Amemhi);
+    }
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct loadL_volatile(iRegL dst, indirect mem ) %{
+  predicate(((LoadLNode*)n)->require_atomic_access());
+  match(Set dst (LoadL mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDREXD    $dst,$mem\t! long" %}
+  ins_encode %{
+    __ atomic_ldrd($dst$$Register, reg_to_register_object($dst$$reg + 1), reg_to_register_object($mem$$base));
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct loadL_volatile_fp(iRegL dst, memoryD mem ) %{
+  predicate(((LoadLNode*)n)->require_atomic_access());
+  match(Set dst (LoadL mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "FLDD      S14, $mem"
+            "FMRRD    $dst, S14\t! long \n't" %}
+  ins_encode %{
+    __ vldr_f64(f14, $mem$$Address);
+    __ vmov_f64($dst$$Register, $dst$$Register->successor(), f14);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct loadL_unaligned(iRegL dst, memorylong mem ) %{
+  match(Set dst (LoadL_unaligned mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(8);
+  format %{ "LDR    $dst.lo,$mem\t! long order of instrs reversed if $dst.lo == base($mem)\n\t"
+            "LDR    $dst.hi,$mem+4" %}
+  ins_encode %{
+    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+
+    if ($dst$$Register == reg_to_register_object($mem$$base)) {
+      __ ldr($dst$$Register->successor(), Amemhi);
+      __ ldr($dst$$Register, Amemlo);
+    } else {
+      __ ldr($dst$$Register, Amemlo);
+      __ ldr($dst$$Register->successor(), Amemhi);
+    }
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Range
+instruct loadRange(iRegI dst, memoryI mem) %{
+  match(Set dst (LoadRange mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDR_u32 $dst,$mem\t! range" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Pointer
+
+instruct loadP(iRegP dst, memoryP mem) %{
+  match(Set dst (LoadP mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "LDR   $dst,$mem\t! ptr" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegP dst, memoryI mem) %{
+  match(Set dst (LoadKlass mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "LDR   $dst,$mem\t! klass ptr" %}
+  ins_encode %{
+    __ ldr($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct loadD(regD dst, memoryD mem) %{
+  match(Set dst (LoadD mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  // FIXME: needs to be atomic, but  ARMv7 A.R.M. guarantees
+  // only LDREXD and STREXD are 64-bit single-copy atomic
+  format %{ "FLDD   $dst,$mem" %}
+  ins_encode %{
+    __ vldr_f64($dst$$FloatRegister, $mem$$Address);
+  %}
+  ins_pipe(floadD_mem);
+%}
+
+// Load Double - UNaligned
+instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{
+  match(Set dst (LoadD_unaligned mem));
+  ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
+  size(8);
+  format %{ "FLDS    $dst.lo,$mem\t! misaligned double\n"
+          "\tFLDS    $dst.hi,$mem+4\t!" %}
+  ins_encode %{
+    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+      __ vldr_f32($dst$$FloatRegister, Amemlo);
+      __ vldr_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), Amemhi);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct loadF(regF dst, memoryF mem) %{
+  match(Set dst (LoadF mem));
+
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "FLDS    $dst,$mem" %}
+  ins_encode %{
+    __ vldr_f32($dst$$FloatRegister, $mem$$Address);
+  %}
+  ins_pipe(floadF_mem);
+%}
+
+// // Load Constant
+instruct loadConI( iRegI dst, immI src ) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST * 3/2);
+  format %{ "MOV_SLOW    $dst, $src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(ialu_hi_lo_reg);
+%}
+
+instruct loadConIMov( iRegI dst, immIMov src ) %{
+  match(Set dst src);
+  size(4);
+  format %{ "MOV    $dst, $src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct loadConIMovn( iRegI dst, immIRotn src ) %{
+  match(Set dst src);
+  size(4);
+  format %{ "MVN    $dst, ~$src" %}
+  ins_encode %{
+    __ mvn_i($dst$$Register, ~$src$$constant);
+  %}
+  ins_pipe(ialu_imm_n);
+%}
+
+instruct loadConI16( iRegI dst, immI16 src ) %{
+  match(Set dst src);
+  size(4);
+  format %{ "MOVW    $dst, $src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(ialu_imm_n);
+%}
+
+instruct loadConP(iRegP dst, immP src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST * 3/2);
+  format %{ "MOV_SLOW    $dst,$src\t!ptr" %}
+  ins_encode %{
+    relocInfo::relocType constant_reloc = _opnds[1]->constant_reloc();
+    intptr_t val = $src$$constant;
+    if (constant_reloc == relocInfo::oop_type) {
+      __ movoop($dst$$Register, (jobject)val, true);
+    } else if (constant_reloc == relocInfo::metadata_type) {
+      __ mov_metadata($dst$$Register, (Metadata*)val);
+    } else {
+      __ mov($dst$$Register, val);
+    }
+  %}
+  ins_pipe(loadConP);
+%}
+
+
+instruct loadConP_poll(iRegP dst, immP_poll src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  format %{ "MOV_SLOW    $dst,$src\t!ptr" %}
+  ins_encode %{
+      __ mov($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(loadConP_poll);
+%}
+
+instruct loadConL(iRegL dst, immL src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST * 4);
+  format %{ "MOV_SLOW   $dst.lo, $src & 0x0FFFFFFFFL \t! long\n\t"
+            "MOV_SLOW   $dst.hi, $src >> 32" %}
+  ins_encode %{
+    __ mov(reg_to_register_object($dst$$reg), $src$$constant & 0x0FFFFFFFFL);
+    __ mov(reg_to_register_object($dst$$reg + 1), ((julong)($src$$constant)) >> 32);
+  %}
+  ins_pipe(loadConL);
+%}
+
+instruct loadConL16( iRegL dst, immL16 src ) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST * 2);
+
+  size(8);
+  format %{ "MOVW    $dst.lo, $src \n\t"
+            "MOVW    $dst.hi, 0 \n\t" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant);
+    __ movw_i(reg_to_register_object($dst$$reg + 1), 0);
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct loadConF_imm8(regF dst, imm8F src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  size(4);
+
+  format %{ "FCONSTS      $dst, $src"%}
+
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+instruct loadConF(regF dst, immF src, iRegI tmp) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST * 2);
+  effect(TEMP tmp);
+  size(3*4);
+
+  format %{ "MOV_SLOW  $tmp, $src\n\t"
+            "FMSR      $dst, $tmp"%}
+
+  ins_encode %{
+    // FIXME revisit once 6961697 is in
+    union {
+      jfloat f;
+      int i;
+    } v;
+    v.f = $src$$constant;
+    __ mov($tmp$$Register, v.i);
+    __ vmov_f32($dst$$FloatRegister, $tmp$$Register);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+instruct loadConD_imm8(regD dst, imm8D src) %{
+  match(Set dst src);
+  ins_cost(DEFAULT_COST);
+  size(4);
+
+  format %{ "FCONSTD      $dst, $src"%}
+
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+instruct loadConD(regD dst, immD src, iRegP tmp) %{
+  match(Set dst src);
+  effect(TEMP tmp);
+  ins_cost(MEMORY_REF_COST);
+  format %{ "FLDD  $dst, [$constanttablebase + $constantoffset]\t! load from constant table: double=$src" %}
+
+  ins_encode %{
+    Register r = $constanttablebase;
+    int offset  = $constantoffset($src);
+    if (!is_memoryD(offset)) {                // can't use a predicate
+                                              // in load constant instructs
+      __ add($tmp$$Register, r, offset);
+      r = $tmp$$Register;
+      offset = 0;
+    }
+    __ vldr_f64($dst$$FloatRegister, Address(r, offset));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Prefetch instructions.
+// Must be safe to execute with invalid address (cannot fault).
+
+instruct prefetchAlloc_mp( memoryP mem ) %{
+  predicate(VM_Version::features() & FT_MP_EXT);
+  match( PrefetchAllocation mem );
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "PLDW $mem\t! Prefetch allocation" %}
+  ins_encode %{
+    __ pldw($mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct prefetchAlloc_sp( memoryP mem ) %{
+  predicate(!(VM_Version::features() & FT_MP_EXT));
+  match( PrefetchAllocation mem );
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "PLD $mem\t! Prefetch allocation" %}
+  ins_encode %{
+    __ pld($mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+//----------Store Instructions-------------------------------------------------
+// Store Byte
+instruct storeB(memoryB mem, store_RegI src) %{
+  match(Set mem (StoreB mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "STRB    $src,$mem\t! byte" %}
+  ins_encode %{
+    __ strb($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+instruct storeCM(memoryB mem, store_RegI src) %{
+  match(Set mem (StoreCM mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "STRB    $src,$mem\t! CMS card-mark byte" %}
+  ins_encode %{
+    __ strb($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+// Store Char/Short
+
+instruct storeC(memoryS mem, store_RegI src) %{
+  match(Set mem (StoreC mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "STRH    $src,$mem\t! short" %}
+  ins_encode %{
+    __ strh($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+// Store Integer
+
+instruct storeI(memoryI mem, store_RegI src) %{
+  match(Set mem (StoreI mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "str $src,$mem" %}
+  ins_encode %{
+    __ str($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+// Store Long
+
+instruct storeL(memoryL mem, store_RegLd src) %{
+  predicate(!((StoreLNode*)n)->require_atomic_access());
+  match(Set mem (StoreL mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "strd  $src,$mem\t! long\n\t" %}
+
+  ins_encode %{
+    __ strd($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+instruct storeL_2instr(memorylong mem, iRegL src) %{
+  predicate(!((StoreLNode*)n)->require_atomic_access());
+  match(Set mem (StoreL mem src));
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+  size(8);
+  format %{ "STR    $src.lo,$mem\t! long\n\t"
+            "STR    $src.hi,$mem+4" %}
+
+  ins_encode %{
+    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
+    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
+    __ str($src$$Register, Amemlo);
+    __ str($src$$Register->successor(), Amemhi);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+instruct storeL_volatile(indirect mem, iRegL src) %{
+  predicate(((StoreLNode*)n)->require_atomic_access());
+  match(Set mem (StoreL mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STMIA    $src,$mem\t! long" %}
+  ins_encode %{
+    // FIXME: why is stmia considered atomic?  Should be strexd
+    // TODO: need 3 temp registers to use atomic_strd
+    __ stmia(reg_to_register_object($mem$$base), RegSet::of($src$$Register, reg_to_register_object($src$$reg + 1)).bits(), /*wb*/false);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+instruct storeL_volatile_fp(memoryD mem, iRegL src) %{
+  predicate(((StoreLNode*)n)->require_atomic_access());
+  match(Set mem (StoreL mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(8);
+  format %{ "FMDRR    S14, $src\t! long \n\t"
+            "FSTD     S14, $mem" %}
+  ins_encode %{
+    __ vmov_f64(f14, $src$$Register, $src$$Register->successor());
+    __ vstr_f64(f14, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
+
+// Store Pointer
+
+instruct storeP(memoryP mem, store_ptr_RegP src) %{
+  match(Set mem (StoreP mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "STR    $src,$mem\t! ptr" %}
+  ins_encode %{
+    __ str($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(istore_mem_spORreg);
+%}
+
+// Store Double
+
+instruct storeD(memoryD mem, regD src) %{
+  match(Set mem (StoreD mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  // FIXME: needs to be atomic, but  ARMv7 A.R.M. guarantees
+  // only LDREXD and STREXD are 64-bit single-copy atomic
+  format %{ "FSTD   $src,$mem" %}
+  ins_encode %{
+    __ vstr_f64($src$$FloatRegister, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_reg);
+%}
+
+// Store Float
+
+instruct storeF( memoryF mem, regF src) %{
+  match(Set mem (StoreF mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "FSTS    $src,$mem" %}
+  ins_encode %{
+    __ vstr_f32($src$$FloatRegister, $mem$$Address);
+  %}
+  ins_pipe(fstoreF_mem_reg);
+%}
+
+//----------MemBar Instructions-----------------------------------------------
+// Memory barrier flavors
+
+// TODO: take advantage of Aarch64 load-acquire, store-release, etc
+// pattern-match out unnecessary membars
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(4*MEMORY_REF_COST);
+
+  size(4);
+  format %{ "MEMBAR-storestore" %}
+  ins_encode %{
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore));
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  match(LoadFence);
+  ins_cost(4*MEMORY_REF_COST);
+
+  size(4);
+  format %{ "MEMBAR-acquire" %}
+  ins_encode %{
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore));
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct membar_acquire_lock() %{
+  match(MemBarAcquireLock);
+  ins_cost(0);
+
+  size(0);
+  format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  match(StoreFence);
+  ins_cost(4*MEMORY_REF_COST);
+
+  size(4);
+  format %{ "MEMBAR-release" %}
+  ins_encode %{
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore));
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct membar_release_lock() %{
+  match(MemBarReleaseLock);
+  ins_cost(0);
+
+  size(0);
+  format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(4*MEMORY_REF_COST);
+
+  size(4);
+  format %{ "MEMBAR-volatile" %}
+  ins_encode %{
+    __ membar(MacroAssembler::StoreLoad);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct unnecessary_membar_volatile() %{
+  match(MemBarVolatile);
+  predicate(Matcher::post_store_load_barrier(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+//----------Register Move Instructions-----------------------------------------
+// instruct roundDouble_nop(regD dst) %{
+//   match(Set dst (RoundDouble dst));
+//   ins_pipe(empty);
+// %}
+
+
+// instruct roundFloat_nop(regF dst) %{
+//   match(Set dst (RoundFloat dst));
+//   ins_pipe(empty);
+// %}
+
+
+// Cast Index to Pointer for unsafe natives
+instruct castX2P(iRegX src, iRegP dst) %{
+  match(Set dst (CastX2P src));
+
+  format %{ "MOV    $dst,$src\t! IntX->Ptr if $dst != $src" %}
+  ins_encode %{
+    if ($dst$$Register !=  $src$$Register) {
+      __ mov($dst$$Register, $src$$Register);
+    }
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Cast Pointer to Index for unsafe natives
+instruct castP2X(iRegP src, iRegX dst) %{
+  match(Set dst (CastP2X src));
+
+  format %{ "MOV    $dst,$src\t! Ptr->IntX if $dst != $src" %}
+  ins_encode %{
+    if ($dst$$Register !=  $src$$Register) {
+      __ mov($dst$$Register, $src$$Register);
+    }
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+//----------Conditional Move---------------------------------------------------
+// Conditional move
+instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! int" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{
+  match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovIP_imm16(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOVw$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovII_imm16(cmpOp cmp, flagsReg icc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOVw$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(140);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovII_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(140);
+  size(4);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovIIu_imm16(cmpOpU cmp, flagsRegU icc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+// Conditional move
+instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
+  ins_cost(140);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+// This instruction also works with CmpN so we don't need cmovPN_reg.
+instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! ptr" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(150);
+
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! ptr" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! ptr" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! ptr" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(140);
+
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! ptr" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+
+  size(4);
+  format %{ "MOV$cmp  $dst,$src\t! ptr" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+// Conditional move
+instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(150);
+
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+// Conditional move
+instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_double_move);
+%}
+
+instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_double_move);
+%}
+
+instruct cmovDI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(150);
+
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_double_move);
+%}
+
+instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_double_move);
+%}
+
+// Conditional move
+instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
+  ins_cost(150);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst.hi,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct cmovLP_immRot(cmpOpP cmp, flagsRegP pcc, iRegL dst, immLlowRot src) %{
+  match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
+  ins_cost(140);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
+            "MOV$cmp  $dst.hi,0" %}
+  ins_encode %{
+    __ mov($dst$$Register, (long)$src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLP_imm16(cmpOpP cmp, flagsRegP pcc, iRegL dst, immL16 src) %{
+  match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
+  ins_cost(140);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
+            "MOV$cmp  $dst.hi,0" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst.hi,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovLI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(150);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst.hi,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct cmovLI_immRot(cmpOp cmp, flagsReg icc, iRegL dst, immLlowRot src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
+            "MOV$cmp  $dst.hi,0" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct cmovLI_immRot_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immLlowRot src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(140);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
+            "MOV$cmp  $dst.hi,0" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLI_imm16(cmpOp cmp, flagsReg icc, iRegL dst, immL16 src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  ins_cost(140);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
+            "MOV$cmp  $dst.hi,0" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+    __ movw_i($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLI_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immL16 src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+  ins_cost(140);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src\t! long\n\t"
+            "MOV$cmp  $dst.hi,0" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+    __ movw_i($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst.hi,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
+//----------OS and Locking Instructions----------------------------------------
+
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(RthreadRegP dst) %{
+  match(Set dst (ThreadLocal));
+
+  size(0);
+  ins_cost(0);
+  format %{ "! TLS is in $dst" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe(ialu_none);
+%}
+
+instruct checkCastPP( iRegP dst ) %{
+  match(Set dst (CheckCastPP dst));
+
+  size(0);
+  format %{ "! checkcastPP of $dst" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe(empty);
+%}
+
+
+instruct castPP( iRegP dst ) %{
+  match(Set dst (CastPP dst));
+  format %{ "! castPP of $dst" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe(empty);
+%}
+
+instruct castII( iRegI dst ) %{
+  match(Set dst (CastII dst));
+  format %{ "! castII of $dst" %}
+  ins_encode( /*empty encoding*/ );
+  ins_cost(0);
+  ins_pipe(empty);
+%}
+
+//----------Arithmetic Instructions--------------------------------------------
+// Addition Instructions
+// Register Addition
+instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (AddI src1 src2));
+
+  size(4);
+  format %{ "add_32 $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (AddI (LShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+  match(Set dst (AddI (LShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (AddI (RShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+  match(Set dst (AddI (RShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (AddI (URShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+  match(Set dst (AddI (URShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Addition
+instruct addI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
+  match(Set dst (AddI src1 src2));
+
+  size(4);
+  format %{ "add_32 $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ add($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Pointer Register Addition
+instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{
+  match(Set dst (AddP src1 src2));
+
+  size(4);
+  format %{ "ADD    $dst,$src1,$src2\t! ptr" %}
+  ins_encode %{
+    __ add($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// shifted iRegX operand
+operand shiftedX(iRegX src2, shimmX src3) %{
+//constraint(ALLOC_IN_RC(sp_ptr_reg));
+  match(LShiftX src2 src3);
+
+  op_cost(1);
+  format %{ "$src2 << $src3" %}
+  interface(MEMORY_INTER) %{
+    base($src2);
+    index(0xff);
+    scale($src3);
+    disp(0x0);
+  %}
+%}
+
+instruct addshlP_reg_reg_imm(iRegP dst, iRegP src1, shiftedX src2) %{
+  match(Set dst (AddP src1 src2));
+
+  ins_cost(DEFAULT_COST * 3/2);
+  size(4);
+  format %{ "ADD    $dst,$src1,$src2\t! ptr" %}
+  ins_encode %{
+    Register base = reg_to_register_object($src2$$base);
+    __ add($dst$$Register, $src1$$Register, base, lsl($src2$$scale));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Pointer Immediate Addition
+instruct addP_reg_aimmX(iRegP dst, iRegP src1, aimmX src2) %{
+  match(Set dst (AddP src1 src2));
+
+  size(4);
+  format %{ "ADD    $dst,$src1,$src2\t! ptr" %}
+  ins_encode %{
+    __ add($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Addition
+instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{
+  match(Set dst (AddL src1 src2));
+  effect(KILL ccr);
+  ins_cost(DEFAULT_COST*2);
+  size(8);
+  format %{ "ADDS    $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
+            "ADC     $dst.hi,$src1.hi,$src2.hi" %}
+  ins_encode %{
+    __ adds($dst$$Register, $src1$$Register, $src2$$Register);
+    __ adc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{
+  match(Set dst (AddL src1 con));
+  effect(KILL ccr);
+  size(8);
+  format %{ "ADDS    $dst.lo,$src1.lo,$con\t! long\n\t"
+            "ADC     $dst.hi,$src1.hi,0" %}
+  ins_encode %{
+    __ adds($dst$$Register, $src1$$Register, (long)$con$$constant);
+    __ adc($dst$$Register->successor(), $src1$$Register->successor(), 0);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+//----------Conditional_store--------------------------------------------------
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flags (EQ) on success.
+
+// TODO: optimize out barriers with AArch64 load-acquire/store-release
+// LoadP-locked.
+instruct loadPLocked(iRegP dst, memoryex mem) %{
+  match(Set dst (LoadPLocked mem));
+  size(4);
+  format %{ "LDREX  $dst,$mem" %}
+  ins_encode %{
+    __ ldrex($dst$$Register,$mem$$Address);
+  %}
+  ins_pipe(iload_mem);
+%}
+
+instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, iRegI tmp, flagsRegP pcc ) %{
+  predicate(_kids[1]->_kids[0]->_leaf->Opcode() == Op_LoadPLocked); // only works in conjunction with a LoadPLocked node
+  match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval)));
+  effect( TEMP tmp );
+  size(8);
+  format %{ "STREX  $tmp,$newval,$heap_top_ptr\n\t"
+            "CMP    $tmp, 0" %}
+  ins_encode %{
+    __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address);
+    __ cmp($tmp$$Register, 0);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+// Conditional-store of an intx value.
+instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{
+  match(Set icc (StoreIConditional mem (Binary oldval newval)));
+  effect( TEMP tmp );
+  size(28);
+  format %{ "loop: \n\t"
+            "LDREX    $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t"
+            "XORS     $tmp,$tmp, $oldval\n\t"
+            "STREX.eq $tmp, $newval, $mem\n\t"
+            "CMP.eq   $tmp, 1 \n\t"
+            "B.eq     loop \n\t"
+            "TEQ      $tmp, 0\n\t"
+            "membar   LoadStore|LoadLoad" %}
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($tmp$$Register, $mem$$Address);
+    __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register);
+    __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
+    __ cmp($tmp$$Register, 1, Assembler::EQ);
+    __ b(loop, Assembler::EQ);
+    __ teq($tmp$$Register, 0);
+    // used by biased locking only. Requires a membar.
+    __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad));
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
+
+instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{
+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+  effect( KILL ccr, TEMP tmp);
+  size(32);
+  format %{ "loop: \n\t"
+            "LDREXD   $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+            "CMP      $tmp.lo, $oldval.lo\n\t"
+            "CMP.eq   $tmp.hi, $oldval.hi\n\t"
+            "STREXD.eq $tmp, $newval, $mem\n\t"
+            "MOV.ne   $tmp, 0 \n\t"
+            "XORS.eq  $tmp,$tmp, 1 \n\t"
+            "B.eq     loop \n\t"
+            "MOV      $res, $tmp" %}
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrexd($tmp$$Register, $mem$$Address);
+    __ cmp($tmp$$Register, $oldval$$Register);
+    __ cmp($tmp$$Register->successor(), $oldval$$Register->successor(), Assembler::EQ);
+    __ strexd($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
+    __ mov($tmp$$Register, 0, Assembler::NE);
+    __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ);
+    __ b(loop, Assembler::EQ);
+    __ mov($res$$Register, $tmp$$Register);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+
+instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+  effect( KILL ccr, TEMP tmp);
+  size(28);
+  format %{ "loop: \n\t"
+            "LDREX    $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+            "CMP      $tmp, $oldval\n\t"
+            "STREX.eq $tmp, $newval, $mem\n\t"
+            "MOV.ne   $tmp, 0 \n\t"
+            "XORS.eq  $tmp,$tmp, 1 \n\t"
+            "B.eq     loop \n\t"
+            "MOV      $res, $tmp" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($tmp$$Register,$mem$$Address);
+    __ cmp($tmp$$Register, $oldval$$Register);
+    __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
+    __ mov($tmp$$Register, 0, Assembler::NE);
+    __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ);
+    __ b(loop, Assembler::EQ);
+    __ mov($res$$Register, $tmp$$Register);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  effect( KILL ccr, TEMP tmp);
+  size(28);
+  format %{ "loop: \n\t"
+            "LDREX    $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+            "CMP      $tmp, $oldval\n\t"
+            "STREX.eq $tmp, $newval, $mem\n\t"
+            "MOV.ne   $tmp, 0 \n\t"
+            "EORS.eq  $tmp,$tmp, 1 \n\t"
+            "B.eq     loop \n\t"
+            "MOV      $res, $tmp" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($tmp$$Register,$mem$$Address);
+    __ cmp($tmp$$Register, $oldval$$Register);
+    __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ);
+    __ mov($tmp$$Register, 0, Assembler::NE);
+    __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ);
+    __ b(loop, Assembler::EQ);
+    __ mov($res$$Register, $tmp$$Register);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem add));
+  effect(KILL ccr, TEMP tmp1, TEMP tmp2);
+  size(20);
+  format %{ "loop: \n\t"
+            "LDREX    $tmp1, $mem\n\t"
+            "ADD      $tmp1, $tmp1, $add\n\t"
+            "STREX    $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($tmp1$$Register,$mem$$Address);
+    __ add($tmp1$$Register, $tmp1$$Register, $add$$constant);
+    __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem add));
+  effect(KILL ccr, TEMP tmp1, TEMP tmp2);
+  size(20);
+  format %{ "loop: \n\t"
+            "LDREX    $tmp1, $mem\n\t"
+            "ADD      $tmp1, $tmp1, $add\n\t"
+            "STREX    $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($tmp1$$Register,$mem$$Address);
+    __ add($tmp1$$Register, $tmp1$$Register, $add$$Register);
+    __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+  match(Set res (GetAndAddI mem add));
+  effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+  size(20);
+  format %{ "loop: \n\t"
+            "LDREX    $res, $mem\n\t"
+            "ADD      $tmp1, $res, $add\n\t"
+            "STREX    $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($res$$Register,$mem$$Address);
+    __ add($tmp1$$Register, $res$$Register, $add$$constant);
+    __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
+  match(Set res (GetAndAddI mem add));
+  effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+  size(20);
+  format %{ "loop: \n\t"
+            "LDREX    $res, $mem\n\t"
+            "ADD      $tmp1, $res, $add\n\t"
+            "STREX    $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($res$$Register,$mem$$Address);
+    __ add($tmp1$$Register, $res$$Register, $add$$Register);
+    __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem add));
+  effect( KILL ccr, TEMP tmp1, TEMP tmp2);
+  size(24);
+  format %{ "loop: \n\t"
+            "LDREXD   $tmp1, $mem\n\t"
+            "ADDS     $tmp1.lo, $tmp1.lo, $add.lo\n\t"
+            "ADC      $tmp1.hi, $tmp1.hi, $add.hi\n\t"
+            "STREXD   $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrexd($tmp1$$Register, $mem$$Address);
+    __ adds($tmp1$$Register, $tmp1$$Register, $add$$Register);
+    __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), $add$$Register->successor());
+    __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem add));
+  effect( KILL ccr, TEMP tmp1, TEMP tmp2);
+  size(24);
+  format %{ "loop: \n\t"
+            "LDREXD   $tmp1, $mem\n\t"
+            "ADDS     $tmp1.lo, $tmp1.lo, $add\n\t"
+            "ADC      $tmp1.hi, $tmp1.hi, 0\n\t"
+            "STREXD   $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrexd($tmp1$$Register, $mem$$Address);
+    __ adds($tmp1$$Register, $tmp1$$Register, (long)$add$$constant);
+    __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), 0);
+    __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+  match(Set res (GetAndAddL mem add));
+  effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+  size(24);
+  format %{ "loop: \n\t"
+            "LDREXD   $res, $mem\n\t"
+            "ADDS     $tmp1.lo, $res.lo, $add.lo\n\t"
+            "ADC      $tmp1.hi, $res.hi, $add.hi\n\t"
+            "STREXD   $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrexd($res$$Register, $mem$$Address);
+    __ adds($tmp1$$Register, $res$$Register, $add$$Register);
+    __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor());
+    __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
+  match(Set res (GetAndAddL mem add));
+  effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
+  size(24);
+  format %{ "loop: \n\t"
+            "LDREXD   $res, $mem\n\t"
+            "ADDS     $tmp1.lo, $res.lo, $add\n\t"
+            "ADC      $tmp1.hi, $res.hi, 0\n\t"
+            "STREXD   $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrexd($res$$Register, $mem$$Address);
+    __ adds($tmp1$$Register, $res$$Register, (long)$add$$constant);
+    __ adc($tmp1$$Register->successor(), $res$$Register->successor(), 0);
+    __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{
+  match(Set res (GetAndSetI mem newval));
+  effect(KILL ccr, TEMP tmp, TEMP res);
+  size(16);
+  format %{ "loop: \n\t"
+            "LDREX    $res, $mem\n\t"
+            "STREX    $tmp, $newval, $mem\n\t"
+            "CMP      $tmp, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($res$$Register,$mem$$Address);
+    __ strex($tmp$$Register, $newval$$Register, $mem$$Address);
+    __ cmp($tmp$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{
+  match(Set res (GetAndSetL mem newval));
+  effect( KILL ccr, TEMP tmp, TEMP res);
+  size(16);
+  format %{ "loop: \n\t"
+            "LDREXD   $res, $mem\n\t"
+            "STREXD   $tmp, $newval, $mem\n\t"
+            "CMP      $tmp, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrexd($res$$Register, $mem$$Address);
+    __ strexd($tmp$$Register, $newval$$Register, $mem$$Address);
+    __ cmp($tmp$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{
+  match(Set res (GetAndSetP mem newval));
+  effect(KILL ccr, TEMP tmp, TEMP res);
+  size(16);
+  format %{ "loop: \n\t"
+            "LDREX    $res, $mem\n\t"
+            "STREX    $tmp, $newval, $mem\n\t"
+            "CMP      $tmp, 0 \n\t"
+            "B.ne     loop \n\t" %}
+
+  ins_encode %{
+    Label loop;
+    __ bind(loop);
+    __ ldrex($res$$Register,$mem$$Address);
+    __ strex($tmp$$Register, $newval$$Register, $mem$$Address);
+    __ cmp($tmp$$Register, 0);
+    __ b(loop, Assembler::NE);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+//---------------------
+// Subtraction Instructions
+// Register Subtraction
+instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (SubI src1 src2));
+
+  size(4);
+  format %{ "sub_32 $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (SubI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "SUB    $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (SubI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "sub_32 $dst,$src1,$src2<<$src3\t! int" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (SubI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "SUB    $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (SubI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "sub_32 $dst,$src1,$src2>>$src3\t! int" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (SubI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "SUB    $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (SubI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "sub_32 $dst,$src1,$src2>>>$src3\t! int" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (SubI (LShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "RSB    $dst,$src3,$src1<<$src2" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+  match(Set dst (SubI (LShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "RSB    $dst,$src3,$src1<<$src2" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (SubI (RShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "RSB    $dst,$src3,$src1>>$src2" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+  match(Set dst (SubI (RShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "RSB    $dst,$src3,$src1>>$src2" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (SubI (URShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "RSB    $dst,$src3,$src1>>>$src2" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct rsbshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
+  match(Set dst (SubI (URShiftI src1 src2) src3));
+
+  size(4);
+  format %{ "RSB    $dst,$src3,$src1>>>$src2" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Subtraction
+instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
+  match(Set dst (SubI src1 src2));
+
+  size(4);
+  format %{ "sub_32 $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct subI_reg_immRotneg(iRegI dst, iRegI src1, aimmIneg src2) %{
+  match(Set dst (AddI src1 src2));
+
+  size(4);
+  format %{ "sub_32 $dst,$src1,-($src2)\t! int" %}
+  ins_encode %{
+    __ sub($dst$$Register, $src1$$Register, -$src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{
+  match(Set dst (SubI src1 src2));
+
+  size(4);
+  format %{ "RSB    $dst,$src2,src1" %}
+  ins_encode %{
+    __ rsb($dst$$Register, $src2$$Register, $src1$$constant);
+  %}
+  ins_pipe(ialu_zero_reg);
+%}
+
+// Register Subtraction
+instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{
+  match(Set dst (SubL src1 src2));
+  effect (KILL icc);
+
+  size(8);
+  format %{ "SUBS   $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
+            "SBC    $dst.hi,$src1.hi,$src2.hi" %}
+  ins_encode %{
+    __ subs($dst$$Register, $src1$$Register, $src2$$Register);
+    __ sbc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Subtraction
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct subL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg icc) %{
+  match(Set dst (SubL src1 con));
+  effect (KILL icc);
+
+  size(8);
+  format %{ "SUB    $dst.lo,$src1.lo,$con\t! long\n\t"
+            "SBC    $dst.hi,$src1.hi,0" %}
+  ins_encode %{
+    __ subs($dst$$Register, $src1$$Register, (long)$con$$constant);
+    __ sbc($dst$$Register->successor(), $src1$$Register->successor(), 0);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long negation
+instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2, flagsReg icc) %{
+  match(Set dst (SubL zero src2));
+  effect (KILL icc);
+
+  size(8);
+  format %{ "RSBS   $dst.lo,$src2.lo,0\t! long\n\t"
+            "RSC    $dst.hi,$src2.hi,0" %}
+  ins_encode %{
+    __ rsbs($dst$$Register, $src2$$Register, 0);
+    __ rsc($dst$$Register->successor(), $src2$$Register->successor(), 0);
+  %}
+  ins_pipe(ialu_zero_reg);
+%}
+
+// Multiplication Instructions
+// Integer Multiplication
+// Register Multiplication
+instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (MulI src1 src2));
+
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "mul_32 $dst,$src1,$src2" %}
+  ins_encode %{
+    __ mul($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{
+  effect(DEF dst, USE src1, USE src2);
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "MUL  $dst.hi,$src1.lo,$src2.hi\t! long" %}
+  ins_encode %{
+    __ mul($dst$$Register->successor(), $src1$$Register, $src2$$Register->successor());
+  %}
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_hi1_lo2(iRegL dst, iRegL src1, iRegL src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  ins_cost(DEFAULT_COST*3/2);
+  size(8);
+  format %{ "MLA  $dst.hi,$src1.hi,$src2.lo,$dst.hi\t! long\n\t"
+            "MOV  $dst.lo, 0"%}
+  ins_encode %{
+    __ mla($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register, $dst$$Register->successor());
+    __ mov($dst$$Register, 0);
+  %}
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_lo1_lo2(iRegL dst, iRegL src1, iRegL src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "UMLAL  $dst.lo,$dst.hi,$src1,$src2\t! long" %}
+  ins_encode %{
+    __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+  match(Set dst (MulL src1 src2));
+  ins_cost(DEFAULT_COST*8/2);
+
+  expand %{
+    mulL_lo1_hi2(dst, src1, src2);
+    mulL_hi1_lo2(dst, src1, src2);
+    mulL_lo1_lo2(dst, src1, src2);
+  %}
+%}
+
+instruct mla_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI srcA) %{
+  match(Set dst (AddI (MulI src1 src2) srcA));
+
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "MLA $dst,$src1,$src2,$srcA" %}
+  ins_encode %{
+    __ mla($dst$$Register, $src1$$Register, $src2$$Register, $srcA$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct mls_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI srcA) %{
+  match(Set dst (SubI srcA (MulI src1 src2)));
+
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "MLS $dst,$src1,$src2,$srcA" %}
+  ins_encode %{
+    __ mls($dst$$Register, $src1$$Register, $src2$$Register, $srcA$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct smlal_reg_reg_reg(iRegL dst, iRegI src1, iRegI src2) %{
+  match(Set dst (AddL (MulL (ConvI2L src1) (ConvI2L src2)) dst));
+
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "SMLAL $dst.lo,$dst.hi,$src1,$src2" %}
+  ins_encode %{
+    __ smlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct smull_reg_reg_reg(iRegL dst, iRegI src1, iRegI src2) %{
+  match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
+
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "SMULL $dst.lo,$dst.hi,$src1,$src2" %}
+  ins_encode %{
+    __ smull($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Integer Division
+// Register Division
+instruct divI_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (DivI src1 src2));
+  predicate(VM_Version::features() & FT_HW_DIVIDE);
+  ins_cost(2*DEFAULT_COST);
+
+  format %{ "SDIV   $dst,$src1,$src2"%}
+  ins_encode %{
+    __ sdiv($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(sdiv_reg_reg_IDIV);
+%}
+
+instruct divI_reg_reg_SW(R0RegI dst, R1RegI src1, R2RegI src2, R9RegI temp1, R12RegI temp2, LRRegP lr, flagsReg ccr) %{
+  match(Set dst (DivI src1 src2));
+  predicate(!(VM_Version::features() & FT_HW_DIVIDE));
+  effect( KILL ccr, TEMP temp1, TEMP temp2, USE_KILL src1,USE_KILL src2, KILL lr);
+  ins_cost((2+71)*DEFAULT_COST);
+
+  format %{ "DIV   $dst,$src1,$src2 ! call to StubRoutines::aarch32::idiv_entry()" %}
+  ins_encode %{
+    __ call(StubRoutines::aarch32::idiv_entry(), relocInfo::runtime_call_type);
+  %}
+  ins_pipe(sdiv_reg_reg_SW);
+%}
+
+// Register Long Division
+instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
+  match(Set dst (DivL src1 src2));
+  effect(CALL);
+  ins_cost(DEFAULT_COST*71);
+  format %{ "DIVL  $src1,$src2,$dst\t! long ! call to SharedRuntime::ldiv" %}
+  ins_encode %{
+    address target = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv);
+    __ call(target, relocInfo::runtime_call_type);
+  %}
+  ins_pipe(divL_reg_reg);
+%}
+
+// Integer Remainder
+// Register Remainder
+instruct modI_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{
+  match(Set dst (ModI src1 src2));
+  predicate(VM_Version::features() & FT_HW_DIVIDE);
+  effect( TEMP temp);
+
+  format %{ "SDIV   $temp,$src1,$src2\n\t"
+            "MLS    $dst, $temp, $src2, $src1"%}
+  ins_encode %{
+    __ sdiv($temp$$Register, $src1$$Register, $src2$$Register);
+    __ mls($dst$$Register, $temp$$Register, $src2$$Register, $src1$$Register);
+  %}
+  ins_pipe(sdiv_reg_reg_IDIV);
+%}
+
+instruct modI_reg_reg_SW(R0RegI dst, R1RegI src1, R2RegI src2, R9RegI temp1, R12RegI temp2, LRRegP lr, flagsReg ccr ) %{
+  match(Set dst (ModI src1 src2));
+  predicate(!(VM_Version::features() & FT_HW_DIVIDE));
+  effect( KILL ccr, TEMP temp1, TEMP temp2, KILL lr, USE_KILL src1, USE_KILL src2);
+
+  format %{ "MODI   $dst,$src1,$src2\t ! call to StubRoutines::aarch32::irem_entry" %}
+  ins_encode %{
+    __ call(StubRoutines::aarch32::irem_entry(), relocInfo::runtime_call_type);
+  %}
+  ins_pipe(sdiv_reg_reg_SW);
+%}
+
+// Register Long Remainder
+instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
+  match(Set dst (ModL src1 src2));
+  effect(CALL);
+  ins_cost(MEMORY_REF_COST); // FIXME
+  format %{ "modL    $dst,$src1,$src2\t ! call to SharedRuntime::lrem" %}
+  ins_encode %{
+    address target = CAST_FROM_FN_PTR(address, SharedRuntime::lrem);
+    __ call(target, relocInfo::runtime_call_type);
+  %}
+  ins_pipe(divL_reg_reg);
+%}
+
+// Integer Shift Instructions
+
+// Register Shift Left
+instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (LShiftI src1 src2));
+
+  size(4);
+  format %{ "LSL  $dst,$src1,$src2 \n\t" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src1$$Register, lsl($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Register Shift Left Immediate
+instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
+  match(Set dst (LShiftI src1 src2));
+
+  size(4);
+  format %{ "LSL    $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ lsl($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  size(4);
+  format %{"OR  $dst.hi,$dst.hi,($src1.hi << $src2)"  %}
+  ins_encode %{
+    __ orr($dst$$Register->successor(), $dst$$Register->successor(), $src1$$Register->successor(), lsl($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shlL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "LSL  $dst.lo,$src1.lo,$src2 \n\t" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src1$$Register, lsl($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shlL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
+  effect(DEF dst, USE src1, USE src2, KILL ccr);
+  size(16);
+  format %{ "SUBS  $dst.hi,$src2,32 \n\t"
+            "LSLpl $dst.hi,$src1.lo,$dst.hi \n\t"
+            "RSBmi $dst.hi,$dst.hi,0 \n\t"
+            "LSRmi $dst.hi,$src1.lo,$dst.hi" %}
+
+  ins_encode %{
+    // $src1$$Register and $dst$$Register->successor() can't be the same
+    __ subs($dst$$Register->successor(), $src2$$Register, 32);
+    __ mov($dst$$Register->successor(), $src1$$Register, lsl($dst$$Register->successor()), Assembler::PL);
+    __ rsb($dst$$Register->successor(), $dst$$Register->successor(), 0, Assembler::MI);
+    __ mov($dst$$Register->successor(), $src1$$Register, lsr($dst$$Register->successor()), Assembler::MI);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  expand %{
+    flagsReg ccr;
+    shlL_reg_reg_overlap(dst, src1, src2, ccr);
+    shlL_reg_reg_merge_hi(dst, src1, src2);
+    shlL_reg_reg_merge_lo(dst, src1, src2);
+  %}
+%}
+
+// Register Shift Left Immediate
+instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  size(8);
+  format %{ "LSL   $dst.hi,$src1.lo,$src2-32\t! or mov if $src2==32\n\t"
+            "MOV   $dst.lo, 0" %}
+  ins_encode %{
+    if ($src2$$constant == 32) {
+      __ mov($dst$$Register->successor(), $src1$$Register);
+    } else {
+      __ mov($dst$$Register->successor(), $src1$$Register, lsl($src2$$constant-32));
+    }
+    __ mov($dst$$Register, 0);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct shlL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  size(12);
+  format %{ "LSL   $dst.hi,$src1.lo,$src2\n\t"
+            "OR    $dst.hi, $dst.hi, $src1.lo >> 32-$src2\n\t"
+            "LSL   $dst.lo,$src1.lo,$src2" %}
+  ins_encode %{
+    // The order of the following 3 instructions matters: src1.lo and
+    // dst.hi can't overlap but src.hi and dst.hi can.
+    __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsl($src2$$constant));
+    __ orr($dst$$Register->successor(), $dst$$Register->successor(), $src1$$Register, lsr(32-$src2$$constant));
+    __ mov($dst$$Register, $src1$$Register, lsl($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Arithmetic Shift Right
+instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (RShiftI src1 src2));
+  size(4);
+  format %{ "ASR    $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src1$$Register, asr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Register Arithmetic Shift Right Immediate
+instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
+  match(Set dst (RShiftI src1 src2));
+
+  size(4);
+  format %{ "ASR    $dst,$src1,$src2" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src1$$Register, asr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Shift Right Arithmetic Long
+instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "OR  $dst.lo,$dst.lo,($src1.lo >> $src2)"  %}
+  ins_encode %{
+    __ orr($dst$$Register, $dst$$Register, $src1$$Register, lsr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct sarL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "ASR  $dst.hi,$src1.hi,$src2 \n\t" %}
+  ins_encode %{
+    __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct sarL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
+  effect(DEF dst, USE src1, USE src2, KILL ccr);
+  size(16);
+  format %{ "SUBS  $dst.lo,$src2,32 \n\t"
+            "ASRpl $dst.lo,$src1.hi,$dst.lo \n\t"
+            "RSBmi $dst.lo,$dst.lo,0 \n\t"
+            "LSLmi $dst.lo,$src1.hi,$dst.lo" %}
+
+  ins_encode %{
+    // $src1$$Register->successor() and $dst$$Register can't be the same
+    __ subs($dst$$Register, $src2$$Register, 32);
+    __ mov($dst$$Register, $src1$$Register->successor(), asr($dst$$Register), Assembler::PL);
+    __ rsb($dst$$Register, $dst$$Register, 0, Assembler::MI);
+    __ mov($dst$$Register, $src1$$Register->successor(), lsl($dst$$Register), Assembler::MI);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  expand %{
+    flagsReg ccr;
+    sarL_reg_reg_overlap(dst, src1, src2, ccr);
+    sarL_reg_reg_merge_lo(dst, src1, src2);
+    sarL_reg_reg_merge_hi(dst, src1, src2);
+  %}
+%}
+
+// Register Shift Left Immediate
+instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  size(8);
+  format %{ "ASR   $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t"
+            "ASR   $dst.hi,$src1.hi, $src2" %}
+  ins_encode %{
+    if ($src2$$constant == 32) {
+      __ mov($dst$$Register, $src1$$Register->successor());
+    } else{
+      __ mov($dst$$Register, $src1$$Register->successor(), asr($src2$$constant-32));
+    }
+    __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr(32));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct sarL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
+  match(Set dst (RShiftL src1 src2));
+  size(12);
+  format %{ "LSR   $dst.lo,$src1.lo,$src2\n\t"
+            "OR    $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t"
+            "ASR   $dst.hi,$src1.hi,$src2" %}
+  ins_encode %{
+    // The order of the following 3 instructions matters: src1.lo and
+    // dst.hi can't overlap but src.hi and dst.hi can.
+    __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant));
+    __ orr($dst$$Register, $dst$$Register, $src1$$Register->successor(), lsl(32-$src2$$constant));
+    __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Shift Right
+instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (URShiftI src1 src2));
+  size(4);
+  format %{ "LSR    $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src1$$Register, lsr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Register Shift Right Immediate
+instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
+  match(Set dst (URShiftI src1 src2));
+
+  size(4);
+  format %{ "LSR    $dst,$src1,$src2" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Shift Right
+instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "OR   $dst.lo,$dst,($src1.lo >>> $src2)"  %}
+  ins_encode %{
+    __ orr($dst$$Register, $dst$$Register, $src1$$Register, lsr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shrL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
+  effect(USE_DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "LSR  $dst.hi,$src1.hi,$src2 \n\t" %}
+  ins_encode %{
+    __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsr($src2$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shrL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{
+  effect(DEF dst, USE src1, USE src2, KILL ccr);
+  size(16);
+  format %{ "SUBS  $dst,$src2,32 \n\t"
+            "LSRpl $dst,$src1.hi,$dst \n\t"
+            "RSBmi $dst,$dst,0 \n\t"
+            "LSLmi $dst,$src1.hi,$dst" %}
+
+  ins_encode %{
+    // $src1$$Register->successor() and $dst$$Register can't be the same
+    __ subs($dst$$Register, $src2$$Register, 32);
+    __ mov($dst$$Register, $src1$$Register->successor(), lsr($dst$$Register), Assembler::PL);
+    __ rsb($dst$$Register, $dst$$Register, 0, Assembler::MI);
+    __ mov($dst$$Register, $src1$$Register->successor(), lsl($dst$$Register), Assembler::MI);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  expand %{
+    flagsReg ccr;
+    shrL_reg_reg_overlap(dst, src1, src2, ccr);
+    shrL_reg_reg_merge_lo(dst, src1, src2);
+    shrL_reg_reg_merge_hi(dst, src1, src2);
+  %}
+%}
+
+// Register Shift Right Immediate
+instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  size(8);
+  format %{ "LSR   $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t"
+            "MOV   $dst.hi, 0" %}
+  ins_encode %{
+    if ($src2$$constant == 32) {
+      __ mov($dst$$Register, $src1$$Register->successor());
+    } else {
+      __ mov($dst$$Register, $src1$$Register->successor(), lsr($src2$$constant-32));
+    }
+    __ mov($dst$$Register->successor(), 0);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct shrL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  size(12);
+  format %{ "LSR   $dst.lo,$src1.lo,$src2\n\t"
+            "OR    $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t"
+            "LSR   $dst.hi,$src1.hi,$src2" %}
+  ins_encode %{
+    // The order of the following 3 instructions matters: src1.lo and
+    // dst.hi can't overlap but src.hi and dst.hi can.
+    __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant));
+    __ orr($dst$$Register, $dst$$Register, $src1$$Register->successor(), lsl(32-$src2$$constant));
+    __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsr($src2$$constant));
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+
+instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{
+  match(Set dst (URShiftI (CastP2X src1) src2));
+  size(4);
+  format %{ "LSR    $dst,$src1,$src2\t! Cast ptr $src1 to int and shift" %}
+  ins_encode %{
+    __ lsr($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Overcomplicated unsigned math
+instruct umull_lreg32_lreg32(iRegL dst, iRegL src1, iRegL src2) %{
+  match(Set dst (MulL src1 src2));
+  predicate(n->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(2)->find_long_con(-1))>>32)==0 &&
+            n->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(2)->find_long_con(-1))>>32)==0);
+
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "UMULL $dst.lo,$dst.hi,$src1.lo,$src2.lo" %}
+  ins_encode %{
+    __ umull($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(imul_reg_reg);
+%}
+
+instruct umlal_reg32_reg32(iRegL dst, iRegL src1, iRegL src2) %{
+  match(Set dst (AddL dst (MulL src1 src2)));
+  predicate(
+    n->in(2)->Opcode() == Op_MulL ?
+    n->in(2)->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(1)->in(2)->find_long_con(-1))>>32)==0 &&
+    n->in(2)->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(2)->in(2)->find_long_con(-1))>>32)==0 :
+    n->in(1)->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(1)->in(2)->find_long_con(-1))>>32)==0 &&
+    n->in(1)->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(2)->in(2)->find_long_con(-1))>>32)==0
+    );
+
+  ins_cost(DEFAULT_COST*3/2);
+  size(4);
+  format %{ "UMLAL $dst.lo,$dst.hi,$src1.lo,$src2.lo" %}
+  ins_encode %{
+    __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+//----------Floating Point Arithmetic Instructions-----------------------------
+
+//  Add float single precision
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (AddF src1 src2));
+
+  size(4);
+  format %{ "FADDS  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vadd_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+
+  ins_pipe(faddF_reg_reg);
+%}
+
+//  Add float double precision
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (AddD src1 src2));
+
+  size(4);
+  format %{ "FADDD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vadd_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+
+  ins_pipe(faddD_reg_reg);
+%}
+
+//  Sub float single precision
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (SubF src1 src2));
+
+  size(4);
+  format %{ "FSUBS  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vsub_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(faddF_reg_reg);
+%}
+
+//  Sub float double precision
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (SubD src1 src2));
+
+  size(4);
+  format %{ "FSUBD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vsub_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(faddD_reg_reg);
+%}
+
+//  Mul float single precision
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (MulF src1 src2));
+
+  size(4);
+  format %{ "FMULS  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vmul_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+
+  ins_pipe(fmulF_reg_reg);
+%}
+
+//  Mul float double precision
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (MulD src1 src2));
+
+  size(4);
+  format %{ "FMULD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vmul_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+
+  ins_pipe(fmulD_reg_reg);
+%}
+
+//  Div float single precision
+instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (DivF src1 src2));
+
+  size(4);
+  format %{ "FDIVS  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vdiv_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+
+  ins_pipe(fdivF_reg_reg);
+%}
+
+//  Div float double precision
+instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (DivD src1 src2));
+
+  size(4);
+  format %{ "FDIVD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vdiv_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+
+  ins_pipe(fdivD_reg_reg);
+%}
+
+//  Absolute float double precision
+instruct absD_reg(regD dst, regD src) %{
+  match(Set dst (AbsD src));
+
+  size(4);
+  format %{ "FABSd  $dst,$src" %}
+  ins_encode %{
+    __ vabs_f64($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(faddD_reg);
+%}
+
+//  Absolute float single precision
+instruct absF_reg(regF dst, regF src) %{
+  match(Set dst (AbsF src));
+  format %{ "FABSs  $dst,$src" %}
+  ins_encode %{
+    __ vabs_f32($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(faddF_reg);
+%}
+
+instruct negF_reg(regF dst, regF src) %{
+  match(Set dst (NegF src));
+
+  size(4);
+  format %{ "FNEGs  $dst,$src" %}
+  ins_encode %{
+    __ vneg_f32($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(faddF_reg);
+%}
+
+instruct negD_reg(regD dst, regD src) %{
+  match(Set dst (NegD src));
+
+  format %{ "FNEGd  $dst,$src" %}
+  ins_encode %{
+    __ vneg_f64($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(faddD_reg);
+%}
+
+//  Sqrt float double precision
+instruct sqrtF_reg_reg(regF dst, regF src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+  size(4);
+  format %{ "FSQRTS $dst,$src" %}
+  ins_encode %{
+    __ vsqrt_f32($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fdivF_reg_reg);
+%}
+
+//  Sqrt float double precision
+instruct sqrtD_reg_reg(regD dst, regD src) %{
+  match(Set dst (SqrtD src));
+
+  size(4);
+  format %{ "FSQRTD $dst,$src" %}
+  ins_encode %{
+    __ vsqrt_f64($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fdivD_reg_reg);
+%}
+
+//----------Logical Instructions-----------------------------------------------
+// And Instructions
+// Register And
+instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (AndI src1 src2));
+
+  size(4);
+  format %{ "and_32 $dst,$src1,$src2" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (AndI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "AND    $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (AndI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "and_32 $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (AndI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "AND    $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (AndI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "and_32 $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (AndI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "AND    $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (AndI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "and_32 $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate And
+instruct andI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
+  match(Set dst (AndI src1 src2));
+
+  size(4);
+  format %{ "and_32 $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{
+  match(Set dst (AndI src1 src2));
+
+  size(4);
+  format %{ "bic    $dst,$src1,~$src2\t! int" %}
+  ins_encode %{
+    __ bic($dst$$Register, $src1$$Register, ~$src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register And Long
+instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+  match(Set dst (AndL src1 src2));
+
+  ins_cost(DEFAULT_COST);
+  size(8);
+  format %{ "AND    $dst,$src1,$src2\t! long" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $src2$$Register);
+    __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
+  match(Set dst (AndL src1 con));
+  ins_cost(DEFAULT_COST);
+  size(8);
+  format %{ "AND    $dst,$src1,$con\t! long" %}
+  ins_encode %{
+    __ andr($dst$$Register, $src1$$Register, $con$$constant);
+    __ andr($dst$$Register->successor(), $src1$$Register->successor(), 0u);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Or Instructions
+// Register Or
+instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (OrI src1 src2));
+
+  size(4);
+  format %{ "orr_32 $dst,$src1,$src2\t! int" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (OrI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "OR    $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (OrI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "orr_32 $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (OrI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "OR    $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (OrI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "orr_32 $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (OrI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "OR    $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (OrI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "orr_32 $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Or
+instruct orI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
+  match(Set dst (OrI src1 src2));
+
+  size(4);
+  format %{ "orr_32  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+// TODO: orn_32 with limmIn
+
+// Register Or Long
+instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+  match(Set dst (OrL src1 src2));
+
+  ins_cost(DEFAULT_COST);
+  size(8);
+  format %{ "OR     $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
+            "OR     $dst.hi,$src1.hi,$src2.hi" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register);
+    __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
+  match(Set dst (OrL src1 con));
+  ins_cost(DEFAULT_COST);
+  size(8);
+  format %{ "OR     $dst.lo,$src1.lo,$con\t! long\n\t"
+            "OR     $dst.hi,$src1.hi,$con" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $con$$constant);
+    __ orr($dst$$Register->successor(), $src1$$Register->successor(), 0u);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+#ifdef TODO
+// Use SPRegP to match Rthread (TLS register) without spilling.
+// Use store_ptr_RegP to match Rthread (TLS register) without spilling.
+// Use sp_ptr_RegP to match Rthread (TLS register) without spilling.
+instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{
+  match(Set dst (OrI src1 (CastP2X src2)));
+  size(4);
+  format %{ "OR     $dst,$src1,$src2" %}
+  ins_encode %{
+    __ orr($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+#endif
+
+// Xor Instructions
+// Register Xor
+instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
+  match(Set dst (XorI src1 src2));
+
+  size(4);
+  format %{ "eor_32 $dst,$src1,$src2" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (XorI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "XOR    $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (XorI src1 (LShiftI src2 src3)));
+
+  size(4);
+  format %{ "eor_32 $dst,$src1,$src2<<$src3" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (XorI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "XOR    $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (XorI src1 (RShiftI src2 src3)));
+
+  size(4);
+  format %{ "eor_32 $dst,$src1,$src2>>$src3" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
+  match(Set dst (XorI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "XOR    $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
+  match(Set dst (XorI src1 (URShiftI src2 src3)));
+
+  size(4);
+  format %{ "eor_32 $dst,$src1,$src2>>>$src3" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Xor
+instruct xorI_reg_imm(iRegI dst, iRegI src1, limmI src2) %{
+  match(Set dst (XorI src1 src2));
+
+  size(4);
+  format %{ "eor_32 $dst,$src1,$src2" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Xor Long
+instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
+  match(Set dst (XorL src1 src2));
+  ins_cost(DEFAULT_COST);
+  size(8);
+  format %{ "XOR     $dst.hi,$src1.hi,$src2.hi\t! long\n\t"
+            "XOR     $dst.lo,$src1.lo,$src2.lo\t! long" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $src2$$Register);
+    __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
+  match(Set dst (XorL src1 con));
+  ins_cost(DEFAULT_COST);
+  size(8);
+  format %{ "XOR     $dst.hi,$src1.hi,$con\t! long\n\t"
+            "XOR     $dst.lo,$src1.lo,0\t! long" %}
+  ins_encode %{
+    __ eor($dst$$Register, $src1$$Register, $con$$constant);
+    __ eor($dst$$Register->successor(), $src1$$Register->successor(), 0u);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+//----------Convert to Boolean-------------------------------------------------
+instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
+  match(Set dst (Conv2B src));
+  effect(KILL ccr);
+  size(12);
+  ins_cost(DEFAULT_COST*2);
+  format %{ "TST    $src,$src \n\t"
+            "MOV    $dst, 0   \n\t"
+            "MOV.ne $dst, 1" %}
+  ins_encode %{ // FIXME: can do better?
+    __ tst($src$$Register, $src$$Register);
+    __ mov($dst$$Register, 0);
+    __ mov($dst$$Register, 1, Assembler::NE);
+  %}
+  ins_pipe(ialu_reg_ialu);
+%}
+
+instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
+  match(Set dst (Conv2B src));
+  effect(KILL ccr);
+  size(12);
+  ins_cost(DEFAULT_COST*2);
+  format %{ "TST    $src,$src \n\t"
+            "MOV    $dst, 0   \n\t"
+            "MOV.ne $dst, 1" %}
+  ins_encode %{
+    __ tst($src$$Register, $src$$Register);
+    __ mov($dst$$Register, 0);
+    __ mov($dst$$Register, 1, Assembler::NE);
+  %}
+  ins_pipe(ialu_reg_ialu);
+%}
+
+instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
+  match(Set dst (CmpLTMask p q));
+  effect( KILL ccr );
+  ins_cost(DEFAULT_COST*3);
+  format %{ "CMP    $p,$q\n\t"
+            "MOV    $dst, #0\n\t"
+            "MOV.lt $dst, #-1" %}
+  ins_encode %{
+    __ cmp($p$$Register, $q$$Register);
+    __ mov_i($dst$$Register, 0);
+    __ mvn_i($dst$$Register, 0, Assembler::LT);
+  %}
+  ins_pipe(ialu_reg_reg_ialu);
+%}
+
+instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{
+  match(Set dst (CmpLTMask p q));
+  effect( KILL ccr );
+  ins_cost(DEFAULT_COST*3);
+  format %{ "CMP    $p,$q\n\t"
+            "MOV    $dst, #0\n\t"
+            "MOV.lt $dst, #-1" %}
+  ins_encode %{
+    __ cmp($p$$Register, $q$$constant);
+    __ mov_i($dst$$Register, 0);
+    __ mvn_i($dst$$Register, 0, Assembler::LT);
+  %}
+  ins_pipe(ialu_reg_reg_ialu);
+%}
+
+instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{
+  match(Set z (AddI (AndI (CmpLTMask p q) y) z));
+  effect( KILL ccr );
+  ins_cost(DEFAULT_COST*2);
+  format %{ "CMP    $p,$q\n\t"
+            "ADD.lt $z,$y,$z" %}
+  ins_encode %{
+    __ cmp($p$$Register, $q$$Register);
+    __ add($z$$Register, $y$$Register, $z$$Register, Assembler::LT);
+  %}
+  ins_pipe( cadd_cmpltmask );
+%}
+
+// FIXME: remove unused "dst"
+instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{
+  match(Set z (AddI (AndI (CmpLTMask p q) y) z));
+  effect( KILL ccr );
+  ins_cost(DEFAULT_COST*2);
+  format %{ "CMP    $p,$q\n\t"
+            "ADD.lt $z,$y,$z" %}
+  ins_encode %{
+    __ cmp($p$$Register, $q$$constant);
+    __ add($z$$Register, $y$$Register, $z$$Register, Assembler::LT);
+  %}
+  ins_pipe( cadd_cmpltmask );
+%}
+
+instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
+  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
+  effect( KILL ccr );
+  ins_cost(DEFAULT_COST*2);
+  format %{ "SUBS   $p,$p,$q\n\t"
+            "ADD.lt $p,$y,$p" %}
+  ins_encode %{
+    __ subs($p$$Register, $p$$Register, $q$$Register);
+    __ add($p$$Register, $y$$Register, $p$$Register, Assembler::LT);
+  %}
+  ins_pipe( cadd_cmpltmask );
+%}
+
+//----------Arithmetic Conversion Instructions---------------------------------
+// The conversions operations are all Alpha sorted.  Please keep it that way!
+
+instruct convD2F_reg(regF dst, regD src) %{
+  match(Set dst (ConvD2F src));
+  size(4);
+  format %{ "FCVTSD  $dst,$src" %}
+  ins_encode %{
+    __ vcvt_f32_f64($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fcvtD2F);
+%}
+
+// Convert a double to an int in a float register.
+// If the double is a NAN, stuff a zero in instead.
+
+instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{
+  match(Set dst (ConvD2I src));
+  effect( TEMP tmp );
+  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+  format %{ "FTOSIZD  $tmp,$src\n\t"
+            "FMRS     $dst, $tmp" %}
+  ins_encode %{
+    __ vcvt_s32_f64($tmp$$FloatRegister, $src$$FloatRegister);
+    __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
+  %}
+  ins_pipe(fcvtD2I);
+%}
+
+// Convert a double to a long in a double register.
+// If the double is a NAN, stuff a zero in instead.
+
+// Double to Long conversion
+instruct convD2L_reg(R0R1RegL dst, regD src) %{
+  match(Set dst (ConvD2L src));
+  effect(CALL);
+  ins_cost(MEMORY_REF_COST); // FIXME
+  format %{ "convD2L    $dst,$src\t ! call to SharedRuntime::d2l" %}
+  ins_encode %{
+#ifndef HARD_FLOAT_CC
+    __ vmov_f64($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
+#else
+    if ($src$$FloatRegister != d0) {
+      __ vmov_f64(d0, $src$$FloatRegister);
+    }
+#endif
+    address target = CAST_FROM_FN_PTR(address, SharedRuntime::d2l);
+    __ call(target, relocInfo::runtime_call_type);
+  %}
+  ins_pipe(fcvtD2L);
+%}
+
+instruct convF2D_reg(regD dst, regF src) %{
+  match(Set dst (ConvF2D src));
+  size(4);
+  format %{ "FCVTDS  $dst,$src" %}
+  ins_encode %{
+    __ vcvt_f64_f32($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fcvtF2D);
+%}
+
+instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{
+  match(Set dst (ConvF2I src));
+  effect( TEMP tmp );
+  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+  size(8);
+  format %{ "FTOSIZS  $tmp,$src\n\t"
+            "FMRS     $dst, $tmp" %}
+  ins_encode %{
+    __ vcvt_s32_f32($tmp$$FloatRegister, $src$$FloatRegister);
+    __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
+  %}
+  ins_pipe(fcvtF2I);
+%}
+
+// Float to Long conversion
+instruct convF2L_reg(R0R1RegL dst, regF src, R0RegI arg1) %{
+  match(Set dst (ConvF2L src));
+  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
+  effect(CALL);
+  format %{ "convF2L  $dst,$src\t! call to SharedRuntime::f2l" %}
+  ins_encode %{
+#ifndef HARD_FLOAT_CC
+    __ vmov_f32($arg1$$Register, $src$$FloatRegister);
+#else
+    if($src$$FloatRegister != f0) {
+      __ vmov_f32(f0, $src$$FloatRegister);
+    }
+#endif
+    address target = CAST_FROM_FN_PTR(address, SharedRuntime::f2l);
+    __ call(target, relocInfo::runtime_call_type);
+  %}
+  ins_pipe(fcvtF2L);
+%}
+
+instruct convI2D_reg_reg(iRegI src, regD_low dst) %{
+  match(Set dst (ConvI2D src));
+  ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
+  size(8);
+  format %{ "FMSR     $dst,$src \n\t"
+            "FSITOD   $dst $dst"%}
+  ins_encode %{
+      __ vmov_f32($dst$$FloatRegister, $src$$Register);
+      __ vcvt_f64_s32($dst$$FloatRegister, $dst$$FloatRegister);
+  %}
+  ins_pipe(fcvtI2D);
+%}
+
+instruct convI2F_reg_reg( regF dst, iRegI src ) %{
+  match(Set dst (ConvI2F src));
+  ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
+  size(8);
+  format %{ "FMSR     $dst,$src \n\t"
+            "FSITOS   $dst, $dst"%}
+  ins_encode %{
+      __ vmov_f32($dst$$FloatRegister, $src$$Register);
+      __ vcvt_f32_s32($dst$$FloatRegister, $dst$$FloatRegister);
+  %}
+  ins_pipe(fcvtI2F);
+%}
+
+instruct convI2L_reg(iRegL dst, iRegI src) %{
+  match(Set dst (ConvI2L src));
+  size(8);
+  format %{ "MOV    $dst.lo, $src \n\t"
+            "ASR    $dst.hi,$src,31\t! int->long" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register);
+    __ mov($dst$$Register->successor(), $src$$Register, asr(31));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Zero-extend convert int to long
+instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{
+  match(Set dst (AndL (ConvI2L src) mask) );
+  size(8);
+  format %{ "MOV    $dst.lo,$src.lo\t! zero-extend int to long\n\t"
+            "MOV    $dst.hi, 0"%}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Zero-extend long
+instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{
+  match(Set dst (AndL src mask) );
+  size(8);
+  format %{ "MOV    $dst.lo,$src.lo\t! zero-extend long\n\t"
+            "MOV    $dst.hi, 0"%}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register);
+    __ mov($dst$$Register->successor(), 0);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct MoveF2I_reg_reg(iRegI dst, regF src) %{
+  match(Set dst (MoveF2I src));
+  effect(DEF dst, USE src);
+  ins_cost(MEMORY_REF_COST); // FIXME
+
+  size(4);
+  format %{ "FMRS   $dst,$src\t! MoveF2I" %}
+  ins_encode %{
+    __ vmov_f32($dst$$Register, $src$$FloatRegister);
+  %}
+  ins_pipe(iload_mem); // FIXME
+%}
+
+instruct MoveI2F_reg_reg(regF dst, iRegI src) %{
+  match(Set dst (MoveI2F src));
+  ins_cost(MEMORY_REF_COST); // FIXME
+
+  size(4);
+  format %{ "FMSR   $dst,$src\t! MoveI2F" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(iload_mem); // FIXME
+%}
+
+instruct MoveD2L_reg_reg(iRegL dst, regD src) %{
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+  ins_cost(MEMORY_REF_COST); // FIXME
+
+  size(4);
+  format %{ "FMRRD    $dst,$src\t! MoveD2L" %}
+  ins_encode %{
+    __ vmov_f64($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
+  %}
+  ins_pipe(iload_mem); // FIXME
+%}
+
+instruct MoveL2D_reg_reg(regD dst, iRegL src) %{
+  match(Set dst (MoveL2D src));
+  effect(DEF dst, USE src);
+  ins_cost(MEMORY_REF_COST); // FIXME
+
+  size(4);
+  format %{ "FMDRR   $dst,$src\t! MoveL2D" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg); // FIXME
+%}
+
+//-----------
+// Long to Double conversion
+
+// Magic constant, 0x43300000
+instruct loadConI_x43300000(iRegI dst) %{
+  effect(DEF dst);
+  size(8);
+  format %{ "MOV_SLOW  $dst,0x43300000\t! 2^52" %}
+  ins_encode %{
+    __ mov($dst$$Register, 0x43300000);
+  %}
+  ins_pipe(ialu_none);
+%}
+
+// Magic constant, 0x41f00000
+instruct loadConI_x41f00000(iRegI dst) %{
+  effect(DEF dst);
+  size(8);
+  format %{ "MOV_SLOW  $dst, 0x41f00000\t! 2^32" %}
+  ins_encode %{
+    __ mov($dst$$Register, 0x41f00000);
+  %}
+  ins_pipe(ialu_none);
+%}
+
+instruct loadConI_x0(iRegI dst) %{
+  effect(DEF dst);
+  size(4);
+  format %{ "MOV  $dst, 0x0\t! 0" %}
+  ins_encode %{
+    __ mov($dst$$Register, 0);
+  %}
+  ins_pipe(ialu_none);
+%}
+
+// Construct a double from two float halves
+instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{
+  effect(DEF dst, USE src1, USE src2);
+  size(8);
+  format %{ "FCPYS  $dst.hi,$src1.hi\n\t"
+            "FCPYS  $dst.lo,$src2.lo" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
+    __ vmov_f32($dst$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(faddD_reg_reg);
+%}
+
+// Convert integer in high half of a double register (in the lower half of
+// the double register file) to double
+instruct convI2D_regDHi_regD(regD dst, regD_low src) %{
+  effect(DEF dst, USE src);
+  size(4);
+  format %{ "FSITOD  $dst,$src" %}
+  ins_encode %{
+    __ vcvt_f64_s32($dst$$FloatRegister, $src$$FloatRegister->successor(FloatRegisterImpl::SINGLE));// TODO verify the samentics is the same as was before
+  %}
+  ins_pipe(fcvtLHi2D);
+%}
+
+// Add float double precision
+instruct addD_regD_regD(regD dst, regD src1, regD src2) %{
+  effect(DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "FADDD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vadd_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(faddD_reg_reg);
+%}
+
+// Sub float double precision
+instruct subD_regD_regD(regD dst, regD src1, regD src2) %{
+  effect(DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "FSUBD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vsub_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(faddD_reg_reg);
+%}
+
+// Mul float double precision
+instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{
+  effect(DEF dst, USE src1, USE src2);
+  size(4);
+  format %{ "FMULD  $dst,$src1,$src2" %}
+  ins_encode %{
+    __ vmul_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(fmulD_reg_reg);
+%}
+
+instruct regL_to_regD(regD dst, iRegL src) %{
+  // No match rule to avoid chain rule match.
+  effect(DEF dst, USE src);
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "FMDRR   $dst,$src\t! regL to regD" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
+  %}
+  ins_pipe(ialu_reg_reg); // FIXME
+%}
+
+instruct regI_regI_to_regD(regD dst, iRegI src1, iRegI src2) %{
+  // No match rule to avoid chain rule match.
+  effect(DEF dst, USE src1, USE src2);
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "FMDRR   $dst,$src1,$src2\t! regI,regI to regD" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // FIXME
+%}
+
+instruct convL2D_reg_slow_fxtof(regD dst, iRegL src) %{
+  match(Set dst (ConvL2D src));
+  ins_cost(DEFAULT_COST*8 + MEMORY_REF_COST*6); // FIXME
+
+  expand %{
+    regD_low   tmpsrc;
+    iRegI      ix43300000;
+    iRegI      ix41f00000;
+    iRegI      ix0;
+    regD_low   dx43300000;
+    regD       dx41f00000;
+    regD       tmp1;
+    regD_low   tmp2;
+    regD       tmp3;
+    regD       tmp4;
+
+    regL_to_regD(tmpsrc, src);
+
+    loadConI_x43300000(ix43300000);
+    loadConI_x41f00000(ix41f00000);
+    loadConI_x0(ix0);
+
+    regI_regI_to_regD(dx43300000, ix0, ix43300000);
+    regI_regI_to_regD(dx41f00000, ix0, ix41f00000);
+
+    convI2D_regDHi_regD(tmp1, tmpsrc);
+    regDHi_regDLo_to_regD(tmp2, dx43300000, tmpsrc);
+    subD_regD_regD(tmp3, tmp2, dx43300000);
+    mulD_regD_regD(tmp4, tmp1, dx41f00000);
+    addD_regD_regD(dst, tmp3, tmp4);
+  %}
+%}
+
+instruct convL2I_reg(iRegI dst, iRegL src) %{
+  match(Set dst (ConvL2I src));
+  size(4);
+  format %{ "MOV    $dst,$src.lo\t! long->int" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_move_reg_I_to_L);
+%}
+
+// Register Shift Right Immediate
+instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{
+  match(Set dst (ConvL2I (RShiftL src cnt)));
+  size(4);
+  format %{ "ASR    $dst,$src.hi,($cnt - 32)\t! long->int or mov if $cnt==32" %}
+  ins_encode %{
+    if ($cnt$$constant == 32) {
+      __ mov($dst$$Register, $src$$Register->successor());
+    } else {
+      __ mov($dst$$Register, $src$$Register->successor(), asr($cnt$$constant - 32));
+    }
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+
+
+//----------Control Flow Instructions------------------------------------------
+// Compare Instructions
+// Compare Integers
+instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{
+  match(Set icc (CmpI op1 op2));
+  effect( DEF icc, USE op1, USE op2 );
+
+  size(4);
+  format %{ "cmp_32 $op1,$op2\t! int" %}
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{
+  match(Set icc (CmpU op1 op2));
+
+  size(4);
+  format %{ "cmp_32 $op1,$op2\t! unsigned int" %}
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compI_iReg_immneg(flagsReg icc, iRegI op1, aimmIneg op2) %{
+  match(Set icc (CmpI op1 op2));
+  effect( DEF icc, USE op1 );
+
+  size(4);
+  format %{ "cmn_32 $op1,-$op2\t! int" %}
+  ins_encode %{
+    __ cmn($op1$$Register, -$op2$$constant);
+  %}
+  ins_pipe(ialu_cconly_reg_imm);
+%}
+
+instruct compI_iReg_imm(flagsReg icc, iRegI op1, aimmI op2) %{
+  match(Set icc (CmpI op1 op2));
+  effect( DEF icc, USE op1 );
+
+  size(4);
+  format %{ "cmp_32 $op1,$op2\t! int" %}
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_cconly_reg_imm);
+%}
+
+instruct testI_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 op2) zero));
+  size(4);
+  format %{ "tst $op2,$op1" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
+  size(4);
+  format %{ "TST   $op2,$op1<<$op3" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register, lsl($op3$$Register));
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
+  size(4);
+  format %{ "tst $op2,$op1<<$op3" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register, lsl($op3$$constant));
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
+  size(4);
+  format %{ "TST   $op2,$op1<<$op3" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register, asr($op3$$Register));
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
+  size(4);
+  format %{ "tst $op2,$op1<<$op3" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register, asr($op3$$constant));
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
+  size(4);
+  format %{ "TST   $op2,$op1<<$op3" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register, lsr($op3$$Register));
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
+  size(4);
+  format %{ "tst $op2,$op1<<$op3" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register, lsr($op3$$constant));
+  %}
+  ins_pipe(ialu_cconly_reg_reg_zero);
+%}
+
+instruct testI_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, limmI op2, immI0 zero ) %{
+  match(Set icc (CmpI (AndI op1 op2) zero));
+  size(4);
+  format %{ "tst $op2,$op1" %}
+
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_cconly_reg_imm_zero);
+%}
+
+instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegI tmp) %{
+  match(Set xcc (CmpL op1 op2));
+  effect( DEF xcc, USE op1, USE op2, TEMP tmp );
+
+  size(8);
+  format %{ "CMP     $op1.low,$op2.low\t\t! long\n\t"
+            "SBCS    $tmp,$op1.hi,$op2.hi" %}
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$Register);
+    __ sbcs($tmp$$Register, $op1$$Register->successor(), $op2$$Register->successor());
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{
+  match(Set xcc (CmpL op1 op2));
+  effect( DEF xcc, USE op1, USE op2 );
+
+  size(8);
+  format %{ "TEQ    $op1.hi,$op2.hi\t\t! long\n\t"
+            "TEQ.eq $op1.lo,$op2.lo" %}
+  ins_encode %{
+    __ teq($op1$$Register->successor(), $op2$$Register->successor());
+    __ teq($op1$$Register, $op2$$Register, Assembler::EQ);
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compL_reg_reg_LEGT(flagsRegL_LEGT xcc, iRegL op1, iRegL op2, iRegI tmp) %{
+  match(Set xcc (CmpL op1 op2));
+  effect( DEF xcc, USE op1, USE op2, TEMP tmp );
+
+  size(8);
+  format %{ "CMP     $op2.low,$op1.low\t\t! long\n\t"
+            "SBCS    $tmp,$op2.hi,$op1.hi" %}
+  ins_encode %{
+    __ cmp($op2$$Register, $op1$$Register);
+    __ sbcs($tmp$$Register, $op2$$Register->successor(), $op1$$Register->successor());
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compUL_reg_reg(flagsRegUL xcc, iRegL op1, iRegL op2) %{
+  match(Set xcc (CmpUL op1 op2));
+  effect( DEF xcc, USE op1, USE op2 );
+
+  size(8);
+  format %{ "CMP     $op1.hi,$op2.hi\t\t! long\n\t"
+            "CMP.eq  $op1.low,$op2.low" %}
+  ins_encode %{
+    __ cmp($op1$$Register->successor(), $op2$$Register->successor());
+    __ cmp($op1$$Register, $op2$$Register, Assembler::EQ);
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct compL_reg_con_LTGE(flagsRegL_LTGE xcc, iRegL op1, immLlowRot con, iRegI tmp) %{
+  match(Set xcc (CmpL op1 con));
+  effect( DEF xcc, USE op1, USE con, TEMP tmp );
+
+  size(8);
+  format %{ "CMP     $op1.low,$con\t\t! long\n\t"
+            "SBCS    $tmp,$op1.hi,0" %}
+  ins_encode %{
+    __ cmp($op1$$Register, (int)$con$$constant);
+    __ sbcs($tmp$$Register, $op1$$Register->successor(), 0);
+  %}
+
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compUL_reg_con(flagsRegUL xcc, iRegL op1, immLlowRot con ) %{
+  match(Set xcc (CmpUL op1 con));
+  effect( DEF xcc, USE op1, USE con );
+
+  size(8);
+  format %{ "CMP     $op1.hi,0\t\t! long\n\t"
+            "CMP.eq  $op1.low,$con" %}
+  ins_encode %{
+    __ cmp($op1$$Register->successor(), 0);
+    __ cmp($op1$$Register, (int)$con$$constant, Assembler::EQ);
+  %}
+
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct compL_reg_con_EQNE(flagsRegL_EQNE xcc, iRegL op1, immLlowRot con) %{
+  match(Set xcc (CmpL op1 con));
+  effect( DEF xcc, USE op1, USE con );
+
+  size(8);
+  format %{ "TEQ    $op1.hi,0\t\t! long\n\t"
+            "TEQ.eq $op1.lo,$con" %}
+  ins_encode %{
+    __ teq($op1$$Register->successor(), 0);
+    __ teq($op1$$Register, (int)$con$$constant, Assembler::EQ);
+  %}
+
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+// TODO: try immLRot2 instead, (0, $con$$constant) becomes
+// (hi($con$$constant), lo($con$$constant)) becomes
+instruct compL_reg_con_LEGT(flagsRegL_LEGT xcc, iRegL op1, immLlowRot con, iRegL tmp) %{
+  match(Set xcc (CmpL op1 con));
+  effect( DEF xcc, USE op1, USE con, TEMP tmp );
+
+  size(8);
+  format %{ "RSBS    $tmp,$op1.low,$con\t\t! long\n\t"
+            "RSCS    $tmp,$op1.hi,0" %}
+  ins_encode %{
+    __ rsbs($tmp$$Register, $op1$$Register, (long)$con$$constant);
+    __ rscs($tmp$$Register->successor(), $op1$$Register->successor(), 0);
+  %}
+
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+/* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */
+/*   match(Set xcc (CmpL (AndL op1 op2) zero)); */
+/*   ins_encode %{ */
+/*     __ stop("testL_reg_reg unimplemented"); */
+/*   %} */
+/*   ins_pipe(ialu_cconly_reg_reg); */
+/* %} */
+
+/* // useful for checking the alignment of a pointer: */
+/* instruct testL_reg_con(flagsRegL xcc, iRegL op1, immLlowRot con, immL0 zero) %{ */
+/*   match(Set xcc (CmpL (AndL op1 con) zero)); */
+/*   ins_encode %{ */
+/*     __ stop("testL_reg_con unimplemented"); */
+/*   %} */
+/*   ins_pipe(ialu_cconly_reg_reg); */
+/* %} */
+
+instruct compU_iReg_imm(flagsRegU icc, iRegI op1, aimmU31 op2 ) %{
+  match(Set icc (CmpU op1 op2));
+
+  size(4);
+  format %{ "cmp_32 $op1,$op2\t! unsigned" %}
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_cconly_reg_imm);
+%}
+
+// Compare Pointers
+instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{
+  match(Set pcc (CmpP op1 op2));
+
+  size(4);
+  format %{ "CMP    $op1,$op2\t! ptr" %}
+  ins_encode %{
+    __ cmp($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compP_iRegP_imm(flagsRegP pcc, iRegP op1, aimmP op2 ) %{
+  match(Set pcc (CmpP op1 op2));
+
+  size(4);
+  format %{ "CMP    $op1,$op2\t! ptr" %}
+  ins_encode %{
+    assert($op2$$constant == 0 || _opnds[2]->constant_reloc() == relocInfo::none, "reloc in cmp?");
+    __ cmp($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_cconly_reg_imm);
+%}
+
+//----------Max and Min--------------------------------------------------------
+// Min Instructions
+// Conditional move for min
+instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{
+  effect( USE_DEF op2, USE op1, USE icc );
+
+  size(4);
+  format %{ "MOV.lt  $op2,$op1\t! min" %}
+  ins_encode %{
+    __ mov($op2$$Register, $op1$$Register, Assembler::LT);
+  %}
+  ins_pipe(ialu_reg_flags);
+%}
+
+// Min Register with Register.
+instruct minI_eReg(iRegI op1, iRegI op2) %{
+  match(Set op2 (MinI op1 op2));
+  ins_cost(DEFAULT_COST*2);
+  expand %{
+    flagsReg icc;
+    compI_iReg(icc,op1,op2);
+    cmovI_reg_lt(op2,op1,icc);
+  %}
+%}
+
+// Max Instructions
+// Conditional move for max
+instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{
+  effect( USE_DEF op2, USE op1, USE icc );
+  format %{ "MOV.gt  $op2,$op1\t! max" %}
+  ins_encode %{
+    __ mov($op2$$Register, $op1$$Register, Assembler::GT);
+  %}
+  ins_pipe(ialu_reg_flags);
+%}
+
+// Max Register with Register
+instruct maxI_eReg(iRegI op1, iRegI op2) %{
+  match(Set op2 (MaxI op1 op2));
+  ins_cost(DEFAULT_COST*2);
+  expand %{
+    flagsReg icc;
+    compI_iReg(icc,op1,op2);
+    cmovI_reg_gt(op2,op1,icc);
+  %}
+%}
+
+
+//----------Float Compares----------------------------------------------------
+// Compare floating, generate condition code
+instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{
+  match(Set icc (CmpF src1 src2));
+  effect(KILL fcc);
+
+  size(8);
+  format %{ "FCMPs  $src1,$src2\n\t"
+            "FMSTAT" %}
+  ins_encode %{
+    __ vcmp_f32($src1$$FloatRegister, $src2$$FloatRegister);
+    __ get_fpsr();
+  %}
+  ins_pipe(faddF_fcc_reg_reg_zero);
+%}
+
+instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{
+  match(Set icc (CmpF src1 src2));
+  effect(KILL fcc);
+
+  size(8);
+  format %{ "FCMPs  $src1,$src2\n\t"
+            "FMSTAT" %}
+  ins_encode %{
+    __ vcmp_f32($src1$$FloatRegister, 0);
+    __ get_fpsr();
+  %}
+  ins_pipe(faddF_fcc_reg_reg_zero);
+%}
+
+instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{
+  match(Set icc (CmpD src1 src2));
+  effect(KILL fcc);
+
+  size(8);
+  format %{ "FCMPd  $src1,$src2 \n\t"
+            "FMSTAT" %}
+  ins_encode %{
+    __ vcmp_f64($src1$$FloatRegister, $src2$$FloatRegister);
+    __ get_fpsr();
+  %}
+  ins_pipe(faddD_fcc_reg_reg_zero);
+%}
+
+instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{
+  match(Set icc (CmpD src1 src2));
+  effect(KILL fcc);
+
+  size(8);
+  format %{ "FCMPZd  $src1,$src2 \n\t"
+            "FMSTAT" %}
+  ins_encode %{
+    __ vcmp_f64($src1$$FloatRegister, 0);
+    __ get_fpsr();
+  %}
+  ins_pipe(faddD_fcc_reg_reg_zero);
+%}
+
+// Compare floating, generate -1,0,1
+instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL fcc);
+  ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+  size(20);
+  // same number of instructions as code using conditional moves but
+  // doesn't kill integer condition register
+  format %{ "FCMPs  $dst,$src1,$src2 \n\t"
+            "VMRS   $dst, FPSCR \n\t"
+            "OR     $dst, $dst, 0x08000000 \n\t"
+            "EOR    $dst, $dst, $dst << 3 \n\t"
+            "MOV    $dst, $dst >> 30" %}
+  ins_encode %{
+    __ vcmp_f32($src1$$FloatRegister, $src2$$FloatRegister);
+    __ floating_cmp($dst$$Register);
+  %}
+  ins_pipe( floating_cmp );
+%}
+
+instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsRegF fcc) %{
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL fcc);
+  ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+  size(20);
+  // same number of instructions as code using conditional moves but
+  // doesn't kill integer condition register
+  format %{ "FCMPZs $dst,$src1,$src2 \n\t"
+            "VMRS   $dst, FPSCR \n\t"
+            "OR     $dst, $dst, 0x08000000 \n\t"
+            "EOR    $dst, $dst, $dst << 3 \n\t"
+            "MOV    $dst, $dst >> 30" %}
+  ins_encode %{
+    __ vcmp_f32($src1$$FloatRegister, 0);
+    __ floating_cmp($dst$$Register);
+  %}
+  ins_pipe( floating_cmp );
+%}
+
+instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF fcc) %{
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL fcc);
+  ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+  size(20);
+  // same number of instructions as code using conditional moves but
+  // doesn't kill integer condition register
+  format %{ "FCMPd  $dst,$src1,$src2 \n\t"
+            "VMRS   $dst, FPSCR \n\t"
+            "OR     $dst, $dst, 0x08000000 \n\t"
+            "EOR    $dst, $dst, $dst << 3 \n\t"
+            "MOV    $dst, $dst >> 30" %}
+  ins_encode %{
+    __ vcmp_f64($src1$$FloatRegister, $src2$$FloatRegister);
+    __ floating_cmp($dst$$Register);
+  %}
+  ins_pipe( floating_cmp );
+%}
+
+instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsRegF fcc) %{
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL fcc);
+  ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME
+  size(20);
+  // same number of instructions as code using conditional moves but
+  // doesn't kill integer condition register
+  format %{ "FCMPZd $dst,$src1,$src2 \n\t"
+            "VMRS   $dst, FPSCR \n\t"
+            "OR     $dst, $dst, 0x08000000 \n\t"
+            "EOR    $dst, $dst, $dst << 3 \n\t"
+            "MOV    $dst, $dst >> 30" %}
+  ins_encode %{
+    __ vcmp_f64($src1$$FloatRegister, 0);
+    __ floating_cmp($dst$$Register);
+  %}
+  ins_pipe( floating_cmp );
+%}
+
+//----------Branches---------------------------------------------------------
+// Jump
+// (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
+// FIXME
+instruct jumpXtnd(iRegX switch_val, iRegP tmp) %{
+  match(Jump switch_val);
+  effect(TEMP tmp);
+  ins_cost(350);
+  format %{  "ADD    $tmp, $constanttablebase, $switch_val\n\t"
+             "LDR    $tmp,[$tmp + $constantoffset]\n\t"
+             "BX     $tmp" %}
+  size(20);
+  ins_encode %{
+    Register table_reg;
+    Register label_reg = $tmp$$Register;
+    if (constant_offset() == 0) {
+      table_reg = $constanttablebase;
+      __ ldr(label_reg, Address(table_reg, $switch_val$$Register));
+    } else {
+      table_reg = $tmp$$Register;
+      int offset = $constantoffset;
+      if (is_memoryP(offset)) {
+        __ add(table_reg, $constanttablebase, $switch_val$$Register);
+        __ ldr(label_reg, Address(table_reg, offset));
+      } else {
+        __ mov(table_reg, $constantoffset);
+        __ add(table_reg, $constanttablebase, table_reg);
+        __ ldr(label_reg, Address(table_reg, $switch_val$$Register));
+      }
+    }
+    __ b(label_reg); // ldr + b better than ldr to PC for branch predictor?
+    //    __ ldr(PC, Address($table$$Register, $switch_val$$Register));
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+// // Direct Branch.
+instruct branch(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B     $labl" %}
+  ins_encode %{
+    __ b(*($labl$$label));
+  %}
+  ins_pipe(br);
+%}
+
+// Conditional Direct Branch
+instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
+  match(If cmp icc);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp   $icc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchCon_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, label labl) %{
+  match(If cmp icc);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp   $icc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
+  match(If cmp icc);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp  $icc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
+  match(If cmp pcc);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp  $pcc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{
+  match(If cmp xcc);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp  $xcc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConL_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, label labl) %{
+  match(If cmp xcc);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp  $xcc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConL_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, label labl) %{
+  match(If cmp xcc);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp  $xcc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConUL(cmpOpU cmp, flagsRegUL xcc, label labl) %{
+  match(If cmp xcc);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp  $xcc,$labl" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
+  match(CountedLoopEnd cmp icc);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "B$cmp   $icc,$labl\t! Loop end" %}
+  ins_encode %{
+    __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(br_cc);
+%}
+
+// instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
+//   match(CountedLoopEnd cmp icc);
+//   ins_pipe(br_cc);
+// %}
+
+// ============================================================================
+// Long Compare
+//
+// Currently we hold longs in 2 registers.  Comparing such values efficiently
+// is tricky.  The flavor of compare used depends on whether we are testing
+// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
+// The GE test is the negated LT test.  The LE test can be had by commuting
+// the operands (yielding a GE test) and then negating; negate again for the
+// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
+// NE test is negated from that.
+
+// Due to a shortcoming in the ADLC, it mixes up expressions like:
+// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
+// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
+// are collapsed internally in the ADLC's dfa-gen code.  The match for
+// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
+// foo match ends up with the wrong leaf.  One fix is to not match both
+// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
+// both forms beat the trinary form of long-compare and both are very useful
+// on Intel which has so few registers.
+
+// instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{
+//   match(If cmp xcc);
+//   ins_pipe(br_cc);
+// %}
+
+// Manifest a CmpL3 result in an integer register.  Very painful.
+// This is the test to avoid.
+instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
+  match(Set dst (CmpL3 src1 src2) );
+  effect( KILL ccr );
+  ins_cost(6*DEFAULT_COST); // FIXME
+  size(32);
+  format %{
+      "CMP    $src1.hi, $src2.hi\t\t! long\n"
+    "\tMOV.gt $dst, 1\n"
+    "\tmvn.lt $dst, 0\n"
+    "\tB.ne   done\n"
+    "\tSUBS   $dst, $src1.lo, $src2.lo\n"
+    "\tMOV.hi $dst, 1\n"
+    "\tmvn.lo $dst, 0\n"
+    "done:"     %}
+  ins_encode %{
+    Label done;
+    __ cmp($src1$$Register->successor(), $src2$$Register->successor());
+    __ mov_i($dst$$Register, 1, Assembler::GT);
+    __ mvn_i($dst$$Register, 0, Assembler::LT);
+    __ b(done, Assembler::NE);
+    __ subs($dst$$Register, $src1$$Register, $src2$$Register);
+    __ mov_i($dst$$Register, 1, Assembler::HI);
+    __ mvn_i($dst$$Register, 0, Assembler::LO);
+    __ bind(done);
+  %}
+  ins_pipe(cmpL_reg);
+%}
+
+// Conditional move
+instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  ins_cost(150);
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovLL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  ins_cost(150);
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovLL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, iRegL src) %{
+  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+  ins_cost(150);
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,$src.lo\t! long\n\t"
+            "MOV$cmp  $dst,$src.hi" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovLL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, immL0 src) %{
+  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+  ins_cost(140);
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,0\t! long\n\t"
+            "MOV$cmp  $dst,0" %}
+  ins_encode %{
+    __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, immL0 src) %{
+  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+  ins_cost(140);
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,0\t! long\n\t"
+            "MOV$cmp  $dst,0" %}
+  ins_encode %{
+    __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovLL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, immL0 src) %{
+  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+  ins_cost(140);
+  size(8);
+  format %{ "MOV$cmp  $dst.lo,0\t! long\n\t"
+            "MOV$cmp  $dst,0" %}
+  ins_encode %{
+    __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode));
+    __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovIL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovIL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, iRegI src) %{
+  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  ins_cost(140);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovIL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  ins_cost(140);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovIL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, immI16 src) %{
+  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+  ins_cost(140);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovPL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, iRegP src) %{
+  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $dst,$src" %}
+  ins_encode %{
+    __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovPL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  ins_cost(140);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovPL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  ins_cost(140);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovPL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, immP0 src) %{
+  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+  ins_cost(140);
+  format %{ "MOVW$cmp  $dst,$src" %}
+  ins_encode %{
+    __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(ialu_imm);
+%}
+
+instruct cmovFL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovFL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYS$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+instruct cmovDL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+
+  ins_cost(150);
+  size(4);
+  format %{ "FCPYD$cmp $dst,$src" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode));
+  %}
+  ins_pipe(int_conditional_float_move);
+%}
+
+// ============================================================================
+// Safepoint Instruction
+// rather than KILL R12, it would be better to use any reg as
+// TEMP. Can't do that at this point because it crashes the compiler
+instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{
+  match(SafePoint poll);
+  effect(USE poll, KILL tmp, KILL icc);
+
+  size(4);
+  format %{ "LDR   $tmp,[$poll]\t! Safepoint: poll for GC" %}
+  ins_encode %{
+    __ relocate(relocInfo::poll_type);
+    __ ldr($tmp$$Register, Address($poll$$Register));
+  %}
+  ins_pipe(loadPollP);
+%}
+
+
+// ============================================================================
+// Call Instructions
+// Call Java Static Instruction
+instruct CallStaticJavaDirect( method meth ) %{
+  match(CallStaticJava);
+  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
+  effect(USE meth);
+  size(call_static_enc_size(this, _method, _method_handle_invoke));
+
+  ins_cost(CALL_COST);
+  format %{ "CALL,static ==> " %}
+  ins_encode( Java_Static_Call( meth ), call_epilog );
+  ins_pipe(simple_call);
+%}
+
+// Call Java Static Instruction (method handle version)
+instruct CallStaticJavaHandle( method meth ) %{
+  match(CallStaticJava);
+  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
+  effect(USE meth);
+  size(call_static_enc_size(this, _method, _method_handle_invoke));
+
+  // FP is saved by all callees (for interpreter stack correction).
+  // We use it here for a similar purpose, in {preserve,restore}_FP.
+
+  ins_cost(CALL_COST);
+  format %{ "CALL,static/MethodHandle ==> " %}
+  ins_encode( preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog );
+  ins_pipe(simple_call);
+%}
+
+// Call Java Dynamic Instruction
+instruct CallDynamicJavaDirect( method meth ) %{
+  match(CallDynamicJava);
+  effect(USE meth);
+  size(call_dynamic_enc_size());
+
+  ins_cost(CALL_COST);
+  format %{ "MOV_OOP    (empty),R_R8\n\t"
+            "CALL,dynamic  ; NOP ==> " %}
+  ins_encode( Java_Dynamic_Call( meth ), call_epilog );
+  ins_pipe(call);
+%}
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+  match(CallRuntime);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  size(call_runtime_enc_size(this));
+
+  format %{ "CALL,runtime" %}
+  ins_encode( Java_To_Runtime( meth ),
+              call_epilog );
+  ins_pipe(simple_call);
+%}
+
+// Call runtime without safepoint - same as CallRuntime
+instruct CallLeafDirect(method meth) %{
+  match(CallLeaf);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  size(call_runtime_enc_size(this));
+
+  format %{ "CALL,runtime leaf" %}
+  // TODO: ned save_last_PC here?
+  ins_encode( Java_To_Runtime( meth ),
+              call_epilog );
+  ins_pipe(simple_call);
+%}
+
+// Call runtime without safepoint - same as CallLeaf
+instruct CallLeafNoFPDirect(method meth) %{
+  match(CallLeafNoFP);
+  effect(USE meth);
+  ins_cost(CALL_COST);
+  size(call_runtime_enc_size(this));
+
+  format %{ "CALL,runtime leaf nofp" %}
+  // TODO: ned save_last_PC here?
+  ins_encode( Java_To_Runtime( meth ),
+              call_epilog );
+  ins_pipe(simple_call);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
+  match(TailCall jump_target method_oop );
+
+  ins_cost(CALL_COST);
+  format %{ "MOV    Rexception_pc, LR\n\t"
+            "jump   $jump_target  \t! $method_oop holds method oop" %}
+  ins_encode %{
+    __ mov(r3, lr);   // this is used only to call
+                                 // StubRoutines::forward_exception_entry()
+                                 // which expects PC of exception in
+                                 // R3. FIXME?
+    __ b($jump_target$$Register);
+  %}
+  ins_pipe(tail_call);
+%}
+
+
+// Return Instruction
+instruct Ret() %{
+  match(Return);
+
+  format %{ "ret LR" %}
+
+  ins_encode %{
+    __ ret(lr);
+  %}
+
+  ins_pipe(br);
+%}
+
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
+// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
+// "restore" before this instruction (in Epilogue), we need to materialize it
+// in %i0.
+instruct tailjmpInd(IPRegP jump_target, RExceptionRegP ex_oop) %{
+  match( TailJump jump_target ex_oop );
+  ins_cost(CALL_COST);
+  format %{ "MOV    Rexception_pc, LR\n\t"
+            "jump   $jump_target \t! $ex_oop holds exc. oop" %}
+  ins_encode %{
+    __ mov(r3, lr);
+    __ b($jump_target$$Register);
+  %}
+  ins_pipe(tail_call);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler.  No code emitted.
+instruct CreateException( RExceptionRegP ex_oop )
+%{
+  match(Set ex_oop (CreateEx));
+  ins_cost(0);
+
+  size(0);
+  // use the following format syntax
+  format %{ "! exception oop is in Rexception_obj; no code emitted" %}
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+
+// Rethrow exception:
+// The exception oop will come in the first argument position.
+// Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+  match(Rethrow);
+  ins_cost(CALL_COST);
+
+  // use the following format syntax
+  format %{ "b    rethrow_stub" %}
+  ins_encode %{
+    Register scratch = r1;
+    assert_different_registers(scratch, c_rarg0, lr);
+    __ jump(OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type, scratch);
+  %}
+  ins_pipe(tail_call);
+%}
+
+
+// Die now
+instruct ShouldNotReachHere( )
+%{
+  match(Halt);
+  ins_cost(CALL_COST);
+
+  size(4);
+  // Use the following format syntax
+  format %{ "ShouldNotReachHere" %}
+  ins_encode %{
+    __ udf(0xdead);
+  %}
+  ins_pipe(tail_call);
+%}
+
+// ============================================================================
+// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
+// array for an instance of the superklass.  Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()).  Return
+// not zero for a miss or zero for a hit.  The encoding ALSO sets flags.
+instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP pcc, LRRegP lr, R9RegI r9, R12RegI r12 ) %{
+  match(Set index (PartialSubtypeCheck sub super));
+  effect( KILL pcc, KILL r9, KILL r12, KILL lr );
+  ins_cost(DEFAULT_COST*10);
+  format %{ "CALL   PartialSubtypeCheck" %}
+  ins_encode %{
+    __ call(StubRoutines::aarch32::partial_subtype_check(), relocInfo::runtime_call_type);
+  %}
+  ins_pipe(partial_subtype_check_pipe);
+%}
+
+/* instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{ */
+/*   match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero)); */
+/*   ins_pipe(partial_subtype_check_pipe); */
+/* %} */
+
+
+// ============================================================================
+// inlined locking and unlocking
+
+instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP mark, iRegP scratch2, iRegP scratch )
+%{
+  match(Set pcc (FastLock object box));
+
+  effect(TEMP mark, TEMP scratch, TEMP scratch2);
+  ins_cost(100);
+
+  format %{ "FASTLOCK  $object, $box; KILL $mark, $scratch, $scratch2" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $mark$$Register, $scratch$$Register, $scratch2$$Register);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{
+  match(Set pcc (FastUnlock object box));
+  effect(TEMP scratch, TEMP scratch2);
+  ins_cost(100);
+
+  format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2" %}
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// Count and Base registers are fixed because the allocator cannot
+// kill unknown registers.  The encodings are generic.
+instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(TEMP temp, TEMP zero, KILL cpsr);
+  ins_cost(300);
+  format %{ "MOV    $zero,0\n"
+      "        MOV    $temp,$cnt\n"
+      "loop:   SUBS   $temp,$temp,4\t! Count down a dword of bytes\n"
+      "        STR.ge $zero,[$base+$temp]\t! delay slot"
+      "        B.gt   loop\t\t! Clearing loop\n" %}
+  ins_encode %{
+    __ mov($zero$$Register, 0);
+    __ mov($temp$$Register, $cnt$$Register);
+    Label(loop);
+    __ bind(loop);
+    __ subs($temp$$Register, $temp$$Register, 4);
+    __ str($zero$$Register, Address($base$$Register, $temp$$Register), Assembler::GE);
+    __ b(loop, Assembler::GT);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compareUU(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
+                          iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (2), (2)) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compareLL(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
+                          iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (1), (1)) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compareUL(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
+                          iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (2), (1)) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compareLU(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result,
+                          iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (1), (2)) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_equalsUU(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, flagsReg ccr) %{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "String Equals $str1,$str2,$cnt -> $result   # KILL $tmp1" %}
+  ins_encode( enc_Array_Equals(str1, str2, cnt, tmp1, result, (2), (false)) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_equalsLL(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, flagsReg ccr) %{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "String Equals $str1,$str2,$cnt -> $result   # KILL $tmp1" %}
+  ins_encode( enc_Array_Equals(str1, str2, cnt, tmp1, result, (1), (false)) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct array_equalsUU(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI result, flagsReg ccr) %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (AryEq ary1 ary2));
+  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result   # KILL $tmp1,$tmp2" %}
+  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result, (2), (true)));
+  ins_pipe(long_memory_op);
+%}
+
+instruct array_equalsLL(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI result, flagsReg ccr) %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (AryEq ary1 ary2));
+  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr);
+
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result   # KILL $tmp1,$tmp2" %}
+  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result, (1), (true)));
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compress(R2RegP src, R1RegP dst, R3RegI len,
+                         R9RegI tmp1, Q0_regD tmp2, Q1_regD tmp3, R12RegI tmp4, LRRegP lr, R0RegI result, flagsReg ccr)
+%{
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP lr, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
+
+  format %{ "String Compress $src,$dst -> $result    // KILL $tmp1, $tmp2, $tmp3, $tmp4, $lr" %}
+  ins_encode( enc_Char_Array_Compress(src, dst, len, tmp1, tmp2, tmp3, tmp4, result, ccr) );
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_inflate(Universe dummy, R0RegP src, R1RegP dst, R2RegI len,
+                        iRegI tmp1, Q0_regD tmp2, LRRegP lr, flagsReg ccr)
+%{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP lr, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
+
+  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2, $lr" %}
+  ins_encode( enc_Byte_Array_Inflate(src, dst, len, tmp1, tmp2, ccr) );
+  ins_pipe(long_memory_op);
+%}
+
+//---------- Zeros Count Instructions ------------------------------------------
+
+instruct countLeadingZerosI(iRegI dst, iRegI src) %{
+  match(Set dst (CountLeadingZerosI src));
+  size(4);
+  format %{ "CLZ_32 $dst,$src" %}
+  ins_encode %{
+    __ clz($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
+  match(Set dst (CountLeadingZerosL src));
+  effect(TEMP tmp, TEMP dst, KILL ccr);
+  size(16);
+  format %{ "CLZ    $dst,$src.hi\n\t"
+            "TEQ    $dst,32\n\t"
+            "CLZ.eq $tmp,$src.lo\n\t"
+            "ADD.eq $dst, $dst, $tmp\n\t" %}
+  ins_encode %{
+    __ clz($dst$$Register, $src$$Register->successor());
+    __ teq($dst$$Register, 32);
+    __ clz($tmp$$Register, $src$$Register, Assembler::EQ);
+    __ add($dst$$Register, $dst$$Register, $tmp$$Register, Assembler::EQ);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{
+  match(Set dst (CountTrailingZerosI src));
+  effect(TEMP tmp);
+  size(8);
+  format %{ "RBIT_32 $tmp, $src\n\t"
+            "CLZ_32  $dst,$tmp" %}
+  ins_encode %{
+    __ rbit($tmp$$Register, $src$$Register);
+    __ clz($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
+  match(Set dst (CountTrailingZerosL src));
+  effect(TEMP tmp, TEMP dst, KILL ccr);
+  size(24);
+  format %{ "RBIT   $tmp,$src.lo\n\t"
+            "CLZ    $dst,$tmp\n\t"
+            "TEQ    $dst,32\n\t"
+            "RBIT   $tmp,$src.hi\n\t"
+            "CLZ.eq $tmp,$tmp\n\t"
+            "ADD.eq $dst,$dst,$tmp\n\t" %}
+  ins_encode %{
+    __ rbit($tmp$$Register, $src$$Register);
+    __ clz($dst$$Register, $tmp$$Register);
+    __ teq($dst$$Register, 32);
+    __ rbit($tmp$$Register, $src$$Register->successor());
+    __ clz($tmp$$Register, $tmp$$Register, Assembler::EQ);
+    __ add($dst$$Register, $dst$$Register, $tmp$$Register, Assembler::EQ);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+  effect(TEMP tmp);
+
+  format %{ "FMSR       $tmp,$src\n\t"
+            "VCNT.8     $tmp,$tmp\n\t"
+            "VPADDL.U8  $tmp,$tmp\n\t"
+            "VPADDL.U16 $tmp,$tmp\n\t"
+            "FMRS       $dst,$tmp" %}
+  size(20);
+
+  ins_encode %{
+    __ vmov_f32($tmp$$FloatRegister, $src$$Register);
+    __ vcnt_64($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vpaddl_64_u8($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vpaddl_64_u16($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+  effect(TEMP tmp);
+
+  format %{ "FMDRR       $tmp,$src.lo,$src.hi\n\t"
+            "VCNT.8      $tmp,$tmp\n\t"
+            "VPADDL.U8   $tmp,$tmp\n\t"
+            "VPADDL.U16  $tmp,$tmp\n\t"
+            "VPADDL.U32  $tmp,$tmp\n\t"
+            "FMRS        $dst,$tmp" %}
+
+  size(32);
+
+  ins_encode %{
+    __ vmov_f64($tmp$$FloatRegister, $src$$Register, $src$$Register->successor());
+    __ vcnt_64($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vpaddl_64_u8($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vpaddl_64_u16($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vpaddl_64_u32($tmp$$FloatRegister, $tmp$$FloatRegister);
+    __ vmov_f32($dst$$Register, $tmp$$FloatRegister);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
+// ============================================================================
+//------------Bytes reverse--------------------------------------------------
+
+instruct bytes_reverse_int(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesI src));
+
+  size(4);
+  format %{ "REV32 $dst,$src" %}
+  ins_encode %{
+    __ rev($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( iload_mem ); // FIXME
+%}
+
+instruct bytes_reverse_long(iRegL dst, iRegL src) %{
+  match(Set dst (ReverseBytesL src));
+  effect(TEMP dst);
+  size(8);
+  format %{ "REV $dst.lo,$src.lo\n\t"
+            "REV $dst.hi,$src.hi" %}
+  ins_encode %{
+    __ rev($dst$$Register, $src$$Register->successor());
+    __ rev($dst$$Register->successor(), $src$$Register);
+  %}
+  ins_pipe( iload_mem ); // FIXME
+%}
+
+instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesUS src));
+  size(4);
+  format %{ "REV16 $dst,$src" %}
+  ins_encode %{
+    __ rev16($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( iload_mem ); // FIXME
+%}
+
+instruct bytes_reverse_short(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesS src));
+  size(4);
+  format %{ "REVSH $dst,$src" %}
+  ins_encode %{
+    __ revsh($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( iload_mem ); // FIXME
+%}
+
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load Aligned Packed values into a Double Register
+instruct loadV8(vecD dst, memoryD mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  match(Set dst (LoadVector mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "FLDD   $mem,$dst\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ vldr_f64($dst$$FloatRegister, $mem$$Address);
+  %}
+  ins_pipe(floadD_mem);
+%}
+
+// Load Aligned Packed values into a Double Register Pair
+instruct loadV16(vecX dst, memoryvld mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 16);
+  match(Set dst (LoadVector mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "VLD1   $mem,$dst.Q\t! load vector (16 bytes)" %}
+  ins_encode %{
+    __ vld1_16($dst$$FloatRegister, $dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), $mem$$Address, Assembler::ALIGN_STD);
+  %}
+  ins_pipe(floadD_mem); // FIXME
+%}
+
+// Store Vector in Double register to memory
+instruct storeV8(memoryD mem, vecD src) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "FSTD   $src,$mem\t! store vector (8 bytes)" %}
+  ins_encode %{
+    __ vstr_f64($src$$FloatRegister, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_reg);
+%}
+
+// Store Vector in Double Register Pair to memory
+instruct storeV16(memoryvld mem, vecX src) %{
+  predicate(n->as_StoreVector()->memory_size() == 16);
+  match(Set mem (StoreVector mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "VST1   $src,$mem\t! store vector (16 bytes)" %}
+  ins_encode %{
+    __ vst1_16($src$$FloatRegister, $src$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), $mem$$Address, Assembler::ALIGN_STD);
+  %}
+  ins_pipe(fstoreD_mem_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register
+instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB src));
+  ins_cost(DEFAULT_COST*4);
+  effect(TEMP tmp);
+  size(16);
+
+  // FIXME: could use PKH instruction instead?
+  format %{ "LSL      $tmp, $src, 24 \n\t"
+            "OR       $tmp, $tmp, ($tmp >> 8) \n\t"
+            "OR       $tmp, $tmp, ($tmp >> 16) \n\t"
+            "FMDRR    $dst,$tmp,$tmp\t" %}
+  ins_encode %{
+    __ mov($tmp$$Register, $src$$Register, lsl(24));
+    __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(8));
+    __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(16));
+    __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register
+instruct Repl8B_reg_simd(vecD dst, iRegI src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateB src));
+  size(4);
+
+  format %{ "VDUP.8 $dst,$src\t" %}
+  ins_encode %{
+    __ vdup_64_8($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl16B_reg(vecX dst, iRegI src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (ReplicateB src));
+  size(4);
+
+  format %{ "VDUP.8 $dst.Q,$src\t" %}
+  ins_encode %{
+    __ vdup_128_8($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB src));
+  ins_cost(DEFAULT_COST*2);
+  effect(TEMP tmp);
+  size(12);
+
+  format %{ "MOV      $tmp, Repl4($src))\n\t"
+            "FMDRR    $dst,$tmp,$tmp\t" %}
+  ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) );
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+// TODO: support negative constants with MVNI?
+instruct Repl8B_immU8(vecD dst, immU8 src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateB src));
+  size(4);
+
+  format %{ "VMOV.U8  $dst,$src" %}
+  ins_encode %{
+    __ vmov_64_8($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register pair
+instruct Repl16B_immU8(vecX dst, immU8 src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateB src));
+  size(4);
+
+  format %{ "VMOV.U8  $dst.Q,$src" %}
+  ins_encode %{
+    __ vmov_128_8($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar to packed short/char values into Double register
+instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS src));
+  ins_cost(DEFAULT_COST*3);
+  effect(TEMP tmp);
+  size(12);
+
+  // FIXME: could use PKH instruction instead?
+  format %{ "LSL      $tmp, $src, 16 \n\t"
+            "OR       $tmp, $tmp, ($tmp >> 16) \n\t"
+            "FMDRR    $dst,$tmp,$tmp\t" %}
+  ins_encode %{
+    __ mov($tmp$$Register, $src$$Register, lsl(16));
+    __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(16));
+    __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register
+instruct Repl4S_reg_simd(vecD dst, iRegI src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateS src));
+  size(4);
+
+  format %{ "VDUP.16 $dst,$src\t" %}
+  ins_encode %{
+    __ vdup_64_16($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl8S_reg(vecX dst, iRegI src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateS src));
+  size(4);
+
+  format %{ "VDUP.16 $dst.Q,$src\t" %}
+  ins_encode %{
+    __ vdup_128_16($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+
+// Replicate scalar constant to packed short/char values in Double register
+instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS src));
+  effect(TEMP tmp);
+  size(12);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "MOV      $tmp, Repl2($src))\n\t"
+            "FMDRR    $dst,$tmp,$tmp\t" %}
+  ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) );
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl4S_immU8(vecD dst, immU8 src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateS src));
+  size(4);
+
+  format %{ "VMOV.U16  $dst,$src" %}
+  ins_encode %{
+    __ vmov_64_16($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register pair
+instruct Repl8S_immU8(vecX dst, immU8 src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateS src));
+  size(4);
+
+  format %{ "VMOV.U16  $dst.Q,$src" %}
+  ins_encode %{
+    __ vmov_128_16($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar to packed int values in Double register
+instruct Repl2I_reg(vecD dst, iRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI src));
+  size(4);
+
+  format %{ "FMDRR    $dst,$src,$src\t" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed int values in Double register pair
+instruct Repl4I_reg(vecX dst, iRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI src));
+  ins_cost(DEFAULT_COST*2);
+  size(8);
+
+  format %{ "FMDRR    $dst.lo,$src,$src\n\t"
+            "FMDRR    $dst.hi,$src,$src" %}
+
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register);
+    __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE),
+             $src$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed int values in Double register
+instruct Repl2I_reg_simd(vecD dst, iRegI src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateI src));
+  size(4);
+
+  format %{ "VDUP.32 $dst.D,$src\t" %}
+  ins_encode %{
+    __ vdup_64_32($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed int values in Double register pair
+instruct Repl4I_reg_simd(vecX dst, iRegI src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateI src));
+  size(4);
+
+  format %{ "VDUP.32 $dst.Q,$src\t" %}
+  ins_encode %{
+    __ vdup_128_32($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+
+// Replicate scalar zero constant to packed int values in Double register
+instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI src));
+  effect(TEMP tmp);
+  size(12);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "MOV      $tmp, Repl1($src))\n\t"
+            "FMDRR    $dst,$tmp,$tmp\t" %}
+  ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) );
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl2I_immU8(vecD dst, immU8 src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateI src));
+  size(4);
+
+  format %{ "VMOV.I32  $dst.D,$src" %}
+  ins_encode %{
+    __ vmov_64_32($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar constant to packed byte values in Double register pair
+instruct Repl4I_immU8(vecX dst, immU8 src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateI src));
+  size(4);
+
+  format %{ "VMOV.I32  $dst.Q,$src" %}
+  ins_encode %{
+    __ vmov_128_32($dst$$FloatRegister, $src$$constant);
+  %}
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar to packed byte values in Double register pair
+instruct Repl2L_reg(vecX dst, iRegL src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL src));
+  size(8);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FMDRR $dst.D,$src.lo,$src.hi\t\n"
+            "FMDRR $dst.D.next,$src.lo,$src.hi" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
+    __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE),
+             $src$$Register, $src$$Register->successor());
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+
+// Replicate scalar to packed float values in Double register
+instruct Repl2F_regI(vecD dst, iRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  size(4);
+
+  format %{ "FMDRR    $dst.D,$src,$src\t" %}
+  ins_encode %{
+    __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed float values in Double register
+instruct Repl2F_reg_vfp(vecD dst, regF src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  expand %{
+    iRegI tmp;
+    MoveF2I_reg_reg(tmp, src);
+    Repl2F_regI(dst,tmp);
+  %}
+%}
+
+// Replicate scalar to packed float values in Double register
+instruct Repl2F_reg_simd(vecD dst, regF src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateF src));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+
+  format %{ "VDUP.32  $dst.D,$src.D\t" %}
+  ins_encode %{
+    __ vdups_64($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed float values in Double register pair
+instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateF src));
+  effect(TEMP tmp);
+  size(4*3);
+  ins_cost(DEFAULT_COST*3); // FIXME
+
+  format %{ "FMRS     $tmp,$src\n\t"
+            "FMDRR    $dst.D,$tmp,$tmp\n\t"
+            "FMDRR    $dst.D.next,$tmp,$tmp\t" %}
+  ins_encode %{
+    __ vmov_f32($tmp$$Register, $src$$FloatRegister);
+    __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
+    __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE),
+             $tmp$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar to packed float values in Double register pair
+instruct Repl4F_reg_simd(vecX dst, regF src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (ReplicateF src));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+
+  format %{ "VDUP.32  $dst.Q,$src.D\t" %}
+  ins_encode %{
+    __ vdups_128($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// Replicate scalar zero constant to packed float values in Double register
+instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  effect(TEMP tmp);
+  size(12);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "MOV      $tmp, Repl1($src))\n\t"
+            "FMDRR    $dst,$tmp,$tmp\t" %}
+  ins_encode( LdReplImmF(src, dst, tmp) );
+  ins_pipe(loadConFD); // FIXME
+%}
+
+// Replicate scalar to packed double float values in Double register pair
+instruct Repl2D_reg(vecX dst, regD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateD src));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FCPYD    $dst.D.a,$src\n\t"
+            "FCPYD    $dst.D.b,$src\t" %}
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+    __ vmov_f64(dsta, src);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
+    __ vmov_f64(dstb, src);
+  %}
+  ins_pipe(ialu_reg); // FIXME
+%}
+
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Bytes vector add
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src1 src2));
+  format %{ "VADD.I8 $dst,$src1,$src2\t! add packed8B" %}
+  size(4);
+  ins_encode %{
+    __ vadd_64_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  size(4);
+  format %{ "VADD.I8 $dst.Q,$src1.Q,$src2.Q\t! add packed16B" %}
+  ins_encode %{
+    __ vadd_128_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Chars vector add
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src1 src2));
+  size(4);
+  format %{ "VADD.I16 $dst,$src1,$src2\t! add packed4S" %}
+  ins_encode %{
+    __ vadd_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  size(4);
+  format %{ "VADD.I16 $dst.Q,$src1.Q,$src2.Q\t! add packed8S" %}
+  ins_encode %{
+    __ vadd_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector add
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVI src1 src2));
+  size(4);
+  format %{ "VADD.I32 $dst.D,$src1.D,$src2.D\t! add packed2I" %}
+  ins_encode %{
+    __ vadd_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src1 src2));
+  size(4);
+  format %{ "VADD.I32 $dst.Q,$src1.Q,$src2.Q\t! add packed4I" %}
+  ins_encode %{
+    __ vadd_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector add
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src1 src2));
+  size(4);
+  format %{ "VADD.I64 $dst.Q,$src1.Q,$src2.Q\t! add packed2L" %}
+  ins_encode %{
+    bool quad = true;
+    __ vadd_128_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Floats vector add
+instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVF src1 src2));
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  size(4*2);
+  format %{ "FADDS  $dst.a,$src1.a,$src2.a\n\t"
+            "FADDS  $dst.b,$src1.b,$src2.b" %}
+  ins_encode %{
+    __ vadd_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+    __ vadd_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
+             $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
+             $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
+  %}
+
+  ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src1 src2));
+  size(4*4);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "FADDS  $dst.a,$src1.a,$src2.a\n\t"
+            "FADDS  $dst.b,$src1.b,$src2.b\n\t"
+            "FADDS  $dst.c,$src1.c,$src2.c\n\t"
+            "FADDS  $dst.d,$src1.d,$src2.d" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vadd_f32(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
+    __ vadd_f32(dstb, src1b, src2b);
+    FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
+    __ vadd_f32(dstc, src1c, src2c);
+    FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
+    __ vadd_f32(dstd, src1d, src2d);
+  %}
+
+  ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+instruct vadd2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FADDD  $dst.a,$src1.a,$src2.a\n\t"
+            "FADDD  $dst.b,$src1.b,$src2.b" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vadd_f64(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
+    __ vadd_f64(dstb, src1b, src2b);
+  %}
+
+  ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+
+// Bytes vector sub
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src1 src2));
+  size(4);
+  format %{ "VSUB.I8 $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    __ vsub_64_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  size(4);
+  format %{ "VSUB.I8 $dst.Q,$src1.Q,$src2.Q\t! sub packed16B" %}
+  ins_encode %{
+    __ vsub_128_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Chars vector sub
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src1 src2));
+  size(4);
+  format %{ "VSUB.I16 $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    __ vsub_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsub16S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  size(4);
+  format %{ "VSUB.I16 $dst.Q,$src1.Q,$src2.Q\t! sub packed8S" %}
+  ins_encode %{
+    __ vsub_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector sub
+instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVI src1 src2));
+  size(4);
+  format %{ "VSUB.I32 $dst,$src1,$src2\t! sub packed2I" %}
+  ins_encode %{
+    __ vsub_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src1 src2));
+  size(4);
+  format %{ "VSUB.I32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4I" %}
+  ins_encode %{
+    bool quad = true;
+    __ vsub_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector sub
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src1 src2));
+  size(4);
+  format %{ "VSUB.I64 $dst.Q,$src1.Q,$src2.Q\t! sub packed2L" %}
+  ins_encode %{
+    __ vsub_128_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Floats vector sub
+instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVF src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FSUBS  $dst.a,$src1.a,$src2.a\n\t"
+            "FSUBS  $dst.b,$src1.b,$src2.b" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vsub_f32(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
+    __ vsub_f32(dstb, src1b, src2b);
+  %}
+
+  ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+
+instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src1 src2));
+  size(4*4);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "FSUBS  $dst.a,$src1.a,$src2.a\n\t"
+            "FSUBS  $dst.b,$src1.b,$src2.b\n\t"
+            "FSUBS  $dst.c,$src1.c,$src2.c\n\t"
+            "FSUBS  $dst.d,$src1.d,$src2.d" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vsub_f32(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
+    __ vsub_f32(dstb, src1b, src2b);
+    FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
+    __ vsub_f32(dstc, src1c, src2c);
+    FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
+    __ vsub_f32(dstd, src1d, src2d);
+  %}
+
+  ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+instruct vsub2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FSUBD  $dst.a,$src1.a,$src2.a\n\t"
+            "FSUBD  $dst.b,$src1.b,$src2.b" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vsub_f64(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
+    __ vsub_f64(dstb, src1b, src2b);
+  %}
+
+  ins_pipe(faddF_reg_reg); // FIXME
+%}
+
+// Shorts/Chars vector mul
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src1 src2));
+  size(4);
+  format %{ "VMUL.I16 $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    __ vmul_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  size(4);
+  format %{ "VMUL.I16 $dst.Q,$src1.Q,$src2.Q\t! mul packed8S" %}
+  ins_encode %{
+    __ vmul_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector mul
+instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVI src1 src2));
+  size(4);
+  format %{ "VMUL.I32 $dst,$src1,$src2\t! mul packed2I" %}
+  ins_encode %{
+    __ vmul_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src1 src2));
+  size(4);
+  format %{ "VMUL.I32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4I" %}
+  ins_encode %{
+    __ vmul_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Floats vector mul
+instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVF src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FMULS  $dst.a,$src1.a,$src2.a\n\t"
+            "FMULS  $dst.b,$src1.b,$src2.b" %}
+  ins_encode %{
+    __ vmul_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+    __ vmul_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
+             $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
+             $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
+  %}
+
+  ins_pipe(fmulF_reg_reg); // FIXME
+%}
+
+instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src1 src2));
+  size(4*4);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "FMULS  $dst.a,$src1.a,$src2.a\n\t"
+            "FMULS  $dst.b,$src1.b,$src2.b\n\t"
+            "FMULS  $dst.c,$src1.c,$src2.c\n\t"
+            "FMULS  $dst.d,$src1.d,$src2.d" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vmul_f32(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
+    __ vmul_f32(dstb, src1b, src2b);
+    FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
+    __ vmul_f32(dstc, src1c, src2c);
+    FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
+    __ vmul_f32(dstd, src1d, src2d);
+  %}
+
+  ins_pipe(fmulF_reg_reg); // FIXME
+%}
+
+instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FMULD  $dst.D.a,$src1.D.a,$src2.D.a\n\t"
+            "FMULD  $dst.D.b,$src1.D.b,$src2.D.b" %}
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vmul_f64(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
+    __ vmul_f64(dstb, src1b, src2b);
+  %}
+
+  ins_pipe(fmulD_reg_reg); // FIXME
+%}
+
+
+// Floats vector div
+instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVF src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FDIVS  $dst.a,$src1.a,$src2.a\n\t"
+            "FDIVS  $dst.b,$src1.b,$src2.b" %}
+  ins_encode %{
+    __ vdiv_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+    __ vdiv_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
+             $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE),
+             $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE));
+  %}
+
+  ins_pipe(fdivF_reg_reg); // FIXME
+%}
+
+instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src1 src2));
+  size(4*4);
+  ins_cost(DEFAULT_COST*4); // FIXME
+
+  format %{ "FDIVS  $dst.a,$src1.a,$src2.a\n\t"
+            "FDIVS  $dst.b,$src1.b,$src2.b\n\t"
+            "FDIVS  $dst.c,$src1.c,$src2.c\n\t"
+            "FDIVS  $dst.d,$src1.d,$src2.d" %}
+
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vdiv_f32(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE);
+    __ vdiv_f32(dstb, src1b, src2b);
+    FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE);
+    __ vdiv_f32(dstc, src1c, src2c);
+    FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE);
+    FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE);
+    __ vdiv_f32(dstd, src1d, src2d);
+  %}
+
+  ins_pipe(fdivF_reg_reg); // FIXME
+%}
+
+instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src1 src2));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "FDIVD  $dst.D.a,$src1.D.a,$src2.D.a\n\t"
+            "FDIVD  $dst.D.b,$src1.D.b,$src2.D.b" %}
+  ins_encode %{
+    FloatRegister dsta = $dst$$FloatRegister;
+    FloatRegister src1a = $src1$$FloatRegister;
+    FloatRegister src2a = $src2$$FloatRegister;
+    __ vdiv_f64(dsta, src1a, src2a);
+    FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE);
+    FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE);
+    __ vdiv_f64(dstb, src1b, src2b);
+  %}
+
+  ins_pipe(fdivD_reg_reg); // FIXME
+%}
+
+// --------------------------------- NEG --------------------------------------
+
+instruct vneg8B_reg(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  effect(DEF dst, USE src);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{ "VNEG.S8 $dst.D,$src.D\t! neg packed8B" %}
+  ins_encode %{
+    __ vneg_64_s8($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vneg16B_reg(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  effect(DEF dst, USE src);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{ "VNEG.S8 $dst.Q,$src.Q\t! neg0 packed16B" %}
+  ins_encode %{
+    __ vneg_128_s8($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ------------------------------ Shift ---------------------------------------
+
+instruct vslcntD(vecD dst, iRegI cnt) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (LShiftCntV cnt));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    Repl8B_reg_simd(dst, cnt);
+  %}
+%}
+
+instruct vslcntX(vecX dst, iRegI cnt) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (LShiftCntV cnt));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    Repl16B_reg(dst, cnt);
+  %}
+%}
+
+// Low bits of vector "shift" elements are used, so it
+// doesn't matter if we treat it as ints or bytes here.
+instruct vsrcntD(vecD dst, iRegI cnt) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (RShiftCntV cnt));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+
+  format %{ "VDUP.8 $dst.D,$cnt\n\t"
+            "VNEG.S8 $dst.D,$dst.D\t! neg packed8B" %}
+  ins_encode %{
+    __ vdup_64_8($dst$$FloatRegister, $cnt$$Register);
+    __ vneg_64_s8($dst$$FloatRegister, $dst$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrcntX(vecX dst, iRegI cnt) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (RShiftCntV cnt));
+  size(4*2);
+  ins_cost(DEFAULT_COST*2); // FIXME
+  format %{ "VDUP.8 $dst.Q,$cnt\n\t"
+            "VNEG.S8 $dst.Q,$dst.Q\t! neg packed16B" %}
+  ins_encode %{
+    __ vdup_128_8($dst$$FloatRegister, $cnt$$Register);
+    __ vneg_128_s8($dst$$FloatRegister, $dst$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Byte vector logical left/right shift based on sign
+instruct vsh8B_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U8 $dst.D,$src.D,$shift.D\t! logical left/right shift packed8B"
+  %}
+  ins_encode %{
+    __ vshl_64_u8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsh16B_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U8 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed16B"
+  %}
+  ins_encode %{
+    bool quad = true;
+    __ vshl_128_u8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Char vector logical left/right shift based on sign
+instruct vsh4S_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U16 $dst.D,$src.D,$shift.D\t! logical left/right shift packed4S"
+  %}
+  ins_encode %{
+    __ vshl_64_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsh8S_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U16 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed8S"
+  %}
+  ins_encode %{
+    __ vshl_128_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector logical left/right shift based on sign
+instruct vsh2I_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U32 $dst.D,$src.D,$shift.D\t! logical left/right shift packed2I"
+  %}
+  ins_encode %{
+    __ vshl_64_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsh4I_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U32 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed4I"
+  %}
+  ins_encode %{
+    __ vshl_128_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector logical left/right shift based on sign
+instruct vsh2L_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.U64 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed2L"
+  %}
+  ins_encode %{
+    __ vshl_128_u64($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Byte vector left shift
+instruct vsl8B_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVB src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh8B_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl16B_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVB src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh16B_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl8B_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVB src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I8 $dst.D,$src.D,$shift\t! logical left shift packed8B"
+  %}
+  ins_encode %{
+    __ vshl_64_8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsl16B_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVB src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I8 $dst.Q,$src.Q,$shift\t! logical left shift packed16B"
+  %}
+  ins_encode %{
+    bool quad = true;
+    __ vshl_128_8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts/Chars vector logical left/right shift
+instruct vsl4S_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  match(Set dst (URShiftVS src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh4S_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl8S_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  match(Set dst (URShiftVS src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh8S_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl4S_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I16 $dst.D,$src.D,$shift\t! logical left shift packed4S"
+  %}
+  ins_encode %{
+    bool quad = false;
+    __ vshl_64_16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsl8S_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I16 $dst.Q,$src.Q,$shift\t! logical left shift packed8S"
+  %}
+  ins_encode %{
+    bool quad = true;
+    __ vshl_128_16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector logical left/right shift
+instruct vsl2I_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (LShiftVI src shift));
+  match(Set dst (URShiftVI src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh2I_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl4I_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (LShiftVI src shift));
+  match(Set dst (URShiftVI src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh4I_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl2I_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (LShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I32 $dst.D,$src.D,$shift\t! logical left shift packed2I"
+  %}
+  ins_encode %{
+    __ vshl_64_32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsl4I_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (LShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I32 $dst.Q,$src.Q,$shift\t! logical left shift packed4I"
+  %}
+  ins_encode %{
+    __ vshl_128_32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector logical left/right shift
+instruct vsl2L_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  match(Set dst (URShiftVL src shift));
+  size(4*1);
+  ins_cost(DEFAULT_COST*1); // FIXME
+  expand %{
+    vsh2L_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsl2L_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.I64 $dst.Q,$src.Q,$shift\t! logical left shift packed2L"
+  %}
+  ins_encode %{
+    __ vshl_128_64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Bytes/Shorts vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift.
+
+// Chars vector logical right shift
+instruct vsrl4S_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.U16 $dst.D,$src.D,$shift\t! logical right shift packed4S"
+  %}
+  ins_encode %{
+    __ vshr_64_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrl8S_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.U16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S"
+  %}
+  ins_encode %{
+    __ vshr_128_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector logical right shift
+instruct vsrl2I_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (URShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.U32 $dst.D,$src.D,$shift\t! logical right shift packed2I"
+  %}
+  ins_encode %{
+    __ vshr_64_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrl4I_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD));
+  match(Set dst (URShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.U32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I"
+  %}
+  ins_encode %{
+    __ vshr_128_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector logical right shift
+instruct vsrl2L_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.U64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L"
+  %}
+  ins_encode %{
+    __ vshr_128_u64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// ------------------- ArithmeticRightShift -----------------------------------
+
+// Bytes vector arithmetic left/right shift based on sign
+instruct vsha8B_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S8 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed8B"
+  %}
+  ins_encode %{
+    __ vshl_64_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsha16B_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S8 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed16B"
+  %}
+  ins_encode %{
+    __ vshl_128_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts vector arithmetic left/right shift based on sign
+instruct vsha4S_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S16 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed4S"
+  %}
+  ins_encode %{
+    __ vshl_64_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsha8S_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S16 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed8S"
+  %}
+  ins_encode %{
+    __ vshl_128_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector arithmetic left/right shift based on sign
+instruct vsha2I_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S32 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed2I"
+  %}
+  ins_encode %{
+    __ vshl_64_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsha4I_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S32 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed4I"
+  %}
+  ins_encode %{
+    __ vshl_128_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector arithmetic left/right shift based on sign
+instruct vsha2L_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  effect(DEF dst, USE src, USE shift);
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHL.S64 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed2L"
+  %}
+  ins_encode %{
+    __ vshl_128_s64($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Byte vector arithmetic right shift
+
+instruct vsra8B_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVB src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha8B_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsrl16B_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVB src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha16B_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsrl8B_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVB src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S8 $dst.D,$src.D,$shift\t! logical right shift packed8B"
+  %}
+  ins_encode %{
+    __ vshr_64_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsrl16B_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVB src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S8 $dst.Q,$src.Q,$shift\t! logical right shift packed16B"
+  %}
+  ins_encode %{
+    __ vshr_128_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Shorts vector arithmetic right shift
+instruct vsra4S_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha4S_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsra8S_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha8S_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsra4S_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S16 $dst.D,$src.D,$shift\t! logical right shift packed4S"
+  %}
+  ins_encode %{
+    __ vshr_64_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsra8S_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S"
+  %}
+  ins_encode %{
+    __ vshr_128_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Integers vector arithmetic right shift
+instruct vsra2I_reg(vecD dst, vecD src, vecD shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha2I_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsra4I_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha4I_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsra2I_immI(vecD dst, vecD src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S32 $dst.D,$src.D,$shift\t! logical right shift packed2I"
+  %}
+  ins_encode %{
+    __ vshr_64_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vsra4I_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I"
+  %}
+  ins_encode %{
+    __ vshr_128_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// Longs vector arithmetic right shift
+instruct vsra2L_reg(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  expand %{
+    vsha2L_reg(dst, src, shift);
+  %}
+%}
+
+instruct vsra2L_immI(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  size(4);
+  ins_cost(DEFAULT_COST); // FIXME
+  format %{
+    "VSHR.S64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L"
+  %}
+  ins_encode %{
+    __ vshr_128_s64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// --------------------------------- AND --------------------------------------
+
+instruct vandD(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV src1 src2));
+  format %{ "VAND    $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    __ vand_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vandX(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src1 src2));
+  format %{ "VAND    $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool quad = true;
+    __ vand_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct vorD(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV src1 src2));
+  format %{ "VOR     $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    __ vorr_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vorX(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src1 src2));
+  format %{ "VOR     $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    __ vorr_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct vxorD(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV src1 src2));
+  format %{ "VXOR    $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    __ veor_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+instruct vxorX(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src1 src2));
+  format %{ "VXOR    $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    __ veor_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( ialu_reg_reg ); // FIXME
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch ( incI_eReg movI );
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint ( 0.dst == 1.dst );
+//   // construct a replacement instruction that sets
+//   // the destination to ( move's source register + one )
+//   peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) );
+// %}
+//
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+// peephole %{
+//   peepmatch ( loadI storeI );
+//   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
+//   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
+// %}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// ARM will probably not have any of these rules due to RISC instruction set.
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.