1 //
   2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Common Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 #ifdef _WIN64
 180 
 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 197 
 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 214 
 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 553 
 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 570 
 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 587 
 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 604 
 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 621 
 622 #else // _WIN64
 623 
 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 640 
 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 657 
 658 #ifdef _LP64
 659 
 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 676 
 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 693 
 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 710 
 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 727 
 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 744 
 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 761 
 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 778 
 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 795 
 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 812 
 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 829 
 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 846 
 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 863 
 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 880 
 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 897 
 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 914 
 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 931 
 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 948 
 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 965 
 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 982 
 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 999 
1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
1016 
1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
1033 
1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
1050 
1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
1067 
1068 #endif // _LP64
1069 
1070 #endif // _WIN64
1071 
1072 #ifdef _LP64
1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
1074 #else
1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
1076 #endif // _LP64
1077 
1078 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1079                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1080                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1081                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1082                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1083                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1084                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1085                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1086 #ifdef _LP64
1087                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1088                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1089                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1090                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1091                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1092                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1093                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1094                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1095                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1096                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1097                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1098                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1099                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1100                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1101                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1102                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1103                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1104                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1105                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1106                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1107                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1108                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1109                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1110                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1111 #endif
1112                       );
1113 
1114 // flags allocation class should be last.
1115 alloc_class chunk3(RFLAGS);
1116 
1117 // Singleton class for condition codes
1118 reg_class int_flags(RFLAGS);
1119 
1120 // Class for pre evex float registers
1121 reg_class float_reg_legacy(XMM0,
1122                     XMM1,
1123                     XMM2,
1124                     XMM3,
1125                     XMM4,
1126                     XMM5,
1127                     XMM6,
1128                     XMM7
1129 #ifdef _LP64
1130                    ,XMM8,
1131                     XMM9,
1132                     XMM10,
1133                     XMM11,
1134                     XMM12,
1135                     XMM13,
1136                     XMM14,
1137                     XMM15
1138 #endif
1139                     );
1140 
1141 // Class for evex float registers
1142 reg_class float_reg_evex(XMM0,
1143                     XMM1,
1144                     XMM2,
1145                     XMM3,
1146                     XMM4,
1147                     XMM5,
1148                     XMM6,
1149                     XMM7
1150 #ifdef _LP64
1151                    ,XMM8,
1152                     XMM9,
1153                     XMM10,
1154                     XMM11,
1155                     XMM12,
1156                     XMM13,
1157                     XMM14,
1158                     XMM15,
1159                     XMM16,
1160                     XMM17,
1161                     XMM18,
1162                     XMM19,
1163                     XMM20,
1164                     XMM21,
1165                     XMM22,
1166                     XMM23,
1167                     XMM24,
1168                     XMM25,
1169                     XMM26,
1170                     XMM27,
1171                     XMM28,
1172                     XMM29,
1173                     XMM30,
1174                     XMM31
1175 #endif
1176                     );
1177 
1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1179 
1180 // Class for pre evex double registers
1181 reg_class double_reg_legacy(XMM0,  XMM0b,
1182                      XMM1,  XMM1b,
1183                      XMM2,  XMM2b,
1184                      XMM3,  XMM3b,
1185                      XMM4,  XMM4b,
1186                      XMM5,  XMM5b,
1187                      XMM6,  XMM6b,
1188                      XMM7,  XMM7b
1189 #ifdef _LP64
1190                     ,XMM8,  XMM8b,
1191                      XMM9,  XMM9b,
1192                      XMM10, XMM10b,
1193                      XMM11, XMM11b,
1194                      XMM12, XMM12b,
1195                      XMM13, XMM13b,
1196                      XMM14, XMM14b,
1197                      XMM15, XMM15b
1198 #endif
1199                      );
1200 
1201 // Class for evex double registers
1202 reg_class double_reg_evex(XMM0,  XMM0b,
1203                      XMM1,  XMM1b,
1204                      XMM2,  XMM2b,
1205                      XMM3,  XMM3b,
1206                      XMM4,  XMM4b,
1207                      XMM5,  XMM5b,
1208                      XMM6,  XMM6b,
1209                      XMM7,  XMM7b
1210 #ifdef _LP64
1211                     ,XMM8,  XMM8b,
1212                      XMM9,  XMM9b,
1213                      XMM10, XMM10b,
1214                      XMM11, XMM11b,
1215                      XMM12, XMM12b,
1216                      XMM13, XMM13b,
1217                      XMM14, XMM14b,
1218                      XMM15, XMM15b,
1219                      XMM16, XMM16b,
1220                      XMM17, XMM17b,
1221                      XMM18, XMM18b,
1222                      XMM19, XMM19b,
1223                      XMM20, XMM20b,
1224                      XMM21, XMM21b,
1225                      XMM22, XMM22b,
1226                      XMM23, XMM23b,
1227                      XMM24, XMM24b,
1228                      XMM25, XMM25b,
1229                      XMM26, XMM26b,
1230                      XMM27, XMM27b,
1231                      XMM28, XMM28b,
1232                      XMM29, XMM29b,
1233                      XMM30, XMM30b,
1234                      XMM31, XMM31b
1235 #endif
1236                      );
1237 
1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1239 
1240 // Class for pre evex 32bit vector registers
1241 reg_class vectors_reg_legacy(XMM0,
1242                       XMM1,
1243                       XMM2,
1244                       XMM3,
1245                       XMM4,
1246                       XMM5,
1247                       XMM6,
1248                       XMM7
1249 #ifdef _LP64
1250                      ,XMM8,
1251                       XMM9,
1252                       XMM10,
1253                       XMM11,
1254                       XMM12,
1255                       XMM13,
1256                       XMM14,
1257                       XMM15
1258 #endif
1259                       );
1260 
1261 // Class for evex 32bit vector registers
1262 reg_class vectors_reg_evex(XMM0,
1263                       XMM1,
1264                       XMM2,
1265                       XMM3,
1266                       XMM4,
1267                       XMM5,
1268                       XMM6,
1269                       XMM7
1270 #ifdef _LP64
1271                      ,XMM8,
1272                       XMM9,
1273                       XMM10,
1274                       XMM11,
1275                       XMM12,
1276                       XMM13,
1277                       XMM14,
1278                       XMM15,
1279                       XMM16,
1280                       XMM17,
1281                       XMM18,
1282                       XMM19,
1283                       XMM20,
1284                       XMM21,
1285                       XMM22,
1286                       XMM23,
1287                       XMM24,
1288                       XMM25,
1289                       XMM26,
1290                       XMM27,
1291                       XMM28,
1292                       XMM29,
1293                       XMM30,
1294                       XMM31
1295 #endif
1296                       );
1297 
1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1299 
1300 // Class for all 64bit vector registers
1301 reg_class vectord_reg_legacy(XMM0,  XMM0b,
1302                       XMM1,  XMM1b,
1303                       XMM2,  XMM2b,
1304                       XMM3,  XMM3b,
1305                       XMM4,  XMM4b,
1306                       XMM5,  XMM5b,
1307                       XMM6,  XMM6b,
1308                       XMM7,  XMM7b
1309 #ifdef _LP64
1310                      ,XMM8,  XMM8b,
1311                       XMM9,  XMM9b,
1312                       XMM10, XMM10b,
1313                       XMM11, XMM11b,
1314                       XMM12, XMM12b,
1315                       XMM13, XMM13b,
1316                       XMM14, XMM14b,
1317                       XMM15, XMM15b
1318 #endif
1319                       );
1320 
1321 // Class for all 64bit vector registers
1322 reg_class vectord_reg_evex(XMM0,  XMM0b,
1323                       XMM1,  XMM1b,
1324                       XMM2,  XMM2b,
1325                       XMM3,  XMM3b,
1326                       XMM4,  XMM4b,
1327                       XMM5,  XMM5b,
1328                       XMM6,  XMM6b,
1329                       XMM7,  XMM7b
1330 #ifdef _LP64
1331                      ,XMM8,  XMM8b,
1332                       XMM9,  XMM9b,
1333                       XMM10, XMM10b,
1334                       XMM11, XMM11b,
1335                       XMM12, XMM12b,
1336                       XMM13, XMM13b,
1337                       XMM14, XMM14b,
1338                       XMM15, XMM15b,
1339                       XMM16, XMM16b,
1340                       XMM17, XMM17b,
1341                       XMM18, XMM18b,
1342                       XMM19, XMM19b,
1343                       XMM20, XMM20b,
1344                       XMM21, XMM21b,
1345                       XMM22, XMM22b,
1346                       XMM23, XMM23b,
1347                       XMM24, XMM24b,
1348                       XMM25, XMM25b,
1349                       XMM26, XMM26b,
1350                       XMM27, XMM27b,
1351                       XMM28, XMM28b,
1352                       XMM29, XMM29b,
1353                       XMM30, XMM30b,
1354                       XMM31, XMM31b
1355 #endif
1356                       );
1357 
1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1359 
1360 // Class for all 128bit vector registers
1361 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
1362                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1363                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1364                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1365                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1366                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1367                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1368                       XMM7,  XMM7b,  XMM7c,  XMM7d
1369 #ifdef _LP64
1370                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1371                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1372                       XMM10, XMM10b, XMM10c, XMM10d,
1373                       XMM11, XMM11b, XMM11c, XMM11d,
1374                       XMM12, XMM12b, XMM12c, XMM12d,
1375                       XMM13, XMM13b, XMM13c, XMM13d,
1376                       XMM14, XMM14b, XMM14c, XMM14d,
1377                       XMM15, XMM15b, XMM15c, XMM15d
1378 #endif
1379                       );
1380 
1381 // Class for all 128bit vector registers
1382 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
1383                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1384                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1385                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1386                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1387                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1388                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1389                       XMM7,  XMM7b,  XMM7c,  XMM7d
1390 #ifdef _LP64
1391                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1392                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1393                       XMM10, XMM10b, XMM10c, XMM10d,
1394                       XMM11, XMM11b, XMM11c, XMM11d,
1395                       XMM12, XMM12b, XMM12c, XMM12d,
1396                       XMM13, XMM13b, XMM13c, XMM13d,
1397                       XMM14, XMM14b, XMM14c, XMM14d,
1398                       XMM15, XMM15b, XMM15c, XMM15d,
1399                       XMM16, XMM16b, XMM16c, XMM16d,
1400                       XMM17, XMM17b, XMM17c, XMM17d,
1401                       XMM18, XMM18b, XMM18c, XMM18d,
1402                       XMM19, XMM19b, XMM19c, XMM19d,
1403                       XMM20, XMM20b, XMM20c, XMM20d,
1404                       XMM21, XMM21b, XMM21c, XMM21d,
1405                       XMM22, XMM22b, XMM22c, XMM22d,
1406                       XMM23, XMM23b, XMM23c, XMM23d,
1407                       XMM24, XMM24b, XMM24c, XMM24d,
1408                       XMM25, XMM25b, XMM25c, XMM25d,
1409                       XMM26, XMM26b, XMM26c, XMM26d,
1410                       XMM27, XMM27b, XMM27c, XMM27d,
1411                       XMM28, XMM28b, XMM28c, XMM28d,
1412                       XMM29, XMM29b, XMM29c, XMM29d,
1413                       XMM30, XMM30b, XMM30c, XMM30d,
1414                       XMM31, XMM31b, XMM31c, XMM31d
1415 #endif
1416                       );
1417 
1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1419 
1420 // Class for all 256bit vector registers
1421 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1422                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1423                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1424                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1425                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1426                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1427                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1428                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1429 #ifdef _LP64
1430                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1431                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1432                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1433                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1434                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1435                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1436                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1437                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1438 #endif
1439                       );
1440 
1441 // Class for all 256bit vector registers
1442 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1443                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1444                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1445                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1446                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1447                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1448                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1449                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1450 #ifdef _LP64
1451                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1452                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1453                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1454                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1455                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1456                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1457                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1458                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1459                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1460                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1461                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1462                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1463                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1464                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1465                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1466                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1467                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1468                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1469                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1470                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1471                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1472                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1473                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1474                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1475 #endif
1476                       );
1477 
1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1479 
1480 // Class for all 512bit vector registers
1481 reg_class vectorz_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1482                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1483                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1484                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1485                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1486                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1487                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1488                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1489 #ifdef _LP64
1490                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1491                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1492                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1493                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1494                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1495                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1496                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1497                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1498                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1499                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1500                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1501                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1502                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1503                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1504                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1505                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1506                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1507                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1508                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1509                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1510                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1511                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1512                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1513                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1514 #endif
1515                       );
1516 
1517 %}
1518 
1519 
1520 //----------SOURCE BLOCK-------------------------------------------------------
1521 // This is a block of C++ code which provides values, functions, and
1522 // definitions necessary in the rest of the architecture description
1523 
1524 source_hpp %{
1525 // Header information of the source block.
1526 // Method declarations/definitions which are used outside
1527 // the ad-scope can conveniently be defined here.
1528 //
1529 // To keep related declarations/definitions/uses close together,
1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1531 
1532 class NativeJump;
1533 
1534 class CallStubImpl {
1535 
1536   //--------------------------------------------------------------
1537   //---<  Used for optimization in Compile::shorten_branches  >---
1538   //--------------------------------------------------------------
1539 
1540  public:
1541   // Size of call trampoline stub.
1542   static uint size_call_trampoline() {
1543     return 0; // no call trampolines on this platform
1544   }
1545 
1546   // number of relocations needed by a call trampoline stub
1547   static uint reloc_call_trampoline() {
1548     return 0; // no call trampolines on this platform
1549   }
1550 };
1551 
1552 class HandlerImpl {
1553 
1554  public:
1555 
1556   static int emit_exception_handler(CodeBuffer &cbuf);
1557   static int emit_deopt_handler(CodeBuffer& cbuf);
1558 
1559   static uint size_exception_handler() {
1560     // NativeCall instruction size is the same as NativeJump.
1561     // exception handler starts out as jump and can be patched to
1562     // a call be deoptimization.  (4932387)
1563     // Note that this value is also credited (in output.cpp) to
1564     // the size of the code section.
1565     return NativeJump::instruction_size;
1566   }
1567 
1568 #ifdef _LP64
1569   static uint size_deopt_handler() {
1570     // three 5 byte instructions
1571     return 15;
1572   }
1573 #else
1574   static uint size_deopt_handler() {
1575     // NativeCall instruction size is the same as NativeJump.
1576     // exception handler starts out as jump and can be patched to
1577     // a call be deoptimization.  (4932387)
1578     // Note that this value is also credited (in output.cpp) to
1579     // the size of the code section.
1580     return 5 + NativeJump::instruction_size; // pushl(); jmp;
1581   }
1582 #endif
1583 };
1584 
1585 %} // end source_hpp
1586 
1587 source %{
1588 
1589 // Emit exception handler code.
1590 // Stuff framesize into a register and call a VM stub routine.
1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1592 
1593   // Note that the code buffer's insts_mark is always relative to insts.
1594   // That's why we must use the macroassembler to generate a handler.
1595   MacroAssembler _masm(&cbuf);
1596   address base = __ start_a_stub(size_exception_handler());
1597   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1598   int offset = __ offset();
1599   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1600   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1601   __ end_a_stub();
1602   return offset;
1603 }
1604 
1605 // Emit deopt handler code.
1606 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1607 
1608   // Note that the code buffer's insts_mark is always relative to insts.
1609   // That's why we must use the macroassembler to generate a handler.
1610   MacroAssembler _masm(&cbuf);
1611   address base = __ start_a_stub(size_deopt_handler());
1612   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1613   int offset = __ offset();
1614 
1615 #ifdef _LP64
1616   address the_pc = (address) __ pc();
1617   Label next;
1618   // push a "the_pc" on the stack without destroying any registers
1619   // as they all may be live.
1620 
1621   // push address of "next"
1622   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1623   __ bind(next);
1624   // adjust it so it matches "the_pc"
1625   __ subptr(Address(rsp, 0), __ offset() - offset);
1626 #else
1627   InternalAddress here(__ pc());
1628   __ pushptr(here.addr());
1629 #endif
1630 
1631   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1632   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1633   __ end_a_stub();
1634   return offset;
1635 }
1636 
1637 
1638 //=============================================================================
1639 
1640   // Float masks come from different places depending on platform.
1641 #ifdef _LP64
1642   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1643   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1644   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1645   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1646 #else
1647   static address float_signmask()  { return (address)float_signmask_pool; }
1648   static address float_signflip()  { return (address)float_signflip_pool; }
1649   static address double_signmask() { return (address)double_signmask_pool; }
1650   static address double_signflip() { return (address)double_signflip_pool; }
1651 #endif
1652 
1653 
1654 const bool Matcher::match_rule_supported(int opcode) {
1655   if (!has_match_rule(opcode))
1656     return false;
1657 
1658   switch (opcode) {
1659     case Op_PopCountI:
1660     case Op_PopCountL:
1661       if (!UsePopCountInstruction)
1662         return false;
1663     break;
1664     case Op_MulVI:
1665       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1666         return false;
1667     break;
1668     case Op_MulVL:
1669     case Op_MulReductionVL:
1670       if (VM_Version::supports_avx512dq() == false)
1671         return false;
1672     case Op_AddReductionVL:
1673       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1674         return false;
1675     case Op_AddReductionVI:
1676       if (UseSSE < 3) // requires at least SSE3
1677         return false;
1678     case Op_MulReductionVI:
1679       if (UseSSE < 4) // requires at least SSE4
1680         return false;
1681     case Op_AddReductionVF:
1682     case Op_AddReductionVD:
1683     case Op_MulReductionVF:
1684     case Op_MulReductionVD:
1685       if (UseSSE < 1) // requires at least SSE
1686         return false;
1687     break;
1688     case Op_CompareAndSwapL:
1689 #ifdef _LP64
1690     case Op_CompareAndSwapP:
1691 #endif
1692       if (!VM_Version::supports_cx8())
1693         return false;
1694     break;
1695   }
1696 
1697   return true;  // Per default match rules are supported.
1698 }
1699 
1700 // Max vector size in bytes. 0 if not supported.
1701 const int Matcher::vector_width_in_bytes(BasicType bt) {
1702   assert(is_java_primitive(bt), "only primitive type vectors");
1703   if (UseSSE < 2) return 0;
1704   // SSE2 supports 128bit vectors for all types.
1705   // AVX2 supports 256bit vectors for all types.
1706   // AVX2/EVEX supports 512bit vectors for all types.
1707   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1708   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1709   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1710     size = (UseAVX > 2) ? 64 : 32;
1711   // Use flag to limit vector size.
1712   size = MIN2(size,(int)MaxVectorSize);
1713   // Minimum 2 values in vector (or 4 for bytes).
1714   switch (bt) {
1715   case T_DOUBLE:
1716   case T_LONG:
1717     if (size < 16) return 0;
1718   case T_FLOAT:
1719   case T_INT:
1720     if (size < 8) return 0;
1721   case T_BOOLEAN:
1722   case T_BYTE:
1723   case T_CHAR:
1724   case T_SHORT:
1725     if (size < 4) return 0;
1726     break;
1727   default:
1728     ShouldNotReachHere();
1729   }
1730   return size;
1731 }
1732 
1733 // Limits on vector size (number of elements) loaded into vector.
1734 const int Matcher::max_vector_size(const BasicType bt) {
1735   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1736 }
1737 const int Matcher::min_vector_size(const BasicType bt) {
1738   int max_size = max_vector_size(bt);
1739   // Min size which can be loaded into vector is 4 bytes.
1740   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1741   return MIN2(size,max_size);
1742 }
1743 
1744 // Vector ideal reg corresponding to specidied size in bytes
1745 const int Matcher::vector_ideal_reg(int size) {
1746   assert(MaxVectorSize >= size, "");
1747   switch(size) {
1748     case  4: return Op_VecS;
1749     case  8: return Op_VecD;
1750     case 16: return Op_VecX;
1751     case 32: return Op_VecY;
1752     case 64: return Op_VecZ;
1753   }
1754   ShouldNotReachHere();
1755   return 0;
1756 }
1757 
1758 // Only lowest bits of xmm reg are used for vector shift count.
1759 const int Matcher::vector_shift_count_ideal_reg(int size) {
1760   return Op_VecS;
1761 }
1762 
1763 // x86 supports misaligned vectors store/load.
1764 const bool Matcher::misaligned_vectors_ok() {
1765   return !AlignVector; // can be changed by flag
1766 }
1767 
1768 // x86 AES instructions are compatible with SunJCE expanded
1769 // keys, hence we do not need to pass the original key to stubs
1770 const bool Matcher::pass_original_key_for_aes() {
1771   return false;
1772 }
1773 
1774 // Helper methods for MachSpillCopyNode::implementation().
1775 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1776                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1777   // In 64-bit VM size calculation is very complex. Emitting instructions
1778   // into scratch buffer is used to get size in 64-bit VM.
1779   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1780   assert(ireg == Op_VecS || // 32bit vector
1781          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1782          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1783          "no non-adjacent vector moves" );
1784   if (cbuf) {
1785     MacroAssembler _masm(cbuf);
1786     int offset = __ offset();
1787     switch (ireg) {
1788     case Op_VecS: // copy whole register
1789     case Op_VecD:
1790     case Op_VecX:
1791       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1792       break;
1793     case Op_VecY:
1794       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1795       break;
1796     case Op_VecZ:
1797       __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1798       break;
1799     default:
1800       ShouldNotReachHere();
1801     }
1802     int size = __ offset() - offset;
1803 #ifdef ASSERT
1804     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1805     assert(!do_size || size == 4, "incorrect size calculattion");
1806 #endif
1807     return size;
1808 #ifndef PRODUCT
1809   } else if (!do_size) {
1810     switch (ireg) {
1811     case Op_VecS:
1812     case Op_VecD:
1813     case Op_VecX:
1814       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1815       break;
1816     case Op_VecY:
1817     case Op_VecZ:
1818       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1819       break;
1820     default:
1821       ShouldNotReachHere();
1822     }
1823 #endif
1824   }
1825   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1826   return (UseAVX > 2) ? 6 : 4;
1827 }
1828 
1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1830                             int stack_offset, int reg, uint ireg, outputStream* st) {
1831   // In 64-bit VM size calculation is very complex. Emitting instructions
1832   // into scratch buffer is used to get size in 64-bit VM.
1833   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1834   if (cbuf) {
1835     MacroAssembler _masm(cbuf);
1836     int offset = __ offset();
1837     if (is_load) {
1838       switch (ireg) {
1839       case Op_VecS:
1840         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1841         break;
1842       case Op_VecD:
1843         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1844         break;
1845       case Op_VecX:
1846         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1847         break;
1848       case Op_VecY:
1849         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1850         break;
1851       case Op_VecZ:
1852         __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1853         break;
1854       default:
1855         ShouldNotReachHere();
1856       }
1857     } else { // store
1858       switch (ireg) {
1859       case Op_VecS:
1860         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1861         break;
1862       case Op_VecD:
1863         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1864         break;
1865       case Op_VecX:
1866         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1867         break;
1868       case Op_VecY:
1869         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1870         break;
1871       case Op_VecZ:
1872         __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1873         break;
1874       default:
1875         ShouldNotReachHere();
1876       }
1877     }
1878     int size = __ offset() - offset;
1879 #ifdef ASSERT
1880     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1881     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1882     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1883 #endif
1884     return size;
1885 #ifndef PRODUCT
1886   } else if (!do_size) {
1887     if (is_load) {
1888       switch (ireg) {
1889       case Op_VecS:
1890         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1891         break;
1892       case Op_VecD:
1893         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1894         break;
1895        case Op_VecX:
1896         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1897         break;
1898       case Op_VecY:
1899       case Op_VecZ:
1900         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1901         break;
1902       default:
1903         ShouldNotReachHere();
1904       }
1905     } else { // store
1906       switch (ireg) {
1907       case Op_VecS:
1908         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1909         break;
1910       case Op_VecD:
1911         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1912         break;
1913        case Op_VecX:
1914         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1915         break;
1916       case Op_VecY:
1917       case Op_VecZ:
1918         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1919         break;
1920       default:
1921         ShouldNotReachHere();
1922       }
1923     }
1924 #endif
1925   }
1926   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1927   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1928   return 5+offset_size;
1929 }
1930 
1931 static inline jfloat replicate4_imm(int con, int width) {
1932   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
1933   assert(width == 1 || width == 2, "only byte or short types here");
1934   int bit_width = width * 8;
1935   jint val = con;
1936   val &= (1 << bit_width) - 1;  // mask off sign bits
1937   while(bit_width < 32) {
1938     val |= (val << bit_width);
1939     bit_width <<= 1;
1940   }
1941   jfloat fval = *((jfloat*) &val);  // coerce to float type
1942   return fval;
1943 }
1944 
1945 static inline jdouble replicate8_imm(int con, int width) {
1946   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
1947   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
1948   int bit_width = width * 8;
1949   jlong val = con;
1950   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
1951   while(bit_width < 64) {
1952     val |= (val << bit_width);
1953     bit_width <<= 1;
1954   }
1955   jdouble dval = *((jdouble*) &val);  // coerce to double type
1956   return dval;
1957 }
1958 
1959 #ifndef PRODUCT
1960   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1961     st->print("nop \t# %d bytes pad for loops and calls", _count);
1962   }
1963 #endif
1964 
1965   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1966     MacroAssembler _masm(&cbuf);
1967     __ nop(_count);
1968   }
1969 
1970   uint MachNopNode::size(PhaseRegAlloc*) const {
1971     return _count;
1972   }
1973 
1974 #ifndef PRODUCT
1975   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
1976     st->print("# breakpoint");
1977   }
1978 #endif
1979 
1980   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
1981     MacroAssembler _masm(&cbuf);
1982     __ int3();
1983   }
1984 
1985   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
1986     return MachNode::size(ra_);
1987   }
1988 
1989 %}
1990 
1991 encode %{
1992 
1993   enc_class preserve_SP %{
1994     debug_only(int off0 = cbuf.insts_size());
1995     MacroAssembler _masm(&cbuf);
1996     // RBP is preserved across all calls, even compiled calls.
1997     // Use it to preserve RSP in places where the callee might change the SP.
1998     __ movptr(rbp_mh_SP_save, rsp);
1999     debug_only(int off1 = cbuf.insts_size());
2000     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2001   %}
2002 
2003   enc_class restore_SP %{
2004     MacroAssembler _masm(&cbuf);
2005     __ movptr(rsp, rbp_mh_SP_save);
2006   %}
2007 
2008   enc_class call_epilog %{
2009     if (VerifyStackAtCalls) {
2010       // Check that stack depth is unchanged: find majik cookie on stack
2011       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2012       MacroAssembler _masm(&cbuf);
2013       Label L;
2014       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2015       __ jccb(Assembler::equal, L);
2016       // Die if stack mismatch
2017       __ int3();
2018       __ bind(L);
2019     }
2020   %}
2021 
2022 %}
2023 
2024 
2025 //----------OPERANDS-----------------------------------------------------------
2026 // Operand definitions must precede instruction definitions for correct parsing
2027 // in the ADLC because operands constitute user defined types which are used in
2028 // instruction definitions.
2029 
2030 // This one generically applies only for evex, so only one version
2031 operand vecZ() %{
2032   constraint(ALLOC_IN_RC(vectorz_reg));
2033   match(VecZ);
2034 
2035   format %{ %}
2036   interface(REG_INTER);
2037 %}
2038 
2039 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2040 
2041 // ============================================================================
2042 
2043 instruct ShouldNotReachHere() %{
2044   match(Halt);
2045   format %{ "int3\t# ShouldNotReachHere" %}
2046   ins_encode %{
2047     __ int3();
2048   %}
2049   ins_pipe(pipe_slow);
2050 %}
2051 
2052 // ============================================================================
2053 
2054 instruct addF_reg(regF dst, regF src) %{
2055   predicate((UseSSE>=1) && (UseAVX == 0));
2056   match(Set dst (AddF dst src));
2057 
2058   format %{ "addss   $dst, $src" %}
2059   ins_cost(150);
2060   ins_encode %{
2061     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2062   %}
2063   ins_pipe(pipe_slow);
2064 %}
2065 
2066 instruct addF_mem(regF dst, memory src) %{
2067   predicate((UseSSE>=1) && (UseAVX == 0));
2068   match(Set dst (AddF dst (LoadF src)));
2069 
2070   format %{ "addss   $dst, $src" %}
2071   ins_cost(150);
2072   ins_encode %{
2073     __ addss($dst$$XMMRegister, $src$$Address);
2074   %}
2075   ins_pipe(pipe_slow);
2076 %}
2077 
2078 instruct addF_imm(regF dst, immF con) %{
2079   predicate((UseSSE>=1) && (UseAVX == 0));
2080   match(Set dst (AddF dst con));
2081   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2082   ins_cost(150);
2083   ins_encode %{
2084     __ addss($dst$$XMMRegister, $constantaddress($con));
2085   %}
2086   ins_pipe(pipe_slow);
2087 %}
2088 
2089 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2090   predicate(UseAVX > 0);
2091   match(Set dst (AddF src1 src2));
2092 
2093   format %{ "vaddss  $dst, $src1, $src2" %}
2094   ins_cost(150);
2095   ins_encode %{
2096     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2097   %}
2098   ins_pipe(pipe_slow);
2099 %}
2100 
2101 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2102   predicate(UseAVX > 0);
2103   match(Set dst (AddF src1 (LoadF src2)));
2104 
2105   format %{ "vaddss  $dst, $src1, $src2" %}
2106   ins_cost(150);
2107   ins_encode %{
2108     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2109   %}
2110   ins_pipe(pipe_slow);
2111 %}
2112 
2113 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2114   predicate(UseAVX > 0);
2115   match(Set dst (AddF src con));
2116 
2117   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2118   ins_cost(150);
2119   ins_encode %{
2120     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2121   %}
2122   ins_pipe(pipe_slow);
2123 %}
2124 
2125 instruct addD_reg(regD dst, regD src) %{
2126   predicate((UseSSE>=2) && (UseAVX == 0));
2127   match(Set dst (AddD dst src));
2128 
2129   format %{ "addsd   $dst, $src" %}
2130   ins_cost(150);
2131   ins_encode %{
2132     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2133   %}
2134   ins_pipe(pipe_slow);
2135 %}
2136 
2137 instruct addD_mem(regD dst, memory src) %{
2138   predicate((UseSSE>=2) && (UseAVX == 0));
2139   match(Set dst (AddD dst (LoadD src)));
2140 
2141   format %{ "addsd   $dst, $src" %}
2142   ins_cost(150);
2143   ins_encode %{
2144     __ addsd($dst$$XMMRegister, $src$$Address);
2145   %}
2146   ins_pipe(pipe_slow);
2147 %}
2148 
2149 instruct addD_imm(regD dst, immD con) %{
2150   predicate((UseSSE>=2) && (UseAVX == 0));
2151   match(Set dst (AddD dst con));
2152   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2153   ins_cost(150);
2154   ins_encode %{
2155     __ addsd($dst$$XMMRegister, $constantaddress($con));
2156   %}
2157   ins_pipe(pipe_slow);
2158 %}
2159 
2160 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2161   predicate(UseAVX > 0);
2162   match(Set dst (AddD src1 src2));
2163 
2164   format %{ "vaddsd  $dst, $src1, $src2" %}
2165   ins_cost(150);
2166   ins_encode %{
2167     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2168   %}
2169   ins_pipe(pipe_slow);
2170 %}
2171 
2172 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
2173   predicate(UseAVX > 0);
2174   match(Set dst (AddD src1 (LoadD src2)));
2175 
2176   format %{ "vaddsd  $dst, $src1, $src2" %}
2177   ins_cost(150);
2178   ins_encode %{
2179     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2180   %}
2181   ins_pipe(pipe_slow);
2182 %}
2183 
2184 instruct addD_reg_imm(regD dst, regD src, immD con) %{
2185   predicate(UseAVX > 0);
2186   match(Set dst (AddD src con));
2187 
2188   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2189   ins_cost(150);
2190   ins_encode %{
2191     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2192   %}
2193   ins_pipe(pipe_slow);
2194 %}
2195 
2196 instruct subF_reg(regF dst, regF src) %{
2197   predicate((UseSSE>=1) && (UseAVX == 0));
2198   match(Set dst (SubF dst src));
2199 
2200   format %{ "subss   $dst, $src" %}
2201   ins_cost(150);
2202   ins_encode %{
2203     __ subss($dst$$XMMRegister, $src$$XMMRegister);
2204   %}
2205   ins_pipe(pipe_slow);
2206 %}
2207 
2208 instruct subF_mem(regF dst, memory src) %{
2209   predicate((UseSSE>=1) && (UseAVX == 0));
2210   match(Set dst (SubF dst (LoadF src)));
2211 
2212   format %{ "subss   $dst, $src" %}
2213   ins_cost(150);
2214   ins_encode %{
2215     __ subss($dst$$XMMRegister, $src$$Address);
2216   %}
2217   ins_pipe(pipe_slow);
2218 %}
2219 
2220 instruct subF_imm(regF dst, immF con) %{
2221   predicate((UseSSE>=1) && (UseAVX == 0));
2222   match(Set dst (SubF dst con));
2223   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2224   ins_cost(150);
2225   ins_encode %{
2226     __ subss($dst$$XMMRegister, $constantaddress($con));
2227   %}
2228   ins_pipe(pipe_slow);
2229 %}
2230 
2231 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
2232   predicate(UseAVX > 0);
2233   match(Set dst (SubF src1 src2));
2234 
2235   format %{ "vsubss  $dst, $src1, $src2" %}
2236   ins_cost(150);
2237   ins_encode %{
2238     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2239   %}
2240   ins_pipe(pipe_slow);
2241 %}
2242 
2243 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
2244   predicate(UseAVX > 0);
2245   match(Set dst (SubF src1 (LoadF src2)));
2246 
2247   format %{ "vsubss  $dst, $src1, $src2" %}
2248   ins_cost(150);
2249   ins_encode %{
2250     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2251   %}
2252   ins_pipe(pipe_slow);
2253 %}
2254 
2255 instruct subF_reg_imm(regF dst, regF src, immF con) %{
2256   predicate(UseAVX > 0);
2257   match(Set dst (SubF src con));
2258 
2259   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2260   ins_cost(150);
2261   ins_encode %{
2262     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2263   %}
2264   ins_pipe(pipe_slow);
2265 %}
2266 
2267 instruct subD_reg(regD dst, regD src) %{
2268   predicate((UseSSE>=2) && (UseAVX == 0));
2269   match(Set dst (SubD dst src));
2270 
2271   format %{ "subsd   $dst, $src" %}
2272   ins_cost(150);
2273   ins_encode %{
2274     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
2275   %}
2276   ins_pipe(pipe_slow);
2277 %}
2278 
2279 instruct subD_mem(regD dst, memory src) %{
2280   predicate((UseSSE>=2) && (UseAVX == 0));
2281   match(Set dst (SubD dst (LoadD src)));
2282 
2283   format %{ "subsd   $dst, $src" %}
2284   ins_cost(150);
2285   ins_encode %{
2286     __ subsd($dst$$XMMRegister, $src$$Address);
2287   %}
2288   ins_pipe(pipe_slow);
2289 %}
2290 
2291 instruct subD_imm(regD dst, immD con) %{
2292   predicate((UseSSE>=2) && (UseAVX == 0));
2293   match(Set dst (SubD dst con));
2294   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2295   ins_cost(150);
2296   ins_encode %{
2297     __ subsd($dst$$XMMRegister, $constantaddress($con));
2298   %}
2299   ins_pipe(pipe_slow);
2300 %}
2301 
2302 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
2303   predicate(UseAVX > 0);
2304   match(Set dst (SubD src1 src2));
2305 
2306   format %{ "vsubsd  $dst, $src1, $src2" %}
2307   ins_cost(150);
2308   ins_encode %{
2309     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2310   %}
2311   ins_pipe(pipe_slow);
2312 %}
2313 
2314 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
2315   predicate(UseAVX > 0);
2316   match(Set dst (SubD src1 (LoadD src2)));
2317 
2318   format %{ "vsubsd  $dst, $src1, $src2" %}
2319   ins_cost(150);
2320   ins_encode %{
2321     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2322   %}
2323   ins_pipe(pipe_slow);
2324 %}
2325 
2326 instruct subD_reg_imm(regD dst, regD src, immD con) %{
2327   predicate(UseAVX > 0);
2328   match(Set dst (SubD src con));
2329 
2330   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2331   ins_cost(150);
2332   ins_encode %{
2333     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2334   %}
2335   ins_pipe(pipe_slow);
2336 %}
2337 
2338 instruct mulF_reg(regF dst, regF src) %{
2339   predicate((UseSSE>=1) && (UseAVX == 0));
2340   match(Set dst (MulF dst src));
2341 
2342   format %{ "mulss   $dst, $src" %}
2343   ins_cost(150);
2344   ins_encode %{
2345     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
2346   %}
2347   ins_pipe(pipe_slow);
2348 %}
2349 
2350 instruct mulF_mem(regF dst, memory src) %{
2351   predicate((UseSSE>=1) && (UseAVX == 0));
2352   match(Set dst (MulF dst (LoadF src)));
2353 
2354   format %{ "mulss   $dst, $src" %}
2355   ins_cost(150);
2356   ins_encode %{
2357     __ mulss($dst$$XMMRegister, $src$$Address);
2358   %}
2359   ins_pipe(pipe_slow);
2360 %}
2361 
2362 instruct mulF_imm(regF dst, immF con) %{
2363   predicate((UseSSE>=1) && (UseAVX == 0));
2364   match(Set dst (MulF dst con));
2365   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2366   ins_cost(150);
2367   ins_encode %{
2368     __ mulss($dst$$XMMRegister, $constantaddress($con));
2369   %}
2370   ins_pipe(pipe_slow);
2371 %}
2372 
2373 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
2374   predicate(UseAVX > 0);
2375   match(Set dst (MulF src1 src2));
2376 
2377   format %{ "vmulss  $dst, $src1, $src2" %}
2378   ins_cost(150);
2379   ins_encode %{
2380     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2381   %}
2382   ins_pipe(pipe_slow);
2383 %}
2384 
2385 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
2386   predicate(UseAVX > 0);
2387   match(Set dst (MulF src1 (LoadF src2)));
2388 
2389   format %{ "vmulss  $dst, $src1, $src2" %}
2390   ins_cost(150);
2391   ins_encode %{
2392     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2393   %}
2394   ins_pipe(pipe_slow);
2395 %}
2396 
2397 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
2398   predicate(UseAVX > 0);
2399   match(Set dst (MulF src con));
2400 
2401   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2402   ins_cost(150);
2403   ins_encode %{
2404     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2405   %}
2406   ins_pipe(pipe_slow);
2407 %}
2408 
2409 instruct mulD_reg(regD dst, regD src) %{
2410   predicate((UseSSE>=2) && (UseAVX == 0));
2411   match(Set dst (MulD dst src));
2412 
2413   format %{ "mulsd   $dst, $src" %}
2414   ins_cost(150);
2415   ins_encode %{
2416     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
2417   %}
2418   ins_pipe(pipe_slow);
2419 %}
2420 
2421 instruct mulD_mem(regD dst, memory src) %{
2422   predicate((UseSSE>=2) && (UseAVX == 0));
2423   match(Set dst (MulD dst (LoadD src)));
2424 
2425   format %{ "mulsd   $dst, $src" %}
2426   ins_cost(150);
2427   ins_encode %{
2428     __ mulsd($dst$$XMMRegister, $src$$Address);
2429   %}
2430   ins_pipe(pipe_slow);
2431 %}
2432 
2433 instruct mulD_imm(regD dst, immD con) %{
2434   predicate((UseSSE>=2) && (UseAVX == 0));
2435   match(Set dst (MulD dst con));
2436   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2437   ins_cost(150);
2438   ins_encode %{
2439     __ mulsd($dst$$XMMRegister, $constantaddress($con));
2440   %}
2441   ins_pipe(pipe_slow);
2442 %}
2443 
2444 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
2445   predicate(UseAVX > 0);
2446   match(Set dst (MulD src1 src2));
2447 
2448   format %{ "vmulsd  $dst, $src1, $src2" %}
2449   ins_cost(150);
2450   ins_encode %{
2451     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2452   %}
2453   ins_pipe(pipe_slow);
2454 %}
2455 
2456 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
2457   predicate(UseAVX > 0);
2458   match(Set dst (MulD src1 (LoadD src2)));
2459 
2460   format %{ "vmulsd  $dst, $src1, $src2" %}
2461   ins_cost(150);
2462   ins_encode %{
2463     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2464   %}
2465   ins_pipe(pipe_slow);
2466 %}
2467 
2468 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
2469   predicate(UseAVX > 0);
2470   match(Set dst (MulD src con));
2471 
2472   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2473   ins_cost(150);
2474   ins_encode %{
2475     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2476   %}
2477   ins_pipe(pipe_slow);
2478 %}
2479 
2480 instruct divF_reg(regF dst, regF src) %{
2481   predicate((UseSSE>=1) && (UseAVX == 0));
2482   match(Set dst (DivF dst src));
2483 
2484   format %{ "divss   $dst, $src" %}
2485   ins_cost(150);
2486   ins_encode %{
2487     __ divss($dst$$XMMRegister, $src$$XMMRegister);
2488   %}
2489   ins_pipe(pipe_slow);
2490 %}
2491 
2492 instruct divF_mem(regF dst, memory src) %{
2493   predicate((UseSSE>=1) && (UseAVX == 0));
2494   match(Set dst (DivF dst (LoadF src)));
2495 
2496   format %{ "divss   $dst, $src" %}
2497   ins_cost(150);
2498   ins_encode %{
2499     __ divss($dst$$XMMRegister, $src$$Address);
2500   %}
2501   ins_pipe(pipe_slow);
2502 %}
2503 
2504 instruct divF_imm(regF dst, immF con) %{
2505   predicate((UseSSE>=1) && (UseAVX == 0));
2506   match(Set dst (DivF dst con));
2507   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2508   ins_cost(150);
2509   ins_encode %{
2510     __ divss($dst$$XMMRegister, $constantaddress($con));
2511   %}
2512   ins_pipe(pipe_slow);
2513 %}
2514 
2515 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
2516   predicate(UseAVX > 0);
2517   match(Set dst (DivF src1 src2));
2518 
2519   format %{ "vdivss  $dst, $src1, $src2" %}
2520   ins_cost(150);
2521   ins_encode %{
2522     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2523   %}
2524   ins_pipe(pipe_slow);
2525 %}
2526 
2527 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
2528   predicate(UseAVX > 0);
2529   match(Set dst (DivF src1 (LoadF src2)));
2530 
2531   format %{ "vdivss  $dst, $src1, $src2" %}
2532   ins_cost(150);
2533   ins_encode %{
2534     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2535   %}
2536   ins_pipe(pipe_slow);
2537 %}
2538 
2539 instruct divF_reg_imm(regF dst, regF src, immF con) %{
2540   predicate(UseAVX > 0);
2541   match(Set dst (DivF src con));
2542 
2543   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2544   ins_cost(150);
2545   ins_encode %{
2546     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2547   %}
2548   ins_pipe(pipe_slow);
2549 %}
2550 
2551 instruct divD_reg(regD dst, regD src) %{
2552   predicate((UseSSE>=2) && (UseAVX == 0));
2553   match(Set dst (DivD dst src));
2554 
2555   format %{ "divsd   $dst, $src" %}
2556   ins_cost(150);
2557   ins_encode %{
2558     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
2559   %}
2560   ins_pipe(pipe_slow);
2561 %}
2562 
2563 instruct divD_mem(regD dst, memory src) %{
2564   predicate((UseSSE>=2) && (UseAVX == 0));
2565   match(Set dst (DivD dst (LoadD src)));
2566 
2567   format %{ "divsd   $dst, $src" %}
2568   ins_cost(150);
2569   ins_encode %{
2570     __ divsd($dst$$XMMRegister, $src$$Address);
2571   %}
2572   ins_pipe(pipe_slow);
2573 %}
2574 
2575 instruct divD_imm(regD dst, immD con) %{
2576   predicate((UseSSE>=2) && (UseAVX == 0));
2577   match(Set dst (DivD dst con));
2578   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2579   ins_cost(150);
2580   ins_encode %{
2581     __ divsd($dst$$XMMRegister, $constantaddress($con));
2582   %}
2583   ins_pipe(pipe_slow);
2584 %}
2585 
2586 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
2587   predicate(UseAVX > 0);
2588   match(Set dst (DivD src1 src2));
2589 
2590   format %{ "vdivsd  $dst, $src1, $src2" %}
2591   ins_cost(150);
2592   ins_encode %{
2593     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2594   %}
2595   ins_pipe(pipe_slow);
2596 %}
2597 
2598 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
2599   predicate(UseAVX > 0);
2600   match(Set dst (DivD src1 (LoadD src2)));
2601 
2602   format %{ "vdivsd  $dst, $src1, $src2" %}
2603   ins_cost(150);
2604   ins_encode %{
2605     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2606   %}
2607   ins_pipe(pipe_slow);
2608 %}
2609 
2610 instruct divD_reg_imm(regD dst, regD src, immD con) %{
2611   predicate(UseAVX > 0);
2612   match(Set dst (DivD src con));
2613 
2614   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2615   ins_cost(150);
2616   ins_encode %{
2617     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2618   %}
2619   ins_pipe(pipe_slow);
2620 %}
2621 
2622 instruct absF_reg(regF dst) %{
2623   predicate((UseSSE>=1) && (UseAVX == 0));
2624   match(Set dst (AbsF dst));
2625   ins_cost(150);
2626   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2627   ins_encode %{
2628     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2629   %}
2630   ins_pipe(pipe_slow);
2631 %}
2632 
2633 instruct absF_reg_reg(regF dst, regF src) %{
2634   predicate(UseAVX > 0);
2635   match(Set dst (AbsF src));
2636   ins_cost(150);
2637   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2638   ins_encode %{
2639     int vector_len = 0;
2640     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2641               ExternalAddress(float_signmask()), vector_len);
2642   %}
2643   ins_pipe(pipe_slow);
2644 %}
2645 
2646 instruct absD_reg(regD dst) %{
2647   predicate((UseSSE>=2) && (UseAVX == 0));
2648   match(Set dst (AbsD dst));
2649   ins_cost(150);
2650   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2651             "# abs double by sign masking" %}
2652   ins_encode %{
2653     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2654   %}
2655   ins_pipe(pipe_slow);
2656 %}
2657 
2658 instruct absD_reg_reg(regD dst, regD src) %{
2659   predicate(UseAVX > 0);
2660   match(Set dst (AbsD src));
2661   ins_cost(150);
2662   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2663             "# abs double by sign masking" %}
2664   ins_encode %{
2665     int vector_len = 0;
2666     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2667               ExternalAddress(double_signmask()), vector_len);
2668   %}
2669   ins_pipe(pipe_slow);
2670 %}
2671 
2672 instruct negF_reg(regF dst) %{
2673   predicate((UseSSE>=1) && (UseAVX == 0));
2674   match(Set dst (NegF dst));
2675   ins_cost(150);
2676   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2677   ins_encode %{
2678     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2679   %}
2680   ins_pipe(pipe_slow);
2681 %}
2682 
2683 instruct negF_reg_reg(regF dst, regF src) %{
2684   predicate(UseAVX > 0);
2685   match(Set dst (NegF src));
2686   ins_cost(150);
2687   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2688   ins_encode %{
2689     int vector_len = 0;
2690     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
2691               ExternalAddress(float_signflip()), vector_len);
2692   %}
2693   ins_pipe(pipe_slow);
2694 %}
2695 
2696 instruct negD_reg(regD dst) %{
2697   predicate((UseSSE>=2) && (UseAVX == 0));
2698   match(Set dst (NegD dst));
2699   ins_cost(150);
2700   format %{ "xorpd   $dst, [0x8000000000000000]\t"
2701             "# neg double by sign flipping" %}
2702   ins_encode %{
2703     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
2704   %}
2705   ins_pipe(pipe_slow);
2706 %}
2707 
2708 instruct negD_reg_reg(regD dst, regD src) %{
2709   predicate(UseAVX > 0);
2710   match(Set dst (NegD src));
2711   ins_cost(150);
2712   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
2713             "# neg double by sign flipping" %}
2714   ins_encode %{
2715     int vector_len = 0;
2716     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
2717               ExternalAddress(double_signflip()), vector_len);
2718   %}
2719   ins_pipe(pipe_slow);
2720 %}
2721 
2722 instruct sqrtF_reg(regF dst, regF src) %{
2723   predicate(UseSSE>=1);
2724   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
2725 
2726   format %{ "sqrtss  $dst, $src" %}
2727   ins_cost(150);
2728   ins_encode %{
2729     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
2730   %}
2731   ins_pipe(pipe_slow);
2732 %}
2733 
2734 instruct sqrtF_mem(regF dst, memory src) %{
2735   predicate(UseSSE>=1);
2736   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
2737 
2738   format %{ "sqrtss  $dst, $src" %}
2739   ins_cost(150);
2740   ins_encode %{
2741     __ sqrtss($dst$$XMMRegister, $src$$Address);
2742   %}
2743   ins_pipe(pipe_slow);
2744 %}
2745 
2746 instruct sqrtF_imm(regF dst, immF con) %{
2747   predicate(UseSSE>=1);
2748   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
2749   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2750   ins_cost(150);
2751   ins_encode %{
2752     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
2753   %}
2754   ins_pipe(pipe_slow);
2755 %}
2756 
2757 instruct sqrtD_reg(regD dst, regD src) %{
2758   predicate(UseSSE>=2);
2759   match(Set dst (SqrtD src));
2760 
2761   format %{ "sqrtsd  $dst, $src" %}
2762   ins_cost(150);
2763   ins_encode %{
2764     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
2765   %}
2766   ins_pipe(pipe_slow);
2767 %}
2768 
2769 instruct sqrtD_mem(regD dst, memory src) %{
2770   predicate(UseSSE>=2);
2771   match(Set dst (SqrtD (LoadD src)));
2772 
2773   format %{ "sqrtsd  $dst, $src" %}
2774   ins_cost(150);
2775   ins_encode %{
2776     __ sqrtsd($dst$$XMMRegister, $src$$Address);
2777   %}
2778   ins_pipe(pipe_slow);
2779 %}
2780 
2781 instruct sqrtD_imm(regD dst, immD con) %{
2782   predicate(UseSSE>=2);
2783   match(Set dst (SqrtD con));
2784   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2785   ins_cost(150);
2786   ins_encode %{
2787     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
2788   %}
2789   ins_pipe(pipe_slow);
2790 %}
2791 
2792 // ====================VECTOR INSTRUCTIONS=====================================
2793 
2794 // Load vectors (4 bytes long)
2795 instruct loadV4(vecS dst, memory mem) %{
2796   predicate(n->as_LoadVector()->memory_size() == 4);
2797   match(Set dst (LoadVector mem));
2798   ins_cost(125);
2799   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
2800   ins_encode %{
2801     __ movdl($dst$$XMMRegister, $mem$$Address);
2802   %}
2803   ins_pipe( pipe_slow );
2804 %}
2805 
2806 // Load vectors (8 bytes long)
2807 instruct loadV8(vecD dst, memory mem) %{
2808   predicate(n->as_LoadVector()->memory_size() == 8);
2809   match(Set dst (LoadVector mem));
2810   ins_cost(125);
2811   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
2812   ins_encode %{
2813     __ movq($dst$$XMMRegister, $mem$$Address);
2814   %}
2815   ins_pipe( pipe_slow );
2816 %}
2817 
2818 // Load vectors (16 bytes long)
2819 instruct loadV16(vecX dst, memory mem) %{
2820   predicate(n->as_LoadVector()->memory_size() == 16);
2821   match(Set dst (LoadVector mem));
2822   ins_cost(125);
2823   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
2824   ins_encode %{
2825     __ movdqu($dst$$XMMRegister, $mem$$Address);
2826   %}
2827   ins_pipe( pipe_slow );
2828 %}
2829 
2830 // Load vectors (32 bytes long)
2831 instruct loadV32(vecY dst, memory mem) %{
2832   predicate(n->as_LoadVector()->memory_size() == 32);
2833   match(Set dst (LoadVector mem));
2834   ins_cost(125);
2835   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
2836   ins_encode %{
2837     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
2838   %}
2839   ins_pipe( pipe_slow );
2840 %}
2841 
2842 // Load vectors (64 bytes long)
2843 instruct loadV64(vecZ dst, memory mem) %{
2844   predicate(n->as_LoadVector()->memory_size() == 64);
2845   match(Set dst (LoadVector mem));
2846   ins_cost(125);
2847   format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
2848   ins_encode %{
2849     int vector_len = 2;
2850     __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
2851   %}
2852   ins_pipe( pipe_slow );
2853 %}
2854 
2855 // Store vectors
2856 instruct storeV4(memory mem, vecS src) %{
2857   predicate(n->as_StoreVector()->memory_size() == 4);
2858   match(Set mem (StoreVector mem src));
2859   ins_cost(145);
2860   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
2861   ins_encode %{
2862     __ movdl($mem$$Address, $src$$XMMRegister);
2863   %}
2864   ins_pipe( pipe_slow );
2865 %}
2866 
2867 instruct storeV8(memory mem, vecD src) %{
2868   predicate(n->as_StoreVector()->memory_size() == 8);
2869   match(Set mem (StoreVector mem src));
2870   ins_cost(145);
2871   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
2872   ins_encode %{
2873     __ movq($mem$$Address, $src$$XMMRegister);
2874   %}
2875   ins_pipe( pipe_slow );
2876 %}
2877 
2878 instruct storeV16(memory mem, vecX src) %{
2879   predicate(n->as_StoreVector()->memory_size() == 16);
2880   match(Set mem (StoreVector mem src));
2881   ins_cost(145);
2882   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
2883   ins_encode %{
2884     __ movdqu($mem$$Address, $src$$XMMRegister);
2885   %}
2886   ins_pipe( pipe_slow );
2887 %}
2888 
2889 instruct storeV32(memory mem, vecY src) %{
2890   predicate(n->as_StoreVector()->memory_size() == 32);
2891   match(Set mem (StoreVector mem src));
2892   ins_cost(145);
2893   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
2894   ins_encode %{
2895     __ vmovdqu($mem$$Address, $src$$XMMRegister);
2896   %}
2897   ins_pipe( pipe_slow );
2898 %}
2899 
2900 instruct storeV64(memory mem, vecZ src) %{
2901   predicate(n->as_StoreVector()->memory_size() == 64);
2902   match(Set mem (StoreVector mem src));
2903   ins_cost(145);
2904   format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
2905   ins_encode %{
2906     int vector_len = 2;
2907     __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
2908   %}
2909   ins_pipe( pipe_slow );
2910 %}
2911 
2912 // Replicate byte scalar to be vector
2913 instruct Repl4B(vecS dst, rRegI src) %{
2914   predicate(n->as_Vector()->length() == 4);
2915   match(Set dst (ReplicateB src));
2916   format %{ "movd    $dst,$src\n\t"
2917             "punpcklbw $dst,$dst\n\t"
2918             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
2919   ins_encode %{
2920     __ movdl($dst$$XMMRegister, $src$$Register);
2921     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2922     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2923   %}
2924   ins_pipe( pipe_slow );
2925 %}
2926 
2927 instruct Repl8B(vecD dst, rRegI src) %{
2928   predicate(n->as_Vector()->length() == 8);
2929   match(Set dst (ReplicateB src));
2930   format %{ "movd    $dst,$src\n\t"
2931             "punpcklbw $dst,$dst\n\t"
2932             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
2933   ins_encode %{
2934     __ movdl($dst$$XMMRegister, $src$$Register);
2935     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2936     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2937   %}
2938   ins_pipe( pipe_slow );
2939 %}
2940 
2941 instruct Repl16B(vecX dst, rRegI src) %{
2942   predicate(n->as_Vector()->length() == 16);
2943   match(Set dst (ReplicateB src));
2944   format %{ "movd    $dst,$src\n\t"
2945             "punpcklbw $dst,$dst\n\t"
2946             "pshuflw $dst,$dst,0x00\n\t"
2947             "punpcklqdq $dst,$dst\t! replicate16B" %}
2948   ins_encode %{
2949     __ movdl($dst$$XMMRegister, $src$$Register);
2950     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2951     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2952     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2953   %}
2954   ins_pipe( pipe_slow );
2955 %}
2956 
2957 instruct Repl32B(vecY dst, rRegI src) %{
2958   predicate(n->as_Vector()->length() == 32);
2959   match(Set dst (ReplicateB src));
2960   format %{ "movd    $dst,$src\n\t"
2961             "punpcklbw $dst,$dst\n\t"
2962             "pshuflw $dst,$dst,0x00\n\t"
2963             "punpcklqdq $dst,$dst\n\t"
2964             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2965   ins_encode %{
2966     __ movdl($dst$$XMMRegister, $src$$Register);
2967     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2968     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2969     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2970     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2971   %}
2972   ins_pipe( pipe_slow );
2973 %}
2974 
2975 instruct Repl64B(vecZ dst, rRegI src) %{
2976   predicate(n->as_Vector()->length() == 64);
2977   match(Set dst (ReplicateB src));
2978   format %{ "movd    $dst,$src\n\t"
2979             "punpcklbw $dst,$dst\n\t"
2980             "pshuflw $dst,$dst,0x00\n\t"
2981             "punpcklqdq $dst,$dst\n\t"
2982             "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t"
2983             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %}
2984   ins_encode %{
2985     __ movdl($dst$$XMMRegister, $src$$Register);
2986     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2987     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2988     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2989     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2990     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2991   %}
2992   ins_pipe( pipe_slow );
2993 %}
2994 
2995 // Replicate byte scalar immediate to be vector by loading from const table.
2996 instruct Repl4B_imm(vecS dst, immI con) %{
2997   predicate(n->as_Vector()->length() == 4);
2998   match(Set dst (ReplicateB con));
2999   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
3000   ins_encode %{
3001     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3002   %}
3003   ins_pipe( pipe_slow );
3004 %}
3005 
3006 instruct Repl8B_imm(vecD dst, immI con) %{
3007   predicate(n->as_Vector()->length() == 8);
3008   match(Set dst (ReplicateB con));
3009   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
3010   ins_encode %{
3011     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3012   %}
3013   ins_pipe( pipe_slow );
3014 %}
3015 
3016 instruct Repl16B_imm(vecX dst, immI con) %{
3017   predicate(n->as_Vector()->length() == 16);
3018   match(Set dst (ReplicateB con));
3019   format %{ "movq    $dst,[$constantaddress]\n\t"
3020             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
3021   ins_encode %{
3022     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3023     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3024   %}
3025   ins_pipe( pipe_slow );
3026 %}
3027 
3028 instruct Repl32B_imm(vecY dst, immI con) %{
3029   predicate(n->as_Vector()->length() == 32);
3030   match(Set dst (ReplicateB con));
3031   format %{ "movq    $dst,[$constantaddress]\n\t"
3032             "punpcklqdq $dst,$dst\n\t"
3033             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
3034   ins_encode %{
3035     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3036     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3037     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3038   %}
3039   ins_pipe( pipe_slow );
3040 %}
3041 
3042 instruct Repl64B_imm(vecZ dst, immI con) %{
3043   predicate(n->as_Vector()->length() == 64);
3044   match(Set dst (ReplicateB con));
3045   format %{ "movq    $dst,[$constantaddress]\n\t"
3046             "punpcklqdq $dst,$dst\n\t"
3047             "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t"
3048             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %}
3049   ins_encode %{
3050     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3051     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3052     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3053     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3054   %}
3055   ins_pipe( pipe_slow );
3056 %}
3057 
3058 // Replicate byte scalar zero to be vector
3059 instruct Repl4B_zero(vecS dst, immI0 zero) %{
3060   predicate(n->as_Vector()->length() == 4);
3061   match(Set dst (ReplicateB zero));
3062   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
3063   ins_encode %{
3064     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3065   %}
3066   ins_pipe( fpu_reg_reg );
3067 %}
3068 
3069 instruct Repl8B_zero(vecD dst, immI0 zero) %{
3070   predicate(n->as_Vector()->length() == 8);
3071   match(Set dst (ReplicateB zero));
3072   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
3073   ins_encode %{
3074     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3075   %}
3076   ins_pipe( fpu_reg_reg );
3077 %}
3078 
3079 instruct Repl16B_zero(vecX dst, immI0 zero) %{
3080   predicate(n->as_Vector()->length() == 16);
3081   match(Set dst (ReplicateB zero));
3082   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
3083   ins_encode %{
3084     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3085   %}
3086   ins_pipe( fpu_reg_reg );
3087 %}
3088 
3089 instruct Repl32B_zero(vecY dst, immI0 zero) %{
3090   predicate(n->as_Vector()->length() == 32);
3091   match(Set dst (ReplicateB zero));
3092   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
3093   ins_encode %{
3094     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3095     int vector_len = 1;
3096     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3097   %}
3098   ins_pipe( fpu_reg_reg );
3099 %}
3100 
3101 instruct Repl64B_zero(vecZ dst, immI0 zero) %{
3102   predicate(n->as_Vector()->length() == 64);
3103   match(Set dst (ReplicateB zero));
3104   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
3105   ins_encode %{
3106     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3107     int vector_len = 2;
3108     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3109   %}
3110   ins_pipe( fpu_reg_reg );
3111 %}
3112 
3113 // Replicate char/short (2 byte) scalar to be vector
3114 instruct Repl2S(vecS dst, rRegI src) %{
3115   predicate(n->as_Vector()->length() == 2);
3116   match(Set dst (ReplicateS src));
3117   format %{ "movd    $dst,$src\n\t"
3118             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3119   ins_encode %{
3120     __ movdl($dst$$XMMRegister, $src$$Register);
3121     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3122   %}
3123   ins_pipe( fpu_reg_reg );
3124 %}
3125 
3126 instruct Repl4S(vecD dst, rRegI src) %{
3127   predicate(n->as_Vector()->length() == 4);
3128   match(Set dst (ReplicateS src));
3129   format %{ "movd    $dst,$src\n\t"
3130             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
3131   ins_encode %{
3132     __ movdl($dst$$XMMRegister, $src$$Register);
3133     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3134   %}
3135   ins_pipe( fpu_reg_reg );
3136 %}
3137 
3138 instruct Repl8S(vecX dst, rRegI src) %{
3139   predicate(n->as_Vector()->length() == 8);
3140   match(Set dst (ReplicateS src));
3141   format %{ "movd    $dst,$src\n\t"
3142             "pshuflw $dst,$dst,0x00\n\t"
3143             "punpcklqdq $dst,$dst\t! replicate8S" %}
3144   ins_encode %{
3145     __ movdl($dst$$XMMRegister, $src$$Register);
3146     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3147     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3148   %}
3149   ins_pipe( pipe_slow );
3150 %}
3151 
3152 instruct Repl16S(vecY dst, rRegI src) %{
3153   predicate(n->as_Vector()->length() == 16);
3154   match(Set dst (ReplicateS src));
3155   format %{ "movd    $dst,$src\n\t"
3156             "pshuflw $dst,$dst,0x00\n\t"
3157             "punpcklqdq $dst,$dst\n\t"
3158             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3159   ins_encode %{
3160     __ movdl($dst$$XMMRegister, $src$$Register);
3161     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3162     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3163     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3164   %}
3165   ins_pipe( pipe_slow );
3166 %}
3167 
3168 instruct Repl32S(vecZ dst, rRegI src) %{
3169   predicate(n->as_Vector()->length() == 32);
3170   match(Set dst (ReplicateS src));
3171   format %{ "movd    $dst,$src\n\t"
3172             "pshuflw $dst,$dst,0x00\n\t"
3173             "punpcklqdq $dst,$dst\n\t"
3174             "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t"
3175             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %}
3176   ins_encode %{
3177     __ movdl($dst$$XMMRegister, $src$$Register);
3178     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3179     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3180     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3181     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3182   %}
3183   ins_pipe( pipe_slow );
3184 %}
3185 
3186 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3187 instruct Repl2S_imm(vecS dst, immI con) %{
3188   predicate(n->as_Vector()->length() == 2);
3189   match(Set dst (ReplicateS con));
3190   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
3191   ins_encode %{
3192     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3193   %}
3194   ins_pipe( fpu_reg_reg );
3195 %}
3196 
3197 instruct Repl4S_imm(vecD dst, immI con) %{
3198   predicate(n->as_Vector()->length() == 4);
3199   match(Set dst (ReplicateS con));
3200   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
3201   ins_encode %{
3202     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3203   %}
3204   ins_pipe( fpu_reg_reg );
3205 %}
3206 
3207 instruct Repl8S_imm(vecX dst, immI con) %{
3208   predicate(n->as_Vector()->length() == 8);
3209   match(Set dst (ReplicateS con));
3210   format %{ "movq    $dst,[$constantaddress]\n\t"
3211             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
3212   ins_encode %{
3213     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3214     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3215   %}
3216   ins_pipe( pipe_slow );
3217 %}
3218 
3219 instruct Repl16S_imm(vecY dst, immI con) %{
3220   predicate(n->as_Vector()->length() == 16);
3221   match(Set dst (ReplicateS con));
3222   format %{ "movq    $dst,[$constantaddress]\n\t"
3223             "punpcklqdq $dst,$dst\n\t"
3224             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3225   ins_encode %{
3226     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3227     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3228     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3229   %}
3230   ins_pipe( pipe_slow );
3231 %}
3232 
3233 instruct Repl32S_imm(vecZ dst, immI con) %{
3234   predicate(n->as_Vector()->length() == 32);
3235   match(Set dst (ReplicateS con));
3236   format %{ "movq    $dst,[$constantaddress]\n\t"
3237             "punpcklqdq $dst,$dst\n\t"
3238             "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t"
3239             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %}
3240   ins_encode %{
3241     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3242     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3243     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3244     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3245   %}
3246   ins_pipe( pipe_slow );
3247 %}
3248 
3249 // Replicate char/short (2 byte) scalar zero to be vector
3250 instruct Repl2S_zero(vecS dst, immI0 zero) %{
3251   predicate(n->as_Vector()->length() == 2);
3252   match(Set dst (ReplicateS zero));
3253   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
3254   ins_encode %{
3255     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3256   %}
3257   ins_pipe( fpu_reg_reg );
3258 %}
3259 
3260 instruct Repl4S_zero(vecD dst, immI0 zero) %{
3261   predicate(n->as_Vector()->length() == 4);
3262   match(Set dst (ReplicateS zero));
3263   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
3264   ins_encode %{
3265     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3266   %}
3267   ins_pipe( fpu_reg_reg );
3268 %}
3269 
3270 instruct Repl8S_zero(vecX dst, immI0 zero) %{
3271   predicate(n->as_Vector()->length() == 8);
3272   match(Set dst (ReplicateS zero));
3273   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
3274   ins_encode %{
3275     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3276   %}
3277   ins_pipe( fpu_reg_reg );
3278 %}
3279 
3280 instruct Repl16S_zero(vecY dst, immI0 zero) %{
3281   predicate(n->as_Vector()->length() == 16);
3282   match(Set dst (ReplicateS zero));
3283   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
3284   ins_encode %{
3285     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3286     int vector_len = 1;
3287     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3288   %}
3289   ins_pipe( fpu_reg_reg );
3290 %}
3291 
3292 instruct Repl32S_zero(vecZ dst, immI0 zero) %{
3293   predicate(n->as_Vector()->length() == 32);
3294   match(Set dst (ReplicateS zero));
3295   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
3296   ins_encode %{
3297     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3298     int vector_len = 2;
3299     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3300   %}
3301   ins_pipe( fpu_reg_reg );
3302 %}
3303 
3304 // Replicate integer (4 byte) scalar to be vector
3305 instruct Repl2I(vecD dst, rRegI src) %{
3306   predicate(n->as_Vector()->length() == 2);
3307   match(Set dst (ReplicateI src));
3308   format %{ "movd    $dst,$src\n\t"
3309             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3310   ins_encode %{
3311     __ movdl($dst$$XMMRegister, $src$$Register);
3312     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3313   %}
3314   ins_pipe( fpu_reg_reg );
3315 %}
3316 
3317 instruct Repl4I(vecX dst, rRegI src) %{
3318   predicate(n->as_Vector()->length() == 4);
3319   match(Set dst (ReplicateI src));
3320   format %{ "movd    $dst,$src\n\t"
3321             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
3322   ins_encode %{
3323     __ movdl($dst$$XMMRegister, $src$$Register);
3324     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3325   %}
3326   ins_pipe( pipe_slow );
3327 %}
3328 
3329 instruct Repl8I(vecY dst, rRegI src) %{
3330   predicate(n->as_Vector()->length() == 8);
3331   match(Set dst (ReplicateI src));
3332   format %{ "movd    $dst,$src\n\t"
3333             "pshufd  $dst,$dst,0x00\n\t"
3334             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3335   ins_encode %{
3336     __ movdl($dst$$XMMRegister, $src$$Register);
3337     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3338     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3339   %}
3340   ins_pipe( pipe_slow );
3341 %}
3342 
3343 instruct Repl16I(vecZ dst, rRegI src) %{
3344   predicate(n->as_Vector()->length() == 16);
3345   match(Set dst (ReplicateI src));
3346   format %{ "movd    $dst,$src\n\t"
3347             "pshufd  $dst,$dst,0x00\n\t"
3348             "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
3349             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
3350   ins_encode %{
3351     __ movdl($dst$$XMMRegister, $src$$Register);
3352     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3353     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3354     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3355   %}
3356   ins_pipe( pipe_slow );
3357 %}
3358 
3359 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3360 instruct Repl2I_imm(vecD dst, immI con) %{
3361   predicate(n->as_Vector()->length() == 2);
3362   match(Set dst (ReplicateI con));
3363   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
3364   ins_encode %{
3365     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3366   %}
3367   ins_pipe( fpu_reg_reg );
3368 %}
3369 
3370 instruct Repl4I_imm(vecX dst, immI con) %{
3371   predicate(n->as_Vector()->length() == 4);
3372   match(Set dst (ReplicateI con));
3373   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3374             "punpcklqdq $dst,$dst" %}
3375   ins_encode %{
3376     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3377     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3378   %}
3379   ins_pipe( pipe_slow );
3380 %}
3381 
3382 instruct Repl8I_imm(vecY dst, immI con) %{
3383   predicate(n->as_Vector()->length() == 8);
3384   match(Set dst (ReplicateI con));
3385   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3386             "punpcklqdq $dst,$dst\n\t"
3387             "vinserti128h $dst,$dst,$dst" %}
3388   ins_encode %{
3389     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3390     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3391     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3392   %}
3393   ins_pipe( pipe_slow );
3394 %}
3395 
3396 instruct Repl16I_imm(vecZ dst, immI con) %{
3397   predicate(n->as_Vector()->length() == 16);
3398   match(Set dst (ReplicateI con));
3399   format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
3400             "punpcklqdq $dst,$dst\n\t"
3401             "vinserti128h $dst,$dst,$dst\n\t"
3402             "vinserti64x4h $dst k0,$dst,$dst" %}
3403   ins_encode %{
3404     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3405     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3406     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3407     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3408   %}
3409   ins_pipe( pipe_slow );
3410 %}
3411 
3412 // Integer could be loaded into xmm register directly from memory.
3413 instruct Repl2I_mem(vecD dst, memory mem) %{
3414   predicate(n->as_Vector()->length() == 2);
3415   match(Set dst (ReplicateI (LoadI mem)));
3416   format %{ "movd    $dst,$mem\n\t"
3417             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3418   ins_encode %{
3419     __ movdl($dst$$XMMRegister, $mem$$Address);
3420     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3421   %}
3422   ins_pipe( fpu_reg_reg );
3423 %}
3424 
3425 instruct Repl4I_mem(vecX dst, memory mem) %{
3426   predicate(n->as_Vector()->length() == 4);
3427   match(Set dst (ReplicateI (LoadI mem)));
3428   format %{ "movd    $dst,$mem\n\t"
3429             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
3430   ins_encode %{
3431     __ movdl($dst$$XMMRegister, $mem$$Address);
3432     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3433   %}
3434   ins_pipe( pipe_slow );
3435 %}
3436 
3437 instruct Repl8I_mem(vecY dst, memory mem) %{
3438   predicate(n->as_Vector()->length() == 8);
3439   match(Set dst (ReplicateI (LoadI mem)));
3440   format %{ "movd    $dst,$mem\n\t"
3441             "pshufd  $dst,$dst,0x00\n\t"
3442             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3443   ins_encode %{
3444     __ movdl($dst$$XMMRegister, $mem$$Address);
3445     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3446     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3447   %}
3448   ins_pipe( pipe_slow );
3449 %}
3450 
3451 instruct Repl16I_mem(vecZ dst, memory mem) %{
3452   predicate(n->as_Vector()->length() == 16);
3453   match(Set dst (ReplicateI (LoadI mem)));
3454   format %{ "movd    $dst,$mem\n\t"
3455             "pshufd  $dst,$dst,0x00\n\t"
3456             "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
3457             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
3458   ins_encode %{
3459     __ movdl($dst$$XMMRegister, $mem$$Address);
3460     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3461     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3462     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3463   %}
3464   ins_pipe( pipe_slow );
3465 %}
3466 
3467 // Replicate integer (4 byte) scalar zero to be vector
3468 instruct Repl2I_zero(vecD dst, immI0 zero) %{
3469   predicate(n->as_Vector()->length() == 2);
3470   match(Set dst (ReplicateI zero));
3471   format %{ "pxor    $dst,$dst\t! replicate2I" %}
3472   ins_encode %{
3473     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3474   %}
3475   ins_pipe( fpu_reg_reg );
3476 %}
3477 
3478 instruct Repl4I_zero(vecX dst, immI0 zero) %{
3479   predicate(n->as_Vector()->length() == 4);
3480   match(Set dst (ReplicateI zero));
3481   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
3482   ins_encode %{
3483     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3484   %}
3485   ins_pipe( fpu_reg_reg );
3486 %}
3487 
3488 instruct Repl8I_zero(vecY dst, immI0 zero) %{
3489   predicate(n->as_Vector()->length() == 8);
3490   match(Set dst (ReplicateI zero));
3491   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
3492   ins_encode %{
3493     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3494     int vector_len = 1;
3495     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3496   %}
3497   ins_pipe( fpu_reg_reg );
3498 %}
3499 
3500 instruct Repl16I_zero(vecZ dst, immI0 zero) %{
3501   predicate(n->as_Vector()->length() == 16);
3502   match(Set dst (ReplicateI zero));
3503   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
3504   ins_encode %{
3505     // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
3506     int vector_len = 2;
3507     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3508   %}
3509   ins_pipe( fpu_reg_reg );
3510 %}
3511 
3512 // Replicate long (8 byte) scalar to be vector
3513 #ifdef _LP64
3514 instruct Repl2L(vecX dst, rRegL src) %{
3515   predicate(n->as_Vector()->length() == 2);
3516   match(Set dst (ReplicateL src));
3517   format %{ "movdq   $dst,$src\n\t"
3518             "punpcklqdq $dst,$dst\t! replicate2L" %}
3519   ins_encode %{
3520     __ movdq($dst$$XMMRegister, $src$$Register);
3521     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3522   %}
3523   ins_pipe( pipe_slow );
3524 %}
3525 
3526 instruct Repl4L(vecY dst, rRegL src) %{
3527   predicate(n->as_Vector()->length() == 4);
3528   match(Set dst (ReplicateL src));
3529   format %{ "movdq   $dst,$src\n\t"
3530             "punpcklqdq $dst,$dst\n\t"
3531             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3532   ins_encode %{
3533     __ movdq($dst$$XMMRegister, $src$$Register);
3534     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3535     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3536   %}
3537   ins_pipe( pipe_slow );
3538 %}
3539 
3540 instruct Repl8L(vecZ dst, rRegL src) %{
3541   predicate(n->as_Vector()->length() == 8);
3542   match(Set dst (ReplicateL src));
3543   format %{ "movdq   $dst,$src\n\t"
3544             "punpcklqdq $dst,$dst\n\t"
3545             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3546             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3547   ins_encode %{
3548     __ movdq($dst$$XMMRegister, $src$$Register);
3549     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3550     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3551     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3552   %}
3553   ins_pipe( pipe_slow );
3554 %}
3555 #else // _LP64
3556 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3557   predicate(n->as_Vector()->length() == 2);
3558   match(Set dst (ReplicateL src));
3559   effect(TEMP dst, USE src, TEMP tmp);
3560   format %{ "movdl   $dst,$src.lo\n\t"
3561             "movdl   $tmp,$src.hi\n\t"
3562             "punpckldq $dst,$tmp\n\t"
3563             "punpcklqdq $dst,$dst\t! replicate2L"%}
3564   ins_encode %{
3565     __ movdl($dst$$XMMRegister, $src$$Register);
3566     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3567     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3568     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3569   %}
3570   ins_pipe( pipe_slow );
3571 %}
3572 
3573 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3574   predicate(n->as_Vector()->length() == 4);
3575   match(Set dst (ReplicateL src));
3576   effect(TEMP dst, USE src, TEMP tmp);
3577   format %{ "movdl   $dst,$src.lo\n\t"
3578             "movdl   $tmp,$src.hi\n\t"
3579             "punpckldq $dst,$tmp\n\t"
3580             "punpcklqdq $dst,$dst\n\t"
3581             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3582   ins_encode %{
3583     __ movdl($dst$$XMMRegister, $src$$Register);
3584     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3585     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3586     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3587     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3588   %}
3589   ins_pipe( pipe_slow );
3590 %}
3591 
3592 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{
3593   predicate(n->as_Vector()->length() == 4);
3594   match(Set dst (ReplicateL src));
3595   effect(TEMP dst, USE src, TEMP tmp);
3596   format %{ "movdl   $dst,$src.lo\n\t"
3597             "movdl   $tmp,$src.hi\n\t"
3598             "punpckldq $dst,$tmp\n\t"
3599             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3600             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3601   ins_encode %{
3602     __ movdl($dst$$XMMRegister, $src$$Register);
3603     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3604     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3605     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3606     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3607     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3608   %}
3609   ins_pipe( pipe_slow );
3610 %}
3611 #endif // _LP64
3612 
3613 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3614 instruct Repl2L_imm(vecX dst, immL con) %{
3615   predicate(n->as_Vector()->length() == 2);
3616   match(Set dst (ReplicateL con));
3617   format %{ "movq    $dst,[$constantaddress]\n\t"
3618             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3619   ins_encode %{
3620     __ movq($dst$$XMMRegister, $constantaddress($con));
3621     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3622   %}
3623   ins_pipe( pipe_slow );
3624 %}
3625 
3626 instruct Repl4L_imm(vecY dst, immL con) %{
3627   predicate(n->as_Vector()->length() == 4);
3628   match(Set dst (ReplicateL con));
3629   format %{ "movq    $dst,[$constantaddress]\n\t"
3630             "punpcklqdq $dst,$dst\n\t"
3631             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3632   ins_encode %{
3633     __ movq($dst$$XMMRegister, $constantaddress($con));
3634     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3635     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3636   %}
3637   ins_pipe( pipe_slow );
3638 %}
3639 
3640 instruct Repl8L_imm(vecZ dst, immL con) %{
3641   predicate(n->as_Vector()->length() == 8);
3642   match(Set dst (ReplicateL con));
3643   format %{ "movq    $dst,[$constantaddress]\n\t"
3644             "punpcklqdq $dst,$dst\n\t"
3645             "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t"
3646             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %}
3647   ins_encode %{
3648     __ movq($dst$$XMMRegister, $constantaddress($con));
3649     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3650     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3651     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3652   %}
3653   ins_pipe( pipe_slow );
3654 %}
3655 
3656 // Long could be loaded into xmm register directly from memory.
3657 instruct Repl2L_mem(vecX dst, memory mem) %{
3658   predicate(n->as_Vector()->length() == 2);
3659   match(Set dst (ReplicateL (LoadL mem)));
3660   format %{ "movq    $dst,$mem\n\t"
3661             "punpcklqdq $dst,$dst\t! replicate2L" %}
3662   ins_encode %{
3663     __ movq($dst$$XMMRegister, $mem$$Address);
3664     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3665   %}
3666   ins_pipe( pipe_slow );
3667 %}
3668 
3669 instruct Repl4L_mem(vecY dst, memory mem) %{
3670   predicate(n->as_Vector()->length() == 4);
3671   match(Set dst (ReplicateL (LoadL mem)));
3672   format %{ "movq    $dst,$mem\n\t"
3673             "punpcklqdq $dst,$dst\n\t"
3674             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3675   ins_encode %{
3676     __ movq($dst$$XMMRegister, $mem$$Address);
3677     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3678     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3679   %}
3680   ins_pipe( pipe_slow );
3681 %}
3682 
3683 instruct Repl8L_mem(vecZ dst, memory mem) %{
3684   predicate(n->as_Vector()->length() == 8);
3685   match(Set dst (ReplicateL (LoadL mem)));
3686   format %{ "movq    $dst,$mem\n\t"
3687             "punpcklqdq $dst,$dst\n\t"
3688             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3689             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3690   ins_encode %{
3691     __ movq($dst$$XMMRegister, $mem$$Address);
3692     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3693     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3694     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3695   %}
3696   ins_pipe( pipe_slow );
3697 %}
3698 
3699 // Replicate long (8 byte) scalar zero to be vector
3700 instruct Repl2L_zero(vecX dst, immL0 zero) %{
3701   predicate(n->as_Vector()->length() == 2);
3702   match(Set dst (ReplicateL zero));
3703   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
3704   ins_encode %{
3705     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3706   %}
3707   ins_pipe( fpu_reg_reg );
3708 %}
3709 
3710 instruct Repl4L_zero(vecY dst, immL0 zero) %{
3711   predicate(n->as_Vector()->length() == 4);
3712   match(Set dst (ReplicateL zero));
3713   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
3714   ins_encode %{
3715     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3716     int vector_len = 1;
3717     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3718   %}
3719   ins_pipe( fpu_reg_reg );
3720 %}
3721 
3722 instruct Repl8L_zero(vecZ dst, immL0 zero) %{
3723   predicate(n->as_Vector()->length() == 8);
3724   match(Set dst (ReplicateL zero));
3725   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate8L zero" %}
3726   ins_encode %{
3727     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3728     int vector_len = 2;
3729     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3730   %}
3731   ins_pipe( fpu_reg_reg );
3732 %}
3733 
3734 // Replicate float (4 byte) scalar to be vector
3735 instruct Repl2F(vecD dst, regF src) %{
3736   predicate(n->as_Vector()->length() == 2);
3737   match(Set dst (ReplicateF src));
3738   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
3739   ins_encode %{
3740     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3741   %}
3742   ins_pipe( fpu_reg_reg );
3743 %}
3744 
3745 instruct Repl4F(vecX dst, regF src) %{
3746   predicate(n->as_Vector()->length() == 4);
3747   match(Set dst (ReplicateF src));
3748   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
3749   ins_encode %{
3750     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3751   %}
3752   ins_pipe( pipe_slow );
3753 %}
3754 
3755 instruct Repl8F(vecY dst, regF src) %{
3756   predicate(n->as_Vector()->length() == 8);
3757   match(Set dst (ReplicateF src));
3758   format %{ "pshufd  $dst,$src,0x00\n\t"
3759             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3760   ins_encode %{
3761     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3762     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3763   %}
3764   ins_pipe( pipe_slow );
3765 %}
3766 
3767 instruct Repl16F(vecZ dst, regF src) %{
3768   predicate(n->as_Vector()->length() == 16);
3769   match(Set dst (ReplicateF src));
3770   format %{ "pshufd  $dst,$src,0x00\n\t"
3771             "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t"
3772             "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %}
3773   ins_encode %{
3774     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3775     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3776     __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3777   %}
3778   ins_pipe( pipe_slow );
3779 %}
3780 
3781 // Replicate float (4 byte) scalar zero to be vector
3782 instruct Repl2F_zero(vecD dst, immF0 zero) %{
3783   predicate(n->as_Vector()->length() == 2);
3784   match(Set dst (ReplicateF zero));
3785   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
3786   ins_encode %{
3787     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3788   %}
3789   ins_pipe( fpu_reg_reg );
3790 %}
3791 
3792 instruct Repl4F_zero(vecX dst, immF0 zero) %{
3793   predicate(n->as_Vector()->length() == 4);
3794   match(Set dst (ReplicateF zero));
3795   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
3796   ins_encode %{
3797     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3798   %}
3799   ins_pipe( fpu_reg_reg );
3800 %}
3801 
3802 instruct Repl8F_zero(vecY dst, immF0 zero) %{
3803   predicate(n->as_Vector()->length() == 8);
3804   match(Set dst (ReplicateF zero));
3805   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
3806   ins_encode %{
3807     int vector_len = 1;
3808     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3809   %}
3810   ins_pipe( fpu_reg_reg );
3811 %}
3812 
3813 instruct Repl16F_zero(vecZ dst, immF0 zero) %{
3814   predicate(n->as_Vector()->length() == 16);
3815   match(Set dst (ReplicateF zero));
3816   format %{ "vxorps  $dst k0,$dst,$dst\t! replicate16F zero" %}
3817   ins_encode %{
3818     int vector_len = 2;
3819     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3820   %}
3821   ins_pipe( fpu_reg_reg );
3822 %}
3823 
3824 // Replicate double (8 bytes) scalar to be vector
3825 instruct Repl2D(vecX dst, regD src) %{
3826   predicate(n->as_Vector()->length() == 2);
3827   match(Set dst (ReplicateD src));
3828   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
3829   ins_encode %{
3830     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3831   %}
3832   ins_pipe( pipe_slow );
3833 %}
3834 
3835 instruct Repl4D(vecY dst, regD src) %{
3836   predicate(n->as_Vector()->length() == 4);
3837   match(Set dst (ReplicateD src));
3838   format %{ "pshufd  $dst,$src,0x44\n\t"
3839             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3840   ins_encode %{
3841     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3842     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3843   %}
3844   ins_pipe( pipe_slow );
3845 %}
3846 
3847 instruct Repl8D(vecZ dst, regD src) %{
3848   predicate(n->as_Vector()->length() == 8);
3849   match(Set dst (ReplicateD src));
3850   format %{ "pshufd  $dst,$src,0x44\n\t"
3851             "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t"
3852             "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %}
3853   ins_encode %{
3854     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3855     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3856     __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3857   %}
3858   ins_pipe( pipe_slow );
3859 %}
3860 
3861 // Replicate double (8 byte) scalar zero to be vector
3862 instruct Repl2D_zero(vecX dst, immD0 zero) %{
3863   predicate(n->as_Vector()->length() == 2);
3864   match(Set dst (ReplicateD zero));
3865   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
3866   ins_encode %{
3867     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3868   %}
3869   ins_pipe( fpu_reg_reg );
3870 %}
3871 
3872 instruct Repl4D_zero(vecY dst, immD0 zero) %{
3873   predicate(n->as_Vector()->length() == 4);
3874   match(Set dst (ReplicateD zero));
3875   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3876   ins_encode %{
3877     int vector_len = 1;
3878     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3879   %}
3880   ins_pipe( fpu_reg_reg );
3881 %}
3882 
3883 instruct Repl8D_zero(vecZ dst, immD0 zero) %{
3884   predicate(n->as_Vector()->length() == 8);
3885   match(Set dst (ReplicateD zero));
3886   format %{ "vxorpd  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
3887   ins_encode %{
3888     int vector_len = 2;
3889     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3890   %}
3891   ins_pipe( fpu_reg_reg );
3892 %}
3893 
3894 // ====================REDUCTION ARITHMETIC=======================================
3895 
3896 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3897   predicate(UseSSE > 2 && UseAVX == 0);
3898   match(Set dst (AddReductionVI src1 src2));
3899   effect(TEMP tmp2, TEMP tmp);
3900   format %{ "movdqu  $tmp2,$src2\n\t"
3901             "phaddd  $tmp2,$tmp2\n\t"
3902             "movd    $tmp,$src1\n\t"
3903             "paddd   $tmp,$tmp2\n\t"
3904             "movd    $dst,$tmp\t! add reduction2I" %}
3905   ins_encode %{
3906     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
3907     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3908     __ movdl($tmp$$XMMRegister, $src1$$Register);
3909     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
3910     __ movdl($dst$$Register, $tmp$$XMMRegister);
3911   %}
3912   ins_pipe( pipe_slow );
3913 %}
3914 
3915 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3916   predicate(UseAVX > 0 && UseAVX < 3);
3917   match(Set dst (AddReductionVI src1 src2));
3918   effect(TEMP tmp, TEMP tmp2);
3919   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
3920             "movd     $tmp2,$src1\n\t"
3921             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
3922             "movd     $dst,$tmp2\t! add reduction2I" %}
3923   ins_encode %{
3924     int vector_len = 0;
3925     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
3926     __ movdl($tmp2$$XMMRegister, $src1$$Register);
3927     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
3928     __ movdl($dst$$Register, $tmp2$$XMMRegister);
3929   %}
3930   ins_pipe( pipe_slow );
3931 %}
3932 
3933 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3934   predicate(UseAVX > 2);
3935   match(Set dst (AddReductionVI src1 src2));
3936   effect(TEMP tmp, TEMP tmp2);
3937   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
3938             "vpaddd  $tmp,$src2,$tmp2\n\t"
3939             "movd    $tmp2,$src1\n\t"
3940             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
3941             "movd    $dst,$tmp2\t! add reduction2I" %}
3942   ins_encode %{
3943     int vector_len = 0;
3944     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
3945     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3946     __ movdl($tmp2$$XMMRegister, $src1$$Register);
3947     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3948     __ movdl($dst$$Register, $tmp2$$XMMRegister);
3949   %}
3950   ins_pipe( pipe_slow );
3951 %}
3952 
3953 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3954   predicate(UseSSE > 2 && UseAVX == 0);
3955   match(Set dst (AddReductionVI src1 src2));
3956   effect(TEMP tmp2, TEMP tmp);
3957   format %{ "movdqu  $tmp2,$src2\n\t"
3958             "phaddd  $tmp2,$tmp2\n\t"
3959             "phaddd  $tmp2,$tmp2\n\t"
3960             "movd    $tmp,$src1\n\t"
3961             "paddd   $tmp,$tmp2\n\t"
3962             "movd    $dst,$tmp\t! add reduction4I" %}
3963   ins_encode %{
3964     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
3965     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3966     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3967     __ movdl($tmp$$XMMRegister, $src1$$Register);
3968     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
3969     __ movdl($dst$$Register, $tmp$$XMMRegister);
3970   %}
3971   ins_pipe( pipe_slow );
3972 %}
3973 
3974 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3975   predicate(UseAVX > 0 && UseAVX < 3);
3976   match(Set dst (AddReductionVI src1 src2));
3977   effect(TEMP tmp, TEMP tmp2);
3978   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
3979             "vphaddd  $tmp,$tmp,$tmp2\n\t"
3980             "movd     $tmp2,$src1\n\t"
3981             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
3982             "movd     $dst,$tmp2\t! add reduction4I" %}
3983   ins_encode %{
3984     int vector_len = 0;
3985     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
3986     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3987     __ movdl($tmp2$$XMMRegister, $src1$$Register);
3988     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
3989     __ movdl($dst$$Register, $tmp2$$XMMRegister);
3990   %}
3991   ins_pipe( pipe_slow );
3992 %}
3993 
3994 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3995   predicate(UseAVX > 2);
3996   match(Set dst (AddReductionVI src1 src2));
3997   effect(TEMP tmp, TEMP tmp2);
3998   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
3999             "vpaddd  $tmp,$src2,$tmp2\n\t"
4000             "pshufd  $tmp2,$tmp,0x1\n\t"
4001             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4002             "movd    $tmp2,$src1\n\t"
4003             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4004             "movd    $dst,$tmp2\t! add reduction4I" %}
4005   ins_encode %{
4006     int vector_len = 0;
4007     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4008     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4009     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4010     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4011     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4012     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4013     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4014   %}
4015   ins_pipe( pipe_slow );
4016 %}
4017 
4018 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4019   predicate(UseAVX > 0 && UseAVX < 3);
4020   match(Set dst (AddReductionVI src1 src2));
4021   effect(TEMP tmp, TEMP tmp2);
4022   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4023             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4024             "vextracti128  $tmp2,$tmp\n\t"
4025             "vpaddd   $tmp,$tmp,$tmp2\n\t"
4026             "movd     $tmp2,$src1\n\t"
4027             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4028             "movd     $dst,$tmp2\t! add reduction8I" %}
4029   ins_encode %{
4030     int vector_len = 1;
4031     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4032     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4033     __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
4034     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4035     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4036     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4037     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4038   %}
4039   ins_pipe( pipe_slow );
4040 %}
4041 
4042 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4043   predicate(UseAVX > 2);
4044   match(Set dst (AddReductionVI src1 src2));
4045   effect(TEMP tmp, TEMP tmp2);
4046   format %{ "vextracti128  $tmp,$src2\n\t"
4047             "vpaddd  $tmp,$tmp,$src2\n\t"
4048             "pshufd  $tmp2,$tmp,0xE\n\t"
4049             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4050             "pshufd  $tmp2,$tmp,0x1\n\t"
4051             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4052             "movd    $tmp2,$src1\n\t"
4053             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4054             "movd    $dst,$tmp2\t! add reduction8I" %}
4055   ins_encode %{
4056     int vector_len = 0;
4057     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4058     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4059     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4060     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4061     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4062     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4063     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4064     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4065     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4066   %}
4067   ins_pipe( pipe_slow );
4068 %}
4069 
4070 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4071   predicate(UseAVX > 2);
4072   match(Set dst (AddReductionVI src1 src2));
4073   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4074   format %{ "vextracti64x4  $tmp3,$src2\n\t"
4075             "vpaddd  $tmp3,$tmp3,$src2\n\t"
4076             "vextracti128   $tmp,$tmp3\n\t"
4077             "vpaddd  $tmp,$tmp,$tmp3\n\t"
4078             "pshufd  $tmp2,$tmp,0xE\n\t"
4079             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4080             "pshufd  $tmp2,$tmp,0x1\n\t"
4081             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4082             "movd    $tmp2,$src1\n\t"
4083             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4084             "movd    $dst,$tmp2\t! mul reduction16I" %}
4085   ins_encode %{
4086     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4087     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4088     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4089     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4090     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4091     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4092     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4093     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4094     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4095     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4096     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4097   %}
4098   ins_pipe( pipe_slow );
4099 %}
4100 
4101 #ifdef _LP64
4102 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4103   predicate(UseAVX > 2);
4104   match(Set dst (AddReductionVL src1 src2));
4105   effect(TEMP tmp, TEMP tmp2);
4106   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4107             "vpaddq  $tmp,$src2,$tmp2\n\t"
4108             "movdq   $tmp2,$src1\n\t"
4109             "vpaddq  $tmp2,$tmp,$tmp2\n\t"
4110             "movdq   $dst,$tmp2\t! add reduction2L" %}
4111   ins_encode %{
4112     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4113     __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4114     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4115     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4116     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4117   %}
4118   ins_pipe( pipe_slow );
4119 %}
4120 
4121 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4122   predicate(UseAVX > 2);
4123   match(Set dst (AddReductionVL src1 src2));
4124   effect(TEMP tmp, TEMP tmp2);
4125   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
4126             "vpaddq  $tmp2,$tmp,$src2\n\t"
4127             "pshufd  $tmp,$tmp2,0xE\n\t"
4128             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4129             "movdq   $tmp,$src1\n\t"
4130             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4131             "movdq   $dst,$tmp2\t! add reduction4L" %}
4132   ins_encode %{
4133     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4134     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4135     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4136     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4137     __ movdq($tmp$$XMMRegister, $src1$$Register);
4138     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4139     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4140   %}
4141   ins_pipe( pipe_slow );
4142 %}
4143 
4144 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4145   predicate(UseAVX > 2);
4146   match(Set dst (AddReductionVL src1 src2));
4147   effect(TEMP tmp, TEMP tmp2);
4148   format %{ "vextracti64x4  $tmp2,$src2\n\t"
4149             "vpaddq  $tmp2,$tmp2,$src2\n\t"
4150             "vextracti128   $tmp,$tmp2\n\t"
4151             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4152             "pshufd  $tmp,$tmp2,0xE\n\t"
4153             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4154             "movdq   $tmp,$src1\n\t"
4155             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4156             "movdq   $dst,$tmp2\t! add reduction8L" %}
4157   ins_encode %{
4158     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4159     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4160     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4161     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4162     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4163     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4164     __ movdq($tmp$$XMMRegister, $src1$$Register);
4165     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4166     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4167   %}
4168   ins_pipe( pipe_slow );
4169 %}
4170 #endif
4171 
4172 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4173   predicate(UseSSE >= 1 && UseAVX == 0);
4174   match(Set dst (AddReductionVF src1 src2));
4175   effect(TEMP tmp, TEMP tmp2);
4176   format %{ "movdqu  $tmp,$src1\n\t"
4177             "addss   $tmp,$src2\n\t"
4178             "pshufd  $tmp2,$src2,0x01\n\t"
4179             "addss   $tmp,$tmp2\n\t"
4180             "movdqu  $dst,$tmp\t! add reduction2F" %}
4181   ins_encode %{
4182     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4183     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4184     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4185     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4186     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4187   %}
4188   ins_pipe( pipe_slow );
4189 %}
4190 
4191 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4192   predicate(UseAVX > 0);
4193   match(Set dst (AddReductionVF src1 src2));
4194   effect(TEMP tmp2, TEMP tmp);
4195   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4196             "pshufd  $tmp,$src2,0x01\n\t"
4197             "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
4198   ins_encode %{
4199     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4200     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4201     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4202   %}
4203   ins_pipe( pipe_slow );
4204 %}
4205 
4206 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4207   predicate(UseSSE >= 1 && UseAVX == 0);
4208   match(Set dst (AddReductionVF src1 src2));
4209   effect(TEMP tmp, TEMP tmp2);
4210   format %{ "movdqu  $tmp,$src1\n\t"
4211             "addss   $tmp,$src2\n\t"
4212             "pshufd  $tmp2,$src2,0x01\n\t"
4213             "addss   $tmp,$tmp2\n\t"
4214             "pshufd  $tmp2,$src2,0x02\n\t"
4215             "addss   $tmp,$tmp2\n\t"
4216             "pshufd  $tmp2,$src2,0x03\n\t"
4217             "addss   $tmp,$tmp2\n\t"
4218             "movdqu  $dst,$tmp\t! add reduction4F" %}
4219   ins_encode %{
4220     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4221     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4222     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4223     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4224     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
4225     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4226     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
4227     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4228     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4229   %}
4230   ins_pipe( pipe_slow );
4231 %}
4232 
4233 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4234   predicate(UseAVX > 0);
4235   match(Set dst (AddReductionVF src1 src2));
4236   effect(TEMP tmp, TEMP tmp2);
4237   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4238             "pshufd  $tmp,$src2,0x01\n\t"
4239             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4240             "pshufd  $tmp,$src2,0x02\n\t"
4241             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4242             "pshufd  $tmp,$src2,0x03\n\t"
4243             "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
4244   ins_encode %{
4245     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4246     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4247     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4248     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4249     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4250     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4251     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4252   %}
4253   ins_pipe( pipe_slow );
4254 %}
4255 
4256 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
4257   predicate(UseAVX > 0);
4258   match(Set dst (AddReductionVF src1 src2));
4259   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4260   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4261             "pshufd  $tmp,$src2,0x01\n\t"
4262             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4263             "pshufd  $tmp,$src2,0x02\n\t"
4264             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4265             "pshufd  $tmp,$src2,0x03\n\t"
4266             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4267             "vextractf128  $tmp3,$src2\n\t"
4268             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4269             "pshufd  $tmp,$tmp3,0x01\n\t"
4270             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4271             "pshufd  $tmp,$tmp3,0x02\n\t"
4272             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4273             "pshufd  $tmp,$tmp3,0x03\n\t"
4274             "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
4275   ins_encode %{
4276     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4277     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4278     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4279     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4280     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4281     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4282     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4283     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4284     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4285     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4286     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4287     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4288     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4289     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4290     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4291   %}
4292   ins_pipe( pipe_slow );
4293 %}
4294 
4295 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4296   predicate(UseAVX > 2);
4297   match(Set dst (AddReductionVF src1 src2));
4298   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4299   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4300             "pshufd  $tmp,$src2,0x01\n\t"
4301             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4302             "pshufd  $tmp,$src2,0x02\n\t"
4303             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4304             "pshufd  $tmp,$src2,0x03\n\t"
4305             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4306             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4307             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4308             "pshufd  $tmp,$tmp3,0x01\n\t"
4309             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4310             "pshufd  $tmp,$tmp3,0x02\n\t"
4311             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4312             "pshufd  $tmp,$tmp3,0x03\n\t"
4313             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4314             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4315             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4316             "pshufd  $tmp,$tmp3,0x01\n\t"
4317             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4318             "pshufd  $tmp,$tmp3,0x02\n\t"
4319             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4320             "pshufd  $tmp,$tmp3,0x03\n\t"
4321             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4322             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4323             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4324             "pshufd  $tmp,$tmp3,0x01\n\t"
4325             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4326             "pshufd  $tmp,$tmp3,0x02\n\t"
4327             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4328             "pshufd  $tmp,$tmp3,0x03\n\t"
4329             "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
4330   ins_encode %{
4331     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4332     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4333     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4334     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4335     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4336     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4337     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4338     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4339     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4340     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4341     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4342     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4343     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4344     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4345     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4346     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4347     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4348     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4349     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4350     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4351     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4352     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4353     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4354     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4355     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4356     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4357     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4358     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4359     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4360     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4361     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4362   %}
4363   ins_pipe( pipe_slow );
4364 %}
4365 
4366 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4367   predicate(UseSSE >= 1 && UseAVX == 0);
4368   match(Set dst (AddReductionVD src1 src2));
4369   effect(TEMP tmp, TEMP dst);
4370   format %{ "movdqu  $tmp,$src1\n\t"
4371             "addsd   $tmp,$src2\n\t"
4372             "pshufd  $dst,$src2,0xE\n\t"
4373             "addsd   $dst,$tmp\t! add reduction2D" %}
4374   ins_encode %{
4375     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4376     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
4377     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4378     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
4379   %}
4380   ins_pipe( pipe_slow );
4381 %}
4382 
4383 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4384   predicate(UseAVX > 0);
4385   match(Set dst (AddReductionVD src1 src2));
4386   effect(TEMP tmp, TEMP tmp2);
4387   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4388             "pshufd  $tmp,$src2,0xE\n\t"
4389             "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
4390   ins_encode %{
4391     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4392     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4393     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4394   %}
4395   ins_pipe( pipe_slow );
4396 %}
4397 
4398 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
4399   predicate(UseAVX > 0);
4400   match(Set dst (AddReductionVD src1 src2));
4401   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4402   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4403             "pshufd  $tmp,$src2,0xE\n\t"
4404             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4405             "vextractf128  $tmp3,$src2\n\t"
4406             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4407             "pshufd  $tmp,$tmp3,0xE\n\t"
4408             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
4409   ins_encode %{
4410     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4411     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4412     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4413     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4414     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4415     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4416     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4417   %}
4418   ins_pipe( pipe_slow );
4419 %}
4420 
4421 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4422   predicate(UseAVX > 2);
4423   match(Set dst (AddReductionVD src1 src2));
4424   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4425   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4426             "pshufd  $tmp,$src2,0xE\n\t"
4427             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4428             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4429             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4430             "pshufd  $tmp,$tmp3,0xE\n\t"
4431             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4432             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4433             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4434             "pshufd  $tmp,$tmp3,0xE\n\t"
4435             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4436             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4437             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4438             "pshufd  $tmp,$tmp3,0xE\n\t"
4439             "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
4440   ins_encode %{
4441     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4442     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4443     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4444     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4445     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4446     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4447     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4448     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4449     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4450     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4451     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4452     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4453     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4454     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4455     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4456   %}
4457   ins_pipe( pipe_slow );
4458 %}
4459 
4460 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4461   predicate(UseSSE > 3 && UseAVX == 0);
4462   match(Set dst (MulReductionVI src1 src2));
4463   effect(TEMP tmp, TEMP tmp2);
4464   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4465             "pmulld  $tmp2,$src2\n\t"
4466             "movd    $tmp,$src1\n\t"
4467             "pmulld  $tmp2,$tmp\n\t"
4468             "movd    $dst,$tmp2\t! mul reduction2I" %}
4469   ins_encode %{
4470     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4471     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4472     __ movdl($tmp$$XMMRegister, $src1$$Register);
4473     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4474     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4475   %}
4476   ins_pipe( pipe_slow );
4477 %}
4478 
4479 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4480   predicate(UseAVX > 0);
4481   match(Set dst (MulReductionVI src1 src2));
4482   effect(TEMP tmp, TEMP tmp2);
4483   format %{ "pshufd   $tmp2,$src2,0x1\n\t"
4484             "vpmulld  $tmp,$src2,$tmp2\n\t"
4485             "movd     $tmp2,$src1\n\t"
4486             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4487             "movd     $dst,$tmp2\t! mul reduction2I" %}
4488   ins_encode %{
4489     int vector_len = 0;
4490     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4491     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4492     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4493     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4494     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4495   %}
4496   ins_pipe( pipe_slow );
4497 %}
4498 
4499 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4500   predicate(UseSSE > 3 && UseAVX == 0);
4501   match(Set dst (MulReductionVI src1 src2));
4502   effect(TEMP tmp, TEMP tmp2);
4503   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4504             "pmulld  $tmp2,$src2\n\t"
4505             "pshufd  $tmp,$tmp2,0x1\n\t"
4506             "pmulld  $tmp2,$tmp\n\t"
4507             "movd    $tmp,$src1\n\t"
4508             "pmulld  $tmp2,$tmp\n\t"
4509             "movd    $dst,$tmp2\t! mul reduction4I" %}
4510   ins_encode %{
4511     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4512     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4513     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
4514     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4515     __ movdl($tmp$$XMMRegister, $src1$$Register);
4516     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4517     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4518   %}
4519   ins_pipe( pipe_slow );
4520 %}
4521 
4522 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4523   predicate(UseAVX > 0);
4524   match(Set dst (MulReductionVI src1 src2));
4525   effect(TEMP tmp, TEMP tmp2);
4526   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
4527             "vpmulld  $tmp,$src2,$tmp2\n\t"
4528             "pshufd   $tmp2,$tmp,0x1\n\t"
4529             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4530             "movd     $tmp2,$src1\n\t"
4531             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4532             "movd     $dst,$tmp2\t! mul reduction4I" %}
4533   ins_encode %{
4534     int vector_len = 0;
4535     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4536     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4537     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4538     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4539     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4540     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4541     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4542   %}
4543   ins_pipe( pipe_slow );
4544 %}
4545 
4546 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4547   predicate(UseAVX > 0);
4548   match(Set dst (MulReductionVI src1 src2));
4549   effect(TEMP tmp, TEMP tmp2);
4550   format %{ "vextracti128  $tmp,$src2\n\t"
4551             "vpmulld  $tmp,$tmp,$src2\n\t"
4552             "pshufd   $tmp2,$tmp,0xE\n\t"
4553             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4554             "pshufd   $tmp2,$tmp,0x1\n\t"
4555             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4556             "movd     $tmp2,$src1\n\t"
4557             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4558             "movd     $dst,$tmp2\t! mul reduction8I" %}
4559   ins_encode %{
4560     int vector_len = 0;
4561     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4562     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4563     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4564     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4565     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4566     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4567     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4568     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4569     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4570   %}
4571   ins_pipe( pipe_slow );
4572 %}
4573 
4574 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4575   predicate(UseAVX > 2);
4576   match(Set dst (MulReductionVI src1 src2));
4577   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4578   format %{ "vextracti64x4  $tmp3,$src2\n\t"
4579             "vpmulld  $tmp3,$tmp3,$src2\n\t"
4580             "vextracti128   $tmp,$tmp3\n\t"
4581             "vpmulld  $tmp,$tmp,$src2\n\t"
4582             "pshufd   $tmp2,$tmp,0xE\n\t"
4583             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4584             "pshufd   $tmp2,$tmp,0x1\n\t"
4585             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4586             "movd     $tmp2,$src1\n\t"
4587             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4588             "movd     $dst,$tmp2\t! mul reduction16I" %}
4589   ins_encode %{
4590     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4591     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4592     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4593     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4594     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4595     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4596     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4597     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4598     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4599     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4600     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4601   %}
4602   ins_pipe( pipe_slow );
4603 %}
4604 
4605 #ifdef _LP64
4606 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4607   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4608   match(Set dst (MulReductionVL src1 src2));
4609   effect(TEMP tmp, TEMP tmp2);
4610   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
4611             "vpmullq  $tmp,$src2,$tmp2\n\t"
4612             "movdq    $tmp2,$src1\n\t"
4613             "vpmullq  $tmp2,$tmp,$tmp2\n\t"
4614             "movdq    $dst,$tmp2\t! mul reduction2L" %}
4615   ins_encode %{
4616     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4617     __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4618     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4619     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4620     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4621   %}
4622   ins_pipe( pipe_slow );
4623 %}
4624 
4625 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4626   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4627   match(Set dst (MulReductionVL src1 src2));
4628   effect(TEMP tmp, TEMP tmp2);
4629   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
4630             "vpmullq  $tmp2,$tmp,$src2\n\t"
4631             "pshufd   $tmp,$tmp2,0xE\n\t"
4632             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4633             "movdq    $tmp,$src1\n\t"
4634             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4635             "movdq    $dst,$tmp2\t! mul reduction4L" %}
4636   ins_encode %{
4637     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4638     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4639     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4640     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4641     __ movdq($tmp$$XMMRegister, $src1$$Register);
4642     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4643     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4644   %}
4645   ins_pipe( pipe_slow );
4646 %}
4647 
4648 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4649   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4650   match(Set dst (MulReductionVL src1 src2));
4651   effect(TEMP tmp, TEMP tmp2);
4652   format %{ "vextracti64x4  $tmp2,$src2\n\t"
4653             "vpmullq  $tmp2,$tmp2,$src2\n\t"
4654             "vextracti128   $tmp,$tmp2\n\t"
4655             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4656             "pshufd   $tmp,$tmp2,0xE\n\t"
4657             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4658             "movdq    $tmp,$src1\n\t"
4659             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4660             "movdq    $dst,$tmp2\t! mul reduction8L" %}
4661   ins_encode %{
4662     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4663     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4664     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4665     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4666     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4667     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4668     __ movdq($tmp$$XMMRegister, $src1$$Register);
4669     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4670     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4671   %}
4672   ins_pipe( pipe_slow );
4673 %}
4674 #endif
4675 
4676 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4677   predicate(UseSSE >= 1 && UseAVX == 0);
4678   match(Set dst (MulReductionVF src1 src2));
4679   effect(TEMP tmp, TEMP tmp2);
4680   format %{ "movdqu  $tmp,$src1\n\t"
4681             "mulss   $tmp,$src2\n\t"
4682             "pshufd  $tmp2,$src2,0x01\n\t"
4683             "mulss   $tmp,$tmp2\n\t"
4684             "movdqu  $dst,$tmp\t! mul reduction2F" %}
4685   ins_encode %{
4686     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4687     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
4688     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4689     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4690     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4691   %}
4692   ins_pipe( pipe_slow );
4693 %}
4694 
4695 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4696   predicate(UseAVX > 0);
4697   match(Set dst (MulReductionVF src1 src2));
4698   effect(TEMP tmp, TEMP tmp2);
4699   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4700             "pshufd  $tmp,$src2,0x01\n\t"
4701             "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
4702   ins_encode %{
4703     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4704     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4705     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4706   %}
4707   ins_pipe( pipe_slow );
4708 %}
4709 
4710 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4711   predicate(UseSSE >= 1 && UseAVX == 0);
4712   match(Set dst (MulReductionVF src1 src2));
4713   effect(TEMP tmp, TEMP tmp2);
4714   format %{ "movdqu  $tmp,$src1\n\t"
4715             "mulss   $tmp,$src2\n\t"
4716             "pshufd  $tmp2,$src2,0x01\n\t"
4717             "mulss   $tmp,$tmp2\n\t"
4718             "pshufd  $tmp2,$src2,0x02\n\t"
4719             "mulss   $tmp,$tmp2\n\t"
4720             "pshufd  $tmp2,$src2,0x03\n\t"
4721             "mulss   $tmp,$tmp2\n\t"
4722             "movdqu  $dst,$tmp\t! mul reduction4F" %}
4723   ins_encode %{
4724     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4725     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
4726     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4727     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4728     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
4729     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4730     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
4731     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4732     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4733   %}
4734   ins_pipe( pipe_slow );
4735 %}
4736 
4737 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4738   predicate(UseAVX > 0);
4739   match(Set dst (MulReductionVF src1 src2));
4740   effect(TEMP tmp, TEMP tmp2);
4741   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4742             "pshufd  $tmp,$src2,0x01\n\t"
4743             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4744             "pshufd  $tmp,$src2,0x02\n\t"
4745             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4746             "pshufd  $tmp,$src2,0x03\n\t"
4747             "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
4748   ins_encode %{
4749     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4750     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4751     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4752     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4753     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4754     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4755     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4756   %}
4757   ins_pipe( pipe_slow );
4758 %}
4759 
4760 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
4761   predicate(UseAVX > 0);
4762   match(Set dst (MulReductionVF src1 src2));
4763   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4764   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4765             "pshufd  $tmp,$src2,0x01\n\t"
4766             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4767             "pshufd  $tmp,$src2,0x02\n\t"
4768             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4769             "pshufd  $tmp,$src2,0x03\n\t"
4770             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4771             "vextractf128  $tmp3,$src2\n\t"
4772             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4773             "pshufd  $tmp,$tmp3,0x01\n\t"
4774             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4775             "pshufd  $tmp,$tmp3,0x02\n\t"
4776             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4777             "pshufd  $tmp,$tmp3,0x03\n\t"
4778             "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
4779   ins_encode %{
4780     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4781     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4782     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4783     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4784     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4785     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4786     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4787     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4788     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4789     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4790     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4791     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4792     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4793     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4794     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4795   %}
4796   ins_pipe( pipe_slow );
4797 %}
4798 
4799 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4800   predicate(UseAVX > 2);
4801   match(Set dst (MulReductionVF src1 src2));
4802   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4803   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4804             "pshufd  $tmp,$src2,0x01\n\t"
4805             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4806             "pshufd  $tmp,$src2,0x02\n\t"
4807             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4808             "pshufd  $tmp,$src2,0x03\n\t"
4809             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4810             "vextractf32x4  $tmp3,$src2, 0x1\n\t"
4811             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4812             "pshufd  $tmp,$tmp3,0x01\n\t"
4813             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4814             "pshufd  $tmp,$tmp3,0x02\n\t"
4815             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4816             "pshufd  $tmp,$tmp3,0x03\n\t"
4817             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4818             "vextractf32x4  $tmp3,$src2, 0x2\n\t"
4819             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4820             "pshufd  $tmp,$tmp3,0x01\n\t"
4821             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4822             "pshufd  $tmp,$tmp3,0x02\n\t"
4823             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4824             "pshufd  $tmp,$tmp3,0x03\n\t"
4825             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4826             "vextractf32x4  $tmp3,$src2, 0x3\n\t"
4827             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4828             "pshufd  $tmp,$tmp3,0x01\n\t"
4829             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4830             "pshufd  $tmp,$tmp3,0x02\n\t"
4831             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4832             "pshufd  $tmp,$tmp3,0x03\n\t"
4833             "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
4834   ins_encode %{
4835     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4836     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4837     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4838     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4839     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4840     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4841     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4842     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4843     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4844     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4845     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4846     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4847     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4848     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4849     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4850     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4851     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4852     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4853     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4854     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4855     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4856     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4857     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4858     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4859     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4860     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4861     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4862     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4863     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4864     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4865     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4866   %}
4867   ins_pipe( pipe_slow );
4868 %}
4869 
4870 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4871   predicate(UseSSE >= 1 && UseAVX == 0);
4872   match(Set dst (MulReductionVD src1 src2));
4873   effect(TEMP tmp, TEMP dst);
4874   format %{ "movdqu  $tmp,$src1\n\t"
4875             "mulsd   $tmp,$src2\n\t"
4876             "pshufd  $dst,$src2,0xE\n\t"
4877             "mulsd   $dst,$tmp\t! mul reduction2D" %}
4878   ins_encode %{
4879     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4880     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
4881     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4882     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
4883   %}
4884   ins_pipe( pipe_slow );
4885 %}
4886 
4887 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4888   predicate(UseAVX > 0);
4889   match(Set dst (MulReductionVD src1 src2));
4890   effect(TEMP tmp, TEMP tmp2);
4891   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
4892             "pshufd  $tmp,$src2,0xE\n\t"
4893             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
4894   ins_encode %{
4895     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4896     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4897     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4898   %}
4899   ins_pipe( pipe_slow );
4900 %}
4901 
4902 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
4903   predicate(UseAVX > 0);
4904   match(Set dst (MulReductionVD src1 src2));
4905   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4906   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
4907             "pshufd  $tmp,$src2,0xE\n\t"
4908             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4909             "vextractf128  $tmp3,$src2\n\t"
4910             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4911             "pshufd  $tmp,$tmp3,0xE\n\t"
4912             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
4913   ins_encode %{
4914     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4915     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4916     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4917     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4918     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4919     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4920     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4921   %}
4922   ins_pipe( pipe_slow );
4923 %}
4924 
4925 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4926   predicate(UseAVX > 2);
4927   match(Set dst (MulReductionVD src1 src2));
4928   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4929   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
4930             "pshufd  $tmp,$src2,0xE\n\t"
4931             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4932             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4933             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4934             "pshufd  $tmp,$src2,0xE\n\t"
4935             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4936             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4937             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4938             "pshufd  $tmp,$tmp3,0xE\n\t"
4939             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4940             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4941             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4942             "pshufd  $tmp,$tmp3,0xE\n\t"
4943             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
4944   ins_encode %{
4945     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4946     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4947     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4948     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4949     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4950     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4951     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4952     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4953     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4954     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4955     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4956     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4957     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4958     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4959     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4960   %}
4961   ins_pipe( pipe_slow );
4962 %}
4963 
4964 // ====================VECTOR ARITHMETIC=======================================
4965 
4966 // --------------------------------- ADD --------------------------------------
4967 
4968 // Bytes vector add
4969 instruct vadd4B(vecS dst, vecS src) %{
4970   predicate(n->as_Vector()->length() == 4);
4971   match(Set dst (AddVB dst src));
4972   format %{ "paddb   $dst,$src\t! add packed4B" %}
4973   ins_encode %{
4974     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
4975   %}
4976   ins_pipe( pipe_slow );
4977 %}
4978 
4979 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
4980   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4981   match(Set dst (AddVB src1 src2));
4982   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
4983   ins_encode %{
4984     int vector_len = 0;
4985     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4986   %}
4987   ins_pipe( pipe_slow );
4988 %}
4989 
4990 instruct vadd8B(vecD dst, vecD src) %{
4991   predicate(n->as_Vector()->length() == 8);
4992   match(Set dst (AddVB dst src));
4993   format %{ "paddb   $dst,$src\t! add packed8B" %}
4994   ins_encode %{
4995     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
4996   %}
4997   ins_pipe( pipe_slow );
4998 %}
4999 
5000 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
5001   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5002   match(Set dst (AddVB src1 src2));
5003   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
5004   ins_encode %{
5005     int vector_len = 0;
5006     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5007   %}
5008   ins_pipe( pipe_slow );
5009 %}
5010 
5011 instruct vadd16B(vecX dst, vecX src) %{
5012   predicate(n->as_Vector()->length() == 16);
5013   match(Set dst (AddVB dst src));
5014   format %{ "paddb   $dst,$src\t! add packed16B" %}
5015   ins_encode %{
5016     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5017   %}
5018   ins_pipe( pipe_slow );
5019 %}
5020 
5021 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
5022   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5023   match(Set dst (AddVB src1 src2));
5024   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
5025   ins_encode %{
5026     int vector_len = 0;
5027     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5028   %}
5029   ins_pipe( pipe_slow );
5030 %}
5031 
5032 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
5033   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5034   match(Set dst (AddVB src (LoadVector mem)));
5035   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5036   ins_encode %{
5037     int vector_len = 0;
5038     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5039   %}
5040   ins_pipe( pipe_slow );
5041 %}
5042 
5043 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
5044   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5045   match(Set dst (AddVB src1 src2));
5046   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
5047   ins_encode %{
5048     int vector_len = 1;
5049     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5050   %}
5051   ins_pipe( pipe_slow );
5052 %}
5053 
5054 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
5055   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5056   match(Set dst (AddVB src (LoadVector mem)));
5057   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
5058   ins_encode %{
5059     int vector_len = 1;
5060     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5061   %}
5062   ins_pipe( pipe_slow );
5063 %}
5064 
5065 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5066   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5067   match(Set dst (AddVB src1 src2));
5068   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
5069   ins_encode %{
5070     int vector_len = 2;
5071     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5072   %}
5073   ins_pipe( pipe_slow );
5074 %}
5075 
5076 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
5077   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5078   match(Set dst (AddVB src (LoadVector mem)));
5079   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
5080   ins_encode %{
5081     int vector_len = 2;
5082     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5083   %}
5084   ins_pipe( pipe_slow );
5085 %}
5086 
5087 // Shorts/Chars vector add
5088 instruct vadd2S(vecS dst, vecS src) %{
5089   predicate(n->as_Vector()->length() == 2);
5090   match(Set dst (AddVS dst src));
5091   format %{ "paddw   $dst,$src\t! add packed2S" %}
5092   ins_encode %{
5093     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5094   %}
5095   ins_pipe( pipe_slow );
5096 %}
5097 
5098 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
5099   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5100   match(Set dst (AddVS src1 src2));
5101   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
5102   ins_encode %{
5103     int vector_len = 0;
5104     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5105   %}
5106   ins_pipe( pipe_slow );
5107 %}
5108 
5109 instruct vadd4S(vecD dst, vecD src) %{
5110   predicate(n->as_Vector()->length() == 4);
5111   match(Set dst (AddVS dst src));
5112   format %{ "paddw   $dst,$src\t! add packed4S" %}
5113   ins_encode %{
5114     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5115   %}
5116   ins_pipe( pipe_slow );
5117 %}
5118 
5119 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
5120   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5121   match(Set dst (AddVS src1 src2));
5122   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
5123   ins_encode %{
5124     int vector_len = 0;
5125     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5126   %}
5127   ins_pipe( pipe_slow );
5128 %}
5129 
5130 instruct vadd8S(vecX dst, vecX src) %{
5131   predicate(n->as_Vector()->length() == 8);
5132   match(Set dst (AddVS dst src));
5133   format %{ "paddw   $dst,$src\t! add packed8S" %}
5134   ins_encode %{
5135     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5136   %}
5137   ins_pipe( pipe_slow );
5138 %}
5139 
5140 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
5141   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5142   match(Set dst (AddVS src1 src2));
5143   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
5144   ins_encode %{
5145     int vector_len = 0;
5146     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5147   %}
5148   ins_pipe( pipe_slow );
5149 %}
5150 
5151 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
5152   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5153   match(Set dst (AddVS src (LoadVector mem)));
5154   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
5155   ins_encode %{
5156     int vector_len = 0;
5157     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5158   %}
5159   ins_pipe( pipe_slow );
5160 %}
5161 
5162 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
5163   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5164   match(Set dst (AddVS src1 src2));
5165   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
5166   ins_encode %{
5167     int vector_len = 1;
5168     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5169   %}
5170   ins_pipe( pipe_slow );
5171 %}
5172 
5173 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
5174   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5175   match(Set dst (AddVS src (LoadVector mem)));
5176   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
5177   ins_encode %{
5178     int vector_len = 1;
5179     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5180   %}
5181   ins_pipe( pipe_slow );
5182 %}
5183 
5184 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5185   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5186   match(Set dst (AddVS src1 src2));
5187   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
5188   ins_encode %{
5189     int vector_len = 2;
5190     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5191   %}
5192   ins_pipe( pipe_slow );
5193 %}
5194 
5195 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
5196   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5197   match(Set dst (AddVS src (LoadVector mem)));
5198   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
5199   ins_encode %{
5200     int vector_len = 2;
5201     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5202   %}
5203   ins_pipe( pipe_slow );
5204 %}
5205 
5206 // Integers vector add
5207 instruct vadd2I(vecD dst, vecD src) %{
5208   predicate(n->as_Vector()->length() == 2);
5209   match(Set dst (AddVI dst src));
5210   format %{ "paddd   $dst,$src\t! add packed2I" %}
5211   ins_encode %{
5212     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5213   %}
5214   ins_pipe( pipe_slow );
5215 %}
5216 
5217 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
5218   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5219   match(Set dst (AddVI src1 src2));
5220   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
5221   ins_encode %{
5222     int vector_len = 0;
5223     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5224   %}
5225   ins_pipe( pipe_slow );
5226 %}
5227 
5228 instruct vadd4I(vecX dst, vecX src) %{
5229   predicate(n->as_Vector()->length() == 4);
5230   match(Set dst (AddVI dst src));
5231   format %{ "paddd   $dst,$src\t! add packed4I" %}
5232   ins_encode %{
5233     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5234   %}
5235   ins_pipe( pipe_slow );
5236 %}
5237 
5238 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
5239   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5240   match(Set dst (AddVI src1 src2));
5241   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
5242   ins_encode %{
5243     int vector_len = 0;
5244     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5245   %}
5246   ins_pipe( pipe_slow );
5247 %}
5248 
5249 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
5250   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5251   match(Set dst (AddVI src (LoadVector mem)));
5252   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
5253   ins_encode %{
5254     int vector_len = 0;
5255     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5256   %}
5257   ins_pipe( pipe_slow );
5258 %}
5259 
5260 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
5261   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5262   match(Set dst (AddVI src1 src2));
5263   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
5264   ins_encode %{
5265     int vector_len = 1;
5266     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5267   %}
5268   ins_pipe( pipe_slow );
5269 %}
5270 
5271 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
5272   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5273   match(Set dst (AddVI src (LoadVector mem)));
5274   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
5275   ins_encode %{
5276     int vector_len = 1;
5277     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5278   %}
5279   ins_pipe( pipe_slow );
5280 %}
5281 
5282 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5283   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5284   match(Set dst (AddVI src1 src2));
5285   format %{ "vpaddd  $dst,$src1,$src2\t! add packed16I" %}
5286   ins_encode %{
5287     int vector_len = 2;
5288     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5289   %}
5290   ins_pipe( pipe_slow );
5291 %}
5292 
5293 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
5294   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5295   match(Set dst (AddVI src (LoadVector mem)));
5296   format %{ "vpaddd  $dst,$src,$mem\t! add packed16I" %}
5297   ins_encode %{
5298     int vector_len = 2;
5299     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5300   %}
5301   ins_pipe( pipe_slow );
5302 %}
5303 
5304 // Longs vector add
5305 instruct vadd2L(vecX dst, vecX src) %{
5306   predicate(n->as_Vector()->length() == 2);
5307   match(Set dst (AddVL dst src));
5308   format %{ "paddq   $dst,$src\t! add packed2L" %}
5309   ins_encode %{
5310     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5311   %}
5312   ins_pipe( pipe_slow );
5313 %}
5314 
5315 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
5316   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5317   match(Set dst (AddVL src1 src2));
5318   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
5319   ins_encode %{
5320     int vector_len = 0;
5321     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5322   %}
5323   ins_pipe( pipe_slow );
5324 %}
5325 
5326 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
5327   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5328   match(Set dst (AddVL src (LoadVector mem)));
5329   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
5330   ins_encode %{
5331     int vector_len = 0;
5332     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5333   %}
5334   ins_pipe( pipe_slow );
5335 %}
5336 
5337 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
5338   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5339   match(Set dst (AddVL src1 src2));
5340   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
5341   ins_encode %{
5342     int vector_len = 1;
5343     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5344   %}
5345   ins_pipe( pipe_slow );
5346 %}
5347 
5348 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
5349   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5350   match(Set dst (AddVL src (LoadVector mem)));
5351   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
5352   ins_encode %{
5353     int vector_len = 1;
5354     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5355   %}
5356   ins_pipe( pipe_slow );
5357 %}
5358 
5359 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5360   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5361   match(Set dst (AddVL src1 src2));
5362   format %{ "vpaddq  $dst,$src1,$src2\t! add packed8L" %}
5363   ins_encode %{
5364     int vector_len = 2;
5365     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5366   %}
5367   ins_pipe( pipe_slow );
5368 %}
5369 
5370 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
5371   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5372   match(Set dst (AddVL src (LoadVector mem)));
5373   format %{ "vpaddq  $dst,$src,$mem\t! add packed8L" %}
5374   ins_encode %{
5375     int vector_len = 2;
5376     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5377   %}
5378   ins_pipe( pipe_slow );
5379 %}
5380 
5381 // Floats vector add
5382 instruct vadd2F(vecD dst, vecD src) %{
5383   predicate(n->as_Vector()->length() == 2);
5384   match(Set dst (AddVF dst src));
5385   format %{ "addps   $dst,$src\t! add packed2F" %}
5386   ins_encode %{
5387     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5388   %}
5389   ins_pipe( pipe_slow );
5390 %}
5391 
5392 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
5393   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5394   match(Set dst (AddVF src1 src2));
5395   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
5396   ins_encode %{
5397     int vector_len = 0;
5398     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5399   %}
5400   ins_pipe( pipe_slow );
5401 %}
5402 
5403 instruct vadd4F(vecX dst, vecX src) %{
5404   predicate(n->as_Vector()->length() == 4);
5405   match(Set dst (AddVF dst src));
5406   format %{ "addps   $dst,$src\t! add packed4F" %}
5407   ins_encode %{
5408     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5409   %}
5410   ins_pipe( pipe_slow );
5411 %}
5412 
5413 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
5414   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5415   match(Set dst (AddVF src1 src2));
5416   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
5417   ins_encode %{
5418     int vector_len = 0;
5419     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5420   %}
5421   ins_pipe( pipe_slow );
5422 %}
5423 
5424 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
5425   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5426   match(Set dst (AddVF src (LoadVector mem)));
5427   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
5428   ins_encode %{
5429     int vector_len = 0;
5430     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5431   %}
5432   ins_pipe( pipe_slow );
5433 %}
5434 
5435 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
5436   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5437   match(Set dst (AddVF src1 src2));
5438   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
5439   ins_encode %{
5440     int vector_len = 1;
5441     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5442   %}
5443   ins_pipe( pipe_slow );
5444 %}
5445 
5446 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
5447   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5448   match(Set dst (AddVF src (LoadVector mem)));
5449   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
5450   ins_encode %{
5451     int vector_len = 1;
5452     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5453   %}
5454   ins_pipe( pipe_slow );
5455 %}
5456 
5457 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
5458   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5459   match(Set dst (AddVF src1 src2));
5460   format %{ "vaddps  $dst,$src1,$src2\t! add packed16F" %}
5461   ins_encode %{
5462     int vector_len = 2;
5463     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5464   %}
5465   ins_pipe( pipe_slow );
5466 %}
5467 
5468 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
5469   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5470   match(Set dst (AddVF src (LoadVector mem)));
5471   format %{ "vaddps  $dst,$src,$mem\t! add packed16F" %}
5472   ins_encode %{
5473     int vector_len = 2;
5474     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5475   %}
5476   ins_pipe( pipe_slow );
5477 %}
5478 
5479 // Doubles vector add
5480 instruct vadd2D(vecX dst, vecX src) %{
5481   predicate(n->as_Vector()->length() == 2);
5482   match(Set dst (AddVD dst src));
5483   format %{ "addpd   $dst,$src\t! add packed2D" %}
5484   ins_encode %{
5485     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
5486   %}
5487   ins_pipe( pipe_slow );
5488 %}
5489 
5490 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
5491   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5492   match(Set dst (AddVD src1 src2));
5493   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
5494   ins_encode %{
5495     int vector_len = 0;
5496     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5497   %}
5498   ins_pipe( pipe_slow );
5499 %}
5500 
5501 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
5502   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5503   match(Set dst (AddVD src (LoadVector mem)));
5504   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
5505   ins_encode %{
5506     int vector_len = 0;
5507     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5508   %}
5509   ins_pipe( pipe_slow );
5510 %}
5511 
5512 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
5513   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5514   match(Set dst (AddVD src1 src2));
5515   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
5516   ins_encode %{
5517     int vector_len = 1;
5518     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5519   %}
5520   ins_pipe( pipe_slow );
5521 %}
5522 
5523 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
5524   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5525   match(Set dst (AddVD src (LoadVector mem)));
5526   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
5527   ins_encode %{
5528     int vector_len = 1;
5529     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5530   %}
5531   ins_pipe( pipe_slow );
5532 %}
5533 
5534 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
5535   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5536   match(Set dst (AddVD src1 src2));
5537   format %{ "vaddpd  $dst,$src1,$src2\t! add packed8D" %}
5538   ins_encode %{
5539     int vector_len = 2;
5540     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5541   %}
5542   ins_pipe( pipe_slow );
5543 %}
5544 
5545 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
5546   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5547   match(Set dst (AddVD src (LoadVector mem)));
5548   format %{ "vaddpd  $dst,$src,$mem\t! add packed8D" %}
5549   ins_encode %{
5550     int vector_len = 2;
5551     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5552   %}
5553   ins_pipe( pipe_slow );
5554 %}
5555 
5556 // --------------------------------- SUB --------------------------------------
5557 
5558 // Bytes vector sub
5559 instruct vsub4B(vecS dst, vecS src) %{
5560   predicate(n->as_Vector()->length() == 4);
5561   match(Set dst (SubVB dst src));
5562   format %{ "psubb   $dst,$src\t! sub packed4B" %}
5563   ins_encode %{
5564     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5565   %}
5566   ins_pipe( pipe_slow );
5567 %}
5568 
5569 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
5570   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5571   match(Set dst (SubVB src1 src2));
5572   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
5573   ins_encode %{
5574     int vector_len = 0;
5575     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5576   %}
5577   ins_pipe( pipe_slow );
5578 %}
5579 
5580 instruct vsub8B(vecD dst, vecD src) %{
5581   predicate(n->as_Vector()->length() == 8);
5582   match(Set dst (SubVB dst src));
5583   format %{ "psubb   $dst,$src\t! sub packed8B" %}
5584   ins_encode %{
5585     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5586   %}
5587   ins_pipe( pipe_slow );
5588 %}
5589 
5590 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
5591   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5592   match(Set dst (SubVB src1 src2));
5593   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
5594   ins_encode %{
5595     int vector_len = 0;
5596     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5597   %}
5598   ins_pipe( pipe_slow );
5599 %}
5600 
5601 instruct vsub16B(vecX dst, vecX src) %{
5602   predicate(n->as_Vector()->length() == 16);
5603   match(Set dst (SubVB dst src));
5604   format %{ "psubb   $dst,$src\t! sub packed16B" %}
5605   ins_encode %{
5606     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5607   %}
5608   ins_pipe( pipe_slow );
5609 %}
5610 
5611 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
5612   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5613   match(Set dst (SubVB src1 src2));
5614   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
5615   ins_encode %{
5616     int vector_len = 0;
5617     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5618   %}
5619   ins_pipe( pipe_slow );
5620 %}
5621 
5622 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
5623   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5624   match(Set dst (SubVB src (LoadVector mem)));
5625   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
5626   ins_encode %{
5627     int vector_len = 0;
5628     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5629   %}
5630   ins_pipe( pipe_slow );
5631 %}
5632 
5633 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
5634   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5635   match(Set dst (SubVB src1 src2));
5636   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
5637   ins_encode %{
5638     int vector_len = 1;
5639     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5640   %}
5641   ins_pipe( pipe_slow );
5642 %}
5643 
5644 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
5645   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5646   match(Set dst (SubVB src (LoadVector mem)));
5647   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
5648   ins_encode %{
5649     int vector_len = 1;
5650     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5651   %}
5652   ins_pipe( pipe_slow );
5653 %}
5654 
5655 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5656   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5657   match(Set dst (SubVB src1 src2));
5658   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
5659   ins_encode %{
5660     int vector_len = 2;
5661     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5662   %}
5663   ins_pipe( pipe_slow );
5664 %}
5665 
5666 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
5667   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5668   match(Set dst (SubVB src (LoadVector mem)));
5669   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
5670   ins_encode %{
5671     int vector_len = 2;
5672     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5673   %}
5674   ins_pipe( pipe_slow );
5675 %}
5676 
5677 // Shorts/Chars vector sub
5678 instruct vsub2S(vecS dst, vecS src) %{
5679   predicate(n->as_Vector()->length() == 2);
5680   match(Set dst (SubVS dst src));
5681   format %{ "psubw   $dst,$src\t! sub packed2S" %}
5682   ins_encode %{
5683     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5684   %}
5685   ins_pipe( pipe_slow );
5686 %}
5687 
5688 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
5689   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5690   match(Set dst (SubVS src1 src2));
5691   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
5692   ins_encode %{
5693     int vector_len = 0;
5694     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5695   %}
5696   ins_pipe( pipe_slow );
5697 %}
5698 
5699 instruct vsub4S(vecD dst, vecD src) %{
5700   predicate(n->as_Vector()->length() == 4);
5701   match(Set dst (SubVS dst src));
5702   format %{ "psubw   $dst,$src\t! sub packed4S" %}
5703   ins_encode %{
5704     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5705   %}
5706   ins_pipe( pipe_slow );
5707 %}
5708 
5709 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
5710   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5711   match(Set dst (SubVS src1 src2));
5712   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
5713   ins_encode %{
5714     int vector_len = 0;
5715     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5716   %}
5717   ins_pipe( pipe_slow );
5718 %}
5719 
5720 instruct vsub8S(vecX dst, vecX src) %{
5721   predicate(n->as_Vector()->length() == 8);
5722   match(Set dst (SubVS dst src));
5723   format %{ "psubw   $dst,$src\t! sub packed8S" %}
5724   ins_encode %{
5725     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5726   %}
5727   ins_pipe( pipe_slow );
5728 %}
5729 
5730 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
5731   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5732   match(Set dst (SubVS src1 src2));
5733   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
5734   ins_encode %{
5735     int vector_len = 0;
5736     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5737   %}
5738   ins_pipe( pipe_slow );
5739 %}
5740 
5741 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
5742   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5743   match(Set dst (SubVS src (LoadVector mem)));
5744   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
5745   ins_encode %{
5746     int vector_len = 0;
5747     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5748   %}
5749   ins_pipe( pipe_slow );
5750 %}
5751 
5752 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
5753   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5754   match(Set dst (SubVS src1 src2));
5755   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
5756   ins_encode %{
5757     int vector_len = 1;
5758     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5759   %}
5760   ins_pipe( pipe_slow );
5761 %}
5762 
5763 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
5764   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5765   match(Set dst (SubVS src (LoadVector mem)));
5766   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
5767   ins_encode %{
5768     int vector_len = 1;
5769     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5770   %}
5771   ins_pipe( pipe_slow );
5772 %}
5773 
5774 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5775   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5776   match(Set dst (SubVS src1 src2));
5777   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
5778   ins_encode %{
5779     int vector_len = 2;
5780     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5781   %}
5782   ins_pipe( pipe_slow );
5783 %}
5784 
5785 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
5786   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5787   match(Set dst (SubVS src (LoadVector mem)));
5788   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
5789   ins_encode %{
5790     int vector_len = 2;
5791     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5792   %}
5793   ins_pipe( pipe_slow );
5794 %}
5795 
5796 // Integers vector sub
5797 instruct vsub2I(vecD dst, vecD src) %{
5798   predicate(n->as_Vector()->length() == 2);
5799   match(Set dst (SubVI dst src));
5800   format %{ "psubd   $dst,$src\t! sub packed2I" %}
5801   ins_encode %{
5802     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5803   %}
5804   ins_pipe( pipe_slow );
5805 %}
5806 
5807 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
5808   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5809   match(Set dst (SubVI src1 src2));
5810   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
5811   ins_encode %{
5812     int vector_len = 0;
5813     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5814   %}
5815   ins_pipe( pipe_slow );
5816 %}
5817 
5818 instruct vsub4I(vecX dst, vecX src) %{
5819   predicate(n->as_Vector()->length() == 4);
5820   match(Set dst (SubVI dst src));
5821   format %{ "psubd   $dst,$src\t! sub packed4I" %}
5822   ins_encode %{
5823     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5824   %}
5825   ins_pipe( pipe_slow );
5826 %}
5827 
5828 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
5829   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5830   match(Set dst (SubVI src1 src2));
5831   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
5832   ins_encode %{
5833     int vector_len = 0;
5834     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5835   %}
5836   ins_pipe( pipe_slow );
5837 %}
5838 
5839 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
5840   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5841   match(Set dst (SubVI src (LoadVector mem)));
5842   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
5843   ins_encode %{
5844     int vector_len = 0;
5845     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5846   %}
5847   ins_pipe( pipe_slow );
5848 %}
5849 
5850 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
5851   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5852   match(Set dst (SubVI src1 src2));
5853   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
5854   ins_encode %{
5855     int vector_len = 1;
5856     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5857   %}
5858   ins_pipe( pipe_slow );
5859 %}
5860 
5861 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
5862   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5863   match(Set dst (SubVI src (LoadVector mem)));
5864   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
5865   ins_encode %{
5866     int vector_len = 1;
5867     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5868   %}
5869   ins_pipe( pipe_slow );
5870 %}
5871 
5872 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5873   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5874   match(Set dst (SubVI src1 src2));
5875   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed16I" %}
5876   ins_encode %{
5877     int vector_len = 2;
5878     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5879   %}
5880   ins_pipe( pipe_slow );
5881 %}
5882 
5883 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
5884   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5885   match(Set dst (SubVI src (LoadVector mem)));
5886   format %{ "vpsubd  $dst,$src,$mem\t! sub packed16I" %}
5887   ins_encode %{
5888     int vector_len = 2;
5889     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5890   %}
5891   ins_pipe( pipe_slow );
5892 %}
5893 
5894 // Longs vector sub
5895 instruct vsub2L(vecX dst, vecX src) %{
5896   predicate(n->as_Vector()->length() == 2);
5897   match(Set dst (SubVL dst src));
5898   format %{ "psubq   $dst,$src\t! sub packed2L" %}
5899   ins_encode %{
5900     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
5901   %}
5902   ins_pipe( pipe_slow );
5903 %}
5904 
5905 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
5906   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5907   match(Set dst (SubVL src1 src2));
5908   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
5909   ins_encode %{
5910     int vector_len = 0;
5911     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5912   %}
5913   ins_pipe( pipe_slow );
5914 %}
5915 
5916 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
5917   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5918   match(Set dst (SubVL src (LoadVector mem)));
5919   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
5920   ins_encode %{
5921     int vector_len = 0;
5922     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5923   %}
5924   ins_pipe( pipe_slow );
5925 %}
5926 
5927 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
5928   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5929   match(Set dst (SubVL src1 src2));
5930   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
5931   ins_encode %{
5932     int vector_len = 1;
5933     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5934   %}
5935   ins_pipe( pipe_slow );
5936 %}
5937 
5938 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
5939   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5940   match(Set dst (SubVL src (LoadVector mem)));
5941   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
5942   ins_encode %{
5943     int vector_len = 1;
5944     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5945   %}
5946   ins_pipe( pipe_slow );
5947 %}
5948 
5949 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5950   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5951   match(Set dst (SubVL src1 src2));
5952   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed8L" %}
5953   ins_encode %{
5954     int vector_len = 2;
5955     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5956   %}
5957   ins_pipe( pipe_slow );
5958 %}
5959 
5960 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
5961   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5962   match(Set dst (SubVL src (LoadVector mem)));
5963   format %{ "vpsubq  $dst,$src,$mem\t! sub packed8L" %}
5964   ins_encode %{
5965     int vector_len = 2;
5966     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5967   %}
5968   ins_pipe( pipe_slow );
5969 %}
5970 
5971 // Floats vector sub
5972 instruct vsub2F(vecD dst, vecD src) %{
5973   predicate(n->as_Vector()->length() == 2);
5974   match(Set dst (SubVF dst src));
5975   format %{ "subps   $dst,$src\t! sub packed2F" %}
5976   ins_encode %{
5977     __ subps($dst$$XMMRegister, $src$$XMMRegister);
5978   %}
5979   ins_pipe( pipe_slow );
5980 %}
5981 
5982 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
5983   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5984   match(Set dst (SubVF src1 src2));
5985   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
5986   ins_encode %{
5987     int vector_len = 0;
5988     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5989   %}
5990   ins_pipe( pipe_slow );
5991 %}
5992 
5993 instruct vsub4F(vecX dst, vecX src) %{
5994   predicate(n->as_Vector()->length() == 4);
5995   match(Set dst (SubVF dst src));
5996   format %{ "subps   $dst,$src\t! sub packed4F" %}
5997   ins_encode %{
5998     __ subps($dst$$XMMRegister, $src$$XMMRegister);
5999   %}
6000   ins_pipe( pipe_slow );
6001 %}
6002 
6003 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
6004   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6005   match(Set dst (SubVF src1 src2));
6006   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
6007   ins_encode %{
6008     int vector_len = 0;
6009     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6010   %}
6011   ins_pipe( pipe_slow );
6012 %}
6013 
6014 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
6015   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6016   match(Set dst (SubVF src (LoadVector mem)));
6017   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
6018   ins_encode %{
6019     int vector_len = 0;
6020     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6021   %}
6022   ins_pipe( pipe_slow );
6023 %}
6024 
6025 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
6026   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6027   match(Set dst (SubVF src1 src2));
6028   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
6029   ins_encode %{
6030     int vector_len = 1;
6031     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6032   %}
6033   ins_pipe( pipe_slow );
6034 %}
6035 
6036 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
6037   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6038   match(Set dst (SubVF src (LoadVector mem)));
6039   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
6040   ins_encode %{
6041     int vector_len = 1;
6042     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6043   %}
6044   ins_pipe( pipe_slow );
6045 %}
6046 
6047 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6048   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6049   match(Set dst (SubVF src1 src2));
6050   format %{ "vsubps  $dst,$src1,$src2\t! sub packed16F" %}
6051   ins_encode %{
6052     int vector_len = 2;
6053     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6054   %}
6055   ins_pipe( pipe_slow );
6056 %}
6057 
6058 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
6059   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6060   match(Set dst (SubVF src (LoadVector mem)));
6061   format %{ "vsubps  $dst,$src,$mem\t! sub packed16F" %}
6062   ins_encode %{
6063     int vector_len = 2;
6064     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6065   %}
6066   ins_pipe( pipe_slow );
6067 %}
6068 
6069 // Doubles vector sub
6070 instruct vsub2D(vecX dst, vecX src) %{
6071   predicate(n->as_Vector()->length() == 2);
6072   match(Set dst (SubVD dst src));
6073   format %{ "subpd   $dst,$src\t! sub packed2D" %}
6074   ins_encode %{
6075     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
6076   %}
6077   ins_pipe( pipe_slow );
6078 %}
6079 
6080 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
6081   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6082   match(Set dst (SubVD src1 src2));
6083   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
6084   ins_encode %{
6085     int vector_len = 0;
6086     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6087   %}
6088   ins_pipe( pipe_slow );
6089 %}
6090 
6091 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
6092   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6093   match(Set dst (SubVD src (LoadVector mem)));
6094   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
6095   ins_encode %{
6096     int vector_len = 0;
6097     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6098   %}
6099   ins_pipe( pipe_slow );
6100 %}
6101 
6102 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
6103   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6104   match(Set dst (SubVD src1 src2));
6105   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
6106   ins_encode %{
6107     int vector_len = 1;
6108     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6109   %}
6110   ins_pipe( pipe_slow );
6111 %}
6112 
6113 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
6114   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6115   match(Set dst (SubVD src (LoadVector mem)));
6116   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
6117   ins_encode %{
6118     int vector_len = 1;
6119     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6120   %}
6121   ins_pipe( pipe_slow );
6122 %}
6123 
6124 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6125   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6126   match(Set dst (SubVD src1 src2));
6127   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed8D" %}
6128   ins_encode %{
6129     int vector_len = 2;
6130     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6131   %}
6132   ins_pipe( pipe_slow );
6133 %}
6134 
6135 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
6136   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6137   match(Set dst (SubVD src (LoadVector mem)));
6138   format %{ "vsubpd  $dst,$src,$mem\t! sub packed8D" %}
6139   ins_encode %{
6140     int vector_len = 2;
6141     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6142   %}
6143   ins_pipe( pipe_slow );
6144 %}
6145 
6146 // --------------------------------- MUL --------------------------------------
6147 
6148 // Shorts/Chars vector mul
6149 instruct vmul2S(vecS dst, vecS src) %{
6150   predicate(n->as_Vector()->length() == 2);
6151   match(Set dst (MulVS dst src));
6152   format %{ "pmullw $dst,$src\t! mul packed2S" %}
6153   ins_encode %{
6154     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6155   %}
6156   ins_pipe( pipe_slow );
6157 %}
6158 
6159 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
6160   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6161   match(Set dst (MulVS src1 src2));
6162   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
6163   ins_encode %{
6164     int vector_len = 0;
6165     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6166   %}
6167   ins_pipe( pipe_slow );
6168 %}
6169 
6170 instruct vmul4S(vecD dst, vecD src) %{
6171   predicate(n->as_Vector()->length() == 4);
6172   match(Set dst (MulVS dst src));
6173   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
6174   ins_encode %{
6175     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6176   %}
6177   ins_pipe( pipe_slow );
6178 %}
6179 
6180 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
6181   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6182   match(Set dst (MulVS src1 src2));
6183   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
6184   ins_encode %{
6185     int vector_len = 0;
6186     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6187   %}
6188   ins_pipe( pipe_slow );
6189 %}
6190 
6191 instruct vmul8S(vecX dst, vecX src) %{
6192   predicate(n->as_Vector()->length() == 8);
6193   match(Set dst (MulVS dst src));
6194   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
6195   ins_encode %{
6196     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6197   %}
6198   ins_pipe( pipe_slow );
6199 %}
6200 
6201 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
6202   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6203   match(Set dst (MulVS src1 src2));
6204   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
6205   ins_encode %{
6206     int vector_len = 0;
6207     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6208   %}
6209   ins_pipe( pipe_slow );
6210 %}
6211 
6212 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
6213   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6214   match(Set dst (MulVS src (LoadVector mem)));
6215   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
6216   ins_encode %{
6217     int vector_len = 0;
6218     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6219   %}
6220   ins_pipe( pipe_slow );
6221 %}
6222 
6223 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
6224   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6225   match(Set dst (MulVS src1 src2));
6226   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
6227   ins_encode %{
6228     int vector_len = 1;
6229     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6230   %}
6231   ins_pipe( pipe_slow );
6232 %}
6233 
6234 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
6235   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6236   match(Set dst (MulVS src (LoadVector mem)));
6237   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
6238   ins_encode %{
6239     int vector_len = 1;
6240     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6241   %}
6242   ins_pipe( pipe_slow );
6243 %}
6244 
6245 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6246   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6247   match(Set dst (MulVS src1 src2));
6248   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
6249   ins_encode %{
6250     int vector_len = 2;
6251     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6252   %}
6253   ins_pipe( pipe_slow );
6254 %}
6255 
6256 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
6257   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6258   match(Set dst (MulVS src (LoadVector mem)));
6259   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
6260   ins_encode %{
6261     int vector_len = 2;
6262     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6263   %}
6264   ins_pipe( pipe_slow );
6265 %}
6266 
6267 // Integers vector mul (sse4_1)
6268 instruct vmul2I(vecD dst, vecD src) %{
6269   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
6270   match(Set dst (MulVI dst src));
6271   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
6272   ins_encode %{
6273     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6274   %}
6275   ins_pipe( pipe_slow );
6276 %}
6277 
6278 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
6279   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6280   match(Set dst (MulVI src1 src2));
6281   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
6282   ins_encode %{
6283     int vector_len = 0;
6284     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6285   %}
6286   ins_pipe( pipe_slow );
6287 %}
6288 
6289 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
6290   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
6291   match(Set dst (MulVL src1 src2));
6292   format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
6293   ins_encode %{
6294     int vector_len = 0;
6295     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6296   %}
6297   ins_pipe( pipe_slow );
6298 %}
6299 
6300 instruct vmul4I(vecX dst, vecX src) %{
6301   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
6302   match(Set dst (MulVI dst src));
6303   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
6304   ins_encode %{
6305     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6306   %}
6307   ins_pipe( pipe_slow );
6308 %}
6309 
6310 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
6311   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6312   match(Set dst (MulVI src1 src2));
6313   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
6314   ins_encode %{
6315     int vector_len = 0;
6316     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6317   %}
6318   ins_pipe( pipe_slow );
6319 %}
6320 
6321 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
6322   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6323   match(Set dst (MulVI src (LoadVector mem)));
6324   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
6325   ins_encode %{
6326     int vector_len = 0;
6327     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6328   %}
6329   ins_pipe( pipe_slow );
6330 %}
6331 
6332 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
6333   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6334   match(Set dst (MulVL src1 src2));
6335   format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
6336   ins_encode %{
6337     int vector_len = 1;
6338     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6339   %}
6340   ins_pipe( pipe_slow );
6341 %}
6342 
6343 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
6344   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6345   match(Set dst (MulVL src (LoadVector mem)));
6346   format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
6347   ins_encode %{
6348     int vector_len = 1;
6349     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6350   %}
6351   ins_pipe( pipe_slow );
6352 %}
6353 
6354 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
6355   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6356   match(Set dst (MulVI src1 src2));
6357   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
6358   ins_encode %{
6359     int vector_len = 1;
6360     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6361   %}
6362   ins_pipe( pipe_slow );
6363 %}
6364 
6365 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6366   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6367   match(Set dst (MulVL src1 src2));
6368   format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
6369   ins_encode %{
6370     int vector_len = 2;
6371     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6372   %}
6373   ins_pipe( pipe_slow );
6374 %}
6375 
6376 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
6377   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6378   match(Set dst (MulVI src1 src2));
6379   format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
6380   ins_encode %{
6381     int vector_len = 2;
6382     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6383   %}
6384   ins_pipe( pipe_slow );
6385 %}
6386 
6387 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
6388   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6389   match(Set dst (MulVI src (LoadVector mem)));
6390   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
6391   ins_encode %{
6392     int vector_len = 1;
6393     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6394   %}
6395   ins_pipe( pipe_slow );
6396 %}
6397 
6398 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
6399   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6400   match(Set dst (MulVL src (LoadVector mem)));
6401   format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
6402   ins_encode %{
6403     int vector_len = 2;
6404     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6405   %}
6406   ins_pipe( pipe_slow );
6407 %}
6408 
6409 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
6410   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6411   match(Set dst (MulVI src (LoadVector mem)));
6412   format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
6413   ins_encode %{
6414     int vector_len = 2;
6415     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6416   %}
6417   ins_pipe( pipe_slow );
6418 %}
6419 
6420 // Floats vector mul
6421 instruct vmul2F(vecD dst, vecD src) %{
6422   predicate(n->as_Vector()->length() == 2);
6423   match(Set dst (MulVF dst src));
6424   format %{ "mulps   $dst,$src\t! mul packed2F" %}
6425   ins_encode %{
6426     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6427   %}
6428   ins_pipe( pipe_slow );
6429 %}
6430 
6431 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
6432   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6433   match(Set dst (MulVF src1 src2));
6434   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
6435   ins_encode %{
6436     int vector_len = 0;
6437     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6438   %}
6439   ins_pipe( pipe_slow );
6440 %}
6441 
6442 instruct vmul4F(vecX dst, vecX src) %{
6443   predicate(n->as_Vector()->length() == 4);
6444   match(Set dst (MulVF dst src));
6445   format %{ "mulps   $dst,$src\t! mul packed4F" %}
6446   ins_encode %{
6447     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6448   %}
6449   ins_pipe( pipe_slow );
6450 %}
6451 
6452 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
6453   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6454   match(Set dst (MulVF src1 src2));
6455   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
6456   ins_encode %{
6457     int vector_len = 0;
6458     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6459   %}
6460   ins_pipe( pipe_slow );
6461 %}
6462 
6463 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
6464   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6465   match(Set dst (MulVF src (LoadVector mem)));
6466   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
6467   ins_encode %{
6468     int vector_len = 0;
6469     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6470   %}
6471   ins_pipe( pipe_slow );
6472 %}
6473 
6474 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
6475   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6476   match(Set dst (MulVF src1 src2));
6477   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
6478   ins_encode %{
6479     int vector_len = 1;
6480     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6481   %}
6482   ins_pipe( pipe_slow );
6483 %}
6484 
6485 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
6486   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6487   match(Set dst (MulVF src (LoadVector mem)));
6488   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
6489   ins_encode %{
6490     int vector_len = 1;
6491     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6492   %}
6493   ins_pipe( pipe_slow );
6494 %}
6495 
6496 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6497   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6498   match(Set dst (MulVF src1 src2));
6499   format %{ "vmulps  $dst,$src1,$src2\t! mul packed16F" %}
6500   ins_encode %{
6501     int vector_len = 2;
6502     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6503   %}
6504   ins_pipe( pipe_slow );
6505 %}
6506 
6507 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
6508   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6509   match(Set dst (MulVF src (LoadVector mem)));
6510   format %{ "vmulps  $dst,$src,$mem\t! mul packed16F" %}
6511   ins_encode %{
6512     int vector_len = 2;
6513     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6514   %}
6515   ins_pipe( pipe_slow );
6516 %}
6517 
6518 // Doubles vector mul
6519 instruct vmul2D(vecX dst, vecX src) %{
6520   predicate(n->as_Vector()->length() == 2);
6521   match(Set dst (MulVD dst src));
6522   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
6523   ins_encode %{
6524     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
6525   %}
6526   ins_pipe( pipe_slow );
6527 %}
6528 
6529 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
6530   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6531   match(Set dst (MulVD src1 src2));
6532   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
6533   ins_encode %{
6534     int vector_len = 0;
6535     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6536   %}
6537   ins_pipe( pipe_slow );
6538 %}
6539 
6540 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
6541   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6542   match(Set dst (MulVD src (LoadVector mem)));
6543   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
6544   ins_encode %{
6545     int vector_len = 0;
6546     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6547   %}
6548   ins_pipe( pipe_slow );
6549 %}
6550 
6551 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
6552   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6553   match(Set dst (MulVD src1 src2));
6554   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
6555   ins_encode %{
6556     int vector_len = 1;
6557     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6558   %}
6559   ins_pipe( pipe_slow );
6560 %}
6561 
6562 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
6563   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6564   match(Set dst (MulVD src (LoadVector mem)));
6565   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
6566   ins_encode %{
6567     int vector_len = 1;
6568     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6569   %}
6570   ins_pipe( pipe_slow );
6571 %}
6572 
6573 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6574   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6575   match(Set dst (MulVD src1 src2));
6576   format %{ "vmulpd  $dst k0,$src1,$src2\t! mul packed8D" %}
6577   ins_encode %{
6578     int vector_len = 2;
6579     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6580   %}
6581   ins_pipe( pipe_slow );
6582 %}
6583 
6584 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
6585   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6586   match(Set dst (MulVD src (LoadVector mem)));
6587   format %{ "vmulpd  $dst k0,$src,$mem\t! mul packed8D" %}
6588   ins_encode %{
6589     int vector_len = 2;
6590     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6591   %}
6592   ins_pipe( pipe_slow );
6593 %}
6594 
6595 // --------------------------------- DIV --------------------------------------
6596 
6597 // Floats vector div
6598 instruct vdiv2F(vecD dst, vecD src) %{
6599   predicate(n->as_Vector()->length() == 2);
6600   match(Set dst (DivVF dst src));
6601   format %{ "divps   $dst,$src\t! div packed2F" %}
6602   ins_encode %{
6603     __ divps($dst$$XMMRegister, $src$$XMMRegister);
6604   %}
6605   ins_pipe( pipe_slow );
6606 %}
6607 
6608 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
6609   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6610   match(Set dst (DivVF src1 src2));
6611   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
6612   ins_encode %{
6613     int vector_len = 0;
6614     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6615   %}
6616   ins_pipe( pipe_slow );
6617 %}
6618 
6619 instruct vdiv4F(vecX dst, vecX src) %{
6620   predicate(n->as_Vector()->length() == 4);
6621   match(Set dst (DivVF dst src));
6622   format %{ "divps   $dst,$src\t! div packed4F" %}
6623   ins_encode %{
6624     __ divps($dst$$XMMRegister, $src$$XMMRegister);
6625   %}
6626   ins_pipe( pipe_slow );
6627 %}
6628 
6629 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
6630   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6631   match(Set dst (DivVF src1 src2));
6632   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
6633   ins_encode %{
6634     int vector_len = 0;
6635     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6636   %}
6637   ins_pipe( pipe_slow );
6638 %}
6639 
6640 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
6641   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6642   match(Set dst (DivVF src (LoadVector mem)));
6643   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
6644   ins_encode %{
6645     int vector_len = 0;
6646     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6647   %}
6648   ins_pipe( pipe_slow );
6649 %}
6650 
6651 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
6652   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6653   match(Set dst (DivVF src1 src2));
6654   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
6655   ins_encode %{
6656     int vector_len = 1;
6657     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6658   %}
6659   ins_pipe( pipe_slow );
6660 %}
6661 
6662 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
6663   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6664   match(Set dst (DivVF src (LoadVector mem)));
6665   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
6666   ins_encode %{
6667     int vector_len = 1;
6668     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6669   %}
6670   ins_pipe( pipe_slow );
6671 %}
6672 
6673 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6674   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6675   match(Set dst (DivVF src1 src2));
6676   format %{ "vdivps  $dst,$src1,$src2\t! div packed16F" %}
6677   ins_encode %{
6678     int vector_len = 2;
6679     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6680   %}
6681   ins_pipe( pipe_slow );
6682 %}
6683 
6684 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
6685   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6686   match(Set dst (DivVF src (LoadVector mem)));
6687   format %{ "vdivps  $dst,$src,$mem\t! div packed16F" %}
6688   ins_encode %{
6689     int vector_len = 2;
6690     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6691   %}
6692   ins_pipe( pipe_slow );
6693 %}
6694 
6695 // Doubles vector div
6696 instruct vdiv2D(vecX dst, vecX src) %{
6697   predicate(n->as_Vector()->length() == 2);
6698   match(Set dst (DivVD dst src));
6699   format %{ "divpd   $dst,$src\t! div packed2D" %}
6700   ins_encode %{
6701     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
6702   %}
6703   ins_pipe( pipe_slow );
6704 %}
6705 
6706 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
6707   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6708   match(Set dst (DivVD src1 src2));
6709   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
6710   ins_encode %{
6711     int vector_len = 0;
6712     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6713   %}
6714   ins_pipe( pipe_slow );
6715 %}
6716 
6717 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
6718   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6719   match(Set dst (DivVD src (LoadVector mem)));
6720   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
6721   ins_encode %{
6722     int vector_len = 0;
6723     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6724   %}
6725   ins_pipe( pipe_slow );
6726 %}
6727 
6728 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
6729   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6730   match(Set dst (DivVD src1 src2));
6731   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
6732   ins_encode %{
6733     int vector_len = 1;
6734     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6735   %}
6736   ins_pipe( pipe_slow );
6737 %}
6738 
6739 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
6740   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6741   match(Set dst (DivVD src (LoadVector mem)));
6742   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
6743   ins_encode %{
6744     int vector_len = 1;
6745     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6746   %}
6747   ins_pipe( pipe_slow );
6748 %}
6749 
6750 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6751   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6752   match(Set dst (DivVD src1 src2));
6753   format %{ "vdivpd  $dst,$src1,$src2\t! div packed8D" %}
6754   ins_encode %{
6755     int vector_len = 2;
6756     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6757   %}
6758   ins_pipe( pipe_slow );
6759 %}
6760 
6761 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
6762   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6763   match(Set dst (DivVD src (LoadVector mem)));
6764   format %{ "vdivpd  $dst,$src,$mem\t! div packed8D" %}
6765   ins_encode %{
6766     int vector_len = 2;
6767     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6768   %}
6769   ins_pipe( pipe_slow );
6770 %}
6771 
6772 // ------------------------------ Shift ---------------------------------------
6773 
6774 // Left and right shift count vectors are the same on x86
6775 // (only lowest bits of xmm reg are used for count).
6776 instruct vshiftcnt(vecS dst, rRegI cnt) %{
6777   match(Set dst (LShiftCntV cnt));
6778   match(Set dst (RShiftCntV cnt));
6779   format %{ "movd    $dst,$cnt\t! load shift count" %}
6780   ins_encode %{
6781     __ movdl($dst$$XMMRegister, $cnt$$Register);
6782   %}
6783   ins_pipe( pipe_slow );
6784 %}
6785 
6786 // ------------------------------ LeftShift -----------------------------------
6787 
6788 // Shorts/Chars vector left shift
6789 instruct vsll2S(vecS dst, vecS shift) %{
6790   predicate(n->as_Vector()->length() == 2);
6791   match(Set dst (LShiftVS dst shift));
6792   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
6793   ins_encode %{
6794     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6795   %}
6796   ins_pipe( pipe_slow );
6797 %}
6798 
6799 instruct vsll2S_imm(vecS dst, immI8 shift) %{
6800   predicate(n->as_Vector()->length() == 2);
6801   match(Set dst (LShiftVS dst shift));
6802   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
6803   ins_encode %{
6804     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6805   %}
6806   ins_pipe( pipe_slow );
6807 %}
6808 
6809 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
6810   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6811   match(Set dst (LShiftVS src shift));
6812   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
6813   ins_encode %{
6814     int vector_len = 0;
6815     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6816   %}
6817   ins_pipe( pipe_slow );
6818 %}
6819 
6820 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
6821   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6822   match(Set dst (LShiftVS src shift));
6823   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
6824   ins_encode %{
6825     int vector_len = 0;
6826     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6827   %}
6828   ins_pipe( pipe_slow );
6829 %}
6830 
6831 instruct vsll4S(vecD dst, vecS shift) %{
6832   predicate(n->as_Vector()->length() == 4);
6833   match(Set dst (LShiftVS dst shift));
6834   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
6835   ins_encode %{
6836     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6837   %}
6838   ins_pipe( pipe_slow );
6839 %}
6840 
6841 instruct vsll4S_imm(vecD dst, immI8 shift) %{
6842   predicate(n->as_Vector()->length() == 4);
6843   match(Set dst (LShiftVS dst shift));
6844   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
6845   ins_encode %{
6846     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6847   %}
6848   ins_pipe( pipe_slow );
6849 %}
6850 
6851 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
6852   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6853   match(Set dst (LShiftVS src shift));
6854   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
6855   ins_encode %{
6856     int vector_len = 0;
6857     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6858   %}
6859   ins_pipe( pipe_slow );
6860 %}
6861 
6862 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
6863   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6864   match(Set dst (LShiftVS src shift));
6865   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
6866   ins_encode %{
6867     int vector_len = 0;
6868     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6869   %}
6870   ins_pipe( pipe_slow );
6871 %}
6872 
6873 instruct vsll8S(vecX dst, vecS shift) %{
6874   predicate(n->as_Vector()->length() == 8);
6875   match(Set dst (LShiftVS dst shift));
6876   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
6877   ins_encode %{
6878     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6879   %}
6880   ins_pipe( pipe_slow );
6881 %}
6882 
6883 instruct vsll8S_imm(vecX dst, immI8 shift) %{
6884   predicate(n->as_Vector()->length() == 8);
6885   match(Set dst (LShiftVS dst shift));
6886   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
6887   ins_encode %{
6888     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6889   %}
6890   ins_pipe( pipe_slow );
6891 %}
6892 
6893 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
6894   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6895   match(Set dst (LShiftVS src shift));
6896   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
6897   ins_encode %{
6898     int vector_len = 0;
6899     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6900   %}
6901   ins_pipe( pipe_slow );
6902 %}
6903 
6904 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
6905   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6906   match(Set dst (LShiftVS src shift));
6907   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
6908   ins_encode %{
6909     int vector_len = 0;
6910     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6911   %}
6912   ins_pipe( pipe_slow );
6913 %}
6914 
6915 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
6916   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6917   match(Set dst (LShiftVS src shift));
6918   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
6919   ins_encode %{
6920     int vector_len = 1;
6921     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6922   %}
6923   ins_pipe( pipe_slow );
6924 %}
6925 
6926 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
6927   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6928   match(Set dst (LShiftVS src shift));
6929   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
6930   ins_encode %{
6931     int vector_len = 1;
6932     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6933   %}
6934   ins_pipe( pipe_slow );
6935 %}
6936 
6937 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
6938   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6939   match(Set dst (LShiftVS src shift));
6940   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
6941   ins_encode %{
6942     int vector_len = 2;
6943     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6944   %}
6945   ins_pipe( pipe_slow );
6946 %}
6947 
6948 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
6949   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6950   match(Set dst (LShiftVS src shift));
6951   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
6952   ins_encode %{
6953     int vector_len = 2;
6954     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6955   %}
6956   ins_pipe( pipe_slow );
6957 %}
6958 
6959 // Integers vector left shift
6960 instruct vsll2I(vecD dst, vecS shift) %{
6961   predicate(n->as_Vector()->length() == 2);
6962   match(Set dst (LShiftVI dst shift));
6963   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
6964   ins_encode %{
6965     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
6966   %}
6967   ins_pipe( pipe_slow );
6968 %}
6969 
6970 instruct vsll2I_imm(vecD dst, immI8 shift) %{
6971   predicate(n->as_Vector()->length() == 2);
6972   match(Set dst (LShiftVI dst shift));
6973   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
6974   ins_encode %{
6975     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
6976   %}
6977   ins_pipe( pipe_slow );
6978 %}
6979 
6980 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
6981   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6982   match(Set dst (LShiftVI src shift));
6983   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
6984   ins_encode %{
6985     int vector_len = 0;
6986     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6987   %}
6988   ins_pipe( pipe_slow );
6989 %}
6990 
6991 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
6992   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6993   match(Set dst (LShiftVI src shift));
6994   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
6995   ins_encode %{
6996     int vector_len = 0;
6997     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6998   %}
6999   ins_pipe( pipe_slow );
7000 %}
7001 
7002 instruct vsll4I(vecX dst, vecS shift) %{
7003   predicate(n->as_Vector()->length() == 4);
7004   match(Set dst (LShiftVI dst shift));
7005   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7006   ins_encode %{
7007     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7008   %}
7009   ins_pipe( pipe_slow );
7010 %}
7011 
7012 instruct vsll4I_imm(vecX dst, immI8 shift) %{
7013   predicate(n->as_Vector()->length() == 4);
7014   match(Set dst (LShiftVI dst shift));
7015   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7016   ins_encode %{
7017     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7018   %}
7019   ins_pipe( pipe_slow );
7020 %}
7021 
7022 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
7023   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7024   match(Set dst (LShiftVI src shift));
7025   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7026   ins_encode %{
7027     int vector_len = 0;
7028     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7029   %}
7030   ins_pipe( pipe_slow );
7031 %}
7032 
7033 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7034   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7035   match(Set dst (LShiftVI src shift));
7036   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7037   ins_encode %{
7038     int vector_len = 0;
7039     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7040   %}
7041   ins_pipe( pipe_slow );
7042 %}
7043 
7044 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
7045   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7046   match(Set dst (LShiftVI src shift));
7047   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7048   ins_encode %{
7049     int vector_len = 1;
7050     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7051   %}
7052   ins_pipe( pipe_slow );
7053 %}
7054 
7055 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7056   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7057   match(Set dst (LShiftVI src shift));
7058   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7059   ins_encode %{
7060     int vector_len = 1;
7061     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7062   %}
7063   ins_pipe( pipe_slow );
7064 %}
7065 
7066 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
7067   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7068   match(Set dst (LShiftVI src shift));
7069   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7070   ins_encode %{
7071     int vector_len = 2;
7072     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7073   %}
7074   ins_pipe( pipe_slow );
7075 %}
7076 
7077 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7078   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7079   match(Set dst (LShiftVI src shift));
7080   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7081   ins_encode %{
7082     int vector_len = 2;
7083     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7084   %}
7085   ins_pipe( pipe_slow );
7086 %}
7087 
7088 // Longs vector left shift
7089 instruct vsll2L(vecX dst, vecS shift) %{
7090   predicate(n->as_Vector()->length() == 2);
7091   match(Set dst (LShiftVL dst shift));
7092   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7093   ins_encode %{
7094     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
7095   %}
7096   ins_pipe( pipe_slow );
7097 %}
7098 
7099 instruct vsll2L_imm(vecX dst, immI8 shift) %{
7100   predicate(n->as_Vector()->length() == 2);
7101   match(Set dst (LShiftVL dst shift));
7102   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7103   ins_encode %{
7104     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
7105   %}
7106   ins_pipe( pipe_slow );
7107 %}
7108 
7109 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
7110   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7111   match(Set dst (LShiftVL src shift));
7112   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7113   ins_encode %{
7114     int vector_len = 0;
7115     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7116   %}
7117   ins_pipe( pipe_slow );
7118 %}
7119 
7120 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7121   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7122   match(Set dst (LShiftVL src shift));
7123   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7124   ins_encode %{
7125     int vector_len = 0;
7126     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7127   %}
7128   ins_pipe( pipe_slow );
7129 %}
7130 
7131 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
7132   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7133   match(Set dst (LShiftVL src shift));
7134   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7135   ins_encode %{
7136     int vector_len = 1;
7137     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7138   %}
7139   ins_pipe( pipe_slow );
7140 %}
7141 
7142 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7143   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7144   match(Set dst (LShiftVL src shift));
7145   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7146   ins_encode %{
7147     int vector_len = 1;
7148     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7149   %}
7150   ins_pipe( pipe_slow );
7151 %}
7152 
7153 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
7154   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7155   match(Set dst (LShiftVL src shift));
7156   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7157   ins_encode %{
7158     int vector_len = 2;
7159     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7160   %}
7161   ins_pipe( pipe_slow );
7162 %}
7163 
7164 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7165   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7166   match(Set dst (LShiftVL src shift));
7167   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7168   ins_encode %{
7169     int vector_len = 2;
7170     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7171   %}
7172   ins_pipe( pipe_slow );
7173 %}
7174 
7175 // ----------------------- LogicalRightShift -----------------------------------
7176 
7177 // Shorts vector logical right shift produces incorrect Java result
7178 // for negative data because java code convert short value into int with
7179 // sign extension before a shift. But char vectors are fine since chars are
7180 // unsigned values.
7181 
7182 instruct vsrl2S(vecS dst, vecS shift) %{
7183   predicate(n->as_Vector()->length() == 2);
7184   match(Set dst (URShiftVS dst shift));
7185   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7186   ins_encode %{
7187     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7188   %}
7189   ins_pipe( pipe_slow );
7190 %}
7191 
7192 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
7193   predicate(n->as_Vector()->length() == 2);
7194   match(Set dst (URShiftVS dst shift));
7195   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7196   ins_encode %{
7197     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7198   %}
7199   ins_pipe( pipe_slow );
7200 %}
7201 
7202 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
7203   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7204   match(Set dst (URShiftVS src shift));
7205   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7206   ins_encode %{
7207     int vector_len = 0;
7208     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7209   %}
7210   ins_pipe( pipe_slow );
7211 %}
7212 
7213 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7214   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7215   match(Set dst (URShiftVS src shift));
7216   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7217   ins_encode %{
7218     int vector_len = 0;
7219     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7220   %}
7221   ins_pipe( pipe_slow );
7222 %}
7223 
7224 instruct vsrl4S(vecD dst, vecS shift) %{
7225   predicate(n->as_Vector()->length() == 4);
7226   match(Set dst (URShiftVS dst shift));
7227   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
7228   ins_encode %{
7229     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7230   %}
7231   ins_pipe( pipe_slow );
7232 %}
7233 
7234 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
7235   predicate(n->as_Vector()->length() == 4);
7236   match(Set dst (URShiftVS dst shift));
7237   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
7238   ins_encode %{
7239     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7240   %}
7241   ins_pipe( pipe_slow );
7242 %}
7243 
7244 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
7245   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7246   match(Set dst (URShiftVS src shift));
7247   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
7248   ins_encode %{
7249     int vector_len = 0;
7250     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7251   %}
7252   ins_pipe( pipe_slow );
7253 %}
7254 
7255 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7256   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7257   match(Set dst (URShiftVS src shift));
7258   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
7259   ins_encode %{
7260     int vector_len = 0;
7261     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7262   %}
7263   ins_pipe( pipe_slow );
7264 %}
7265 
7266 instruct vsrl8S(vecX dst, vecS shift) %{
7267   predicate(n->as_Vector()->length() == 8);
7268   match(Set dst (URShiftVS dst shift));
7269   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
7270   ins_encode %{
7271     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7272   %}
7273   ins_pipe( pipe_slow );
7274 %}
7275 
7276 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
7277   predicate(n->as_Vector()->length() == 8);
7278   match(Set dst (URShiftVS dst shift));
7279   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
7280   ins_encode %{
7281     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7282   %}
7283   ins_pipe( pipe_slow );
7284 %}
7285 
7286 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
7287   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7288   match(Set dst (URShiftVS src shift));
7289   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
7290   ins_encode %{
7291     int vector_len = 0;
7292     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7293   %}
7294   ins_pipe( pipe_slow );
7295 %}
7296 
7297 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7298   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7299   match(Set dst (URShiftVS src shift));
7300   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
7301   ins_encode %{
7302     int vector_len = 0;
7303     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7304   %}
7305   ins_pipe( pipe_slow );
7306 %}
7307 
7308 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
7309   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7310   match(Set dst (URShiftVS src shift));
7311   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
7312   ins_encode %{
7313     int vector_len = 1;
7314     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7315   %}
7316   ins_pipe( pipe_slow );
7317 %}
7318 
7319 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7320   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7321   match(Set dst (URShiftVS src shift));
7322   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
7323   ins_encode %{
7324     int vector_len = 1;
7325     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7326   %}
7327   ins_pipe( pipe_slow );
7328 %}
7329 
7330 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
7331   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7332   match(Set dst (URShiftVS src shift));
7333   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
7334   ins_encode %{
7335     int vector_len = 2;
7336     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7337   %}
7338   ins_pipe( pipe_slow );
7339 %}
7340 
7341 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7342   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7343   match(Set dst (URShiftVS src shift));
7344   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
7345   ins_encode %{
7346     int vector_len = 2;
7347     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7348   %}
7349   ins_pipe( pipe_slow );
7350 %}
7351 
7352 // Integers vector logical right shift
7353 instruct vsrl2I(vecD dst, vecS shift) %{
7354   predicate(n->as_Vector()->length() == 2);
7355   match(Set dst (URShiftVI dst shift));
7356   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
7357   ins_encode %{
7358     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7359   %}
7360   ins_pipe( pipe_slow );
7361 %}
7362 
7363 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
7364   predicate(n->as_Vector()->length() == 2);
7365   match(Set dst (URShiftVI dst shift));
7366   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
7367   ins_encode %{
7368     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7369   %}
7370   ins_pipe( pipe_slow );
7371 %}
7372 
7373 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
7374   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7375   match(Set dst (URShiftVI src shift));
7376   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
7377   ins_encode %{
7378     int vector_len = 0;
7379     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7380   %}
7381   ins_pipe( pipe_slow );
7382 %}
7383 
7384 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7385   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7386   match(Set dst (URShiftVI src shift));
7387   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
7388   ins_encode %{
7389     int vector_len = 0;
7390     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7391   %}
7392   ins_pipe( pipe_slow );
7393 %}
7394 
7395 instruct vsrl4I(vecX dst, vecS shift) %{
7396   predicate(n->as_Vector()->length() == 4);
7397   match(Set dst (URShiftVI dst shift));
7398   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
7399   ins_encode %{
7400     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7401   %}
7402   ins_pipe( pipe_slow );
7403 %}
7404 
7405 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
7406   predicate(n->as_Vector()->length() == 4);
7407   match(Set dst (URShiftVI dst shift));
7408   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
7409   ins_encode %{
7410     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7411   %}
7412   ins_pipe( pipe_slow );
7413 %}
7414 
7415 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
7416   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7417   match(Set dst (URShiftVI src shift));
7418   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
7419   ins_encode %{
7420     int vector_len = 0;
7421     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7422   %}
7423   ins_pipe( pipe_slow );
7424 %}
7425 
7426 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7427   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7428   match(Set dst (URShiftVI src shift));
7429   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
7430   ins_encode %{
7431     int vector_len = 0;
7432     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7433   %}
7434   ins_pipe( pipe_slow );
7435 %}
7436 
7437 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
7438   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7439   match(Set dst (URShiftVI src shift));
7440   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
7441   ins_encode %{
7442     int vector_len = 1;
7443     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7444   %}
7445   ins_pipe( pipe_slow );
7446 %}
7447 
7448 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7449   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7450   match(Set dst (URShiftVI src shift));
7451   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
7452   ins_encode %{
7453     int vector_len = 1;
7454     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7455   %}
7456   ins_pipe( pipe_slow );
7457 %}
7458 
7459 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
7460   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7461   match(Set dst (URShiftVI src shift));
7462   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
7463   ins_encode %{
7464     int vector_len = 2;
7465     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7466   %}
7467   ins_pipe( pipe_slow );
7468 %}
7469 
7470 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7471   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7472   match(Set dst (URShiftVI src shift));
7473   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
7474   ins_encode %{
7475     int vector_len = 2;
7476     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7477   %}
7478   ins_pipe( pipe_slow );
7479 %}
7480 
7481 // Longs vector logical right shift
7482 instruct vsrl2L(vecX dst, vecS shift) %{
7483   predicate(n->as_Vector()->length() == 2);
7484   match(Set dst (URShiftVL dst shift));
7485   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
7486   ins_encode %{
7487     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
7488   %}
7489   ins_pipe( pipe_slow );
7490 %}
7491 
7492 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
7493   predicate(n->as_Vector()->length() == 2);
7494   match(Set dst (URShiftVL dst shift));
7495   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
7496   ins_encode %{
7497     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
7498   %}
7499   ins_pipe( pipe_slow );
7500 %}
7501 
7502 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
7503   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7504   match(Set dst (URShiftVL src shift));
7505   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
7506   ins_encode %{
7507     int vector_len = 0;
7508     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7509   %}
7510   ins_pipe( pipe_slow );
7511 %}
7512 
7513 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7514   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7515   match(Set dst (URShiftVL src shift));
7516   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
7517   ins_encode %{
7518     int vector_len = 0;
7519     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7520   %}
7521   ins_pipe( pipe_slow );
7522 %}
7523 
7524 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
7525   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7526   match(Set dst (URShiftVL src shift));
7527   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
7528   ins_encode %{
7529     int vector_len = 1;
7530     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7531   %}
7532   ins_pipe( pipe_slow );
7533 %}
7534 
7535 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7536   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7537   match(Set dst (URShiftVL src shift));
7538   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
7539   ins_encode %{
7540     int vector_len = 1;
7541     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7542   %}
7543   ins_pipe( pipe_slow );
7544 %}
7545 
7546 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
7547   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7548   match(Set dst (URShiftVL src shift));
7549   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
7550   ins_encode %{
7551     int vector_len = 2;
7552     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7553   %}
7554   ins_pipe( pipe_slow );
7555 %}
7556 
7557 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7558   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7559   match(Set dst (URShiftVL src shift));
7560   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
7561   ins_encode %{
7562     int vector_len = 2;
7563     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7564   %}
7565   ins_pipe( pipe_slow );
7566 %}
7567 
7568 // ------------------- ArithmeticRightShift -----------------------------------
7569 
7570 // Shorts/Chars vector arithmetic right shift
7571 instruct vsra2S(vecS dst, vecS shift) %{
7572   predicate(n->as_Vector()->length() == 2);
7573   match(Set dst (RShiftVS dst shift));
7574   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
7575   ins_encode %{
7576     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7577   %}
7578   ins_pipe( pipe_slow );
7579 %}
7580 
7581 instruct vsra2S_imm(vecS dst, immI8 shift) %{
7582   predicate(n->as_Vector()->length() == 2);
7583   match(Set dst (RShiftVS dst shift));
7584   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
7585   ins_encode %{
7586     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7587   %}
7588   ins_pipe( pipe_slow );
7589 %}
7590 
7591 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
7592   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7593   match(Set dst (RShiftVS src shift));
7594   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
7595   ins_encode %{
7596     int vector_len = 0;
7597     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7598   %}
7599   ins_pipe( pipe_slow );
7600 %}
7601 
7602 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7603   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7604   match(Set dst (RShiftVS src shift));
7605   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
7606   ins_encode %{
7607     int vector_len = 0;
7608     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7609   %}
7610   ins_pipe( pipe_slow );
7611 %}
7612 
7613 instruct vsra4S(vecD dst, vecS shift) %{
7614   predicate(n->as_Vector()->length() == 4);
7615   match(Set dst (RShiftVS dst shift));
7616   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
7617   ins_encode %{
7618     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7619   %}
7620   ins_pipe( pipe_slow );
7621 %}
7622 
7623 instruct vsra4S_imm(vecD dst, immI8 shift) %{
7624   predicate(n->as_Vector()->length() == 4);
7625   match(Set dst (RShiftVS dst shift));
7626   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
7627   ins_encode %{
7628     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7629   %}
7630   ins_pipe( pipe_slow );
7631 %}
7632 
7633 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
7634   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7635   match(Set dst (RShiftVS src shift));
7636   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
7637   ins_encode %{
7638     int vector_len = 0;
7639     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7640   %}
7641   ins_pipe( pipe_slow );
7642 %}
7643 
7644 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7645   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7646   match(Set dst (RShiftVS src shift));
7647   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
7648   ins_encode %{
7649     int vector_len = 0;
7650     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7651   %}
7652   ins_pipe( pipe_slow );
7653 %}
7654 
7655 instruct vsra8S(vecX dst, vecS shift) %{
7656   predicate(n->as_Vector()->length() == 8);
7657   match(Set dst (RShiftVS dst shift));
7658   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
7659   ins_encode %{
7660     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7661   %}
7662   ins_pipe( pipe_slow );
7663 %}
7664 
7665 instruct vsra8S_imm(vecX dst, immI8 shift) %{
7666   predicate(n->as_Vector()->length() == 8);
7667   match(Set dst (RShiftVS dst shift));
7668   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
7669   ins_encode %{
7670     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7671   %}
7672   ins_pipe( pipe_slow );
7673 %}
7674 
7675 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
7676   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7677   match(Set dst (RShiftVS src shift));
7678   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
7679   ins_encode %{
7680     int vector_len = 0;
7681     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7682   %}
7683   ins_pipe( pipe_slow );
7684 %}
7685 
7686 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7687   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7688   match(Set dst (RShiftVS src shift));
7689   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
7690   ins_encode %{
7691     int vector_len = 0;
7692     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7693   %}
7694   ins_pipe( pipe_slow );
7695 %}
7696 
7697 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
7698   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7699   match(Set dst (RShiftVS src shift));
7700   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
7701   ins_encode %{
7702     int vector_len = 1;
7703     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7704   %}
7705   ins_pipe( pipe_slow );
7706 %}
7707 
7708 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7709   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7710   match(Set dst (RShiftVS src shift));
7711   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
7712   ins_encode %{
7713     int vector_len = 1;
7714     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7715   %}
7716   ins_pipe( pipe_slow );
7717 %}
7718 
7719 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
7720   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7721   match(Set dst (RShiftVS src shift));
7722   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
7723   ins_encode %{
7724     int vector_len = 2;
7725     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7726   %}
7727   ins_pipe( pipe_slow );
7728 %}
7729 
7730 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7731   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7732   match(Set dst (RShiftVS src shift));
7733   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
7734   ins_encode %{
7735     int vector_len = 2;
7736     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7737   %}
7738   ins_pipe( pipe_slow );
7739 %}
7740 
7741 // Integers vector arithmetic right shift
7742 instruct vsra2I(vecD dst, vecS shift) %{
7743   predicate(n->as_Vector()->length() == 2);
7744   match(Set dst (RShiftVI dst shift));
7745   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
7746   ins_encode %{
7747     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
7748   %}
7749   ins_pipe( pipe_slow );
7750 %}
7751 
7752 instruct vsra2I_imm(vecD dst, immI8 shift) %{
7753   predicate(n->as_Vector()->length() == 2);
7754   match(Set dst (RShiftVI dst shift));
7755   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
7756   ins_encode %{
7757     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
7758   %}
7759   ins_pipe( pipe_slow );
7760 %}
7761 
7762 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
7763   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7764   match(Set dst (RShiftVI src shift));
7765   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
7766   ins_encode %{
7767     int vector_len = 0;
7768     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7769   %}
7770   ins_pipe( pipe_slow );
7771 %}
7772 
7773 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7774   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7775   match(Set dst (RShiftVI src shift));
7776   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
7777   ins_encode %{
7778     int vector_len = 0;
7779     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7780   %}
7781   ins_pipe( pipe_slow );
7782 %}
7783 
7784 instruct vsra4I(vecX dst, vecS shift) %{
7785   predicate(n->as_Vector()->length() == 4);
7786   match(Set dst (RShiftVI dst shift));
7787   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
7788   ins_encode %{
7789     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
7790   %}
7791   ins_pipe( pipe_slow );
7792 %}
7793 
7794 instruct vsra4I_imm(vecX dst, immI8 shift) %{
7795   predicate(n->as_Vector()->length() == 4);
7796   match(Set dst (RShiftVI dst shift));
7797   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
7798   ins_encode %{
7799     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
7800   %}
7801   ins_pipe( pipe_slow );
7802 %}
7803 
7804 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
7805   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7806   match(Set dst (RShiftVI src shift));
7807   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
7808   ins_encode %{
7809     int vector_len = 0;
7810     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7811   %}
7812   ins_pipe( pipe_slow );
7813 %}
7814 
7815 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7816   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7817   match(Set dst (RShiftVI src shift));
7818   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
7819   ins_encode %{
7820     int vector_len = 0;
7821     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7822   %}
7823   ins_pipe( pipe_slow );
7824 %}
7825 
7826 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
7827   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7828   match(Set dst (RShiftVI src shift));
7829   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
7830   ins_encode %{
7831     int vector_len = 1;
7832     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7833   %}
7834   ins_pipe( pipe_slow );
7835 %}
7836 
7837 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7838   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7839   match(Set dst (RShiftVI src shift));
7840   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
7841   ins_encode %{
7842     int vector_len = 1;
7843     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7844   %}
7845   ins_pipe( pipe_slow );
7846 %}
7847 
7848 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
7849   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7850   match(Set dst (RShiftVI src shift));
7851   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
7852   ins_encode %{
7853     int vector_len = 2;
7854     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7855   %}
7856   ins_pipe( pipe_slow );
7857 %}
7858 
7859 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7860   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7861   match(Set dst (RShiftVI src shift));
7862   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
7863   ins_encode %{
7864     int vector_len = 2;
7865     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7866   %}
7867   ins_pipe( pipe_slow );
7868 %}
7869 
7870 // There are no longs vector arithmetic right shift instructions.
7871 
7872 
7873 // --------------------------------- AND --------------------------------------
7874 
7875 instruct vand4B(vecS dst, vecS src) %{
7876   predicate(n->as_Vector()->length_in_bytes() == 4);
7877   match(Set dst (AndV dst src));
7878   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
7879   ins_encode %{
7880     __ pand($dst$$XMMRegister, $src$$XMMRegister);
7881   %}
7882   ins_pipe( pipe_slow );
7883 %}
7884 
7885 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
7886   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
7887   match(Set dst (AndV src1 src2));
7888   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
7889   ins_encode %{
7890     int vector_len = 0;
7891     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7892   %}
7893   ins_pipe( pipe_slow );
7894 %}
7895 
7896 instruct vand8B(vecD dst, vecD src) %{
7897   predicate(n->as_Vector()->length_in_bytes() == 8);
7898   match(Set dst (AndV dst src));
7899   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
7900   ins_encode %{
7901     __ pand($dst$$XMMRegister, $src$$XMMRegister);
7902   %}
7903   ins_pipe( pipe_slow );
7904 %}
7905 
7906 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
7907   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
7908   match(Set dst (AndV src1 src2));
7909   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
7910   ins_encode %{
7911     int vector_len = 0;
7912     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7913   %}
7914   ins_pipe( pipe_slow );
7915 %}
7916 
7917 instruct vand16B(vecX dst, vecX src) %{
7918   predicate(n->as_Vector()->length_in_bytes() == 16);
7919   match(Set dst (AndV dst src));
7920   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
7921   ins_encode %{
7922     __ pand($dst$$XMMRegister, $src$$XMMRegister);
7923   %}
7924   ins_pipe( pipe_slow );
7925 %}
7926 
7927 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
7928   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
7929   match(Set dst (AndV src1 src2));
7930   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
7931   ins_encode %{
7932     int vector_len = 0;
7933     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7934   %}
7935   ins_pipe( pipe_slow );
7936 %}
7937 
7938 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
7939   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
7940   match(Set dst (AndV src (LoadVector mem)));
7941   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
7942   ins_encode %{
7943     int vector_len = 0;
7944     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7945   %}
7946   ins_pipe( pipe_slow );
7947 %}
7948 
7949 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
7950   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
7951   match(Set dst (AndV src1 src2));
7952   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
7953   ins_encode %{
7954     int vector_len = 1;
7955     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7956   %}
7957   ins_pipe( pipe_slow );
7958 %}
7959 
7960 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
7961   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
7962   match(Set dst (AndV src (LoadVector mem)));
7963   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
7964   ins_encode %{
7965     int vector_len = 1;
7966     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7967   %}
7968   ins_pipe( pipe_slow );
7969 %}
7970 
7971 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
7972   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
7973   match(Set dst (AndV src1 src2));
7974   format %{ "vpand   $dst,$src1,$src2\t! and vectors (64 bytes)" %}
7975   ins_encode %{
7976     int vector_len = 2;
7977     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7978   %}
7979   ins_pipe( pipe_slow );
7980 %}
7981 
7982 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
7983   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
7984   match(Set dst (AndV src (LoadVector mem)));
7985   format %{ "vpand   $dst,$src,$mem\t! and vectors (64 bytes)" %}
7986   ins_encode %{
7987     int vector_len = 2;
7988     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7989   %}
7990   ins_pipe( pipe_slow );
7991 %}
7992 
7993 // --------------------------------- OR ---------------------------------------
7994 
7995 instruct vor4B(vecS dst, vecS src) %{
7996   predicate(n->as_Vector()->length_in_bytes() == 4);
7997   match(Set dst (OrV dst src));
7998   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
7999   ins_encode %{
8000     __ por($dst$$XMMRegister, $src$$XMMRegister);
8001   %}
8002   ins_pipe( pipe_slow );
8003 %}
8004 
8005 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
8006   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8007   match(Set dst (OrV src1 src2));
8008   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
8009   ins_encode %{
8010     int vector_len = 0;
8011     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8012   %}
8013   ins_pipe( pipe_slow );
8014 %}
8015 
8016 instruct vor8B(vecD dst, vecD src) %{
8017   predicate(n->as_Vector()->length_in_bytes() == 8);
8018   match(Set dst (OrV dst src));
8019   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
8020   ins_encode %{
8021     __ por($dst$$XMMRegister, $src$$XMMRegister);
8022   %}
8023   ins_pipe( pipe_slow );
8024 %}
8025 
8026 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
8027   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8028   match(Set dst (OrV src1 src2));
8029   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
8030   ins_encode %{
8031     int vector_len = 0;
8032     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8033   %}
8034   ins_pipe( pipe_slow );
8035 %}
8036 
8037 instruct vor16B(vecX dst, vecX src) %{
8038   predicate(n->as_Vector()->length_in_bytes() == 16);
8039   match(Set dst (OrV dst src));
8040   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
8041   ins_encode %{
8042     __ por($dst$$XMMRegister, $src$$XMMRegister);
8043   %}
8044   ins_pipe( pipe_slow );
8045 %}
8046 
8047 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
8048   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8049   match(Set dst (OrV src1 src2));
8050   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
8051   ins_encode %{
8052     int vector_len = 0;
8053     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8054   %}
8055   ins_pipe( pipe_slow );
8056 %}
8057 
8058 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
8059   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8060   match(Set dst (OrV src (LoadVector mem)));
8061   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
8062   ins_encode %{
8063     int vector_len = 0;
8064     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8065   %}
8066   ins_pipe( pipe_slow );
8067 %}
8068 
8069 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
8070   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8071   match(Set dst (OrV src1 src2));
8072   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
8073   ins_encode %{
8074     int vector_len = 1;
8075     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8076   %}
8077   ins_pipe( pipe_slow );
8078 %}
8079 
8080 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
8081   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8082   match(Set dst (OrV src (LoadVector mem)));
8083   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
8084   ins_encode %{
8085     int vector_len = 1;
8086     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8087   %}
8088   ins_pipe( pipe_slow );
8089 %}
8090 
8091 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8092   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8093   match(Set dst (OrV src1 src2));
8094   format %{ "vpor    $dst,$src1,$src2\t! or vectors (64 bytes)" %}
8095   ins_encode %{
8096     int vector_len = 2;
8097     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8098   %}
8099   ins_pipe( pipe_slow );
8100 %}
8101 
8102 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
8103   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8104   match(Set dst (OrV src (LoadVector mem)));
8105   format %{ "vpor    $dst,$src,$mem\t! or vectors (64 bytes)" %}
8106   ins_encode %{
8107     int vector_len = 2;
8108     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8109   %}
8110   ins_pipe( pipe_slow );
8111 %}
8112 
8113 // --------------------------------- XOR --------------------------------------
8114 
8115 instruct vxor4B(vecS dst, vecS src) %{
8116   predicate(n->as_Vector()->length_in_bytes() == 4);
8117   match(Set dst (XorV dst src));
8118   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
8119   ins_encode %{
8120     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8121   %}
8122   ins_pipe( pipe_slow );
8123 %}
8124 
8125 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
8126   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8127   match(Set dst (XorV src1 src2));
8128   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
8129   ins_encode %{
8130     int vector_len = 0;
8131     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8132   %}
8133   ins_pipe( pipe_slow );
8134 %}
8135 
8136 instruct vxor8B(vecD dst, vecD src) %{
8137   predicate(n->as_Vector()->length_in_bytes() == 8);
8138   match(Set dst (XorV dst src));
8139   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
8140   ins_encode %{
8141     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8142   %}
8143   ins_pipe( pipe_slow );
8144 %}
8145 
8146 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
8147   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8148   match(Set dst (XorV src1 src2));
8149   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
8150   ins_encode %{
8151     int vector_len = 0;
8152     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8153   %}
8154   ins_pipe( pipe_slow );
8155 %}
8156 
8157 instruct vxor16B(vecX dst, vecX src) %{
8158   predicate(n->as_Vector()->length_in_bytes() == 16);
8159   match(Set dst (XorV dst src));
8160   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
8161   ins_encode %{
8162     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8163   %}
8164   ins_pipe( pipe_slow );
8165 %}
8166 
8167 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
8168   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8169   match(Set dst (XorV src1 src2));
8170   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
8171   ins_encode %{
8172     int vector_len = 0;
8173     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8174   %}
8175   ins_pipe( pipe_slow );
8176 %}
8177 
8178 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
8179   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8180   match(Set dst (XorV src (LoadVector mem)));
8181   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
8182   ins_encode %{
8183     int vector_len = 0;
8184     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8185   %}
8186   ins_pipe( pipe_slow );
8187 %}
8188 
8189 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
8190   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8191   match(Set dst (XorV src1 src2));
8192   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
8193   ins_encode %{
8194     int vector_len = 1;
8195     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8196   %}
8197   ins_pipe( pipe_slow );
8198 %}
8199 
8200 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
8201   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8202   match(Set dst (XorV src (LoadVector mem)));
8203   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
8204   ins_encode %{
8205     int vector_len = 1;
8206     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8207   %}
8208   ins_pipe( pipe_slow );
8209 %}
8210 
8211 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8212   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8213   match(Set dst (XorV src1 src2));
8214   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
8215   ins_encode %{
8216     int vector_len = 2;
8217     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8218   %}
8219   ins_pipe( pipe_slow );
8220 %}
8221 
8222 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
8223   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8224   match(Set dst (XorV src (LoadVector mem)));
8225   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (64 bytes)" %}
8226   ins_encode %{
8227     int vector_len = 2;
8228     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8229   %}
8230   ins_pipe( pipe_slow );
8231 %}
8232