1 //
   2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Common Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 #ifdef _WIN64
 180 
 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 197 
 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 214 
 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 553 
 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 570 
 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 587 
 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 604 
 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 621 
 622 #else // _WIN64
 623 
 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 640 
 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 657 
 658 #ifdef _LP64
 659 
 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 676 
 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 693 
 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 710 
 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 727 
 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 744 
 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 761 
 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 778 
 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 795 
 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 812 
 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 829 
 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 846 
 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 863 
 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 880 
 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 897 
 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 914 
 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 931 
 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 948 
 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 965 
 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 982 
 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 999 
1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
1016 
1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
1033 
1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
1050 
1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
1067 
1068 #endif // _LP64
1069 
1070 #endif // _WIN64
1071 
1072 #ifdef _LP64
1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
1074 #else
1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
1076 #endif // _LP64
1077 
1078 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1079                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1080                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1081                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1082                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1083                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1084                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1085                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1086 #ifdef _LP64
1087                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1088                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1089                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1090                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1091                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1092                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1093                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1094                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1095                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1096                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1097                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1098                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1099                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1100                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1101                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1102                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1103                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1104                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1105                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1106                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1107                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1108                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1109                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1110                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1111 #endif
1112                       );
1113 
1114 // flags allocation class should be last.
1115 alloc_class chunk2(RFLAGS);
1116 
1117 // Singleton class for condition codes
1118 reg_class int_flags(RFLAGS);
1119 
1120 // Class for pre evex float registers
1121 reg_class float_reg_legacy(XMM0,
1122                     XMM1,
1123                     XMM2,
1124                     XMM3,
1125                     XMM4,
1126                     XMM5,
1127                     XMM6,
1128                     XMM7
1129 #ifdef _LP64
1130                    ,XMM8,
1131                     XMM9,
1132                     XMM10,
1133                     XMM11,
1134                     XMM12,
1135                     XMM13,
1136                     XMM14,
1137                     XMM15
1138 #endif
1139                     );
1140 
1141 // Class for evex float registers
1142 reg_class float_reg_evex(XMM0,
1143                     XMM1,
1144                     XMM2,
1145                     XMM3,
1146                     XMM4,
1147                     XMM5,
1148                     XMM6,
1149                     XMM7
1150 #ifdef _LP64
1151                    ,XMM8,
1152                     XMM9,
1153                     XMM10,
1154                     XMM11,
1155                     XMM12,
1156                     XMM13,
1157                     XMM14,
1158                     XMM15,
1159                     XMM16,
1160                     XMM17,
1161                     XMM18,
1162                     XMM19,
1163                     XMM20,
1164                     XMM21,
1165                     XMM22,
1166                     XMM23,
1167                     XMM24,
1168                     XMM25,
1169                     XMM26,
1170                     XMM27,
1171                     XMM28,
1172                     XMM29,
1173                     XMM30,
1174                     XMM31
1175 #endif
1176                     );
1177 
1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1179 
1180 // Class for pre evex double registers
1181 reg_class double_reg_legacy(XMM0,  XMM0b,
1182                      XMM1,  XMM1b,
1183                      XMM2,  XMM2b,
1184                      XMM3,  XMM3b,
1185                      XMM4,  XMM4b,
1186                      XMM5,  XMM5b,
1187                      XMM6,  XMM6b,
1188                      XMM7,  XMM7b
1189 #ifdef _LP64
1190                     ,XMM8,  XMM8b,
1191                      XMM9,  XMM9b,
1192                      XMM10, XMM10b,
1193                      XMM11, XMM11b,
1194                      XMM12, XMM12b,
1195                      XMM13, XMM13b,
1196                      XMM14, XMM14b,
1197                      XMM15, XMM15b
1198 #endif
1199                      );
1200 
1201 // Class for evex double registers
1202 reg_class double_reg_evex(XMM0,  XMM0b,
1203                      XMM1,  XMM1b,
1204                      XMM2,  XMM2b,
1205                      XMM3,  XMM3b,
1206                      XMM4,  XMM4b,
1207                      XMM5,  XMM5b,
1208                      XMM6,  XMM6b,
1209                      XMM7,  XMM7b
1210 #ifdef _LP64
1211                     ,XMM8,  XMM8b,
1212                      XMM9,  XMM9b,
1213                      XMM10, XMM10b,
1214                      XMM11, XMM11b,
1215                      XMM12, XMM12b,
1216                      XMM13, XMM13b,
1217                      XMM14, XMM14b,
1218                      XMM15, XMM15b,
1219                      XMM16, XMM16b,
1220                      XMM17, XMM17b,
1221                      XMM18, XMM18b,
1222                      XMM19, XMM19b,
1223                      XMM20, XMM20b,
1224                      XMM21, XMM21b,
1225                      XMM22, XMM22b,
1226                      XMM23, XMM23b,
1227                      XMM24, XMM24b,
1228                      XMM25, XMM25b,
1229                      XMM26, XMM26b,
1230                      XMM27, XMM27b,
1231                      XMM28, XMM28b,
1232                      XMM29, XMM29b,
1233                      XMM30, XMM30b,
1234                      XMM31, XMM31b
1235 #endif
1236                      );
1237 
1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1239 
1240 // Class for pre evex 32bit vector registers
1241 reg_class vectors_reg_legacy(XMM0,
1242                       XMM1,
1243                       XMM2,
1244                       XMM3,
1245                       XMM4,
1246                       XMM5,
1247                       XMM6,
1248                       XMM7
1249 #ifdef _LP64
1250                      ,XMM8,
1251                       XMM9,
1252                       XMM10,
1253                       XMM11,
1254                       XMM12,
1255                       XMM13,
1256                       XMM14,
1257                       XMM15
1258 #endif
1259                       );
1260 
1261 // Class for evex 32bit vector registers
1262 reg_class vectors_reg_evex(XMM0,
1263                       XMM1,
1264                       XMM2,
1265                       XMM3,
1266                       XMM4,
1267                       XMM5,
1268                       XMM6,
1269                       XMM7
1270 #ifdef _LP64
1271                      ,XMM8,
1272                       XMM9,
1273                       XMM10,
1274                       XMM11,
1275                       XMM12,
1276                       XMM13,
1277                       XMM14,
1278                       XMM15,
1279                       XMM16,
1280                       XMM17,
1281                       XMM18,
1282                       XMM19,
1283                       XMM20,
1284                       XMM21,
1285                       XMM22,
1286                       XMM23,
1287                       XMM24,
1288                       XMM25,
1289                       XMM26,
1290                       XMM27,
1291                       XMM28,
1292                       XMM29,
1293                       XMM30,
1294                       XMM31
1295 #endif
1296                       );
1297 
1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1299 
1300 // Class for all 64bit vector registers
1301 reg_class vectord_reg_legacy(XMM0,  XMM0b,
1302                       XMM1,  XMM1b,
1303                       XMM2,  XMM2b,
1304                       XMM3,  XMM3b,
1305                       XMM4,  XMM4b,
1306                       XMM5,  XMM5b,
1307                       XMM6,  XMM6b,
1308                       XMM7,  XMM7b
1309 #ifdef _LP64
1310                      ,XMM8,  XMM8b,
1311                       XMM9,  XMM9b,
1312                       XMM10, XMM10b,
1313                       XMM11, XMM11b,
1314                       XMM12, XMM12b,
1315                       XMM13, XMM13b,
1316                       XMM14, XMM14b,
1317                       XMM15, XMM15b
1318 #endif
1319                       );
1320 
1321 // Class for all 64bit vector registers
1322 reg_class vectord_reg_evex(XMM0,  XMM0b,
1323                       XMM1,  XMM1b,
1324                       XMM2,  XMM2b,
1325                       XMM3,  XMM3b,
1326                       XMM4,  XMM4b,
1327                       XMM5,  XMM5b,
1328                       XMM6,  XMM6b,
1329                       XMM7,  XMM7b
1330 #ifdef _LP64
1331                      ,XMM8,  XMM8b,
1332                       XMM9,  XMM9b,
1333                       XMM10, XMM10b,
1334                       XMM11, XMM11b,
1335                       XMM12, XMM12b,
1336                       XMM13, XMM13b,
1337                       XMM14, XMM14b,
1338                       XMM15, XMM15b,
1339                       XMM16, XMM16b,
1340                       XMM17, XMM17b,
1341                       XMM18, XMM18b,
1342                       XMM19, XMM19b,
1343                       XMM20, XMM20b,
1344                       XMM21, XMM21b,
1345                       XMM22, XMM22b,
1346                       XMM23, XMM23b,
1347                       XMM24, XMM24b,
1348                       XMM25, XMM25b,
1349                       XMM26, XMM26b,
1350                       XMM27, XMM27b,
1351                       XMM28, XMM28b,
1352                       XMM29, XMM29b,
1353                       XMM30, XMM30b,
1354                       XMM31, XMM31b
1355 #endif
1356                       );
1357 
1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1359 
1360 // Class for all 128bit vector registers
1361 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
1362                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1363                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1364                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1365                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1366                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1367                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1368                       XMM7,  XMM7b,  XMM7c,  XMM7d
1369 #ifdef _LP64
1370                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1371                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1372                       XMM10, XMM10b, XMM10c, XMM10d,
1373                       XMM11, XMM11b, XMM11c, XMM11d,
1374                       XMM12, XMM12b, XMM12c, XMM12d,
1375                       XMM13, XMM13b, XMM13c, XMM13d,
1376                       XMM14, XMM14b, XMM14c, XMM14d,
1377                       XMM15, XMM15b, XMM15c, XMM15d
1378 #endif
1379                       );
1380 
1381 // Class for all 128bit vector registers
1382 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
1383                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1384                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1385                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1386                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1387                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1388                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1389                       XMM7,  XMM7b,  XMM7c,  XMM7d
1390 #ifdef _LP64
1391                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1392                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1393                       XMM10, XMM10b, XMM10c, XMM10d,
1394                       XMM11, XMM11b, XMM11c, XMM11d,
1395                       XMM12, XMM12b, XMM12c, XMM12d,
1396                       XMM13, XMM13b, XMM13c, XMM13d,
1397                       XMM14, XMM14b, XMM14c, XMM14d,
1398                       XMM15, XMM15b, XMM15c, XMM15d,
1399                       XMM16, XMM16b, XMM16c, XMM16d,
1400                       XMM17, XMM17b, XMM17c, XMM17d,
1401                       XMM18, XMM18b, XMM18c, XMM18d,
1402                       XMM19, XMM19b, XMM19c, XMM19d,
1403                       XMM20, XMM20b, XMM20c, XMM20d,
1404                       XMM21, XMM21b, XMM21c, XMM21d,
1405                       XMM22, XMM22b, XMM22c, XMM22d,
1406                       XMM23, XMM23b, XMM23c, XMM23d,
1407                       XMM24, XMM24b, XMM24c, XMM24d,
1408                       XMM25, XMM25b, XMM25c, XMM25d,
1409                       XMM26, XMM26b, XMM26c, XMM26d,
1410                       XMM27, XMM27b, XMM27c, XMM27d,
1411                       XMM28, XMM28b, XMM28c, XMM28d,
1412                       XMM29, XMM29b, XMM29c, XMM29d,
1413                       XMM30, XMM30b, XMM30c, XMM30d,
1414                       XMM31, XMM31b, XMM31c, XMM31d
1415 #endif
1416                       );
1417 
1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1419 
1420 // Class for all 256bit vector registers
1421 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1422                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1423                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1424                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1425                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1426                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1427                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1428                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1429 #ifdef _LP64
1430                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1431                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1432                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1433                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1434                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1435                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1436                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1437                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1438 #endif
1439                       );
1440 
1441 // Class for all 256bit vector registers
1442 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1443                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1444                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1445                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1446                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1447                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1448                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1449                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1450 #ifdef _LP64
1451                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1452                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1453                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1454                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1455                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1456                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1457                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1458                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1459                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1460                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1461                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1462                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1463                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1464                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1465                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1466                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1467                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1468                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1469                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1470                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1471                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1472                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1473                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1474                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1475 #endif
1476                       );
1477 
1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1479 
1480 // Class for all 512bit vector registers
1481 reg_class vectorz_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1482                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1483                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1484                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1485                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1486                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1487                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1488                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1489 #ifdef _LP64
1490                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1491                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1492                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1493                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1494                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1495                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1496                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1497                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1498                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1499                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1500                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1501                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1502                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1503                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1504                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1505                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1506                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1507                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1508                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1509                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1510                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1511                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1512                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1513                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1514 #endif
1515                       );
1516 
1517 %}
1518 
1519 
1520 //----------SOURCE BLOCK-------------------------------------------------------
1521 // This is a block of C++ code which provides values, functions, and
1522 // definitions necessary in the rest of the architecture description
1523 
1524 source_hpp %{
1525 // Header information of the source block.
1526 // Method declarations/definitions which are used outside
1527 // the ad-scope can conveniently be defined here.
1528 //
1529 // To keep related declarations/definitions/uses close together,
1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1531 
1532 class NativeJump;
1533 
1534 class CallStubImpl {
1535 
1536   //--------------------------------------------------------------
1537   //---<  Used for optimization in Compile::shorten_branches  >---
1538   //--------------------------------------------------------------
1539 
1540  public:
1541   // Size of call trampoline stub.
1542   static uint size_call_trampoline() {
1543     return 0; // no call trampolines on this platform
1544   }
1545 
1546   // number of relocations needed by a call trampoline stub
1547   static uint reloc_call_trampoline() {
1548     return 0; // no call trampolines on this platform
1549   }
1550 };
1551 
1552 class HandlerImpl {
1553 
1554  public:
1555 
1556   static int emit_exception_handler(CodeBuffer &cbuf);
1557   static int emit_deopt_handler(CodeBuffer& cbuf);
1558 
1559   static uint size_exception_handler() {
1560     // NativeCall instruction size is the same as NativeJump.
1561     // exception handler starts out as jump and can be patched to
1562     // a call be deoptimization.  (4932387)
1563     // Note that this value is also credited (in output.cpp) to
1564     // the size of the code section.
1565     return NativeJump::instruction_size;
1566   }
1567 
1568 #ifdef _LP64
1569   static uint size_deopt_handler() {
1570     // three 5 byte instructions
1571     return 15;
1572   }
1573 #else
1574   static uint size_deopt_handler() {
1575     // NativeCall instruction size is the same as NativeJump.
1576     // exception handler starts out as jump and can be patched to
1577     // a call be deoptimization.  (4932387)
1578     // Note that this value is also credited (in output.cpp) to
1579     // the size of the code section.
1580     return 5 + NativeJump::instruction_size; // pushl(); jmp;
1581   }
1582 #endif
1583 };
1584 
1585 %} // end source_hpp
1586 
1587 source %{
1588 
1589 // Emit exception handler code.
1590 // Stuff framesize into a register and call a VM stub routine.
1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1592 
1593   // Note that the code buffer's insts_mark is always relative to insts.
1594   // That's why we must use the macroassembler to generate a handler.
1595   MacroAssembler _masm(&cbuf);
1596   address base = __ start_a_stub(size_exception_handler());
1597   if (base == NULL) {
1598     ciEnv::current()->record_failure("CodeCache is full");
1599     return 0;  // CodeBuffer::expand failed
1600   }
1601   int offset = __ offset();
1602   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1603   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1604   __ end_a_stub();
1605   return offset;
1606 }
1607 
1608 // Emit deopt handler code.
1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1610 
1611   // Note that the code buffer's insts_mark is always relative to insts.
1612   // That's why we must use the macroassembler to generate a handler.
1613   MacroAssembler _masm(&cbuf);
1614   address base = __ start_a_stub(size_deopt_handler());
1615   if (base == NULL) {
1616     ciEnv::current()->record_failure("CodeCache is full");
1617     return 0;  // CodeBuffer::expand failed
1618   }
1619   int offset = __ offset();
1620 
1621 #ifdef _LP64
1622   address the_pc = (address) __ pc();
1623   Label next;
1624   // push a "the_pc" on the stack without destroying any registers
1625   // as they all may be live.
1626 
1627   // push address of "next"
1628   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1629   __ bind(next);
1630   // adjust it so it matches "the_pc"
1631   __ subptr(Address(rsp, 0), __ offset() - offset);
1632 #else
1633   InternalAddress here(__ pc());
1634   __ pushptr(here.addr());
1635 #endif
1636 
1637   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1638   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1639   __ end_a_stub();
1640   return offset;
1641 }
1642 
1643 
1644 //=============================================================================
1645 
1646   // Float masks come from different places depending on platform.
1647 #ifdef _LP64
1648   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1649   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1650   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1651   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1652 #else
1653   static address float_signmask()  { return (address)float_signmask_pool; }
1654   static address float_signflip()  { return (address)float_signflip_pool; }
1655   static address double_signmask() { return (address)double_signmask_pool; }
1656   static address double_signflip() { return (address)double_signflip_pool; }
1657 #endif
1658 
1659 
1660 const bool Matcher::match_rule_supported(int opcode) {
1661   if (!has_match_rule(opcode))
1662     return false;
1663 
1664   switch (opcode) {
1665     case Op_PopCountI:
1666     case Op_PopCountL:
1667       if (!UsePopCountInstruction)
1668         return false;
1669     break;
1670     case Op_MulVI:
1671       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1672         return false;
1673     break;
1674     case Op_MulVL:
1675     case Op_MulReductionVL:
1676       if (VM_Version::supports_avx512dq() == false)
1677         return false;
1678     case Op_AddReductionVL:
1679       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1680         return false;
1681     case Op_AddReductionVI:
1682       if (UseSSE < 3) // requires at least SSE3
1683         return false;
1684     case Op_MulReductionVI:
1685       if (UseSSE < 4) // requires at least SSE4
1686         return false;
1687     case Op_AddReductionVF:
1688     case Op_AddReductionVD:
1689     case Op_MulReductionVF:
1690     case Op_MulReductionVD:
1691       if (UseSSE < 1) // requires at least SSE
1692         return false;
1693     break;
1694     case Op_SqrtVD:
1695       if (UseAVX < 1) // enabled for AVX only
1696         return false;
1697     break;
1698     case Op_CompareAndSwapL:
1699 #ifdef _LP64
1700     case Op_CompareAndSwapP:
1701 #endif
1702       if (!VM_Version::supports_cx8())
1703         return false;
1704     break;
1705   }
1706 
1707   return true;  // Per default match rules are supported.
1708 }
1709 
1710 const int Matcher::float_pressure_scale(void) {
1711   int scale_factor = 1;
1712 #ifdef _LP64
1713   if (UseAVX > 2) {
1714     scale_factor  = 2;
1715   }
1716 #endif
1717   return scale_factor;
1718 }
1719 
1720 // Max vector size in bytes. 0 if not supported.
1721 const int Matcher::vector_width_in_bytes(BasicType bt) {
1722   assert(is_java_primitive(bt), "only primitive type vectors");
1723   if (UseSSE < 2) return 0;
1724   // SSE2 supports 128bit vectors for all types.
1725   // AVX2 supports 256bit vectors for all types.
1726   // AVX2/EVEX supports 512bit vectors for all types.
1727   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1728   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1729   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1730     size = (UseAVX > 2) ? 64 : 32;
1731   // Use flag to limit vector size.
1732   size = MIN2(size,(int)MaxVectorSize);
1733   // Minimum 2 values in vector (or 4 for bytes).
1734   switch (bt) {
1735   case T_DOUBLE:
1736   case T_LONG:
1737     if (size < 16) return 0;
1738   case T_FLOAT:
1739   case T_INT:
1740     if (size < 8) return 0;
1741   case T_BOOLEAN:
1742   case T_BYTE:
1743   case T_CHAR:
1744   case T_SHORT:
1745     if (size < 4) return 0;
1746     break;
1747   default:
1748     ShouldNotReachHere();
1749   }
1750   return size;
1751 }
1752 
1753 // Limits on vector size (number of elements) loaded into vector.
1754 const int Matcher::max_vector_size(const BasicType bt) {
1755   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1756 }
1757 const int Matcher::min_vector_size(const BasicType bt) {
1758   int max_size = max_vector_size(bt);
1759   // Min size which can be loaded into vector is 4 bytes.
1760   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1761   return MIN2(size,max_size);
1762 }
1763 
1764 // Vector ideal reg corresponding to specidied size in bytes
1765 const int Matcher::vector_ideal_reg(int size) {
1766   assert(MaxVectorSize >= size, "");
1767   switch(size) {
1768     case  4: return Op_VecS;
1769     case  8: return Op_VecD;
1770     case 16: return Op_VecX;
1771     case 32: return Op_VecY;
1772     case 64: return Op_VecZ;
1773   }
1774   ShouldNotReachHere();
1775   return 0;
1776 }
1777 
1778 // Only lowest bits of xmm reg are used for vector shift count.
1779 const int Matcher::vector_shift_count_ideal_reg(int size) {
1780   return Op_VecS;
1781 }
1782 
1783 // x86 supports misaligned vectors store/load.
1784 const bool Matcher::misaligned_vectors_ok() {
1785   return !AlignVector; // can be changed by flag
1786 }
1787 
1788 // x86 AES instructions are compatible with SunJCE expanded
1789 // keys, hence we do not need to pass the original key to stubs
1790 const bool Matcher::pass_original_key_for_aes() {
1791   return false;
1792 }
1793 
1794 // Helper methods for MachSpillCopyNode::implementation().
1795 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1796                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1797   // In 64-bit VM size calculation is very complex. Emitting instructions
1798   // into scratch buffer is used to get size in 64-bit VM.
1799   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1800   assert(ireg == Op_VecS || // 32bit vector
1801          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1802          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1803          "no non-adjacent vector moves" );
1804   if (cbuf) {
1805     MacroAssembler _masm(cbuf);
1806     int offset = __ offset();
1807     switch (ireg) {
1808     case Op_VecS: // copy whole register
1809     case Op_VecD:
1810     case Op_VecX:
1811       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1812       break;
1813     case Op_VecY:
1814       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1815       break;
1816     case Op_VecZ:
1817       __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1818       break;
1819     default:
1820       ShouldNotReachHere();
1821     }
1822     int size = __ offset() - offset;
1823 #ifdef ASSERT
1824     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1825     assert(!do_size || size == 4, "incorrect size calculattion");
1826 #endif
1827     return size;
1828 #ifndef PRODUCT
1829   } else if (!do_size) {
1830     switch (ireg) {
1831     case Op_VecS:
1832     case Op_VecD:
1833     case Op_VecX:
1834       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1835       break;
1836     case Op_VecY:
1837     case Op_VecZ:
1838       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1839       break;
1840     default:
1841       ShouldNotReachHere();
1842     }
1843 #endif
1844   }
1845   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1846   return (UseAVX > 2) ? 6 : 4;
1847 }
1848 
1849 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1850                             int stack_offset, int reg, uint ireg, outputStream* st) {
1851   // In 64-bit VM size calculation is very complex. Emitting instructions
1852   // into scratch buffer is used to get size in 64-bit VM.
1853   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1854   if (cbuf) {
1855     MacroAssembler _masm(cbuf);
1856     int offset = __ offset();
1857     if (is_load) {
1858       switch (ireg) {
1859       case Op_VecS:
1860         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1861         break;
1862       case Op_VecD:
1863         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1864         break;
1865       case Op_VecX:
1866         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1867         break;
1868       case Op_VecY:
1869         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1870         break;
1871       case Op_VecZ:
1872         __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1873         break;
1874       default:
1875         ShouldNotReachHere();
1876       }
1877     } else { // store
1878       switch (ireg) {
1879       case Op_VecS:
1880         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1881         break;
1882       case Op_VecD:
1883         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1884         break;
1885       case Op_VecX:
1886         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1887         break;
1888       case Op_VecY:
1889         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1890         break;
1891       case Op_VecZ:
1892         __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1893         break;
1894       default:
1895         ShouldNotReachHere();
1896       }
1897     }
1898     int size = __ offset() - offset;
1899 #ifdef ASSERT
1900     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1901     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1902     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1903 #endif
1904     return size;
1905 #ifndef PRODUCT
1906   } else if (!do_size) {
1907     if (is_load) {
1908       switch (ireg) {
1909       case Op_VecS:
1910         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1911         break;
1912       case Op_VecD:
1913         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1914         break;
1915        case Op_VecX:
1916         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1917         break;
1918       case Op_VecY:
1919       case Op_VecZ:
1920         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1921         break;
1922       default:
1923         ShouldNotReachHere();
1924       }
1925     } else { // store
1926       switch (ireg) {
1927       case Op_VecS:
1928         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1929         break;
1930       case Op_VecD:
1931         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1932         break;
1933        case Op_VecX:
1934         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1935         break;
1936       case Op_VecY:
1937       case Op_VecZ:
1938         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1939         break;
1940       default:
1941         ShouldNotReachHere();
1942       }
1943     }
1944 #endif
1945   }
1946   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1947   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1948   return 5+offset_size;
1949 }
1950 
1951 static inline jfloat replicate4_imm(int con, int width) {
1952   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
1953   assert(width == 1 || width == 2, "only byte or short types here");
1954   int bit_width = width * 8;
1955   jint val = con;
1956   val &= (1 << bit_width) - 1;  // mask off sign bits
1957   while(bit_width < 32) {
1958     val |= (val << bit_width);
1959     bit_width <<= 1;
1960   }
1961   jfloat fval = *((jfloat*) &val);  // coerce to float type
1962   return fval;
1963 }
1964 
1965 static inline jdouble replicate8_imm(int con, int width) {
1966   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
1967   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
1968   int bit_width = width * 8;
1969   jlong val = con;
1970   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
1971   while(bit_width < 64) {
1972     val |= (val << bit_width);
1973     bit_width <<= 1;
1974   }
1975   jdouble dval = *((jdouble*) &val);  // coerce to double type
1976   return dval;
1977 }
1978 
1979 #ifndef PRODUCT
1980   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1981     st->print("nop \t# %d bytes pad for loops and calls", _count);
1982   }
1983 #endif
1984 
1985   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1986     MacroAssembler _masm(&cbuf);
1987     __ nop(_count);
1988   }
1989 
1990   uint MachNopNode::size(PhaseRegAlloc*) const {
1991     return _count;
1992   }
1993 
1994 #ifndef PRODUCT
1995   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
1996     st->print("# breakpoint");
1997   }
1998 #endif
1999 
2000   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
2001     MacroAssembler _masm(&cbuf);
2002     __ int3();
2003   }
2004 
2005   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2006     return MachNode::size(ra_);
2007   }
2008 
2009 %}
2010 
2011 encode %{
2012 
2013   enc_class call_epilog %{
2014     if (VerifyStackAtCalls) {
2015       // Check that stack depth is unchanged: find majik cookie on stack
2016       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2017       MacroAssembler _masm(&cbuf);
2018       Label L;
2019       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2020       __ jccb(Assembler::equal, L);
2021       // Die if stack mismatch
2022       __ int3();
2023       __ bind(L);
2024     }
2025   %}
2026 
2027 %}
2028 
2029 
2030 //----------OPERANDS-----------------------------------------------------------
2031 // Operand definitions must precede instruction definitions for correct parsing
2032 // in the ADLC because operands constitute user defined types which are used in
2033 // instruction definitions.
2034 
2035 // This one generically applies only for evex, so only one version
2036 operand vecZ() %{
2037   constraint(ALLOC_IN_RC(vectorz_reg));
2038   match(VecZ);
2039 
2040   format %{ %}
2041   interface(REG_INTER);
2042 %}
2043 
2044 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2045 
2046 // ============================================================================
2047 
2048 instruct ShouldNotReachHere() %{
2049   match(Halt);
2050   format %{ "int3\t# ShouldNotReachHere" %}
2051   ins_encode %{
2052     __ int3();
2053   %}
2054   ins_pipe(pipe_slow);
2055 %}
2056 
2057 // ============================================================================
2058 
2059 instruct addF_reg(regF dst, regF src) %{
2060   predicate((UseSSE>=1) && (UseAVX == 0));
2061   match(Set dst (AddF dst src));
2062 
2063   format %{ "addss   $dst, $src" %}
2064   ins_cost(150);
2065   ins_encode %{
2066     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2067   %}
2068   ins_pipe(pipe_slow);
2069 %}
2070 
2071 instruct addF_mem(regF dst, memory src) %{
2072   predicate((UseSSE>=1) && (UseAVX == 0));
2073   match(Set dst (AddF dst (LoadF src)));
2074 
2075   format %{ "addss   $dst, $src" %}
2076   ins_cost(150);
2077   ins_encode %{
2078     __ addss($dst$$XMMRegister, $src$$Address);
2079   %}
2080   ins_pipe(pipe_slow);
2081 %}
2082 
2083 instruct addF_imm(regF dst, immF con) %{
2084   predicate((UseSSE>=1) && (UseAVX == 0));
2085   match(Set dst (AddF dst con));
2086   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2087   ins_cost(150);
2088   ins_encode %{
2089     __ addss($dst$$XMMRegister, $constantaddress($con));
2090   %}
2091   ins_pipe(pipe_slow);
2092 %}
2093 
2094 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2095   predicate(UseAVX > 0);
2096   match(Set dst (AddF src1 src2));
2097 
2098   format %{ "vaddss  $dst, $src1, $src2" %}
2099   ins_cost(150);
2100   ins_encode %{
2101     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2102   %}
2103   ins_pipe(pipe_slow);
2104 %}
2105 
2106 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2107   predicate(UseAVX > 0);
2108   match(Set dst (AddF src1 (LoadF src2)));
2109 
2110   format %{ "vaddss  $dst, $src1, $src2" %}
2111   ins_cost(150);
2112   ins_encode %{
2113     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2114   %}
2115   ins_pipe(pipe_slow);
2116 %}
2117 
2118 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2119   predicate(UseAVX > 0);
2120   match(Set dst (AddF src con));
2121 
2122   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2123   ins_cost(150);
2124   ins_encode %{
2125     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2126   %}
2127   ins_pipe(pipe_slow);
2128 %}
2129 
2130 instruct addD_reg(regD dst, regD src) %{
2131   predicate((UseSSE>=2) && (UseAVX == 0));
2132   match(Set dst (AddD dst src));
2133 
2134   format %{ "addsd   $dst, $src" %}
2135   ins_cost(150);
2136   ins_encode %{
2137     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2138   %}
2139   ins_pipe(pipe_slow);
2140 %}
2141 
2142 instruct addD_mem(regD dst, memory src) %{
2143   predicate((UseSSE>=2) && (UseAVX == 0));
2144   match(Set dst (AddD dst (LoadD src)));
2145 
2146   format %{ "addsd   $dst, $src" %}
2147   ins_cost(150);
2148   ins_encode %{
2149     __ addsd($dst$$XMMRegister, $src$$Address);
2150   %}
2151   ins_pipe(pipe_slow);
2152 %}
2153 
2154 instruct addD_imm(regD dst, immD con) %{
2155   predicate((UseSSE>=2) && (UseAVX == 0));
2156   match(Set dst (AddD dst con));
2157   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2158   ins_cost(150);
2159   ins_encode %{
2160     __ addsd($dst$$XMMRegister, $constantaddress($con));
2161   %}
2162   ins_pipe(pipe_slow);
2163 %}
2164 
2165 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2166   predicate(UseAVX > 0);
2167   match(Set dst (AddD src1 src2));
2168 
2169   format %{ "vaddsd  $dst, $src1, $src2" %}
2170   ins_cost(150);
2171   ins_encode %{
2172     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2173   %}
2174   ins_pipe(pipe_slow);
2175 %}
2176 
2177 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
2178   predicate(UseAVX > 0);
2179   match(Set dst (AddD src1 (LoadD src2)));
2180 
2181   format %{ "vaddsd  $dst, $src1, $src2" %}
2182   ins_cost(150);
2183   ins_encode %{
2184     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2185   %}
2186   ins_pipe(pipe_slow);
2187 %}
2188 
2189 instruct addD_reg_imm(regD dst, regD src, immD con) %{
2190   predicate(UseAVX > 0);
2191   match(Set dst (AddD src con));
2192 
2193   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2194   ins_cost(150);
2195   ins_encode %{
2196     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2197   %}
2198   ins_pipe(pipe_slow);
2199 %}
2200 
2201 instruct subF_reg(regF dst, regF src) %{
2202   predicate((UseSSE>=1) && (UseAVX == 0));
2203   match(Set dst (SubF dst src));
2204 
2205   format %{ "subss   $dst, $src" %}
2206   ins_cost(150);
2207   ins_encode %{
2208     __ subss($dst$$XMMRegister, $src$$XMMRegister);
2209   %}
2210   ins_pipe(pipe_slow);
2211 %}
2212 
2213 instruct subF_mem(regF dst, memory src) %{
2214   predicate((UseSSE>=1) && (UseAVX == 0));
2215   match(Set dst (SubF dst (LoadF src)));
2216 
2217   format %{ "subss   $dst, $src" %}
2218   ins_cost(150);
2219   ins_encode %{
2220     __ subss($dst$$XMMRegister, $src$$Address);
2221   %}
2222   ins_pipe(pipe_slow);
2223 %}
2224 
2225 instruct subF_imm(regF dst, immF con) %{
2226   predicate((UseSSE>=1) && (UseAVX == 0));
2227   match(Set dst (SubF dst con));
2228   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2229   ins_cost(150);
2230   ins_encode %{
2231     __ subss($dst$$XMMRegister, $constantaddress($con));
2232   %}
2233   ins_pipe(pipe_slow);
2234 %}
2235 
2236 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
2237   predicate(UseAVX > 0);
2238   match(Set dst (SubF src1 src2));
2239 
2240   format %{ "vsubss  $dst, $src1, $src2" %}
2241   ins_cost(150);
2242   ins_encode %{
2243     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2244   %}
2245   ins_pipe(pipe_slow);
2246 %}
2247 
2248 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
2249   predicate(UseAVX > 0);
2250   match(Set dst (SubF src1 (LoadF src2)));
2251 
2252   format %{ "vsubss  $dst, $src1, $src2" %}
2253   ins_cost(150);
2254   ins_encode %{
2255     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2256   %}
2257   ins_pipe(pipe_slow);
2258 %}
2259 
2260 instruct subF_reg_imm(regF dst, regF src, immF con) %{
2261   predicate(UseAVX > 0);
2262   match(Set dst (SubF src con));
2263 
2264   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2265   ins_cost(150);
2266   ins_encode %{
2267     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2268   %}
2269   ins_pipe(pipe_slow);
2270 %}
2271 
2272 instruct subD_reg(regD dst, regD src) %{
2273   predicate((UseSSE>=2) && (UseAVX == 0));
2274   match(Set dst (SubD dst src));
2275 
2276   format %{ "subsd   $dst, $src" %}
2277   ins_cost(150);
2278   ins_encode %{
2279     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
2280   %}
2281   ins_pipe(pipe_slow);
2282 %}
2283 
2284 instruct subD_mem(regD dst, memory src) %{
2285   predicate((UseSSE>=2) && (UseAVX == 0));
2286   match(Set dst (SubD dst (LoadD src)));
2287 
2288   format %{ "subsd   $dst, $src" %}
2289   ins_cost(150);
2290   ins_encode %{
2291     __ subsd($dst$$XMMRegister, $src$$Address);
2292   %}
2293   ins_pipe(pipe_slow);
2294 %}
2295 
2296 instruct subD_imm(regD dst, immD con) %{
2297   predicate((UseSSE>=2) && (UseAVX == 0));
2298   match(Set dst (SubD dst con));
2299   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2300   ins_cost(150);
2301   ins_encode %{
2302     __ subsd($dst$$XMMRegister, $constantaddress($con));
2303   %}
2304   ins_pipe(pipe_slow);
2305 %}
2306 
2307 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
2308   predicate(UseAVX > 0);
2309   match(Set dst (SubD src1 src2));
2310 
2311   format %{ "vsubsd  $dst, $src1, $src2" %}
2312   ins_cost(150);
2313   ins_encode %{
2314     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2315   %}
2316   ins_pipe(pipe_slow);
2317 %}
2318 
2319 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
2320   predicate(UseAVX > 0);
2321   match(Set dst (SubD src1 (LoadD src2)));
2322 
2323   format %{ "vsubsd  $dst, $src1, $src2" %}
2324   ins_cost(150);
2325   ins_encode %{
2326     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2327   %}
2328   ins_pipe(pipe_slow);
2329 %}
2330 
2331 instruct subD_reg_imm(regD dst, regD src, immD con) %{
2332   predicate(UseAVX > 0);
2333   match(Set dst (SubD src con));
2334 
2335   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2336   ins_cost(150);
2337   ins_encode %{
2338     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2339   %}
2340   ins_pipe(pipe_slow);
2341 %}
2342 
2343 instruct mulF_reg(regF dst, regF src) %{
2344   predicate((UseSSE>=1) && (UseAVX == 0));
2345   match(Set dst (MulF dst src));
2346 
2347   format %{ "mulss   $dst, $src" %}
2348   ins_cost(150);
2349   ins_encode %{
2350     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
2351   %}
2352   ins_pipe(pipe_slow);
2353 %}
2354 
2355 instruct mulF_mem(regF dst, memory src) %{
2356   predicate((UseSSE>=1) && (UseAVX == 0));
2357   match(Set dst (MulF dst (LoadF src)));
2358 
2359   format %{ "mulss   $dst, $src" %}
2360   ins_cost(150);
2361   ins_encode %{
2362     __ mulss($dst$$XMMRegister, $src$$Address);
2363   %}
2364   ins_pipe(pipe_slow);
2365 %}
2366 
2367 instruct mulF_imm(regF dst, immF con) %{
2368   predicate((UseSSE>=1) && (UseAVX == 0));
2369   match(Set dst (MulF dst con));
2370   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2371   ins_cost(150);
2372   ins_encode %{
2373     __ mulss($dst$$XMMRegister, $constantaddress($con));
2374   %}
2375   ins_pipe(pipe_slow);
2376 %}
2377 
2378 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
2379   predicate(UseAVX > 0);
2380   match(Set dst (MulF src1 src2));
2381 
2382   format %{ "vmulss  $dst, $src1, $src2" %}
2383   ins_cost(150);
2384   ins_encode %{
2385     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2386   %}
2387   ins_pipe(pipe_slow);
2388 %}
2389 
2390 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
2391   predicate(UseAVX > 0);
2392   match(Set dst (MulF src1 (LoadF src2)));
2393 
2394   format %{ "vmulss  $dst, $src1, $src2" %}
2395   ins_cost(150);
2396   ins_encode %{
2397     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2398   %}
2399   ins_pipe(pipe_slow);
2400 %}
2401 
2402 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
2403   predicate(UseAVX > 0);
2404   match(Set dst (MulF src con));
2405 
2406   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2407   ins_cost(150);
2408   ins_encode %{
2409     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2410   %}
2411   ins_pipe(pipe_slow);
2412 %}
2413 
2414 instruct mulD_reg(regD dst, regD src) %{
2415   predicate((UseSSE>=2) && (UseAVX == 0));
2416   match(Set dst (MulD dst src));
2417 
2418   format %{ "mulsd   $dst, $src" %}
2419   ins_cost(150);
2420   ins_encode %{
2421     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
2422   %}
2423   ins_pipe(pipe_slow);
2424 %}
2425 
2426 instruct mulD_mem(regD dst, memory src) %{
2427   predicate((UseSSE>=2) && (UseAVX == 0));
2428   match(Set dst (MulD dst (LoadD src)));
2429 
2430   format %{ "mulsd   $dst, $src" %}
2431   ins_cost(150);
2432   ins_encode %{
2433     __ mulsd($dst$$XMMRegister, $src$$Address);
2434   %}
2435   ins_pipe(pipe_slow);
2436 %}
2437 
2438 instruct mulD_imm(regD dst, immD con) %{
2439   predicate((UseSSE>=2) && (UseAVX == 0));
2440   match(Set dst (MulD dst con));
2441   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2442   ins_cost(150);
2443   ins_encode %{
2444     __ mulsd($dst$$XMMRegister, $constantaddress($con));
2445   %}
2446   ins_pipe(pipe_slow);
2447 %}
2448 
2449 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
2450   predicate(UseAVX > 0);
2451   match(Set dst (MulD src1 src2));
2452 
2453   format %{ "vmulsd  $dst, $src1, $src2" %}
2454   ins_cost(150);
2455   ins_encode %{
2456     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2457   %}
2458   ins_pipe(pipe_slow);
2459 %}
2460 
2461 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
2462   predicate(UseAVX > 0);
2463   match(Set dst (MulD src1 (LoadD src2)));
2464 
2465   format %{ "vmulsd  $dst, $src1, $src2" %}
2466   ins_cost(150);
2467   ins_encode %{
2468     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2469   %}
2470   ins_pipe(pipe_slow);
2471 %}
2472 
2473 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
2474   predicate(UseAVX > 0);
2475   match(Set dst (MulD src con));
2476 
2477   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2478   ins_cost(150);
2479   ins_encode %{
2480     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2481   %}
2482   ins_pipe(pipe_slow);
2483 %}
2484 
2485 instruct divF_reg(regF dst, regF src) %{
2486   predicate((UseSSE>=1) && (UseAVX == 0));
2487   match(Set dst (DivF dst src));
2488 
2489   format %{ "divss   $dst, $src" %}
2490   ins_cost(150);
2491   ins_encode %{
2492     __ divss($dst$$XMMRegister, $src$$XMMRegister);
2493   %}
2494   ins_pipe(pipe_slow);
2495 %}
2496 
2497 instruct divF_mem(regF dst, memory src) %{
2498   predicate((UseSSE>=1) && (UseAVX == 0));
2499   match(Set dst (DivF dst (LoadF src)));
2500 
2501   format %{ "divss   $dst, $src" %}
2502   ins_cost(150);
2503   ins_encode %{
2504     __ divss($dst$$XMMRegister, $src$$Address);
2505   %}
2506   ins_pipe(pipe_slow);
2507 %}
2508 
2509 instruct divF_imm(regF dst, immF con) %{
2510   predicate((UseSSE>=1) && (UseAVX == 0));
2511   match(Set dst (DivF dst con));
2512   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2513   ins_cost(150);
2514   ins_encode %{
2515     __ divss($dst$$XMMRegister, $constantaddress($con));
2516   %}
2517   ins_pipe(pipe_slow);
2518 %}
2519 
2520 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
2521   predicate(UseAVX > 0);
2522   match(Set dst (DivF src1 src2));
2523 
2524   format %{ "vdivss  $dst, $src1, $src2" %}
2525   ins_cost(150);
2526   ins_encode %{
2527     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2528   %}
2529   ins_pipe(pipe_slow);
2530 %}
2531 
2532 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
2533   predicate(UseAVX > 0);
2534   match(Set dst (DivF src1 (LoadF src2)));
2535 
2536   format %{ "vdivss  $dst, $src1, $src2" %}
2537   ins_cost(150);
2538   ins_encode %{
2539     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2540   %}
2541   ins_pipe(pipe_slow);
2542 %}
2543 
2544 instruct divF_reg_imm(regF dst, regF src, immF con) %{
2545   predicate(UseAVX > 0);
2546   match(Set dst (DivF src con));
2547 
2548   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2549   ins_cost(150);
2550   ins_encode %{
2551     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2552   %}
2553   ins_pipe(pipe_slow);
2554 %}
2555 
2556 instruct divD_reg(regD dst, regD src) %{
2557   predicate((UseSSE>=2) && (UseAVX == 0));
2558   match(Set dst (DivD dst src));
2559 
2560   format %{ "divsd   $dst, $src" %}
2561   ins_cost(150);
2562   ins_encode %{
2563     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
2564   %}
2565   ins_pipe(pipe_slow);
2566 %}
2567 
2568 instruct divD_mem(regD dst, memory src) %{
2569   predicate((UseSSE>=2) && (UseAVX == 0));
2570   match(Set dst (DivD dst (LoadD src)));
2571 
2572   format %{ "divsd   $dst, $src" %}
2573   ins_cost(150);
2574   ins_encode %{
2575     __ divsd($dst$$XMMRegister, $src$$Address);
2576   %}
2577   ins_pipe(pipe_slow);
2578 %}
2579 
2580 instruct divD_imm(regD dst, immD con) %{
2581   predicate((UseSSE>=2) && (UseAVX == 0));
2582   match(Set dst (DivD dst con));
2583   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2584   ins_cost(150);
2585   ins_encode %{
2586     __ divsd($dst$$XMMRegister, $constantaddress($con));
2587   %}
2588   ins_pipe(pipe_slow);
2589 %}
2590 
2591 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
2592   predicate(UseAVX > 0);
2593   match(Set dst (DivD src1 src2));
2594 
2595   format %{ "vdivsd  $dst, $src1, $src2" %}
2596   ins_cost(150);
2597   ins_encode %{
2598     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2599   %}
2600   ins_pipe(pipe_slow);
2601 %}
2602 
2603 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
2604   predicate(UseAVX > 0);
2605   match(Set dst (DivD src1 (LoadD src2)));
2606 
2607   format %{ "vdivsd  $dst, $src1, $src2" %}
2608   ins_cost(150);
2609   ins_encode %{
2610     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2611   %}
2612   ins_pipe(pipe_slow);
2613 %}
2614 
2615 instruct divD_reg_imm(regD dst, regD src, immD con) %{
2616   predicate(UseAVX > 0);
2617   match(Set dst (DivD src con));
2618 
2619   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2620   ins_cost(150);
2621   ins_encode %{
2622     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2623   %}
2624   ins_pipe(pipe_slow);
2625 %}
2626 
2627 instruct absF_reg(regF dst) %{
2628   predicate((UseSSE>=1) && (UseAVX == 0));
2629   match(Set dst (AbsF dst));
2630   ins_cost(150);
2631   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2632   ins_encode %{
2633     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2634   %}
2635   ins_pipe(pipe_slow);
2636 %}
2637 
2638 instruct absF_reg_reg(regF dst, regF src) %{
2639   predicate(UseAVX > 0);
2640   match(Set dst (AbsF src));
2641   ins_cost(150);
2642   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2643   ins_encode %{
2644     int vector_len = 0;
2645     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2646               ExternalAddress(float_signmask()), vector_len);
2647   %}
2648   ins_pipe(pipe_slow);
2649 %}
2650 
2651 instruct absD_reg(regD dst) %{
2652   predicate((UseSSE>=2) && (UseAVX == 0));
2653   match(Set dst (AbsD dst));
2654   ins_cost(150);
2655   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2656             "# abs double by sign masking" %}
2657   ins_encode %{
2658     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2659   %}
2660   ins_pipe(pipe_slow);
2661 %}
2662 
2663 instruct absD_reg_reg(regD dst, regD src) %{
2664   predicate(UseAVX > 0);
2665   match(Set dst (AbsD src));
2666   ins_cost(150);
2667   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2668             "# abs double by sign masking" %}
2669   ins_encode %{
2670     int vector_len = 0;
2671     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2672               ExternalAddress(double_signmask()), vector_len);
2673   %}
2674   ins_pipe(pipe_slow);
2675 %}
2676 
2677 instruct negF_reg(regF dst) %{
2678   predicate((UseSSE>=1) && (UseAVX == 0));
2679   match(Set dst (NegF dst));
2680   ins_cost(150);
2681   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2682   ins_encode %{
2683     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2684   %}
2685   ins_pipe(pipe_slow);
2686 %}
2687 
2688 instruct negF_reg_reg(regF dst, regF src) %{
2689   predicate(UseAVX > 0);
2690   match(Set dst (NegF src));
2691   ins_cost(150);
2692   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2693   ins_encode %{
2694     int vector_len = 0;
2695     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
2696               ExternalAddress(float_signflip()), vector_len);
2697   %}
2698   ins_pipe(pipe_slow);
2699 %}
2700 
2701 instruct negD_reg(regD dst) %{
2702   predicate((UseSSE>=2) && (UseAVX == 0));
2703   match(Set dst (NegD dst));
2704   ins_cost(150);
2705   format %{ "xorpd   $dst, [0x8000000000000000]\t"
2706             "# neg double by sign flipping" %}
2707   ins_encode %{
2708     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
2709   %}
2710   ins_pipe(pipe_slow);
2711 %}
2712 
2713 instruct negD_reg_reg(regD dst, regD src) %{
2714   predicate(UseAVX > 0);
2715   match(Set dst (NegD src));
2716   ins_cost(150);
2717   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
2718             "# neg double by sign flipping" %}
2719   ins_encode %{
2720     int vector_len = 0;
2721     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
2722               ExternalAddress(double_signflip()), vector_len);
2723   %}
2724   ins_pipe(pipe_slow);
2725 %}
2726 
2727 instruct sqrtF_reg(regF dst, regF src) %{
2728   predicate(UseSSE>=1);
2729   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
2730 
2731   format %{ "sqrtss  $dst, $src" %}
2732   ins_cost(150);
2733   ins_encode %{
2734     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
2735   %}
2736   ins_pipe(pipe_slow);
2737 %}
2738 
2739 instruct sqrtF_mem(regF dst, memory src) %{
2740   predicate(UseSSE>=1);
2741   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
2742 
2743   format %{ "sqrtss  $dst, $src" %}
2744   ins_cost(150);
2745   ins_encode %{
2746     __ sqrtss($dst$$XMMRegister, $src$$Address);
2747   %}
2748   ins_pipe(pipe_slow);
2749 %}
2750 
2751 instruct sqrtF_imm(regF dst, immF con) %{
2752   predicate(UseSSE>=1);
2753   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
2754   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2755   ins_cost(150);
2756   ins_encode %{
2757     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
2758   %}
2759   ins_pipe(pipe_slow);
2760 %}
2761 
2762 instruct sqrtD_reg(regD dst, regD src) %{
2763   predicate(UseSSE>=2);
2764   match(Set dst (SqrtD src));
2765 
2766   format %{ "sqrtsd  $dst, $src" %}
2767   ins_cost(150);
2768   ins_encode %{
2769     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
2770   %}
2771   ins_pipe(pipe_slow);
2772 %}
2773 
2774 instruct sqrtD_mem(regD dst, memory src) %{
2775   predicate(UseSSE>=2);
2776   match(Set dst (SqrtD (LoadD src)));
2777 
2778   format %{ "sqrtsd  $dst, $src" %}
2779   ins_cost(150);
2780   ins_encode %{
2781     __ sqrtsd($dst$$XMMRegister, $src$$Address);
2782   %}
2783   ins_pipe(pipe_slow);
2784 %}
2785 
2786 instruct sqrtD_imm(regD dst, immD con) %{
2787   predicate(UseSSE>=2);
2788   match(Set dst (SqrtD con));
2789   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2790   ins_cost(150);
2791   ins_encode %{
2792     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
2793   %}
2794   ins_pipe(pipe_slow);
2795 %}
2796 
2797 // ====================VECTOR INSTRUCTIONS=====================================
2798 
2799 // Load vectors (4 bytes long)
2800 instruct loadV4(vecS dst, memory mem) %{
2801   predicate(n->as_LoadVector()->memory_size() == 4);
2802   match(Set dst (LoadVector mem));
2803   ins_cost(125);
2804   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
2805   ins_encode %{
2806     __ movdl($dst$$XMMRegister, $mem$$Address);
2807   %}
2808   ins_pipe( pipe_slow );
2809 %}
2810 
2811 // Load vectors (8 bytes long)
2812 instruct loadV8(vecD dst, memory mem) %{
2813   predicate(n->as_LoadVector()->memory_size() == 8);
2814   match(Set dst (LoadVector mem));
2815   ins_cost(125);
2816   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
2817   ins_encode %{
2818     __ movq($dst$$XMMRegister, $mem$$Address);
2819   %}
2820   ins_pipe( pipe_slow );
2821 %}
2822 
2823 // Load vectors (16 bytes long)
2824 instruct loadV16(vecX dst, memory mem) %{
2825   predicate(n->as_LoadVector()->memory_size() == 16);
2826   match(Set dst (LoadVector mem));
2827   ins_cost(125);
2828   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
2829   ins_encode %{
2830     __ movdqu($dst$$XMMRegister, $mem$$Address);
2831   %}
2832   ins_pipe( pipe_slow );
2833 %}
2834 
2835 // Load vectors (32 bytes long)
2836 instruct loadV32(vecY dst, memory mem) %{
2837   predicate(n->as_LoadVector()->memory_size() == 32);
2838   match(Set dst (LoadVector mem));
2839   ins_cost(125);
2840   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
2841   ins_encode %{
2842     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
2843   %}
2844   ins_pipe( pipe_slow );
2845 %}
2846 
2847 // Load vectors (64 bytes long)
2848 instruct loadV64(vecZ dst, memory mem) %{
2849   predicate(n->as_LoadVector()->memory_size() == 64);
2850   match(Set dst (LoadVector mem));
2851   ins_cost(125);
2852   format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
2853   ins_encode %{
2854     int vector_len = 2;
2855     __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
2856   %}
2857   ins_pipe( pipe_slow );
2858 %}
2859 
2860 // Store vectors
2861 instruct storeV4(memory mem, vecS src) %{
2862   predicate(n->as_StoreVector()->memory_size() == 4);
2863   match(Set mem (StoreVector mem src));
2864   ins_cost(145);
2865   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
2866   ins_encode %{
2867     __ movdl($mem$$Address, $src$$XMMRegister);
2868   %}
2869   ins_pipe( pipe_slow );
2870 %}
2871 
2872 instruct storeV8(memory mem, vecD src) %{
2873   predicate(n->as_StoreVector()->memory_size() == 8);
2874   match(Set mem (StoreVector mem src));
2875   ins_cost(145);
2876   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
2877   ins_encode %{
2878     __ movq($mem$$Address, $src$$XMMRegister);
2879   %}
2880   ins_pipe( pipe_slow );
2881 %}
2882 
2883 instruct storeV16(memory mem, vecX src) %{
2884   predicate(n->as_StoreVector()->memory_size() == 16);
2885   match(Set mem (StoreVector mem src));
2886   ins_cost(145);
2887   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
2888   ins_encode %{
2889     __ movdqu($mem$$Address, $src$$XMMRegister);
2890   %}
2891   ins_pipe( pipe_slow );
2892 %}
2893 
2894 instruct storeV32(memory mem, vecY src) %{
2895   predicate(n->as_StoreVector()->memory_size() == 32);
2896   match(Set mem (StoreVector mem src));
2897   ins_cost(145);
2898   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
2899   ins_encode %{
2900     __ vmovdqu($mem$$Address, $src$$XMMRegister);
2901   %}
2902   ins_pipe( pipe_slow );
2903 %}
2904 
2905 instruct storeV64(memory mem, vecZ src) %{
2906   predicate(n->as_StoreVector()->memory_size() == 64);
2907   match(Set mem (StoreVector mem src));
2908   ins_cost(145);
2909   format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
2910   ins_encode %{
2911     int vector_len = 2;
2912     __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
2913   %}
2914   ins_pipe( pipe_slow );
2915 %}
2916 
2917 // ====================LEGACY REPLICATE=======================================
2918 
2919 instruct Repl4B_mem(vecS dst, memory mem) %{
2920   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2921   match(Set dst (ReplicateB (LoadB mem)));
2922   format %{ "punpcklbw $dst,$mem\n\t"
2923             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
2924   ins_encode %{
2925     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
2926     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2927   %}
2928   ins_pipe( pipe_slow );
2929 %}
2930 
2931 instruct Repl8B_mem(vecD dst, memory mem) %{
2932   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2933   match(Set dst (ReplicateB (LoadB mem)));
2934   format %{ "punpcklbw $dst,$mem\n\t"
2935             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
2936   ins_encode %{
2937     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
2938     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2939   %}
2940   ins_pipe( pipe_slow );
2941 %}
2942 
2943 instruct Repl16B(vecX dst, rRegI src) %{
2944   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
2945   match(Set dst (ReplicateB src));
2946   format %{ "movd    $dst,$src\n\t"
2947             "punpcklbw $dst,$dst\n\t"
2948             "pshuflw $dst,$dst,0x00\n\t"
2949             "punpcklqdq $dst,$dst\t! replicate16B" %}
2950   ins_encode %{
2951     __ movdl($dst$$XMMRegister, $src$$Register);
2952     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2953     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2954     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2955   %}
2956   ins_pipe( pipe_slow );
2957 %}
2958 
2959 instruct Repl16B_mem(vecX dst, memory mem) %{
2960   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2961   match(Set dst (ReplicateB (LoadB mem)));
2962   format %{ "punpcklbw $dst,$mem\n\t"
2963             "pshuflw $dst,$dst,0x00\n\t"
2964             "punpcklqdq $dst,$dst\t! replicate16B" %}
2965   ins_encode %{
2966     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
2967     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2968     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2969   %}
2970   ins_pipe( pipe_slow );
2971 %}
2972 
2973 instruct Repl32B(vecY dst, rRegI src) %{
2974   predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
2975   match(Set dst (ReplicateB src));
2976   format %{ "movd    $dst,$src\n\t"
2977             "punpcklbw $dst,$dst\n\t"
2978             "pshuflw $dst,$dst,0x00\n\t"
2979             "punpcklqdq $dst,$dst\n\t"
2980             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2981   ins_encode %{
2982     __ movdl($dst$$XMMRegister, $src$$Register);
2983     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2984     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2985     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2986     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2987   %}
2988   ins_pipe( pipe_slow );
2989 %}
2990 
2991 instruct Repl32B_mem(vecY dst, memory mem) %{
2992   predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
2993   match(Set dst (ReplicateB (LoadB mem)));
2994   format %{ "punpcklbw $dst,$mem\n\t"
2995             "pshuflw $dst,$dst,0x00\n\t"
2996             "punpcklqdq $dst,$dst\n\t"
2997             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2998   ins_encode %{
2999     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3000     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3001     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3002     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3003   %}
3004   ins_pipe( pipe_slow );
3005 %}
3006 
3007 instruct Repl16B_imm(vecX dst, immI con) %{
3008   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3009   match(Set dst (ReplicateB con));
3010   format %{ "movq    $dst,[$constantaddress]\n\t"
3011             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
3012   ins_encode %{
3013     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3014     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3015   %}
3016   ins_pipe( pipe_slow );
3017 %}
3018 
3019 instruct Repl32B_imm(vecY dst, immI con) %{
3020   predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
3021   match(Set dst (ReplicateB con));
3022   format %{ "movq    $dst,[$constantaddress]\n\t"
3023             "punpcklqdq $dst,$dst\n\t"
3024             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
3025   ins_encode %{
3026     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3027     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3028     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3029   %}
3030   ins_pipe( pipe_slow );
3031 %}
3032 
3033 instruct Repl4S(vecD dst, rRegI src) %{
3034   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw());
3035   match(Set dst (ReplicateS src));
3036   format %{ "movd    $dst,$src\n\t"
3037             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
3038   ins_encode %{
3039     __ movdl($dst$$XMMRegister, $src$$Register);
3040     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3041   %}
3042   ins_pipe( pipe_slow );
3043 %}
3044 
3045 instruct Repl4S_mem(vecD dst, memory mem) %{
3046   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3047   match(Set dst (ReplicateS (LoadS mem)));
3048   format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %}
3049   ins_encode %{
3050     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3051   %}
3052   ins_pipe( pipe_slow );
3053 %}
3054 
3055 instruct Repl8S(vecX dst, rRegI src) %{
3056   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw());
3057   match(Set dst (ReplicateS src));
3058   format %{ "movd    $dst,$src\n\t"
3059             "pshuflw $dst,$dst,0x00\n\t"
3060             "punpcklqdq $dst,$dst\t! replicate8S" %}
3061   ins_encode %{
3062     __ movdl($dst$$XMMRegister, $src$$Register);
3063     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3064     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3065   %}
3066   ins_pipe( pipe_slow );
3067 %}
3068 
3069 instruct Repl8S_mem(vecX dst, memory mem) %{
3070   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3071   match(Set dst (ReplicateS (LoadS mem)));
3072   format %{ "pshuflw $dst,$mem,0x00\n\t"
3073             "punpcklqdq $dst,$dst\t! replicate8S" %}
3074   ins_encode %{
3075     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3076     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3077   %}
3078   ins_pipe( pipe_slow );
3079 %}
3080 
3081 instruct Repl8S_imm(vecX dst, immI con) %{
3082   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw());
3083   match(Set dst (ReplicateS con));
3084   format %{ "movq    $dst,[$constantaddress]\n\t"
3085             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
3086   ins_encode %{
3087     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3088     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3089   %}
3090   ins_pipe( pipe_slow );
3091 %}
3092 
3093 instruct Repl16S(vecY dst, rRegI src) %{
3094   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3095   match(Set dst (ReplicateS src));
3096   format %{ "movd    $dst,$src\n\t"
3097             "pshuflw $dst,$dst,0x00\n\t"
3098             "punpcklqdq $dst,$dst\n\t"
3099             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3100   ins_encode %{
3101     __ movdl($dst$$XMMRegister, $src$$Register);
3102     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3103     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3104     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3105   %}
3106   ins_pipe( pipe_slow );
3107 %}
3108 
3109 instruct Repl16S_mem(vecY dst, memory mem) %{
3110   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3111   match(Set dst (ReplicateS (LoadS mem)));
3112   format %{ "pshuflw $dst,$mem,0x00\n\t"
3113             "punpcklqdq $dst,$dst\n\t"
3114             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3115   ins_encode %{
3116     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3117     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3118     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3119   %}
3120   ins_pipe( pipe_slow );
3121 %}
3122 
3123 instruct Repl16S_imm(vecY dst, immI con) %{
3124   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3125   match(Set dst (ReplicateS con));
3126   format %{ "movq    $dst,[$constantaddress]\n\t"
3127             "punpcklqdq $dst,$dst\n\t"
3128             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3129   ins_encode %{
3130     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3131     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3132     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3133   %}
3134   ins_pipe( pipe_slow );
3135 %}
3136 
3137 instruct Repl4I(vecX dst, rRegI src) %{
3138   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3139   match(Set dst (ReplicateI src));
3140   format %{ "movd    $dst,$src\n\t"
3141             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
3142   ins_encode %{
3143     __ movdl($dst$$XMMRegister, $src$$Register);
3144     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3145   %}
3146   ins_pipe( pipe_slow );
3147 %}
3148 
3149 instruct Repl4I_mem(vecX dst, memory mem) %{
3150   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3151   match(Set dst (ReplicateI (LoadI mem)));
3152   format %{ "pshufd  $dst,$mem,0x00\t! replicate4I" %}
3153   ins_encode %{
3154     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3155   %}
3156   ins_pipe( pipe_slow );
3157 %}
3158 
3159 instruct Repl8I(vecY dst, rRegI src) %{
3160   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3161   match(Set dst (ReplicateI src));
3162   format %{ "movd    $dst,$src\n\t"
3163             "pshufd  $dst,$dst,0x00\n\t"
3164             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3165   ins_encode %{
3166     __ movdl($dst$$XMMRegister, $src$$Register);
3167     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3168     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3169   %}
3170   ins_pipe( pipe_slow );
3171 %}
3172 
3173 instruct Repl8I_mem(vecY dst, memory mem) %{
3174   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3175   match(Set dst (ReplicateI (LoadI mem)));
3176   format %{ "pshufd  $dst,$mem,0x00\n\t"
3177             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3178   ins_encode %{
3179     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3180     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3181   %}
3182   ins_pipe( pipe_slow );
3183 %}
3184 
3185 instruct Repl4I_imm(vecX dst, immI con) %{
3186   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3187   match(Set dst (ReplicateI con));
3188   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3189             "punpcklqdq $dst,$dst" %}
3190   ins_encode %{
3191     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3192     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3193   %}
3194   ins_pipe( pipe_slow );
3195 %}
3196 
3197 instruct Repl8I_imm(vecY dst, immI con) %{
3198   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3199   match(Set dst (ReplicateI con));
3200   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3201             "punpcklqdq $dst,$dst\n\t"
3202             "vinserti128h $dst,$dst,$dst" %}
3203   ins_encode %{
3204     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3205     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3206     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3207   %}
3208   ins_pipe( pipe_slow );
3209 %}
3210 
3211 // Long could be loaded into xmm register directly from memory.
3212 instruct Repl2L_mem(vecX dst, memory mem) %{
3213   predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw());
3214   match(Set dst (ReplicateL (LoadL mem)));
3215   format %{ "movq    $dst,$mem\n\t"
3216             "punpcklqdq $dst,$dst\t! replicate2L" %}
3217   ins_encode %{
3218     __ movq($dst$$XMMRegister, $mem$$Address);
3219     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3220   %}
3221   ins_pipe( pipe_slow );
3222 %}
3223 
3224 // Replicate long (8 byte) scalar to be vector
3225 #ifdef _LP64
3226 instruct Repl4L(vecY dst, rRegL src) %{
3227   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3228   match(Set dst (ReplicateL src));
3229   format %{ "movdq   $dst,$src\n\t"
3230             "punpcklqdq $dst,$dst\n\t"
3231             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3232   ins_encode %{
3233     __ movdq($dst$$XMMRegister, $src$$Register);
3234     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3235     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3236   %}
3237   ins_pipe( pipe_slow );
3238 %}
3239 #else // _LP64
3240 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3241   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3242   match(Set dst (ReplicateL src));
3243   effect(TEMP dst, USE src, TEMP tmp);
3244   format %{ "movdl   $dst,$src.lo\n\t"
3245             "movdl   $tmp,$src.hi\n\t"
3246             "punpckldq $dst,$tmp\n\t"
3247             "punpcklqdq $dst,$dst\n\t"
3248             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3249   ins_encode %{
3250     __ movdl($dst$$XMMRegister, $src$$Register);
3251     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3252     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3253     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3254     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3255   %}
3256   ins_pipe( pipe_slow );
3257 %}
3258 #endif // _LP64
3259 
3260 instruct Repl4L_imm(vecY dst, immL con) %{
3261   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3262   match(Set dst (ReplicateL con));
3263   format %{ "movq    $dst,[$constantaddress]\n\t"
3264             "punpcklqdq $dst,$dst\n\t"
3265             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3266   ins_encode %{
3267     __ movq($dst$$XMMRegister, $constantaddress($con));
3268     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3269     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3270   %}
3271   ins_pipe( pipe_slow );
3272 %}
3273 
3274 instruct Repl4L_mem(vecY dst, memory mem) %{
3275   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3276   match(Set dst (ReplicateL (LoadL mem)));
3277   format %{ "movq    $dst,$mem\n\t"
3278             "punpcklqdq $dst,$dst\n\t"
3279             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3280   ins_encode %{
3281     __ movq($dst$$XMMRegister, $mem$$Address);
3282     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3283     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3284   %}
3285   ins_pipe( pipe_slow );
3286 %}
3287 
3288 instruct Repl2F_mem(vecD dst, memory mem) %{
3289   predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3290   match(Set dst (ReplicateF (LoadF mem)));
3291   format %{ "pshufd  $dst,$mem,0x00\t! replicate2F" %}
3292   ins_encode %{
3293     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3294   %}
3295   ins_pipe( pipe_slow );
3296 %}
3297 
3298 instruct Repl4F_mem(vecX dst, memory mem) %{
3299   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3300   match(Set dst (ReplicateF (LoadF mem)));
3301   format %{ "pshufd  $dst,$mem,0x00\t! replicate4F" %}
3302   ins_encode %{
3303     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3304   %}
3305   ins_pipe( pipe_slow );
3306 %}
3307 
3308 instruct Repl8F(vecY dst, regF src) %{
3309   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3310   match(Set dst (ReplicateF src));
3311   format %{ "pshufd  $dst,$src,0x00\n\t"
3312             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3313   ins_encode %{
3314     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3315     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3316   %}
3317   ins_pipe( pipe_slow );
3318 %}
3319 
3320 instruct Repl8F_mem(vecY dst, memory mem) %{
3321   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3322   match(Set dst (ReplicateF (LoadF mem)));
3323   format %{ "pshufd  $dst,$mem,0x00\n\t"
3324             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3325   ins_encode %{
3326     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3327     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3328   %}
3329   ins_pipe( pipe_slow );
3330 %}
3331 
3332 instruct Repl2D_mem(vecX dst, memory mem) %{
3333   predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3334   match(Set dst (ReplicateD (LoadD mem)));
3335   format %{ "pshufd  $dst,$mem,0x44\t! replicate2D" %}
3336   ins_encode %{
3337     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
3338   %}
3339   ins_pipe( pipe_slow );
3340 %}
3341 
3342 instruct Repl4D(vecY dst, regD src) %{
3343   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3344   match(Set dst (ReplicateD src));
3345   format %{ "pshufd  $dst,$src,0x44\n\t"
3346             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3347   ins_encode %{
3348     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3349     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3350   %}
3351   ins_pipe( pipe_slow );
3352 %}
3353 
3354 instruct Repl4D_mem(vecY dst, memory mem) %{
3355   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3356   match(Set dst (ReplicateD (LoadD mem)));
3357   format %{ "pshufd  $dst,$mem,0x44\n\t"
3358             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3359   ins_encode %{
3360     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
3361     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3362   %}
3363   ins_pipe( pipe_slow );
3364 %}
3365 
3366 // ====================GENERIC REPLICATE==========================================
3367 
3368 // Replicate byte scalar to be vector
3369 instruct Repl4B(vecS dst, rRegI src) %{
3370   predicate(n->as_Vector()->length() == 4);
3371   match(Set dst (ReplicateB src));
3372   format %{ "movd    $dst,$src\n\t"
3373             "punpcklbw $dst,$dst\n\t"
3374             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3375   ins_encode %{
3376     __ movdl($dst$$XMMRegister, $src$$Register);
3377     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3378     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3379   %}
3380   ins_pipe( pipe_slow );
3381 %}
3382 
3383 instruct Repl8B(vecD dst, rRegI src) %{
3384   predicate(n->as_Vector()->length() == 8);
3385   match(Set dst (ReplicateB src));
3386   format %{ "movd    $dst,$src\n\t"
3387             "punpcklbw $dst,$dst\n\t"
3388             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
3389   ins_encode %{
3390     __ movdl($dst$$XMMRegister, $src$$Register);
3391     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3392     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3393   %}
3394   ins_pipe( pipe_slow );
3395 %}
3396 
3397 // Replicate byte scalar immediate to be vector by loading from const table.
3398 instruct Repl4B_imm(vecS dst, immI con) %{
3399   predicate(n->as_Vector()->length() == 4);
3400   match(Set dst (ReplicateB con));
3401   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
3402   ins_encode %{
3403     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3404   %}
3405   ins_pipe( pipe_slow );
3406 %}
3407 
3408 instruct Repl8B_imm(vecD dst, immI con) %{
3409   predicate(n->as_Vector()->length() == 8);
3410   match(Set dst (ReplicateB con));
3411   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
3412   ins_encode %{
3413     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3414   %}
3415   ins_pipe( pipe_slow );
3416 %}
3417 
3418 // Replicate byte scalar zero to be vector
3419 instruct Repl4B_zero(vecS dst, immI0 zero) %{
3420   predicate(n->as_Vector()->length() == 4);
3421   match(Set dst (ReplicateB zero));
3422   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
3423   ins_encode %{
3424     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3425   %}
3426   ins_pipe( fpu_reg_reg );
3427 %}
3428 
3429 instruct Repl8B_zero(vecD dst, immI0 zero) %{
3430   predicate(n->as_Vector()->length() == 8);
3431   match(Set dst (ReplicateB zero));
3432   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
3433   ins_encode %{
3434     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3435   %}
3436   ins_pipe( fpu_reg_reg );
3437 %}
3438 
3439 instruct Repl16B_zero(vecX dst, immI0 zero) %{
3440   predicate(n->as_Vector()->length() == 16);
3441   match(Set dst (ReplicateB zero));
3442   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
3443   ins_encode %{
3444     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3445   %}
3446   ins_pipe( fpu_reg_reg );
3447 %}
3448 
3449 instruct Repl32B_zero(vecY dst, immI0 zero) %{
3450   predicate(n->as_Vector()->length() == 32);
3451   match(Set dst (ReplicateB zero));
3452   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
3453   ins_encode %{
3454     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3455     int vector_len = 1;
3456     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3457   %}
3458   ins_pipe( fpu_reg_reg );
3459 %}
3460 
3461 // Replicate char/short (2 byte) scalar to be vector
3462 instruct Repl2S(vecS dst, rRegI src) %{
3463   predicate(n->as_Vector()->length() == 2);
3464   match(Set dst (ReplicateS src));
3465   format %{ "movd    $dst,$src\n\t"
3466             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3467   ins_encode %{
3468     __ movdl($dst$$XMMRegister, $src$$Register);
3469     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3470   %}
3471   ins_pipe( fpu_reg_reg );
3472 %}
3473 
3474 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3475 instruct Repl2S_imm(vecS dst, immI con) %{
3476   predicate(n->as_Vector()->length() == 2);
3477   match(Set dst (ReplicateS con));
3478   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
3479   ins_encode %{
3480     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3481   %}
3482   ins_pipe( fpu_reg_reg );
3483 %}
3484 
3485 instruct Repl4S_imm(vecD dst, immI con) %{
3486   predicate(n->as_Vector()->length() == 4);
3487   match(Set dst (ReplicateS con));
3488   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
3489   ins_encode %{
3490     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3491   %}
3492   ins_pipe( fpu_reg_reg );
3493 %}
3494 
3495 // Replicate char/short (2 byte) scalar zero to be vector
3496 instruct Repl2S_zero(vecS dst, immI0 zero) %{
3497   predicate(n->as_Vector()->length() == 2);
3498   match(Set dst (ReplicateS zero));
3499   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
3500   ins_encode %{
3501     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3502   %}
3503   ins_pipe( fpu_reg_reg );
3504 %}
3505 
3506 instruct Repl4S_zero(vecD dst, immI0 zero) %{
3507   predicate(n->as_Vector()->length() == 4);
3508   match(Set dst (ReplicateS zero));
3509   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
3510   ins_encode %{
3511     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3512   %}
3513   ins_pipe( fpu_reg_reg );
3514 %}
3515 
3516 instruct Repl8S_zero(vecX dst, immI0 zero) %{
3517   predicate(n->as_Vector()->length() == 8);
3518   match(Set dst (ReplicateS zero));
3519   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
3520   ins_encode %{
3521     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3522   %}
3523   ins_pipe( fpu_reg_reg );
3524 %}
3525 
3526 instruct Repl16S_zero(vecY dst, immI0 zero) %{
3527   predicate(n->as_Vector()->length() == 16);
3528   match(Set dst (ReplicateS zero));
3529   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
3530   ins_encode %{
3531     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3532     int vector_len = 1;
3533     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3534   %}
3535   ins_pipe( fpu_reg_reg );
3536 %}
3537 
3538 // Replicate integer (4 byte) scalar to be vector
3539 instruct Repl2I(vecD dst, rRegI src) %{
3540   predicate(n->as_Vector()->length() == 2);
3541   match(Set dst (ReplicateI src));
3542   format %{ "movd    $dst,$src\n\t"
3543             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3544   ins_encode %{
3545     __ movdl($dst$$XMMRegister, $src$$Register);
3546     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3547   %}
3548   ins_pipe( fpu_reg_reg );
3549 %}
3550 
3551 // Integer could be loaded into xmm register directly from memory.
3552 instruct Repl2I_mem(vecD dst, memory mem) %{
3553   predicate(n->as_Vector()->length() == 2);
3554   match(Set dst (ReplicateI (LoadI mem)));
3555   format %{ "movd    $dst,$mem\n\t"
3556             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3557   ins_encode %{
3558     __ movdl($dst$$XMMRegister, $mem$$Address);
3559     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3560   %}
3561   ins_pipe( fpu_reg_reg );
3562 %}
3563 
3564 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3565 instruct Repl2I_imm(vecD dst, immI con) %{
3566   predicate(n->as_Vector()->length() == 2);
3567   match(Set dst (ReplicateI con));
3568   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
3569   ins_encode %{
3570     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3571   %}
3572   ins_pipe( fpu_reg_reg );
3573 %}
3574 
3575 // Replicate integer (4 byte) scalar zero to be vector
3576 instruct Repl2I_zero(vecD dst, immI0 zero) %{
3577   predicate(n->as_Vector()->length() == 2);
3578   match(Set dst (ReplicateI zero));
3579   format %{ "pxor    $dst,$dst\t! replicate2I" %}
3580   ins_encode %{
3581     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3582   %}
3583   ins_pipe( fpu_reg_reg );
3584 %}
3585 
3586 instruct Repl4I_zero(vecX dst, immI0 zero) %{
3587   predicate(n->as_Vector()->length() == 4);
3588   match(Set dst (ReplicateI zero));
3589   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
3590   ins_encode %{
3591     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3592   %}
3593   ins_pipe( fpu_reg_reg );
3594 %}
3595 
3596 instruct Repl8I_zero(vecY dst, immI0 zero) %{
3597   predicate(n->as_Vector()->length() == 8);
3598   match(Set dst (ReplicateI zero));
3599   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
3600   ins_encode %{
3601     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3602     int vector_len = 1;
3603     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3604   %}
3605   ins_pipe( fpu_reg_reg );
3606 %}
3607 
3608 // Replicate long (8 byte) scalar to be vector
3609 #ifdef _LP64
3610 instruct Repl2L(vecX dst, rRegL src) %{
3611   predicate(n->as_Vector()->length() == 2);
3612   match(Set dst (ReplicateL src));
3613   format %{ "movdq   $dst,$src\n\t"
3614             "punpcklqdq $dst,$dst\t! replicate2L" %}
3615   ins_encode %{
3616     __ movdq($dst$$XMMRegister, $src$$Register);
3617     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3618   %}
3619   ins_pipe( pipe_slow );
3620 %}
3621 #else // _LP64
3622 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3623   predicate(n->as_Vector()->length() == 2);
3624   match(Set dst (ReplicateL src));
3625   effect(TEMP dst, USE src, TEMP tmp);
3626   format %{ "movdl   $dst,$src.lo\n\t"
3627             "movdl   $tmp,$src.hi\n\t"
3628             "punpckldq $dst,$tmp\n\t"
3629             "punpcklqdq $dst,$dst\t! replicate2L"%}
3630   ins_encode %{
3631     __ movdl($dst$$XMMRegister, $src$$Register);
3632     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3633     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3634     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3635   %}
3636   ins_pipe( pipe_slow );
3637 %}
3638 #endif // _LP64
3639 
3640 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3641 instruct Repl2L_imm(vecX dst, immL con) %{
3642   predicate(n->as_Vector()->length() == 2);
3643   match(Set dst (ReplicateL con));
3644   format %{ "movq    $dst,[$constantaddress]\n\t"
3645             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3646   ins_encode %{
3647     __ movq($dst$$XMMRegister, $constantaddress($con));
3648     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3649   %}
3650   ins_pipe( pipe_slow );
3651 %}
3652 
3653 // Replicate long (8 byte) scalar zero to be vector
3654 instruct Repl2L_zero(vecX dst, immL0 zero) %{
3655   predicate(n->as_Vector()->length() == 2);
3656   match(Set dst (ReplicateL zero));
3657   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
3658   ins_encode %{
3659     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3660   %}
3661   ins_pipe( fpu_reg_reg );
3662 %}
3663 
3664 instruct Repl4L_zero(vecY dst, immL0 zero) %{
3665   predicate(n->as_Vector()->length() == 4);
3666   match(Set dst (ReplicateL zero));
3667   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
3668   ins_encode %{
3669     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3670     int vector_len = 1;
3671     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3672   %}
3673   ins_pipe( fpu_reg_reg );
3674 %}
3675 
3676 // Replicate float (4 byte) scalar to be vector
3677 instruct Repl2F(vecD dst, regF src) %{
3678   predicate(n->as_Vector()->length() == 2);
3679   match(Set dst (ReplicateF src));
3680   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
3681   ins_encode %{
3682     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3683   %}
3684   ins_pipe( fpu_reg_reg );
3685 %}
3686 
3687 instruct Repl4F(vecX dst, regF src) %{
3688   predicate(n->as_Vector()->length() == 4);
3689   match(Set dst (ReplicateF src));
3690   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
3691   ins_encode %{
3692     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3693   %}
3694   ins_pipe( pipe_slow );
3695 %}
3696 
3697 // Replicate float (4 byte) scalar zero to be vector
3698 instruct Repl2F_zero(vecD dst, immF0 zero) %{
3699   predicate(n->as_Vector()->length() == 2);
3700   match(Set dst (ReplicateF zero));
3701   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
3702   ins_encode %{
3703     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3704   %}
3705   ins_pipe( fpu_reg_reg );
3706 %}
3707 
3708 instruct Repl4F_zero(vecX dst, immF0 zero) %{
3709   predicate(n->as_Vector()->length() == 4);
3710   match(Set dst (ReplicateF zero));
3711   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
3712   ins_encode %{
3713     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3714   %}
3715   ins_pipe( fpu_reg_reg );
3716 %}
3717 
3718 instruct Repl8F_zero(vecY dst, immF0 zero) %{
3719   predicate(n->as_Vector()->length() == 8);
3720   match(Set dst (ReplicateF zero));
3721   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
3722   ins_encode %{
3723     int vector_len = 1;
3724     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3725   %}
3726   ins_pipe( fpu_reg_reg );
3727 %}
3728 
3729 // Replicate double (8 bytes) scalar to be vector
3730 instruct Repl2D(vecX dst, regD src) %{
3731   predicate(n->as_Vector()->length() == 2);
3732   match(Set dst (ReplicateD src));
3733   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
3734   ins_encode %{
3735     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3736   %}
3737   ins_pipe( pipe_slow );
3738 %}
3739 
3740 // Replicate double (8 byte) scalar zero to be vector
3741 instruct Repl2D_zero(vecX dst, immD0 zero) %{
3742   predicate(n->as_Vector()->length() == 2);
3743   match(Set dst (ReplicateD zero));
3744   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
3745   ins_encode %{
3746     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3747   %}
3748   ins_pipe( fpu_reg_reg );
3749 %}
3750 
3751 instruct Repl4D_zero(vecY dst, immD0 zero) %{
3752   predicate(n->as_Vector()->length() == 4);
3753   match(Set dst (ReplicateD zero));
3754   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3755   ins_encode %{
3756     int vector_len = 1;
3757     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3758   %}
3759   ins_pipe( fpu_reg_reg );
3760 %}
3761 
3762 // ====================EVEX REPLICATE=============================================
3763 
3764 instruct Repl4B_mem_evex(vecS dst, memory mem) %{
3765   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
3766   match(Set dst (ReplicateB (LoadB mem)));
3767   format %{ "vpbroadcastb  $dst,$mem\t! replicate4B" %}
3768   ins_encode %{
3769     int vector_len = 0;
3770     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3771   %}
3772   ins_pipe( pipe_slow );
3773 %}
3774 
3775 instruct Repl8B_mem_evex(vecD dst, memory mem) %{
3776   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3777   match(Set dst (ReplicateB (LoadB mem)));
3778   format %{ "vpbroadcastb  $dst,$mem\t! replicate8B" %}
3779   ins_encode %{
3780     int vector_len = 0;
3781     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3782   %}
3783   ins_pipe( pipe_slow );
3784 %}
3785 
3786 instruct Repl16B_evex(vecX dst, rRegI src) %{
3787   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3788   match(Set dst (ReplicateB src));
3789   format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
3790   ins_encode %{
3791    int vector_len = 0;
3792     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3793   %}
3794   ins_pipe( pipe_slow );
3795 %}
3796 
3797 instruct Repl16B_mem_evex(vecX dst, memory mem) %{
3798   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3799   match(Set dst (ReplicateB (LoadB mem)));
3800   format %{ "vpbroadcastb  $dst,$mem\t! replicate16B" %}
3801   ins_encode %{
3802     int vector_len = 0;
3803     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3804   %}
3805   ins_pipe( pipe_slow );
3806 %}
3807 
3808 instruct Repl32B_evex(vecY dst, rRegI src) %{
3809   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
3810   match(Set dst (ReplicateB src));
3811   format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
3812   ins_encode %{
3813    int vector_len = 1;
3814     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3815   %}
3816   ins_pipe( pipe_slow );
3817 %}
3818 
3819 instruct Repl32B_mem_evex(vecY dst, memory mem) %{
3820   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
3821   match(Set dst (ReplicateB (LoadB mem)));
3822   format %{ "vpbroadcastb  $dst,$mem\t! replicate32B" %}
3823   ins_encode %{
3824     int vector_len = 1;
3825     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3826   %}
3827   ins_pipe( pipe_slow );
3828 %}
3829 
3830 instruct Repl64B_evex(vecZ dst, rRegI src) %{
3831   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
3832   match(Set dst (ReplicateB src));
3833   format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
3834   ins_encode %{
3835    int vector_len = 2;
3836     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3837   %}
3838   ins_pipe( pipe_slow );
3839 %}
3840 
3841 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
3842   predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw());
3843   match(Set dst (ReplicateB (LoadB mem)));
3844   format %{ "vpbroadcastb  $dst,$mem\t! replicate64B" %}
3845   ins_encode %{
3846     int vector_len = 2;
3847     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3848   %}
3849   ins_pipe( pipe_slow );
3850 %}
3851 
3852 instruct Repl16B_imm_evex(vecX dst, immI con) %{
3853   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3854   match(Set dst (ReplicateB con));
3855   format %{ "movq    $dst,[$constantaddress]\n\t"
3856             "vpbroadcastb $dst,$dst\t! replicate16B" %}
3857   ins_encode %{
3858    int vector_len = 0;
3859     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3860     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3861   %}
3862   ins_pipe( pipe_slow );
3863 %}
3864 
3865 instruct Repl32B_imm_evex(vecY dst, immI con) %{
3866   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
3867   match(Set dst (ReplicateB con));
3868   format %{ "movq    $dst,[$constantaddress]\n\t"
3869             "vpbroadcastb $dst,$dst\t! replicate32B" %}
3870   ins_encode %{
3871    int vector_len = 1;
3872     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3873     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3874   %}
3875   ins_pipe( pipe_slow );
3876 %}
3877 
3878 instruct Repl64B_imm_evex(vecZ dst, immI con) %{
3879   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
3880   match(Set dst (ReplicateB con));
3881   format %{ "movq    $dst,[$constantaddress]\n\t"
3882             "vpbroadcastb $dst,$dst\t! upper replicate64B" %}
3883   ins_encode %{
3884    int vector_len = 2;
3885     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3886     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3887   %}
3888   ins_pipe( pipe_slow );
3889 %}
3890 
3891 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{
3892   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
3893   match(Set dst (ReplicateB zero));
3894   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
3895   ins_encode %{
3896     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3897     int vector_len = 2;
3898     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3899   %}
3900   ins_pipe( fpu_reg_reg );
3901 %}
3902 
3903 instruct Repl4S_evex(vecD dst, rRegI src) %{
3904   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
3905   match(Set dst (ReplicateS src));
3906   format %{ "vpbroadcastw $dst,$src\t! replicate4S" %}
3907   ins_encode %{
3908    int vector_len = 0;
3909     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3910   %}
3911   ins_pipe( pipe_slow );
3912 %}
3913 
3914 instruct Repl4S_mem_evex(vecD dst, memory mem) %{
3915   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
3916   match(Set dst (ReplicateS (LoadS mem)));
3917   format %{ "vpbroadcastw  $dst,$mem\t! replicate4S" %}
3918   ins_encode %{
3919     int vector_len = 0;
3920     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3921   %}
3922   ins_pipe( pipe_slow );
3923 %}
3924 
3925 instruct Repl8S_evex(vecX dst, rRegI src) %{
3926   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3927   match(Set dst (ReplicateS src));
3928   format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
3929   ins_encode %{
3930    int vector_len = 0;
3931     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3932   %}
3933   ins_pipe( pipe_slow );
3934 %}
3935 
3936 instruct Repl8S_mem_evex(vecX dst, memory mem) %{
3937   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3938   match(Set dst (ReplicateS (LoadS mem)));
3939   format %{ "vpbroadcastw  $dst,$mem\t! replicate8S" %}
3940   ins_encode %{
3941     int vector_len = 0;
3942     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3943   %}
3944   ins_pipe( pipe_slow );
3945 %}
3946 
3947 instruct Repl16S_evex(vecY dst, rRegI src) %{
3948   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3949   match(Set dst (ReplicateS src));
3950   format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
3951   ins_encode %{
3952    int vector_len = 1;
3953     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3954   %}
3955   ins_pipe( pipe_slow );
3956 %}
3957 
3958 instruct Repl16S_mem_evex(vecY dst, memory mem) %{
3959   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3960   match(Set dst (ReplicateS (LoadS mem)));
3961   format %{ "vpbroadcastw  $dst,$mem\t! replicate16S" %}
3962   ins_encode %{
3963     int vector_len = 1;
3964     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3965   %}
3966   ins_pipe( pipe_slow );
3967 %}
3968 
3969 instruct Repl32S_evex(vecZ dst, rRegI src) %{
3970   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
3971   match(Set dst (ReplicateS src));
3972   format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
3973   ins_encode %{
3974    int vector_len = 2;
3975     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3976   %}
3977   ins_pipe( pipe_slow );
3978 %}
3979 
3980 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
3981   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
3982   match(Set dst (ReplicateS (LoadS mem)));
3983   format %{ "vpbroadcastw  $dst,$mem\t! replicate32S" %}
3984   ins_encode %{
3985     int vector_len = 2;
3986     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3987   %}
3988   ins_pipe( pipe_slow );
3989 %}
3990 
3991 instruct Repl8S_imm_evex(vecX dst, immI con) %{
3992   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3993   match(Set dst (ReplicateS con));
3994   format %{ "movq    $dst,[$constantaddress]\n\t"
3995             "vpbroadcastw $dst,$dst\t! replicate8S" %}
3996   ins_encode %{
3997    int vector_len = 0;
3998     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3999     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4000   %}
4001   ins_pipe( pipe_slow );
4002 %}
4003 
4004 instruct Repl16S_imm_evex(vecY dst, immI con) %{
4005   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
4006   match(Set dst (ReplicateS con));
4007   format %{ "movq    $dst,[$constantaddress]\n\t"
4008             "vpbroadcastw $dst,$dst\t! replicate16S" %}
4009   ins_encode %{
4010    int vector_len = 1;
4011     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
4012     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4013   %}
4014   ins_pipe( pipe_slow );
4015 %}
4016 
4017 instruct Repl32S_imm_evex(vecZ dst, immI con) %{
4018   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
4019   match(Set dst (ReplicateS con));
4020   format %{ "movq    $dst,[$constantaddress]\n\t"
4021             "vpbroadcastw $dst,$dst\t! replicate32S" %}
4022   ins_encode %{
4023    int vector_len = 2;
4024     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
4025     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4026   %}
4027   ins_pipe( pipe_slow );
4028 %}
4029 
4030 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{
4031   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
4032   match(Set dst (ReplicateS zero));
4033   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
4034   ins_encode %{
4035     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
4036     int vector_len = 2;
4037     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4038   %}
4039   ins_pipe( fpu_reg_reg );
4040 %}
4041 
4042 instruct Repl4I_evex(vecX dst, rRegI src) %{
4043   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4044   match(Set dst (ReplicateI src));
4045   format %{ "vpbroadcastd  $dst,$src\t! replicate4I" %}
4046   ins_encode %{
4047     int vector_len = 0;
4048     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4049   %}
4050   ins_pipe( pipe_slow );
4051 %}
4052 
4053 instruct Repl4I_mem_evex(vecX dst, memory mem) %{
4054   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4055   match(Set dst (ReplicateI (LoadI mem)));
4056   format %{ "vpbroadcastd  $dst,$mem\t! replicate4I" %}
4057   ins_encode %{
4058     int vector_len = 0;
4059     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4060   %}
4061   ins_pipe( pipe_slow );
4062 %}
4063 
4064 instruct Repl8I_evex(vecY dst, rRegI src) %{
4065   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4066   match(Set dst (ReplicateI src));
4067   format %{ "vpbroadcastd  $dst,$src\t! replicate8I" %}
4068   ins_encode %{
4069     int vector_len = 1;
4070     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4071   %}
4072   ins_pipe( pipe_slow );
4073 %}
4074 
4075 instruct Repl8I_mem_evex(vecY dst, memory mem) %{
4076   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4077   match(Set dst (ReplicateI (LoadI mem)));
4078   format %{ "vpbroadcastd  $dst,$mem\t! replicate8I" %}
4079   ins_encode %{
4080     int vector_len = 1;
4081     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4082   %}
4083   ins_pipe( pipe_slow );
4084 %}
4085 
4086 instruct Repl16I_evex(vecZ dst, rRegI src) %{
4087   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4088   match(Set dst (ReplicateI src));
4089   format %{ "vpbroadcastd  $dst,$src\t! replicate16I" %}
4090   ins_encode %{
4091     int vector_len = 2;
4092     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4093   %}
4094   ins_pipe( pipe_slow );
4095 %}
4096 
4097 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{
4098   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4099   match(Set dst (ReplicateI (LoadI mem)));
4100   format %{ "vpbroadcastd  $dst,$mem\t! replicate16I" %}
4101   ins_encode %{
4102     int vector_len = 2;
4103     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4104   %}
4105   ins_pipe( pipe_slow );
4106 %}
4107 
4108 instruct Repl4I_imm_evex(vecX dst, immI con) %{
4109   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4110   match(Set dst (ReplicateI con));
4111   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
4112             "vpbroadcastd  $dst,$dst\t! replicate4I" %}
4113   ins_encode %{
4114     int vector_len = 0;
4115     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4116     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4117   %}
4118   ins_pipe( pipe_slow );
4119 %}
4120 
4121 instruct Repl8I_imm_evex(vecY dst, immI con) %{
4122   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4123   match(Set dst (ReplicateI con));
4124   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
4125             "vpbroadcastd  $dst,$dst\t! replicate8I" %}
4126   ins_encode %{
4127     int vector_len = 1;
4128     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4129     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4130   %}
4131   ins_pipe( pipe_slow );
4132 %}
4133 
4134 instruct Repl16I_imm_evex(vecZ dst, immI con) %{
4135   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4136   match(Set dst (ReplicateI con));
4137   format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
4138             "vpbroadcastd  $dst,$dst\t! replicate16I" %}
4139   ins_encode %{
4140     int vector_len = 2;
4141     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4142     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4143   %}
4144   ins_pipe( pipe_slow );
4145 %}
4146 
4147 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{
4148   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4149   match(Set dst (ReplicateI zero));
4150   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
4151   ins_encode %{
4152     // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
4153     int vector_len = 2;
4154     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4155   %}
4156   ins_pipe( fpu_reg_reg );
4157 %}
4158 
4159 // Replicate long (8 byte) scalar to be vector
4160 #ifdef _LP64
4161 instruct Repl4L_evex(vecY dst, rRegL src) %{
4162   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4163   match(Set dst (ReplicateL src));
4164   format %{ "vpbroadcastq  $dst,$src\t! replicate4L" %}
4165   ins_encode %{
4166     int vector_len = 1;
4167     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
4168   %}
4169   ins_pipe( pipe_slow );
4170 %}
4171 
4172 instruct Repl8L_evex(vecZ dst, rRegL src) %{
4173   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4174   match(Set dst (ReplicateL src));
4175   format %{ "vpbroadcastq  $dst,$src\t! replicate8L" %}
4176   ins_encode %{
4177     int vector_len = 2;
4178     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
4179   %}
4180   ins_pipe( pipe_slow );
4181 %}
4182 #else // _LP64
4183 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
4184   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4185   match(Set dst (ReplicateL src));
4186   effect(TEMP dst, USE src, TEMP tmp);
4187   format %{ "movdl   $dst,$src.lo\n\t"
4188             "movdl   $tmp,$src.hi\n\t"
4189             "punpckldq $dst,$tmp\n\t"
4190             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
4191   ins_encode %{
4192     int vector_len = 1;
4193     __ movdl($dst$$XMMRegister, $src$$Register);
4194     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4195     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4196     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4197   %}
4198   ins_pipe( pipe_slow );
4199 %}
4200 
4201 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
4202   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4203   match(Set dst (ReplicateL src));
4204   effect(TEMP dst, USE src, TEMP tmp);
4205   format %{ "movdl   $dst,$src.lo\n\t"
4206             "movdl   $tmp,$src.hi\n\t"
4207             "punpckldq $dst,$tmp\n\t"
4208             "vpbroadcastq  $dst,$dst\t! replicate8L" %}
4209   ins_encode %{
4210     int vector_len = 2;
4211     __ movdl($dst$$XMMRegister, $src$$Register);
4212     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4213     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4214     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4215   %}
4216   ins_pipe( pipe_slow );
4217 %}
4218 #endif // _LP64
4219 
4220 instruct Repl4L_imm_evex(vecY dst, immL con) %{
4221   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4222   match(Set dst (ReplicateL con));
4223   format %{ "movq    $dst,[$constantaddress]\n\t"
4224             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
4225   ins_encode %{
4226     int vector_len = 1;
4227     __ movq($dst$$XMMRegister, $constantaddress($con));
4228     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4229   %}
4230   ins_pipe( pipe_slow );
4231 %}
4232 
4233 instruct Repl8L_imm_evex(vecZ dst, immL con) %{
4234   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4235   match(Set dst (ReplicateL con));
4236   format %{ "movq    $dst,[$constantaddress]\n\t"
4237             "vpbroadcastq  $dst,$dst\t! replicate8L" %}
4238   ins_encode %{
4239     int vector_len = 2;
4240     __ movq($dst$$XMMRegister, $constantaddress($con));
4241     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4242   %}
4243   ins_pipe( pipe_slow );
4244 %}
4245 
4246 instruct Repl2L_mem_evex(vecX dst, memory mem) %{
4247   predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl());
4248   match(Set dst (ReplicateL (LoadL mem)));
4249   format %{ "vpbroadcastd  $dst,$mem\t! replicate2L" %}
4250   ins_encode %{
4251     int vector_len = 0;
4252     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4253   %}
4254   ins_pipe( pipe_slow );
4255 %}
4256 
4257 instruct Repl4L_mem_evex(vecY dst, memory mem) %{
4258   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4259   match(Set dst (ReplicateL (LoadL mem)));
4260   format %{ "vpbroadcastd  $dst,$mem\t! replicate4L" %}
4261   ins_encode %{
4262     int vector_len = 1;
4263     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4264   %}
4265   ins_pipe( pipe_slow );
4266 %}
4267 
4268 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{
4269   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4270   match(Set dst (ReplicateL (LoadL mem)));
4271   format %{ "vpbroadcastd  $dst,$mem\t! replicate8L" %}
4272   ins_encode %{
4273     int vector_len = 2;
4274     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4275   %}
4276   ins_pipe( pipe_slow );
4277 %}
4278 
4279 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{
4280   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4281   match(Set dst (ReplicateL zero));
4282   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate8L zero" %}
4283   ins_encode %{
4284     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
4285     int vector_len = 2;
4286     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4287   %}
4288   ins_pipe( fpu_reg_reg );
4289 %}
4290 
4291 instruct Repl8F_evex(vecY dst, regF src) %{
4292   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4293   match(Set dst (ReplicateF src));
4294   format %{ "vbroadcastss $dst,$src\t! replicate8F" %}
4295   ins_encode %{
4296     int vector_len = 1;
4297     __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4298   %}
4299   ins_pipe( pipe_slow );
4300 %}
4301 
4302 instruct Repl8F_mem_evex(vecY dst, memory mem) %{
4303   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4304   match(Set dst (ReplicateF (LoadF mem)));
4305   format %{ "vbroadcastss  $dst,$mem\t! replicate8F" %}
4306   ins_encode %{
4307     int vector_len = 1;
4308     __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
4309   %}
4310   ins_pipe( pipe_slow );
4311 %}
4312 
4313 instruct Repl16F_evex(vecZ dst, regF src) %{
4314   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4315   match(Set dst (ReplicateF src));
4316   format %{ "vbroadcastss $dst,$src\t! replicate16F" %}
4317   ins_encode %{
4318     int vector_len = 2;
4319     __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4320   %}
4321   ins_pipe( pipe_slow );
4322 %}
4323 
4324 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{
4325   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4326   match(Set dst (ReplicateF (LoadF mem)));
4327   format %{ "vbroadcastss  $dst,$mem\t! replicate16F" %}
4328   ins_encode %{
4329     int vector_len = 2;
4330     __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
4331   %}
4332   ins_pipe( pipe_slow );
4333 %}
4334 
4335 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
4336   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4337   match(Set dst (ReplicateF zero));
4338   format %{ "vxorps  $dst k0,$dst,$dst\t! replicate16F zero" %}
4339   ins_encode %{
4340     int vector_len = 2;
4341     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4342   %}
4343   ins_pipe( fpu_reg_reg );
4344 %}
4345 
4346 instruct Repl4D_evex(vecY dst, regD src) %{
4347   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4348   match(Set dst (ReplicateD src));
4349   format %{ "vbroadcastsd $dst,$src\t! replicate4D" %}
4350   ins_encode %{
4351     int vector_len = 1;
4352     __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4353   %}
4354   ins_pipe( pipe_slow );
4355 %}
4356 
4357 instruct Repl4D_mem_evex(vecY dst, memory mem) %{
4358   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4359   match(Set dst (ReplicateD (LoadD mem)));
4360   format %{ "vbroadcastsd  $dst,$mem\t! replicate4D" %}
4361   ins_encode %{
4362     int vector_len = 1;
4363     __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
4364   %}
4365   ins_pipe( pipe_slow );
4366 %}
4367 
4368 instruct Repl8D_evex(vecZ dst, regD src) %{
4369   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4370   match(Set dst (ReplicateD src));
4371   format %{ "vbroadcastsd $dst,$src\t! replicate8D" %}
4372   ins_encode %{
4373     int vector_len = 2;
4374     __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4375   %}
4376   ins_pipe( pipe_slow );
4377 %}
4378 
4379 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{
4380   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4381   match(Set dst (ReplicateD (LoadD mem)));
4382   format %{ "vbroadcastsd  $dst,$mem\t! replicate8D" %}
4383   ins_encode %{
4384     int vector_len = 2;
4385     __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
4386   %}
4387   ins_pipe( pipe_slow );
4388 %}
4389 
4390 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
4391   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4392   match(Set dst (ReplicateD zero));
4393   format %{ "vxorpd  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
4394   ins_encode %{
4395     int vector_len = 2;
4396     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4397   %}
4398   ins_pipe( fpu_reg_reg );
4399 %}
4400 
4401 // ====================REDUCTION ARITHMETIC=======================================
4402 
4403 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4404   predicate(UseSSE > 2 && UseAVX == 0);
4405   match(Set dst (AddReductionVI src1 src2));
4406   effect(TEMP tmp2, TEMP tmp);
4407   format %{ "movdqu  $tmp2,$src2\n\t"
4408             "phaddd  $tmp2,$tmp2\n\t"
4409             "movd    $tmp,$src1\n\t"
4410             "paddd   $tmp,$tmp2\n\t"
4411             "movd    $dst,$tmp\t! add reduction2I" %}
4412   ins_encode %{
4413     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4414     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4415     __ movdl($tmp$$XMMRegister, $src1$$Register);
4416     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4417     __ movdl($dst$$Register, $tmp$$XMMRegister);
4418   %}
4419   ins_pipe( pipe_slow );
4420 %}
4421 
4422 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4423   predicate(UseAVX > 0 && UseAVX < 3);
4424   match(Set dst (AddReductionVI src1 src2));
4425   effect(TEMP tmp, TEMP tmp2);
4426   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4427             "movd     $tmp2,$src1\n\t"
4428             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4429             "movd     $dst,$tmp2\t! add reduction2I" %}
4430   ins_encode %{
4431     int vector_len = 0;
4432     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4433     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4434     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4435     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4436   %}
4437   ins_pipe( pipe_slow );
4438 %}
4439 
4440 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4441   predicate(UseAVX > 2);
4442   match(Set dst (AddReductionVI src1 src2));
4443   effect(TEMP tmp, TEMP tmp2);
4444   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4445             "vpaddd  $tmp,$src2,$tmp2\n\t"
4446             "movd    $tmp2,$src1\n\t"
4447             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4448             "movd    $dst,$tmp2\t! add reduction2I" %}
4449   ins_encode %{
4450     int vector_len = 0;
4451     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4452     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4453     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4454     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4455     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4456   %}
4457   ins_pipe( pipe_slow );
4458 %}
4459 
4460 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4461   predicate(UseSSE > 2 && UseAVX == 0);
4462   match(Set dst (AddReductionVI src1 src2));
4463   effect(TEMP tmp2, TEMP tmp);
4464   format %{ "movdqu  $tmp2,$src2\n\t"
4465             "phaddd  $tmp2,$tmp2\n\t"
4466             "phaddd  $tmp2,$tmp2\n\t"
4467             "movd    $tmp,$src1\n\t"
4468             "paddd   $tmp,$tmp2\n\t"
4469             "movd    $dst,$tmp\t! add reduction4I" %}
4470   ins_encode %{
4471     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4472     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4473     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4474     __ movdl($tmp$$XMMRegister, $src1$$Register);
4475     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4476     __ movdl($dst$$Register, $tmp$$XMMRegister);
4477   %}
4478   ins_pipe( pipe_slow );
4479 %}
4480 
4481 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4482   predicate(UseAVX > 0 && UseAVX < 3);
4483   match(Set dst (AddReductionVI src1 src2));
4484   effect(TEMP tmp, TEMP tmp2);
4485   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4486             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4487             "movd     $tmp2,$src1\n\t"
4488             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4489             "movd     $dst,$tmp2\t! add reduction4I" %}
4490   ins_encode %{
4491     int vector_len = 0;
4492     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4493     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4494     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4495     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4496     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4497   %}
4498   ins_pipe( pipe_slow );
4499 %}
4500 
4501 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4502   predicate(UseAVX > 2);
4503   match(Set dst (AddReductionVI src1 src2));
4504   effect(TEMP tmp, TEMP tmp2);
4505   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4506             "vpaddd  $tmp,$src2,$tmp2\n\t"
4507             "pshufd  $tmp2,$tmp,0x1\n\t"
4508             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4509             "movd    $tmp2,$src1\n\t"
4510             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4511             "movd    $dst,$tmp2\t! add reduction4I" %}
4512   ins_encode %{
4513     int vector_len = 0;
4514     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4515     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4516     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4517     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4518     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4519     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4520     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4521   %}
4522   ins_pipe( pipe_slow );
4523 %}
4524 
4525 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4526   predicate(UseAVX > 0 && UseAVX < 3);
4527   match(Set dst (AddReductionVI src1 src2));
4528   effect(TEMP tmp, TEMP tmp2);
4529   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4530             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4531             "vextracti128  $tmp2,$tmp\n\t"
4532             "vpaddd   $tmp,$tmp,$tmp2\n\t"
4533             "movd     $tmp2,$src1\n\t"
4534             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4535             "movd     $dst,$tmp2\t! add reduction8I" %}
4536   ins_encode %{
4537     int vector_len = 1;
4538     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4539     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4540     __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
4541     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4542     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4543     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4544     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4545   %}
4546   ins_pipe( pipe_slow );
4547 %}
4548 
4549 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4550   predicate(UseAVX > 2);
4551   match(Set dst (AddReductionVI src1 src2));
4552   effect(TEMP tmp, TEMP tmp2);
4553   format %{ "vextracti128  $tmp,$src2\n\t"
4554             "vpaddd  $tmp,$tmp,$src2\n\t"
4555             "pshufd  $tmp2,$tmp,0xE\n\t"
4556             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4557             "pshufd  $tmp2,$tmp,0x1\n\t"
4558             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4559             "movd    $tmp2,$src1\n\t"
4560             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4561             "movd    $dst,$tmp2\t! add reduction8I" %}
4562   ins_encode %{
4563     int vector_len = 0;
4564     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4565     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4566     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4567     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4568     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4569     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4570     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4571     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4572     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4573   %}
4574   ins_pipe( pipe_slow );
4575 %}
4576 
4577 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4578   predicate(UseAVX > 2);
4579   match(Set dst (AddReductionVI src1 src2));
4580   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4581   format %{ "vextracti64x4  $tmp3,$src2\n\t"
4582             "vpaddd  $tmp3,$tmp3,$src2\n\t"
4583             "vextracti128   $tmp,$tmp3\n\t"
4584             "vpaddd  $tmp,$tmp,$tmp3\n\t"
4585             "pshufd  $tmp2,$tmp,0xE\n\t"
4586             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4587             "pshufd  $tmp2,$tmp,0x1\n\t"
4588             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4589             "movd    $tmp2,$src1\n\t"
4590             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4591             "movd    $dst,$tmp2\t! mul reduction16I" %}
4592   ins_encode %{
4593     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4594     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4595     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4596     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4597     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4598     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4599     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4600     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4601     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4602     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4603     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4604   %}
4605   ins_pipe( pipe_slow );
4606 %}
4607 
4608 #ifdef _LP64
4609 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4610   predicate(UseAVX > 2);
4611   match(Set dst (AddReductionVL src1 src2));
4612   effect(TEMP tmp, TEMP tmp2);
4613   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4614             "vpaddq  $tmp,$src2,$tmp2\n\t"
4615             "movdq   $tmp2,$src1\n\t"
4616             "vpaddq  $tmp2,$tmp,$tmp2\n\t"
4617             "movdq   $dst,$tmp2\t! add reduction2L" %}
4618   ins_encode %{
4619     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4620     __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4621     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4622     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4623     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4624   %}
4625   ins_pipe( pipe_slow );
4626 %}
4627 
4628 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4629   predicate(UseAVX > 2);
4630   match(Set dst (AddReductionVL src1 src2));
4631   effect(TEMP tmp, TEMP tmp2);
4632   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
4633             "vpaddq  $tmp2,$tmp,$src2\n\t"
4634             "pshufd  $tmp,$tmp2,0xE\n\t"
4635             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4636             "movdq   $tmp,$src1\n\t"
4637             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4638             "movdq   $dst,$tmp2\t! add reduction4L" %}
4639   ins_encode %{
4640     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4641     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4642     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4643     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4644     __ movdq($tmp$$XMMRegister, $src1$$Register);
4645     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4646     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4647   %}
4648   ins_pipe( pipe_slow );
4649 %}
4650 
4651 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4652   predicate(UseAVX > 2);
4653   match(Set dst (AddReductionVL src1 src2));
4654   effect(TEMP tmp, TEMP tmp2);
4655   format %{ "vextracti64x4  $tmp2,$src2\n\t"
4656             "vpaddq  $tmp2,$tmp2,$src2\n\t"
4657             "vextracti128   $tmp,$tmp2\n\t"
4658             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4659             "pshufd  $tmp,$tmp2,0xE\n\t"
4660             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4661             "movdq   $tmp,$src1\n\t"
4662             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4663             "movdq   $dst,$tmp2\t! add reduction8L" %}
4664   ins_encode %{
4665     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4666     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4667     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4668     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4669     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4670     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4671     __ movdq($tmp$$XMMRegister, $src1$$Register);
4672     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4673     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4674   %}
4675   ins_pipe( pipe_slow );
4676 %}
4677 #endif
4678 
4679 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4680   predicate(UseSSE >= 1 && UseAVX == 0);
4681   match(Set dst (AddReductionVF src1 src2));
4682   effect(TEMP tmp, TEMP tmp2);
4683   format %{ "movdqu  $tmp,$src1\n\t"
4684             "addss   $tmp,$src2\n\t"
4685             "pshufd  $tmp2,$src2,0x01\n\t"
4686             "addss   $tmp,$tmp2\n\t"
4687             "movdqu  $dst,$tmp\t! add reduction2F" %}
4688   ins_encode %{
4689     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4690     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4691     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4692     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4693     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4694   %}
4695   ins_pipe( pipe_slow );
4696 %}
4697 
4698 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4699   predicate(UseAVX > 0);
4700   match(Set dst (AddReductionVF src1 src2));
4701   effect(TEMP tmp2, TEMP tmp);
4702   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4703             "pshufd  $tmp,$src2,0x01\n\t"
4704             "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
4705   ins_encode %{
4706     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4707     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4708     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4709   %}
4710   ins_pipe( pipe_slow );
4711 %}
4712 
4713 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4714   predicate(UseSSE >= 1 && UseAVX == 0);
4715   match(Set dst (AddReductionVF src1 src2));
4716   effect(TEMP tmp, TEMP tmp2);
4717   format %{ "movdqu  $tmp,$src1\n\t"
4718             "addss   $tmp,$src2\n\t"
4719             "pshufd  $tmp2,$src2,0x01\n\t"
4720             "addss   $tmp,$tmp2\n\t"
4721             "pshufd  $tmp2,$src2,0x02\n\t"
4722             "addss   $tmp,$tmp2\n\t"
4723             "pshufd  $tmp2,$src2,0x03\n\t"
4724             "addss   $tmp,$tmp2\n\t"
4725             "movdqu  $dst,$tmp\t! add reduction4F" %}
4726   ins_encode %{
4727     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4728     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4729     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4730     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4731     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
4732     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4733     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
4734     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4735     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4736   %}
4737   ins_pipe( pipe_slow );
4738 %}
4739 
4740 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4741   predicate(UseAVX > 0);
4742   match(Set dst (AddReductionVF src1 src2));
4743   effect(TEMP tmp, TEMP tmp2);
4744   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4745             "pshufd  $tmp,$src2,0x01\n\t"
4746             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4747             "pshufd  $tmp,$src2,0x02\n\t"
4748             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4749             "pshufd  $tmp,$src2,0x03\n\t"
4750             "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
4751   ins_encode %{
4752     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4753     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4754     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4755     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4756     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4757     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4758     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4759   %}
4760   ins_pipe( pipe_slow );
4761 %}
4762 
4763 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
4764   predicate(UseAVX > 0);
4765   match(Set dst (AddReductionVF src1 src2));
4766   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4767   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4768             "pshufd  $tmp,$src2,0x01\n\t"
4769             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4770             "pshufd  $tmp,$src2,0x02\n\t"
4771             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4772             "pshufd  $tmp,$src2,0x03\n\t"
4773             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4774             "vextractf128  $tmp3,$src2\n\t"
4775             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4776             "pshufd  $tmp,$tmp3,0x01\n\t"
4777             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4778             "pshufd  $tmp,$tmp3,0x02\n\t"
4779             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4780             "pshufd  $tmp,$tmp3,0x03\n\t"
4781             "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
4782   ins_encode %{
4783     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4784     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4785     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4786     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4787     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4788     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4789     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4790     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4791     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4792     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4793     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4794     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4795     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4796     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4797     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4798   %}
4799   ins_pipe( pipe_slow );
4800 %}
4801 
4802 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4803   predicate(UseAVX > 2);
4804   match(Set dst (AddReductionVF src1 src2));
4805   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4806   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4807             "pshufd  $tmp,$src2,0x01\n\t"
4808             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4809             "pshufd  $tmp,$src2,0x02\n\t"
4810             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4811             "pshufd  $tmp,$src2,0x03\n\t"
4812             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4813             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4814             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4815             "pshufd  $tmp,$tmp3,0x01\n\t"
4816             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4817             "pshufd  $tmp,$tmp3,0x02\n\t"
4818             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4819             "pshufd  $tmp,$tmp3,0x03\n\t"
4820             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4821             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4822             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4823             "pshufd  $tmp,$tmp3,0x01\n\t"
4824             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4825             "pshufd  $tmp,$tmp3,0x02\n\t"
4826             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4827             "pshufd  $tmp,$tmp3,0x03\n\t"
4828             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4829             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4830             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4831             "pshufd  $tmp,$tmp3,0x01\n\t"
4832             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4833             "pshufd  $tmp,$tmp3,0x02\n\t"
4834             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4835             "pshufd  $tmp,$tmp3,0x03\n\t"
4836             "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
4837   ins_encode %{
4838     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4839     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4840     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4841     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4842     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4843     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4844     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4845     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4846     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4847     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4848     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4849     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4850     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4851     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4852     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4853     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4854     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4855     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4856     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4857     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4858     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4859     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4860     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4861     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4862     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4863     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4864     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4865     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4866     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4867     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4868     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4869   %}
4870   ins_pipe( pipe_slow );
4871 %}
4872 
4873 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4874   predicate(UseSSE >= 1 && UseAVX == 0);
4875   match(Set dst (AddReductionVD src1 src2));
4876   effect(TEMP tmp, TEMP dst);
4877   format %{ "movdqu  $tmp,$src1\n\t"
4878             "addsd   $tmp,$src2\n\t"
4879             "pshufd  $dst,$src2,0xE\n\t"
4880             "addsd   $dst,$tmp\t! add reduction2D" %}
4881   ins_encode %{
4882     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4883     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
4884     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4885     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
4886   %}
4887   ins_pipe( pipe_slow );
4888 %}
4889 
4890 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4891   predicate(UseAVX > 0);
4892   match(Set dst (AddReductionVD src1 src2));
4893   effect(TEMP tmp, TEMP tmp2);
4894   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4895             "pshufd  $tmp,$src2,0xE\n\t"
4896             "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
4897   ins_encode %{
4898     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4899     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4900     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4901   %}
4902   ins_pipe( pipe_slow );
4903 %}
4904 
4905 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
4906   predicate(UseAVX > 0);
4907   match(Set dst (AddReductionVD src1 src2));
4908   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4909   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4910             "pshufd  $tmp,$src2,0xE\n\t"
4911             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4912             "vextractf128  $tmp3,$src2\n\t"
4913             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4914             "pshufd  $tmp,$tmp3,0xE\n\t"
4915             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
4916   ins_encode %{
4917     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4918     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4919     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4920     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4921     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4922     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4923     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4924   %}
4925   ins_pipe( pipe_slow );
4926 %}
4927 
4928 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4929   predicate(UseAVX > 2);
4930   match(Set dst (AddReductionVD src1 src2));
4931   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4932   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4933             "pshufd  $tmp,$src2,0xE\n\t"
4934             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4935             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4936             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4937             "pshufd  $tmp,$tmp3,0xE\n\t"
4938             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4939             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4940             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4941             "pshufd  $tmp,$tmp3,0xE\n\t"
4942             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4943             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4944             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4945             "pshufd  $tmp,$tmp3,0xE\n\t"
4946             "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
4947   ins_encode %{
4948     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4949     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4950     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4951     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4952     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4953     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4954     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4955     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4956     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4957     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4958     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4959     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4960     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4961     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4962     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4963   %}
4964   ins_pipe( pipe_slow );
4965 %}
4966 
4967 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4968   predicate(UseSSE > 3 && UseAVX == 0);
4969   match(Set dst (MulReductionVI src1 src2));
4970   effect(TEMP tmp, TEMP tmp2);
4971   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4972             "pmulld  $tmp2,$src2\n\t"
4973             "movd    $tmp,$src1\n\t"
4974             "pmulld  $tmp2,$tmp\n\t"
4975             "movd    $dst,$tmp2\t! mul reduction2I" %}
4976   ins_encode %{
4977     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4978     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4979     __ movdl($tmp$$XMMRegister, $src1$$Register);
4980     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4981     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4982   %}
4983   ins_pipe( pipe_slow );
4984 %}
4985 
4986 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4987   predicate(UseAVX > 0);
4988   match(Set dst (MulReductionVI src1 src2));
4989   effect(TEMP tmp, TEMP tmp2);
4990   format %{ "pshufd   $tmp2,$src2,0x1\n\t"
4991             "vpmulld  $tmp,$src2,$tmp2\n\t"
4992             "movd     $tmp2,$src1\n\t"
4993             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4994             "movd     $dst,$tmp2\t! mul reduction2I" %}
4995   ins_encode %{
4996     int vector_len = 0;
4997     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4998     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4999     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5000     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5001     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5002   %}
5003   ins_pipe( pipe_slow );
5004 %}
5005 
5006 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
5007   predicate(UseSSE > 3 && UseAVX == 0);
5008   match(Set dst (MulReductionVI src1 src2));
5009   effect(TEMP tmp, TEMP tmp2);
5010   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
5011             "pmulld  $tmp2,$src2\n\t"
5012             "pshufd  $tmp,$tmp2,0x1\n\t"
5013             "pmulld  $tmp2,$tmp\n\t"
5014             "movd    $tmp,$src1\n\t"
5015             "pmulld  $tmp2,$tmp\n\t"
5016             "movd    $dst,$tmp2\t! mul reduction4I" %}
5017   ins_encode %{
5018     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5019     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5020     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
5021     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5022     __ movdl($tmp$$XMMRegister, $src1$$Register);
5023     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5024     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5025   %}
5026   ins_pipe( pipe_slow );
5027 %}
5028 
5029 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
5030   predicate(UseAVX > 0);
5031   match(Set dst (MulReductionVI src1 src2));
5032   effect(TEMP tmp, TEMP tmp2);
5033   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
5034             "vpmulld  $tmp,$src2,$tmp2\n\t"
5035             "pshufd   $tmp2,$tmp,0x1\n\t"
5036             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5037             "movd     $tmp2,$src1\n\t"
5038             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5039             "movd     $dst,$tmp2\t! mul reduction4I" %}
5040   ins_encode %{
5041     int vector_len = 0;
5042     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5043     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5044     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5045     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5046     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5047     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5048     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5049   %}
5050   ins_pipe( pipe_slow );
5051 %}
5052 
5053 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
5054   predicate(UseAVX > 0);
5055   match(Set dst (MulReductionVI src1 src2));
5056   effect(TEMP tmp, TEMP tmp2);
5057   format %{ "vextracti128  $tmp,$src2\n\t"
5058             "vpmulld  $tmp,$tmp,$src2\n\t"
5059             "pshufd   $tmp2,$tmp,0xE\n\t"
5060             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5061             "pshufd   $tmp2,$tmp,0x1\n\t"
5062             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5063             "movd     $tmp2,$src1\n\t"
5064             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5065             "movd     $dst,$tmp2\t! mul reduction8I" %}
5066   ins_encode %{
5067     int vector_len = 0;
5068     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
5069     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
5070     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
5071     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5072     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5073     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5074     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5075     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5076     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5077   %}
5078   ins_pipe( pipe_slow );
5079 %}
5080 
5081 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
5082   predicate(UseAVX > 2);
5083   match(Set dst (MulReductionVI src1 src2));
5084   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5085   format %{ "vextracti64x4  $tmp3,$src2\n\t"
5086             "vpmulld  $tmp3,$tmp3,$src2\n\t"
5087             "vextracti128   $tmp,$tmp3\n\t"
5088             "vpmulld  $tmp,$tmp,$src2\n\t"
5089             "pshufd   $tmp2,$tmp,0xE\n\t"
5090             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5091             "pshufd   $tmp2,$tmp,0x1\n\t"
5092             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5093             "movd     $tmp2,$src1\n\t"
5094             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5095             "movd     $dst,$tmp2\t! mul reduction16I" %}
5096   ins_encode %{
5097     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
5098     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
5099     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
5100     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
5101     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
5102     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5103     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5104     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5105     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5106     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5107     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5108   %}
5109   ins_pipe( pipe_slow );
5110 %}
5111 
5112 #ifdef _LP64
5113 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
5114   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5115   match(Set dst (MulReductionVL src1 src2));
5116   effect(TEMP tmp, TEMP tmp2);
5117   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
5118             "vpmullq  $tmp,$src2,$tmp2\n\t"
5119             "movdq    $tmp2,$src1\n\t"
5120             "vpmullq  $tmp2,$tmp,$tmp2\n\t"
5121             "movdq    $dst,$tmp2\t! mul reduction2L" %}
5122   ins_encode %{
5123     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5124     __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
5125     __ movdq($tmp2$$XMMRegister, $src1$$Register);
5126     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5127     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5128   %}
5129   ins_pipe( pipe_slow );
5130 %}
5131 
5132 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
5133   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5134   match(Set dst (MulReductionVL src1 src2));
5135   effect(TEMP tmp, TEMP tmp2);
5136   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
5137             "vpmullq  $tmp2,$tmp,$src2\n\t"
5138             "pshufd   $tmp,$tmp2,0xE\n\t"
5139             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5140             "movdq    $tmp,$src1\n\t"
5141             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5142             "movdq    $dst,$tmp2\t! mul reduction4L" %}
5143   ins_encode %{
5144     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
5145     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
5146     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5147     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5148     __ movdq($tmp$$XMMRegister, $src1$$Register);
5149     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5150     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5151   %}
5152   ins_pipe( pipe_slow );
5153 %}
5154 
5155 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
5156   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5157   match(Set dst (MulReductionVL src1 src2));
5158   effect(TEMP tmp, TEMP tmp2);
5159   format %{ "vextracti64x4  $tmp2,$src2\n\t"
5160             "vpmullq  $tmp2,$tmp2,$src2\n\t"
5161             "vextracti128   $tmp,$tmp2\n\t"
5162             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5163             "pshufd   $tmp,$tmp2,0xE\n\t"
5164             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5165             "movdq    $tmp,$src1\n\t"
5166             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5167             "movdq    $dst,$tmp2\t! mul reduction8L" %}
5168   ins_encode %{
5169     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
5170     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
5171     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
5172     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5173     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5174     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5175     __ movdq($tmp$$XMMRegister, $src1$$Register);
5176     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5177     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5178   %}
5179   ins_pipe( pipe_slow );
5180 %}
5181 #endif
5182 
5183 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
5184   predicate(UseSSE >= 1 && UseAVX == 0);
5185   match(Set dst (MulReductionVF src1 src2));
5186   effect(TEMP tmp, TEMP tmp2);
5187   format %{ "movdqu  $tmp,$src1\n\t"
5188             "mulss   $tmp,$src2\n\t"
5189             "pshufd  $tmp2,$src2,0x01\n\t"
5190             "mulss   $tmp,$tmp2\n\t"
5191             "movdqu  $dst,$tmp\t! mul reduction2F" %}
5192   ins_encode %{
5193     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
5194     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
5195     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
5196     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5197     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
5198   %}
5199   ins_pipe( pipe_slow );
5200 %}
5201 
5202 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
5203   predicate(UseAVX > 0);
5204   match(Set dst (MulReductionVF src1 src2));
5205   effect(TEMP tmp, TEMP tmp2);
5206   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5207             "pshufd  $tmp,$src2,0x01\n\t"
5208             "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
5209   ins_encode %{
5210     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5211     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5212     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5213   %}
5214   ins_pipe( pipe_slow );
5215 %}
5216 
5217 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
5218   predicate(UseSSE >= 1 && UseAVX == 0);
5219   match(Set dst (MulReductionVF src1 src2));
5220   effect(TEMP tmp, TEMP tmp2);
5221   format %{ "movdqu  $tmp,$src1\n\t"
5222             "mulss   $tmp,$src2\n\t"
5223             "pshufd  $tmp2,$src2,0x01\n\t"
5224             "mulss   $tmp,$tmp2\n\t"
5225             "pshufd  $tmp2,$src2,0x02\n\t"
5226             "mulss   $tmp,$tmp2\n\t"
5227             "pshufd  $tmp2,$src2,0x03\n\t"
5228             "mulss   $tmp,$tmp2\n\t"
5229             "movdqu  $dst,$tmp\t! mul reduction4F" %}
5230   ins_encode %{
5231     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
5232     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
5233     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
5234     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5235     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
5236     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5237     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
5238     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5239     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
5240   %}
5241   ins_pipe( pipe_slow );
5242 %}
5243 
5244 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
5245   predicate(UseAVX > 0);
5246   match(Set dst (MulReductionVF src1 src2));
5247   effect(TEMP tmp, TEMP tmp2);
5248   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5249             "pshufd  $tmp,$src2,0x01\n\t"
5250             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5251             "pshufd  $tmp,$src2,0x02\n\t"
5252             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5253             "pshufd  $tmp,$src2,0x03\n\t"
5254             "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
5255   ins_encode %{
5256     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5257     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5258     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5259     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5260     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5261     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5262     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5263   %}
5264   ins_pipe( pipe_slow );
5265 %}
5266 
5267 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
5268   predicate(UseAVX > 0);
5269   match(Set dst (MulReductionVF src1 src2));
5270   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5271   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5272             "pshufd  $tmp,$src2,0x01\n\t"
5273             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5274             "pshufd  $tmp,$src2,0x02\n\t"
5275             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5276             "pshufd  $tmp,$src2,0x03\n\t"
5277             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5278             "vextractf128  $tmp3,$src2\n\t"
5279             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5280             "pshufd  $tmp,$tmp3,0x01\n\t"
5281             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5282             "pshufd  $tmp,$tmp3,0x02\n\t"
5283             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5284             "pshufd  $tmp,$tmp3,0x03\n\t"
5285             "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
5286   ins_encode %{
5287     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5288     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5289     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5290     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5291     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5292     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5293     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5294     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
5295     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5296     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5297     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5298     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5299     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5300     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5301     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5302   %}
5303   ins_pipe( pipe_slow );
5304 %}
5305 
5306 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
5307   predicate(UseAVX > 2);
5308   match(Set dst (MulReductionVF src1 src2));
5309   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5310   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5311             "pshufd  $tmp,$src2,0x01\n\t"
5312             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5313             "pshufd  $tmp,$src2,0x02\n\t"
5314             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5315             "pshufd  $tmp,$src2,0x03\n\t"
5316             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5317             "vextractf32x4  $tmp3,$src2, 0x1\n\t"
5318             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5319             "pshufd  $tmp,$tmp3,0x01\n\t"
5320             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5321             "pshufd  $tmp,$tmp3,0x02\n\t"
5322             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5323             "pshufd  $tmp,$tmp3,0x03\n\t"
5324             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5325             "vextractf32x4  $tmp3,$src2, 0x2\n\t"
5326             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5327             "pshufd  $tmp,$tmp3,0x01\n\t"
5328             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5329             "pshufd  $tmp,$tmp3,0x02\n\t"
5330             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5331             "pshufd  $tmp,$tmp3,0x03\n\t"
5332             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5333             "vextractf32x4  $tmp3,$src2, 0x3\n\t"
5334             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5335             "pshufd  $tmp,$tmp3,0x01\n\t"
5336             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5337             "pshufd  $tmp,$tmp3,0x02\n\t"
5338             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5339             "pshufd  $tmp,$tmp3,0x03\n\t"
5340             "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
5341   ins_encode %{
5342     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5343     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5344     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5345     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5346     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5347     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5348     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5349     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
5350     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5351     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5352     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5353     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5354     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5355     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5356     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5357     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
5358     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5359     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5360     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5361     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5362     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5363     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5364     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5365     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
5366     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5367     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5368     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5369     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5370     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5371     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5372     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5373   %}
5374   ins_pipe( pipe_slow );
5375 %}
5376 
5377 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
5378   predicate(UseSSE >= 1 && UseAVX == 0);
5379   match(Set dst (MulReductionVD src1 src2));
5380   effect(TEMP tmp, TEMP dst);
5381   format %{ "movdqu  $tmp,$src1\n\t"
5382             "mulsd   $tmp,$src2\n\t"
5383             "pshufd  $dst,$src2,0xE\n\t"
5384             "mulsd   $dst,$tmp\t! mul reduction2D" %}
5385   ins_encode %{
5386     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
5387     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
5388     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
5389     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5390   %}
5391   ins_pipe( pipe_slow );
5392 %}
5393 
5394 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
5395   predicate(UseAVX > 0);
5396   match(Set dst (MulReductionVD src1 src2));
5397   effect(TEMP tmp, TEMP tmp2);
5398   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
5399             "pshufd  $tmp,$src2,0xE\n\t"
5400             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
5401   ins_encode %{
5402     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5403     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5404     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5405   %}
5406   ins_pipe( pipe_slow );
5407 %}
5408 
5409 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
5410   predicate(UseAVX > 0);
5411   match(Set dst (MulReductionVD src1 src2));
5412   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5413   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
5414             "pshufd  $tmp,$src2,0xE\n\t"
5415             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5416             "vextractf128  $tmp3,$src2\n\t"
5417             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5418             "pshufd  $tmp,$tmp3,0xE\n\t"
5419             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
5420   ins_encode %{
5421     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5422     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5423     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5424     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
5425     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5426     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5427     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5428   %}
5429   ins_pipe( pipe_slow );
5430 %}
5431 
5432 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
5433   predicate(UseAVX > 2);
5434   match(Set dst (MulReductionVD src1 src2));
5435   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5436   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
5437             "pshufd  $tmp,$src2,0xE\n\t"
5438             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5439             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
5440             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5441             "pshufd  $tmp,$src2,0xE\n\t"
5442             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5443             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
5444             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5445             "pshufd  $tmp,$tmp3,0xE\n\t"
5446             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5447             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
5448             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5449             "pshufd  $tmp,$tmp3,0xE\n\t"
5450             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
5451   ins_encode %{
5452     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5453     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5454     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5455     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
5456     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5457     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5458     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5459     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
5460     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5461     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5462     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5463     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
5464     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5465     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5466     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5467   %}
5468   ins_pipe( pipe_slow );
5469 %}
5470 
5471 // ====================VECTOR ARITHMETIC=======================================
5472 
5473 // --------------------------------- ADD --------------------------------------
5474 
5475 // Bytes vector add
5476 instruct vadd4B(vecS dst, vecS src) %{
5477   predicate(n->as_Vector()->length() == 4);
5478   match(Set dst (AddVB dst src));
5479   format %{ "paddb   $dst,$src\t! add packed4B" %}
5480   ins_encode %{
5481     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5482   %}
5483   ins_pipe( pipe_slow );
5484 %}
5485 
5486 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
5487   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5488   match(Set dst (AddVB src1 src2));
5489   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
5490   ins_encode %{
5491     int vector_len = 0;
5492     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5493   %}
5494   ins_pipe( pipe_slow );
5495 %}
5496 
5497 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
5498   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5499   match(Set dst (AddVB src (LoadVector mem)));
5500   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
5501   ins_encode %{
5502     int vector_len = 0;
5503     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5504   %}
5505   ins_pipe( pipe_slow );
5506 %}
5507 
5508 instruct vadd8B(vecD dst, vecD src) %{
5509   predicate(n->as_Vector()->length() == 8);
5510   match(Set dst (AddVB dst src));
5511   format %{ "paddb   $dst,$src\t! add packed8B" %}
5512   ins_encode %{
5513     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5514   %}
5515   ins_pipe( pipe_slow );
5516 %}
5517 
5518 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
5519   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5520   match(Set dst (AddVB src1 src2));
5521   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
5522   ins_encode %{
5523     int vector_len = 0;
5524     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5525   %}
5526   ins_pipe( pipe_slow );
5527 %}
5528 
5529 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
5530   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5531   match(Set dst (AddVB src (LoadVector mem)));
5532   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
5533   ins_encode %{
5534     int vector_len = 0;
5535     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5536   %}
5537   ins_pipe( pipe_slow );
5538 %}
5539 
5540 instruct vadd16B(vecX dst, vecX src) %{
5541   predicate(n->as_Vector()->length() == 16);
5542   match(Set dst (AddVB dst src));
5543   format %{ "paddb   $dst,$src\t! add packed16B" %}
5544   ins_encode %{
5545     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5546   %}
5547   ins_pipe( pipe_slow );
5548 %}
5549 
5550 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
5551   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5552   match(Set dst (AddVB src1 src2));
5553   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
5554   ins_encode %{
5555     int vector_len = 0;
5556     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5557   %}
5558   ins_pipe( pipe_slow );
5559 %}
5560 
5561 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
5562   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5563   match(Set dst (AddVB src (LoadVector mem)));
5564   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5565   ins_encode %{
5566     int vector_len = 0;
5567     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5568   %}
5569   ins_pipe( pipe_slow );
5570 %}
5571 
5572 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
5573   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5574   match(Set dst (AddVB src1 src2));
5575   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
5576   ins_encode %{
5577     int vector_len = 1;
5578     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5579   %}
5580   ins_pipe( pipe_slow );
5581 %}
5582 
5583 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
5584   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5585   match(Set dst (AddVB src (LoadVector mem)));
5586   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
5587   ins_encode %{
5588     int vector_len = 1;
5589     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5590   %}
5591   ins_pipe( pipe_slow );
5592 %}
5593 
5594 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5595   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5596   match(Set dst (AddVB src1 src2));
5597   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
5598   ins_encode %{
5599     int vector_len = 2;
5600     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5601   %}
5602   ins_pipe( pipe_slow );
5603 %}
5604 
5605 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
5606   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5607   match(Set dst (AddVB src (LoadVector mem)));
5608   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
5609   ins_encode %{
5610     int vector_len = 2;
5611     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5612   %}
5613   ins_pipe( pipe_slow );
5614 %}
5615 
5616 // Shorts/Chars vector add
5617 instruct vadd2S(vecS dst, vecS src) %{
5618   predicate(n->as_Vector()->length() == 2);
5619   match(Set dst (AddVS dst src));
5620   format %{ "paddw   $dst,$src\t! add packed2S" %}
5621   ins_encode %{
5622     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5623   %}
5624   ins_pipe( pipe_slow );
5625 %}
5626 
5627 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
5628   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5629   match(Set dst (AddVS src1 src2));
5630   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
5631   ins_encode %{
5632     int vector_len = 0;
5633     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5634   %}
5635   ins_pipe( pipe_slow );
5636 %}
5637 
5638 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
5639   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5640   match(Set dst (AddVS src (LoadVector mem)));
5641   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
5642   ins_encode %{
5643     int vector_len = 0;
5644     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5645   %}
5646   ins_pipe( pipe_slow );
5647 %}
5648 
5649 instruct vadd4S(vecD dst, vecD src) %{
5650   predicate(n->as_Vector()->length() == 4);
5651   match(Set dst (AddVS dst src));
5652   format %{ "paddw   $dst,$src\t! add packed4S" %}
5653   ins_encode %{
5654     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5655   %}
5656   ins_pipe( pipe_slow );
5657 %}
5658 
5659 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
5660   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5661   match(Set dst (AddVS src1 src2));
5662   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
5663   ins_encode %{
5664     int vector_len = 0;
5665     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5666   %}
5667   ins_pipe( pipe_slow );
5668 %}
5669 
5670 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
5671   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5672   match(Set dst (AddVS src (LoadVector mem)));
5673   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
5674   ins_encode %{
5675     int vector_len = 0;
5676     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5677   %}
5678   ins_pipe( pipe_slow );
5679 %}
5680 
5681 instruct vadd8S(vecX dst, vecX src) %{
5682   predicate(n->as_Vector()->length() == 8);
5683   match(Set dst (AddVS dst src));
5684   format %{ "paddw   $dst,$src\t! add packed8S" %}
5685   ins_encode %{
5686     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5687   %}
5688   ins_pipe( pipe_slow );
5689 %}
5690 
5691 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
5692   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5693   match(Set dst (AddVS src1 src2));
5694   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
5695   ins_encode %{
5696     int vector_len = 0;
5697     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5698   %}
5699   ins_pipe( pipe_slow );
5700 %}
5701 
5702 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
5703   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5704   match(Set dst (AddVS src (LoadVector mem)));
5705   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
5706   ins_encode %{
5707     int vector_len = 0;
5708     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5709   %}
5710   ins_pipe( pipe_slow );
5711 %}
5712 
5713 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
5714   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5715   match(Set dst (AddVS src1 src2));
5716   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
5717   ins_encode %{
5718     int vector_len = 1;
5719     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5720   %}
5721   ins_pipe( pipe_slow );
5722 %}
5723 
5724 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
5725   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5726   match(Set dst (AddVS src (LoadVector mem)));
5727   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
5728   ins_encode %{
5729     int vector_len = 1;
5730     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5731   %}
5732   ins_pipe( pipe_slow );
5733 %}
5734 
5735 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5736   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5737   match(Set dst (AddVS src1 src2));
5738   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
5739   ins_encode %{
5740     int vector_len = 2;
5741     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5742   %}
5743   ins_pipe( pipe_slow );
5744 %}
5745 
5746 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
5747   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5748   match(Set dst (AddVS src (LoadVector mem)));
5749   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
5750   ins_encode %{
5751     int vector_len = 2;
5752     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5753   %}
5754   ins_pipe( pipe_slow );
5755 %}
5756 
5757 // Integers vector add
5758 instruct vadd2I(vecD dst, vecD src) %{
5759   predicate(n->as_Vector()->length() == 2);
5760   match(Set dst (AddVI dst src));
5761   format %{ "paddd   $dst,$src\t! add packed2I" %}
5762   ins_encode %{
5763     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5764   %}
5765   ins_pipe( pipe_slow );
5766 %}
5767 
5768 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
5769   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5770   match(Set dst (AddVI src1 src2));
5771   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
5772   ins_encode %{
5773     int vector_len = 0;
5774     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5775   %}
5776   ins_pipe( pipe_slow );
5777 %}
5778 
5779 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{
5780   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5781   match(Set dst (AddVI src (LoadVector mem)));
5782   format %{ "vpaddd  $dst,$src,$mem\t! add packed2I" %}
5783   ins_encode %{
5784     int vector_len = 0;
5785     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5786   %}
5787   ins_pipe( pipe_slow );
5788 %}
5789 
5790 instruct vadd4I(vecX dst, vecX src) %{
5791   predicate(n->as_Vector()->length() == 4);
5792   match(Set dst (AddVI dst src));
5793   format %{ "paddd   $dst,$src\t! add packed4I" %}
5794   ins_encode %{
5795     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5796   %}
5797   ins_pipe( pipe_slow );
5798 %}
5799 
5800 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
5801   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5802   match(Set dst (AddVI src1 src2));
5803   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
5804   ins_encode %{
5805     int vector_len = 0;
5806     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5807   %}
5808   ins_pipe( pipe_slow );
5809 %}
5810 
5811 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
5812   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5813   match(Set dst (AddVI src (LoadVector mem)));
5814   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
5815   ins_encode %{
5816     int vector_len = 0;
5817     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5818   %}
5819   ins_pipe( pipe_slow );
5820 %}
5821 
5822 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
5823   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5824   match(Set dst (AddVI src1 src2));
5825   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
5826   ins_encode %{
5827     int vector_len = 1;
5828     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5829   %}
5830   ins_pipe( pipe_slow );
5831 %}
5832 
5833 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
5834   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5835   match(Set dst (AddVI src (LoadVector mem)));
5836   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
5837   ins_encode %{
5838     int vector_len = 1;
5839     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5840   %}
5841   ins_pipe( pipe_slow );
5842 %}
5843 
5844 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5845   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5846   match(Set dst (AddVI src1 src2));
5847   format %{ "vpaddd  $dst,$src1,$src2\t! add packed16I" %}
5848   ins_encode %{
5849     int vector_len = 2;
5850     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5851   %}
5852   ins_pipe( pipe_slow );
5853 %}
5854 
5855 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
5856   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5857   match(Set dst (AddVI src (LoadVector mem)));
5858   format %{ "vpaddd  $dst,$src,$mem\t! add packed16I" %}
5859   ins_encode %{
5860     int vector_len = 2;
5861     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5862   %}
5863   ins_pipe( pipe_slow );
5864 %}
5865 
5866 // Longs vector add
5867 instruct vadd2L(vecX dst, vecX src) %{
5868   predicate(n->as_Vector()->length() == 2);
5869   match(Set dst (AddVL dst src));
5870   format %{ "paddq   $dst,$src\t! add packed2L" %}
5871   ins_encode %{
5872     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5873   %}
5874   ins_pipe( pipe_slow );
5875 %}
5876 
5877 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
5878   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5879   match(Set dst (AddVL src1 src2));
5880   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
5881   ins_encode %{
5882     int vector_len = 0;
5883     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5884   %}
5885   ins_pipe( pipe_slow );
5886 %}
5887 
5888 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
5889   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5890   match(Set dst (AddVL src (LoadVector mem)));
5891   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
5892   ins_encode %{
5893     int vector_len = 0;
5894     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5895   %}
5896   ins_pipe( pipe_slow );
5897 %}
5898 
5899 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
5900   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5901   match(Set dst (AddVL src1 src2));
5902   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
5903   ins_encode %{
5904     int vector_len = 1;
5905     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5906   %}
5907   ins_pipe( pipe_slow );
5908 %}
5909 
5910 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
5911   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5912   match(Set dst (AddVL src (LoadVector mem)));
5913   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
5914   ins_encode %{
5915     int vector_len = 1;
5916     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5917   %}
5918   ins_pipe( pipe_slow );
5919 %}
5920 
5921 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5922   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5923   match(Set dst (AddVL src1 src2));
5924   format %{ "vpaddq  $dst,$src1,$src2\t! add packed8L" %}
5925   ins_encode %{
5926     int vector_len = 2;
5927     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5928   %}
5929   ins_pipe( pipe_slow );
5930 %}
5931 
5932 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
5933   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5934   match(Set dst (AddVL src (LoadVector mem)));
5935   format %{ "vpaddq  $dst,$src,$mem\t! add packed8L" %}
5936   ins_encode %{
5937     int vector_len = 2;
5938     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5939   %}
5940   ins_pipe( pipe_slow );
5941 %}
5942 
5943 // Floats vector add
5944 instruct vadd2F(vecD dst, vecD src) %{
5945   predicate(n->as_Vector()->length() == 2);
5946   match(Set dst (AddVF dst src));
5947   format %{ "addps   $dst,$src\t! add packed2F" %}
5948   ins_encode %{
5949     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5950   %}
5951   ins_pipe( pipe_slow );
5952 %}
5953 
5954 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
5955   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5956   match(Set dst (AddVF src1 src2));
5957   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
5958   ins_encode %{
5959     int vector_len = 0;
5960     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5961   %}
5962   ins_pipe( pipe_slow );
5963 %}
5964 
5965 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{
5966   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5967   match(Set dst (AddVF src (LoadVector mem)));
5968   format %{ "vaddps  $dst,$src,$mem\t! add packed2F" %}
5969   ins_encode %{
5970     int vector_len = 0;
5971     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5972   %}
5973   ins_pipe( pipe_slow );
5974 %}
5975 
5976 instruct vadd4F(vecX dst, vecX src) %{
5977   predicate(n->as_Vector()->length() == 4);
5978   match(Set dst (AddVF dst src));
5979   format %{ "addps   $dst,$src\t! add packed4F" %}
5980   ins_encode %{
5981     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5982   %}
5983   ins_pipe( pipe_slow );
5984 %}
5985 
5986 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
5987   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5988   match(Set dst (AddVF src1 src2));
5989   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
5990   ins_encode %{
5991     int vector_len = 0;
5992     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5993   %}
5994   ins_pipe( pipe_slow );
5995 %}
5996 
5997 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
5998   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5999   match(Set dst (AddVF src (LoadVector mem)));
6000   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
6001   ins_encode %{
6002     int vector_len = 0;
6003     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6004   %}
6005   ins_pipe( pipe_slow );
6006 %}
6007 
6008 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
6009   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6010   match(Set dst (AddVF src1 src2));
6011   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
6012   ins_encode %{
6013     int vector_len = 1;
6014     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6015   %}
6016   ins_pipe( pipe_slow );
6017 %}
6018 
6019 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
6020   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6021   match(Set dst (AddVF src (LoadVector mem)));
6022   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
6023   ins_encode %{
6024     int vector_len = 1;
6025     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6026   %}
6027   ins_pipe( pipe_slow );
6028 %}
6029 
6030 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6031   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6032   match(Set dst (AddVF src1 src2));
6033   format %{ "vaddps  $dst,$src1,$src2\t! add packed16F" %}
6034   ins_encode %{
6035     int vector_len = 2;
6036     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6037   %}
6038   ins_pipe( pipe_slow );
6039 %}
6040 
6041 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
6042   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6043   match(Set dst (AddVF src (LoadVector mem)));
6044   format %{ "vaddps  $dst,$src,$mem\t! add packed16F" %}
6045   ins_encode %{
6046     int vector_len = 2;
6047     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6048   %}
6049   ins_pipe( pipe_slow );
6050 %}
6051 
6052 // Doubles vector add
6053 instruct vadd2D(vecX dst, vecX src) %{
6054   predicate(n->as_Vector()->length() == 2);
6055   match(Set dst (AddVD dst src));
6056   format %{ "addpd   $dst,$src\t! add packed2D" %}
6057   ins_encode %{
6058     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
6059   %}
6060   ins_pipe( pipe_slow );
6061 %}
6062 
6063 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
6064   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6065   match(Set dst (AddVD src1 src2));
6066   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
6067   ins_encode %{
6068     int vector_len = 0;
6069     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6070   %}
6071   ins_pipe( pipe_slow );
6072 %}
6073 
6074 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
6075   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6076   match(Set dst (AddVD src (LoadVector mem)));
6077   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
6078   ins_encode %{
6079     int vector_len = 0;
6080     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6081   %}
6082   ins_pipe( pipe_slow );
6083 %}
6084 
6085 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
6086   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6087   match(Set dst (AddVD src1 src2));
6088   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
6089   ins_encode %{
6090     int vector_len = 1;
6091     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6092   %}
6093   ins_pipe( pipe_slow );
6094 %}
6095 
6096 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
6097   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6098   match(Set dst (AddVD src (LoadVector mem)));
6099   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
6100   ins_encode %{
6101     int vector_len = 1;
6102     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6103   %}
6104   ins_pipe( pipe_slow );
6105 %}
6106 
6107 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6108   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6109   match(Set dst (AddVD src1 src2));
6110   format %{ "vaddpd  $dst,$src1,$src2\t! add packed8D" %}
6111   ins_encode %{
6112     int vector_len = 2;
6113     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6114   %}
6115   ins_pipe( pipe_slow );
6116 %}
6117 
6118 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
6119   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6120   match(Set dst (AddVD src (LoadVector mem)));
6121   format %{ "vaddpd  $dst,$src,$mem\t! add packed8D" %}
6122   ins_encode %{
6123     int vector_len = 2;
6124     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6125   %}
6126   ins_pipe( pipe_slow );
6127 %}
6128 
6129 // --------------------------------- SUB --------------------------------------
6130 
6131 // Bytes vector sub
6132 instruct vsub4B(vecS dst, vecS src) %{
6133   predicate(n->as_Vector()->length() == 4);
6134   match(Set dst (SubVB dst src));
6135   format %{ "psubb   $dst,$src\t! sub packed4B" %}
6136   ins_encode %{
6137     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6138   %}
6139   ins_pipe( pipe_slow );
6140 %}
6141 
6142 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
6143   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6144   match(Set dst (SubVB src1 src2));
6145   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
6146   ins_encode %{
6147     int vector_len = 0;
6148     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6149   %}
6150   ins_pipe( pipe_slow );
6151 %}
6152 
6153 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
6154   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6155   match(Set dst (SubVB src (LoadVector mem)));
6156   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
6157   ins_encode %{
6158     int vector_len = 0;
6159     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6160   %}
6161   ins_pipe( pipe_slow );
6162 %}
6163 
6164 instruct vsub8B(vecD dst, vecD src) %{
6165   predicate(n->as_Vector()->length() == 8);
6166   match(Set dst (SubVB dst src));
6167   format %{ "psubb   $dst,$src\t! sub packed8B" %}
6168   ins_encode %{
6169     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6170   %}
6171   ins_pipe( pipe_slow );
6172 %}
6173 
6174 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
6175   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6176   match(Set dst (SubVB src1 src2));
6177   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
6178   ins_encode %{
6179     int vector_len = 0;
6180     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6181   %}
6182   ins_pipe( pipe_slow );
6183 %}
6184 
6185 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
6186   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6187   match(Set dst (SubVB src (LoadVector mem)));
6188   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
6189   ins_encode %{
6190     int vector_len = 0;
6191     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6192   %}
6193   ins_pipe( pipe_slow );
6194 %}
6195 
6196 instruct vsub16B(vecX dst, vecX src) %{
6197   predicate(n->as_Vector()->length() == 16);
6198   match(Set dst (SubVB dst src));
6199   format %{ "psubb   $dst,$src\t! sub packed16B" %}
6200   ins_encode %{
6201     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6202   %}
6203   ins_pipe( pipe_slow );
6204 %}
6205 
6206 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
6207   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6208   match(Set dst (SubVB src1 src2));
6209   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
6210   ins_encode %{
6211     int vector_len = 0;
6212     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6213   %}
6214   ins_pipe( pipe_slow );
6215 %}
6216 
6217 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
6218   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6219   match(Set dst (SubVB src (LoadVector mem)));
6220   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
6221   ins_encode %{
6222     int vector_len = 0;
6223     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6224   %}
6225   ins_pipe( pipe_slow );
6226 %}
6227 
6228 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
6229   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
6230   match(Set dst (SubVB src1 src2));
6231   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
6232   ins_encode %{
6233     int vector_len = 1;
6234     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6235   %}
6236   ins_pipe( pipe_slow );
6237 %}
6238 
6239 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
6240   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
6241   match(Set dst (SubVB src (LoadVector mem)));
6242   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
6243   ins_encode %{
6244     int vector_len = 1;
6245     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6246   %}
6247   ins_pipe( pipe_slow );
6248 %}
6249 
6250 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
6251   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
6252   match(Set dst (SubVB src1 src2));
6253   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
6254   ins_encode %{
6255     int vector_len = 2;
6256     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6257   %}
6258   ins_pipe( pipe_slow );
6259 %}
6260 
6261 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
6262   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
6263   match(Set dst (SubVB src (LoadVector mem)));
6264   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
6265   ins_encode %{
6266     int vector_len = 2;
6267     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6268   %}
6269   ins_pipe( pipe_slow );
6270 %}
6271 
6272 // Shorts/Chars vector sub
6273 instruct vsub2S(vecS dst, vecS src) %{
6274   predicate(n->as_Vector()->length() == 2);
6275   match(Set dst (SubVS dst src));
6276   format %{ "psubw   $dst,$src\t! sub packed2S" %}
6277   ins_encode %{
6278     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
6279   %}
6280   ins_pipe( pipe_slow );
6281 %}
6282 
6283 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
6284   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6285   match(Set dst (SubVS src1 src2));
6286   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
6287   ins_encode %{
6288     int vector_len = 0;
6289     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6290   %}
6291   ins_pipe( pipe_slow );
6292 %}
6293 
6294 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
6295   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6296   match(Set dst (SubVS src (LoadVector mem)));
6297   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
6298   ins_encode %{
6299     int vector_len = 0;
6300     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6301   %}
6302   ins_pipe( pipe_slow );
6303 %}
6304 
6305 instruct vsub4S(vecD dst, vecD src) %{
6306   predicate(n->as_Vector()->length() == 4);
6307   match(Set dst (SubVS dst src));
6308   format %{ "psubw   $dst,$src\t! sub packed4S" %}
6309   ins_encode %{
6310     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
6311   %}
6312   ins_pipe( pipe_slow );
6313 %}
6314 
6315 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
6316   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6317   match(Set dst (SubVS src1 src2));
6318   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
6319   ins_encode %{
6320     int vector_len = 0;
6321     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6322   %}
6323   ins_pipe( pipe_slow );
6324 %}
6325 
6326 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
6327   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6328   match(Set dst (SubVS src (LoadVector mem)));
6329   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
6330   ins_encode %{
6331     int vector_len = 0;
6332     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6333   %}
6334   ins_pipe( pipe_slow );
6335 %}
6336 
6337 instruct vsub8S(vecX dst, vecX src) %{
6338   predicate(n->as_Vector()->length() == 8);
6339   match(Set dst (SubVS dst src));
6340   format %{ "psubw   $dst,$src\t! sub packed8S" %}
6341   ins_encode %{
6342     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
6343   %}
6344   ins_pipe( pipe_slow );
6345 %}
6346 
6347 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
6348   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6349   match(Set dst (SubVS src1 src2));
6350   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
6351   ins_encode %{
6352     int vector_len = 0;
6353     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6354   %}
6355   ins_pipe( pipe_slow );
6356 %}
6357 
6358 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
6359   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6360   match(Set dst (SubVS src (LoadVector mem)));
6361   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
6362   ins_encode %{
6363     int vector_len = 0;
6364     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6365   %}
6366   ins_pipe( pipe_slow );
6367 %}
6368 
6369 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
6370   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6371   match(Set dst (SubVS src1 src2));
6372   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
6373   ins_encode %{
6374     int vector_len = 1;
6375     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6376   %}
6377   ins_pipe( pipe_slow );
6378 %}
6379 
6380 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
6381   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6382   match(Set dst (SubVS src (LoadVector mem)));
6383   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
6384   ins_encode %{
6385     int vector_len = 1;
6386     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6387   %}
6388   ins_pipe( pipe_slow );
6389 %}
6390 
6391 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6392   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6393   match(Set dst (SubVS src1 src2));
6394   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
6395   ins_encode %{
6396     int vector_len = 2;
6397     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6398   %}
6399   ins_pipe( pipe_slow );
6400 %}
6401 
6402 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
6403   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6404   match(Set dst (SubVS src (LoadVector mem)));
6405   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
6406   ins_encode %{
6407     int vector_len = 2;
6408     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6409   %}
6410   ins_pipe( pipe_slow );
6411 %}
6412 
6413 // Integers vector sub
6414 instruct vsub2I(vecD dst, vecD src) %{
6415   predicate(n->as_Vector()->length() == 2);
6416   match(Set dst (SubVI dst src));
6417   format %{ "psubd   $dst,$src\t! sub packed2I" %}
6418   ins_encode %{
6419     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
6420   %}
6421   ins_pipe( pipe_slow );
6422 %}
6423 
6424 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
6425   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6426   match(Set dst (SubVI src1 src2));
6427   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
6428   ins_encode %{
6429     int vector_len = 0;
6430     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6431   %}
6432   ins_pipe( pipe_slow );
6433 %}
6434 
6435 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{
6436   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6437   match(Set dst (SubVI src (LoadVector mem)));
6438   format %{ "vpsubd  $dst,$src,$mem\t! sub packed2I" %}
6439   ins_encode %{
6440     int vector_len = 0;
6441     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6442   %}
6443   ins_pipe( pipe_slow );
6444 %}
6445 
6446 instruct vsub4I(vecX dst, vecX src) %{
6447   predicate(n->as_Vector()->length() == 4);
6448   match(Set dst (SubVI dst src));
6449   format %{ "psubd   $dst,$src\t! sub packed4I" %}
6450   ins_encode %{
6451     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
6452   %}
6453   ins_pipe( pipe_slow );
6454 %}
6455 
6456 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
6457   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6458   match(Set dst (SubVI src1 src2));
6459   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
6460   ins_encode %{
6461     int vector_len = 0;
6462     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6463   %}
6464   ins_pipe( pipe_slow );
6465 %}
6466 
6467 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
6468   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6469   match(Set dst (SubVI src (LoadVector mem)));
6470   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
6471   ins_encode %{
6472     int vector_len = 0;
6473     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6474   %}
6475   ins_pipe( pipe_slow );
6476 %}
6477 
6478 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
6479   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6480   match(Set dst (SubVI src1 src2));
6481   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
6482   ins_encode %{
6483     int vector_len = 1;
6484     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6485   %}
6486   ins_pipe( pipe_slow );
6487 %}
6488 
6489 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
6490   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6491   match(Set dst (SubVI src (LoadVector mem)));
6492   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
6493   ins_encode %{
6494     int vector_len = 1;
6495     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6496   %}
6497   ins_pipe( pipe_slow );
6498 %}
6499 
6500 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
6501   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6502   match(Set dst (SubVI src1 src2));
6503   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed16I" %}
6504   ins_encode %{
6505     int vector_len = 2;
6506     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6507   %}
6508   ins_pipe( pipe_slow );
6509 %}
6510 
6511 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
6512   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6513   match(Set dst (SubVI src (LoadVector mem)));
6514   format %{ "vpsubd  $dst,$src,$mem\t! sub packed16I" %}
6515   ins_encode %{
6516     int vector_len = 2;
6517     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6518   %}
6519   ins_pipe( pipe_slow );
6520 %}
6521 
6522 // Longs vector sub
6523 instruct vsub2L(vecX dst, vecX src) %{
6524   predicate(n->as_Vector()->length() == 2);
6525   match(Set dst (SubVL dst src));
6526   format %{ "psubq   $dst,$src\t! sub packed2L" %}
6527   ins_encode %{
6528     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
6529   %}
6530   ins_pipe( pipe_slow );
6531 %}
6532 
6533 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
6534   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6535   match(Set dst (SubVL src1 src2));
6536   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
6537   ins_encode %{
6538     int vector_len = 0;
6539     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6540   %}
6541   ins_pipe( pipe_slow );
6542 %}
6543 
6544 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
6545   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6546   match(Set dst (SubVL src (LoadVector mem)));
6547   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
6548   ins_encode %{
6549     int vector_len = 0;
6550     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6551   %}
6552   ins_pipe( pipe_slow );
6553 %}
6554 
6555 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
6556   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
6557   match(Set dst (SubVL src1 src2));
6558   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
6559   ins_encode %{
6560     int vector_len = 1;
6561     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6562   %}
6563   ins_pipe( pipe_slow );
6564 %}
6565 
6566 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
6567   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
6568   match(Set dst (SubVL src (LoadVector mem)));
6569   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
6570   ins_encode %{
6571     int vector_len = 1;
6572     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6573   %}
6574   ins_pipe( pipe_slow );
6575 %}
6576 
6577 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6578   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6579   match(Set dst (SubVL src1 src2));
6580   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed8L" %}
6581   ins_encode %{
6582     int vector_len = 2;
6583     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6584   %}
6585   ins_pipe( pipe_slow );
6586 %}
6587 
6588 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
6589   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6590   match(Set dst (SubVL src (LoadVector mem)));
6591   format %{ "vpsubq  $dst,$src,$mem\t! sub packed8L" %}
6592   ins_encode %{
6593     int vector_len = 2;
6594     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6595   %}
6596   ins_pipe( pipe_slow );
6597 %}
6598 
6599 // Floats vector sub
6600 instruct vsub2F(vecD dst, vecD src) %{
6601   predicate(n->as_Vector()->length() == 2);
6602   match(Set dst (SubVF dst src));
6603   format %{ "subps   $dst,$src\t! sub packed2F" %}
6604   ins_encode %{
6605     __ subps($dst$$XMMRegister, $src$$XMMRegister);
6606   %}
6607   ins_pipe( pipe_slow );
6608 %}
6609 
6610 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
6611   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6612   match(Set dst (SubVF src1 src2));
6613   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
6614   ins_encode %{
6615     int vector_len = 0;
6616     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6617   %}
6618   ins_pipe( pipe_slow );
6619 %}
6620 
6621 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{
6622   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6623   match(Set dst (SubVF src (LoadVector mem)));
6624   format %{ "vsubps  $dst,$src,$mem\t! sub packed2F" %}
6625   ins_encode %{
6626     int vector_len = 0;
6627     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6628   %}
6629   ins_pipe( pipe_slow );
6630 %}
6631 
6632 instruct vsub4F(vecX dst, vecX src) %{
6633   predicate(n->as_Vector()->length() == 4);
6634   match(Set dst (SubVF dst src));
6635   format %{ "subps   $dst,$src\t! sub packed4F" %}
6636   ins_encode %{
6637     __ subps($dst$$XMMRegister, $src$$XMMRegister);
6638   %}
6639   ins_pipe( pipe_slow );
6640 %}
6641 
6642 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
6643   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6644   match(Set dst (SubVF src1 src2));
6645   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
6646   ins_encode %{
6647     int vector_len = 0;
6648     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6649   %}
6650   ins_pipe( pipe_slow );
6651 %}
6652 
6653 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
6654   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6655   match(Set dst (SubVF src (LoadVector mem)));
6656   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
6657   ins_encode %{
6658     int vector_len = 0;
6659     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6660   %}
6661   ins_pipe( pipe_slow );
6662 %}
6663 
6664 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
6665   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6666   match(Set dst (SubVF src1 src2));
6667   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
6668   ins_encode %{
6669     int vector_len = 1;
6670     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6671   %}
6672   ins_pipe( pipe_slow );
6673 %}
6674 
6675 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
6676   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6677   match(Set dst (SubVF src (LoadVector mem)));
6678   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
6679   ins_encode %{
6680     int vector_len = 1;
6681     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6682   %}
6683   ins_pipe( pipe_slow );
6684 %}
6685 
6686 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6687   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6688   match(Set dst (SubVF src1 src2));
6689   format %{ "vsubps  $dst,$src1,$src2\t! sub packed16F" %}
6690   ins_encode %{
6691     int vector_len = 2;
6692     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6693   %}
6694   ins_pipe( pipe_slow );
6695 %}
6696 
6697 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
6698   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6699   match(Set dst (SubVF src (LoadVector mem)));
6700   format %{ "vsubps  $dst,$src,$mem\t! sub packed16F" %}
6701   ins_encode %{
6702     int vector_len = 2;
6703     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6704   %}
6705   ins_pipe( pipe_slow );
6706 %}
6707 
6708 // Doubles vector sub
6709 instruct vsub2D(vecX dst, vecX src) %{
6710   predicate(n->as_Vector()->length() == 2);
6711   match(Set dst (SubVD dst src));
6712   format %{ "subpd   $dst,$src\t! sub packed2D" %}
6713   ins_encode %{
6714     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
6715   %}
6716   ins_pipe( pipe_slow );
6717 %}
6718 
6719 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
6720   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6721   match(Set dst (SubVD src1 src2));
6722   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
6723   ins_encode %{
6724     int vector_len = 0;
6725     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6726   %}
6727   ins_pipe( pipe_slow );
6728 %}
6729 
6730 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
6731   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6732   match(Set dst (SubVD src (LoadVector mem)));
6733   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
6734   ins_encode %{
6735     int vector_len = 0;
6736     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6737   %}
6738   ins_pipe( pipe_slow );
6739 %}
6740 
6741 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
6742   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6743   match(Set dst (SubVD src1 src2));
6744   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
6745   ins_encode %{
6746     int vector_len = 1;
6747     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6748   %}
6749   ins_pipe( pipe_slow );
6750 %}
6751 
6752 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
6753   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6754   match(Set dst (SubVD src (LoadVector mem)));
6755   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
6756   ins_encode %{
6757     int vector_len = 1;
6758     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6759   %}
6760   ins_pipe( pipe_slow );
6761 %}
6762 
6763 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6764   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6765   match(Set dst (SubVD src1 src2));
6766   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed8D" %}
6767   ins_encode %{
6768     int vector_len = 2;
6769     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6770   %}
6771   ins_pipe( pipe_slow );
6772 %}
6773 
6774 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
6775   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6776   match(Set dst (SubVD src (LoadVector mem)));
6777   format %{ "vsubpd  $dst,$src,$mem\t! sub packed8D" %}
6778   ins_encode %{
6779     int vector_len = 2;
6780     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6781   %}
6782   ins_pipe( pipe_slow );
6783 %}
6784 
6785 // --------------------------------- MUL --------------------------------------
6786 
6787 // Shorts/Chars vector mul
6788 instruct vmul2S(vecS dst, vecS src) %{
6789   predicate(n->as_Vector()->length() == 2);
6790   match(Set dst (MulVS dst src));
6791   format %{ "pmullw $dst,$src\t! mul packed2S" %}
6792   ins_encode %{
6793     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6794   %}
6795   ins_pipe( pipe_slow );
6796 %}
6797 
6798 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
6799   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6800   match(Set dst (MulVS src1 src2));
6801   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
6802   ins_encode %{
6803     int vector_len = 0;
6804     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6805   %}
6806   ins_pipe( pipe_slow );
6807 %}
6808 
6809 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
6810   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6811   match(Set dst (MulVS src (LoadVector mem)));
6812   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
6813   ins_encode %{
6814     int vector_len = 0;
6815     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6816   %}
6817   ins_pipe( pipe_slow );
6818 %}
6819 
6820 instruct vmul4S(vecD dst, vecD src) %{
6821   predicate(n->as_Vector()->length() == 4);
6822   match(Set dst (MulVS dst src));
6823   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
6824   ins_encode %{
6825     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6826   %}
6827   ins_pipe( pipe_slow );
6828 %}
6829 
6830 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
6831   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6832   match(Set dst (MulVS src1 src2));
6833   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
6834   ins_encode %{
6835     int vector_len = 0;
6836     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6837   %}
6838   ins_pipe( pipe_slow );
6839 %}
6840 
6841 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
6842   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6843   match(Set dst (MulVS src (LoadVector mem)));
6844   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
6845   ins_encode %{
6846     int vector_len = 0;
6847     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6848   %}
6849   ins_pipe( pipe_slow );
6850 %}
6851 
6852 instruct vmul8S(vecX dst, vecX src) %{
6853   predicate(n->as_Vector()->length() == 8);
6854   match(Set dst (MulVS dst src));
6855   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
6856   ins_encode %{
6857     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6858   %}
6859   ins_pipe( pipe_slow );
6860 %}
6861 
6862 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
6863   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6864   match(Set dst (MulVS src1 src2));
6865   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
6866   ins_encode %{
6867     int vector_len = 0;
6868     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6869   %}
6870   ins_pipe( pipe_slow );
6871 %}
6872 
6873 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
6874   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6875   match(Set dst (MulVS src (LoadVector mem)));
6876   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
6877   ins_encode %{
6878     int vector_len = 0;
6879     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6880   %}
6881   ins_pipe( pipe_slow );
6882 %}
6883 
6884 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
6885   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6886   match(Set dst (MulVS src1 src2));
6887   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
6888   ins_encode %{
6889     int vector_len = 1;
6890     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6891   %}
6892   ins_pipe( pipe_slow );
6893 %}
6894 
6895 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
6896   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6897   match(Set dst (MulVS src (LoadVector mem)));
6898   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
6899   ins_encode %{
6900     int vector_len = 1;
6901     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6902   %}
6903   ins_pipe( pipe_slow );
6904 %}
6905 
6906 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6907   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6908   match(Set dst (MulVS src1 src2));
6909   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
6910   ins_encode %{
6911     int vector_len = 2;
6912     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6913   %}
6914   ins_pipe( pipe_slow );
6915 %}
6916 
6917 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
6918   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6919   match(Set dst (MulVS src (LoadVector mem)));
6920   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
6921   ins_encode %{
6922     int vector_len = 2;
6923     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6924   %}
6925   ins_pipe( pipe_slow );
6926 %}
6927 
6928 // Integers vector mul (sse4_1)
6929 instruct vmul2I(vecD dst, vecD src) %{
6930   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
6931   match(Set dst (MulVI dst src));
6932   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
6933   ins_encode %{
6934     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6935   %}
6936   ins_pipe( pipe_slow );
6937 %}
6938 
6939 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
6940   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6941   match(Set dst (MulVI src1 src2));
6942   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
6943   ins_encode %{
6944     int vector_len = 0;
6945     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6946   %}
6947   ins_pipe( pipe_slow );
6948 %}
6949 
6950 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{
6951   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6952   match(Set dst (MulVI src (LoadVector mem)));
6953   format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %}
6954   ins_encode %{
6955     int vector_len = 0;
6956     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6957   %}
6958   ins_pipe( pipe_slow );
6959 %}
6960 
6961 instruct vmul4I(vecX dst, vecX src) %{
6962   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
6963   match(Set dst (MulVI dst src));
6964   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
6965   ins_encode %{
6966     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6967   %}
6968   ins_pipe( pipe_slow );
6969 %}
6970 
6971 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
6972   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6973   match(Set dst (MulVI src1 src2));
6974   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
6975   ins_encode %{
6976     int vector_len = 0;
6977     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6978   %}
6979   ins_pipe( pipe_slow );
6980 %}
6981 
6982 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
6983   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6984   match(Set dst (MulVI src (LoadVector mem)));
6985   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
6986   ins_encode %{
6987     int vector_len = 0;
6988     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6989   %}
6990   ins_pipe( pipe_slow );
6991 %}
6992 
6993 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
6994   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
6995   match(Set dst (MulVL src1 src2));
6996   format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
6997   ins_encode %{
6998     int vector_len = 0;
6999     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7000   %}
7001   ins_pipe( pipe_slow );
7002 %}
7003 
7004 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{
7005   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
7006   match(Set dst (MulVL src (LoadVector mem)));
7007   format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %}
7008   ins_encode %{
7009     int vector_len = 0;
7010     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7011   %}
7012   ins_pipe( pipe_slow );
7013 %}
7014 
7015 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
7016   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
7017   match(Set dst (MulVL src1 src2));
7018   format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
7019   ins_encode %{
7020     int vector_len = 1;
7021     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7022   %}
7023   ins_pipe( pipe_slow );
7024 %}
7025 
7026 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
7027   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
7028   match(Set dst (MulVL src (LoadVector mem)));
7029   format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
7030   ins_encode %{
7031     int vector_len = 1;
7032     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7033   %}
7034   ins_pipe( pipe_slow );
7035 %}
7036 
7037 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
7038   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
7039   match(Set dst (MulVL src1 src2));
7040   format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
7041   ins_encode %{
7042     int vector_len = 2;
7043     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7044   %}
7045   ins_pipe( pipe_slow );
7046 %}
7047 
7048 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
7049   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
7050   match(Set dst (MulVL src (LoadVector mem)));
7051   format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
7052   ins_encode %{
7053     int vector_len = 2;
7054     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7055   %}
7056   ins_pipe( pipe_slow );
7057 %}
7058 
7059 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
7060   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7061   match(Set dst (MulVI src1 src2));
7062   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
7063   ins_encode %{
7064     int vector_len = 1;
7065     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7066   %}
7067   ins_pipe( pipe_slow );
7068 %}
7069 
7070 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
7071   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7072   match(Set dst (MulVI src (LoadVector mem)));
7073   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
7074   ins_encode %{
7075     int vector_len = 1;
7076     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7077   %}
7078   ins_pipe( pipe_slow );
7079 %}
7080 
7081 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
7082   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7083   match(Set dst (MulVI src1 src2));
7084   format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
7085   ins_encode %{
7086     int vector_len = 2;
7087     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7088   %}
7089   ins_pipe( pipe_slow );
7090 %}
7091 
7092 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
7093   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7094   match(Set dst (MulVI src (LoadVector mem)));
7095   format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
7096   ins_encode %{
7097     int vector_len = 2;
7098     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7099   %}
7100   ins_pipe( pipe_slow );
7101 %}
7102 
7103 // Floats vector mul
7104 instruct vmul2F(vecD dst, vecD src) %{
7105   predicate(n->as_Vector()->length() == 2);
7106   match(Set dst (MulVF dst src));
7107   format %{ "mulps   $dst,$src\t! mul packed2F" %}
7108   ins_encode %{
7109     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
7110   %}
7111   ins_pipe( pipe_slow );
7112 %}
7113 
7114 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
7115   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7116   match(Set dst (MulVF src1 src2));
7117   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
7118   ins_encode %{
7119     int vector_len = 0;
7120     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7121   %}
7122   ins_pipe( pipe_slow );
7123 %}
7124 
7125 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{
7126   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7127   match(Set dst (MulVF src (LoadVector mem)));
7128   format %{ "vmulps  $dst,$src,$mem\t! mul packed2F" %}
7129   ins_encode %{
7130     int vector_len = 0;
7131     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7132   %}
7133   ins_pipe( pipe_slow );
7134 %}
7135 
7136 instruct vmul4F(vecX dst, vecX src) %{
7137   predicate(n->as_Vector()->length() == 4);
7138   match(Set dst (MulVF dst src));
7139   format %{ "mulps   $dst,$src\t! mul packed4F" %}
7140   ins_encode %{
7141     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
7142   %}
7143   ins_pipe( pipe_slow );
7144 %}
7145 
7146 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
7147   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7148   match(Set dst (MulVF src1 src2));
7149   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
7150   ins_encode %{
7151     int vector_len = 0;
7152     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7153   %}
7154   ins_pipe( pipe_slow );
7155 %}
7156 
7157 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
7158   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7159   match(Set dst (MulVF src (LoadVector mem)));
7160   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
7161   ins_encode %{
7162     int vector_len = 0;
7163     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7164   %}
7165   ins_pipe( pipe_slow );
7166 %}
7167 
7168 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
7169   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7170   match(Set dst (MulVF src1 src2));
7171   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
7172   ins_encode %{
7173     int vector_len = 1;
7174     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7175   %}
7176   ins_pipe( pipe_slow );
7177 %}
7178 
7179 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
7180   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7181   match(Set dst (MulVF src (LoadVector mem)));
7182   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
7183   ins_encode %{
7184     int vector_len = 1;
7185     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7186   %}
7187   ins_pipe( pipe_slow );
7188 %}
7189 
7190 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
7191   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7192   match(Set dst (MulVF src1 src2));
7193   format %{ "vmulps  $dst,$src1,$src2\t! mul packed16F" %}
7194   ins_encode %{
7195     int vector_len = 2;
7196     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7197   %}
7198   ins_pipe( pipe_slow );
7199 %}
7200 
7201 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
7202   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7203   match(Set dst (MulVF src (LoadVector mem)));
7204   format %{ "vmulps  $dst,$src,$mem\t! mul packed16F" %}
7205   ins_encode %{
7206     int vector_len = 2;
7207     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7208   %}
7209   ins_pipe( pipe_slow );
7210 %}
7211 
7212 // Doubles vector mul
7213 instruct vmul2D(vecX dst, vecX src) %{
7214   predicate(n->as_Vector()->length() == 2);
7215   match(Set dst (MulVD dst src));
7216   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
7217   ins_encode %{
7218     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
7219   %}
7220   ins_pipe( pipe_slow );
7221 %}
7222 
7223 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
7224   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7225   match(Set dst (MulVD src1 src2));
7226   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
7227   ins_encode %{
7228     int vector_len = 0;
7229     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7230   %}
7231   ins_pipe( pipe_slow );
7232 %}
7233 
7234 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
7235   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7236   match(Set dst (MulVD src (LoadVector mem)));
7237   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
7238   ins_encode %{
7239     int vector_len = 0;
7240     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7241   %}
7242   ins_pipe( pipe_slow );
7243 %}
7244 
7245 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
7246   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7247   match(Set dst (MulVD src1 src2));
7248   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
7249   ins_encode %{
7250     int vector_len = 1;
7251     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7252   %}
7253   ins_pipe( pipe_slow );
7254 %}
7255 
7256 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
7257   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7258   match(Set dst (MulVD src (LoadVector mem)));
7259   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
7260   ins_encode %{
7261     int vector_len = 1;
7262     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7263   %}
7264   ins_pipe( pipe_slow );
7265 %}
7266 
7267 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
7268   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7269   match(Set dst (MulVD src1 src2));
7270   format %{ "vmulpd  $dst k0,$src1,$src2\t! mul packed8D" %}
7271   ins_encode %{
7272     int vector_len = 2;
7273     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7274   %}
7275   ins_pipe( pipe_slow );
7276 %}
7277 
7278 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
7279   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7280   match(Set dst (MulVD src (LoadVector mem)));
7281   format %{ "vmulpd  $dst k0,$src,$mem\t! mul packed8D" %}
7282   ins_encode %{
7283     int vector_len = 2;
7284     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7285   %}
7286   ins_pipe( pipe_slow );
7287 %}
7288 
7289 // --------------------------------- DIV --------------------------------------
7290 
7291 // Floats vector div
7292 instruct vdiv2F(vecD dst, vecD src) %{
7293   predicate(n->as_Vector()->length() == 2);
7294   match(Set dst (DivVF dst src));
7295   format %{ "divps   $dst,$src\t! div packed2F" %}
7296   ins_encode %{
7297     __ divps($dst$$XMMRegister, $src$$XMMRegister);
7298   %}
7299   ins_pipe( pipe_slow );
7300 %}
7301 
7302 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
7303   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7304   match(Set dst (DivVF src1 src2));
7305   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
7306   ins_encode %{
7307     int vector_len = 0;
7308     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7309   %}
7310   ins_pipe( pipe_slow );
7311 %}
7312 
7313 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{
7314   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7315   match(Set dst (DivVF src (LoadVector mem)));
7316   format %{ "vdivps  $dst,$src,$mem\t! div packed2F" %}
7317   ins_encode %{
7318     int vector_len = 0;
7319     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7320   %}
7321   ins_pipe( pipe_slow );
7322 %}
7323 
7324 instruct vdiv4F(vecX dst, vecX src) %{
7325   predicate(n->as_Vector()->length() == 4);
7326   match(Set dst (DivVF dst src));
7327   format %{ "divps   $dst,$src\t! div packed4F" %}
7328   ins_encode %{
7329     __ divps($dst$$XMMRegister, $src$$XMMRegister);
7330   %}
7331   ins_pipe( pipe_slow );
7332 %}
7333 
7334 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
7335   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7336   match(Set dst (DivVF src1 src2));
7337   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
7338   ins_encode %{
7339     int vector_len = 0;
7340     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7341   %}
7342   ins_pipe( pipe_slow );
7343 %}
7344 
7345 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
7346   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7347   match(Set dst (DivVF src (LoadVector mem)));
7348   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
7349   ins_encode %{
7350     int vector_len = 0;
7351     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7352   %}
7353   ins_pipe( pipe_slow );
7354 %}
7355 
7356 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
7357   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7358   match(Set dst (DivVF src1 src2));
7359   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
7360   ins_encode %{
7361     int vector_len = 1;
7362     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7363   %}
7364   ins_pipe( pipe_slow );
7365 %}
7366 
7367 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
7368   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7369   match(Set dst (DivVF src (LoadVector mem)));
7370   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
7371   ins_encode %{
7372     int vector_len = 1;
7373     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7374   %}
7375   ins_pipe( pipe_slow );
7376 %}
7377 
7378 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
7379   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
7380   match(Set dst (DivVF src1 src2));
7381   format %{ "vdivps  $dst,$src1,$src2\t! div packed16F" %}
7382   ins_encode %{
7383     int vector_len = 2;
7384     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7385   %}
7386   ins_pipe( pipe_slow );
7387 %}
7388 
7389 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
7390   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
7391   match(Set dst (DivVF src (LoadVector mem)));
7392   format %{ "vdivps  $dst,$src,$mem\t! div packed16F" %}
7393   ins_encode %{
7394     int vector_len = 2;
7395     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7396   %}
7397   ins_pipe( pipe_slow );
7398 %}
7399 
7400 // Doubles vector div
7401 instruct vdiv2D(vecX dst, vecX src) %{
7402   predicate(n->as_Vector()->length() == 2);
7403   match(Set dst (DivVD dst src));
7404   format %{ "divpd   $dst,$src\t! div packed2D" %}
7405   ins_encode %{
7406     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
7407   %}
7408   ins_pipe( pipe_slow );
7409 %}
7410 
7411 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
7412   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7413   match(Set dst (DivVD src1 src2));
7414   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
7415   ins_encode %{
7416     int vector_len = 0;
7417     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7418   %}
7419   ins_pipe( pipe_slow );
7420 %}
7421 
7422 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
7423   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7424   match(Set dst (DivVD src (LoadVector mem)));
7425   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
7426   ins_encode %{
7427     int vector_len = 0;
7428     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7429   %}
7430   ins_pipe( pipe_slow );
7431 %}
7432 
7433 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
7434   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7435   match(Set dst (DivVD src1 src2));
7436   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
7437   ins_encode %{
7438     int vector_len = 1;
7439     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7440   %}
7441   ins_pipe( pipe_slow );
7442 %}
7443 
7444 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
7445   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7446   match(Set dst (DivVD src (LoadVector mem)));
7447   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
7448   ins_encode %{
7449     int vector_len = 1;
7450     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7451   %}
7452   ins_pipe( pipe_slow );
7453 %}
7454 
7455 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
7456   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7457   match(Set dst (DivVD src1 src2));
7458   format %{ "vdivpd  $dst,$src1,$src2\t! div packed8D" %}
7459   ins_encode %{
7460     int vector_len = 2;
7461     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7462   %}
7463   ins_pipe( pipe_slow );
7464 %}
7465 
7466 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
7467   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7468   match(Set dst (DivVD src (LoadVector mem)));
7469   format %{ "vdivpd  $dst,$src,$mem\t! div packed8D" %}
7470   ins_encode %{
7471     int vector_len = 2;
7472     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7473   %}
7474   ins_pipe( pipe_slow );
7475 %}
7476 
7477 // ------------------------------ Shift ---------------------------------------
7478 
7479 // Left and right shift count vectors are the same on x86
7480 // (only lowest bits of xmm reg are used for count).
7481 instruct vshiftcnt(vecS dst, rRegI cnt) %{
7482   match(Set dst (LShiftCntV cnt));
7483   match(Set dst (RShiftCntV cnt));
7484   format %{ "movd    $dst,$cnt\t! load shift count" %}
7485   ins_encode %{
7486     __ movdl($dst$$XMMRegister, $cnt$$Register);
7487   %}
7488   ins_pipe( pipe_slow );
7489 %}
7490 
7491 // --------------------------------- Sqrt --------------------------------------
7492 
7493 // Floating point vector sqrt - double precision only
7494 instruct vsqrt2D_reg(vecX dst, vecX src) %{
7495   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7496   match(Set dst (SqrtVD src));
7497   format %{ "vsqrtpd  $dst,$src\t! sqrt packed2D" %}
7498   ins_encode %{
7499     int vector_len = 0;
7500     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
7501   %}
7502   ins_pipe( pipe_slow );
7503 %}
7504 
7505 instruct vsqrt2D_mem(vecX dst, memory mem) %{
7506   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7507   match(Set dst (SqrtVD (LoadVector mem)));
7508   format %{ "vsqrtpd  $dst,$mem\t! sqrt packed2D" %}
7509   ins_encode %{
7510     int vector_len = 0;
7511     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
7512   %}
7513   ins_pipe( pipe_slow );
7514 %}
7515 
7516 instruct vsqrt4D_reg(vecY dst, vecY src) %{
7517   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7518   match(Set dst (SqrtVD src));
7519   format %{ "vsqrtpd  $dst,$src\t! sqrt packed4D" %}
7520   ins_encode %{
7521     int vector_len = 1;
7522     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
7523   %}
7524   ins_pipe( pipe_slow );
7525 %}
7526 
7527 instruct vsqrt4D_mem(vecY dst, memory mem) %{
7528   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7529   match(Set dst (SqrtVD (LoadVector mem)));
7530   format %{ "vsqrtpd  $dst,$mem\t! sqrt packed4D" %}
7531   ins_encode %{
7532     int vector_len = 1;
7533     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
7534   %}
7535   ins_pipe( pipe_slow );
7536 %}
7537 
7538 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{
7539   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7540   match(Set dst (SqrtVD src));
7541   format %{ "vsqrtpd  $dst,$src\t! sqrt packed8D" %}
7542   ins_encode %{
7543     int vector_len = 2;
7544     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
7545   %}
7546   ins_pipe( pipe_slow );
7547 %}
7548 
7549 instruct vsqrt8D_mem(vecZ dst, memory mem) %{
7550   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7551   match(Set dst (SqrtVD (LoadVector mem)));
7552   format %{ "vsqrtpd  $dst,$mem\t! sqrt packed8D" %}
7553   ins_encode %{
7554     int vector_len = 2;
7555     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
7556   %}
7557   ins_pipe( pipe_slow );
7558 %}
7559 
7560 // ------------------------------ LeftShift -----------------------------------
7561 
7562 // Shorts/Chars vector left shift
7563 instruct vsll2S(vecS dst, vecS shift) %{
7564   predicate(n->as_Vector()->length() == 2);
7565   match(Set dst (LShiftVS dst shift));
7566   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
7567   ins_encode %{
7568     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
7569   %}
7570   ins_pipe( pipe_slow );
7571 %}
7572 
7573 instruct vsll2S_imm(vecS dst, immI8 shift) %{
7574   predicate(n->as_Vector()->length() == 2);
7575   match(Set dst (LShiftVS dst shift));
7576   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
7577   ins_encode %{
7578     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
7579   %}
7580   ins_pipe( pipe_slow );
7581 %}
7582 
7583 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
7584   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7585   match(Set dst (LShiftVS src shift));
7586   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
7587   ins_encode %{
7588     int vector_len = 0;
7589     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7590   %}
7591   ins_pipe( pipe_slow );
7592 %}
7593 
7594 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7595   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7596   match(Set dst (LShiftVS src shift));
7597   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
7598   ins_encode %{
7599     int vector_len = 0;
7600     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7601   %}
7602   ins_pipe( pipe_slow );
7603 %}
7604 
7605 instruct vsll4S(vecD dst, vecS shift) %{
7606   predicate(n->as_Vector()->length() == 4);
7607   match(Set dst (LShiftVS dst shift));
7608   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
7609   ins_encode %{
7610     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
7611   %}
7612   ins_pipe( pipe_slow );
7613 %}
7614 
7615 instruct vsll4S_imm(vecD dst, immI8 shift) %{
7616   predicate(n->as_Vector()->length() == 4);
7617   match(Set dst (LShiftVS dst shift));
7618   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
7619   ins_encode %{
7620     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
7621   %}
7622   ins_pipe( pipe_slow );
7623 %}
7624 
7625 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
7626   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7627   match(Set dst (LShiftVS src shift));
7628   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
7629   ins_encode %{
7630     int vector_len = 0;
7631     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7632   %}
7633   ins_pipe( pipe_slow );
7634 %}
7635 
7636 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7637   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7638   match(Set dst (LShiftVS src shift));
7639   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
7640   ins_encode %{
7641     int vector_len = 0;
7642     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7643   %}
7644   ins_pipe( pipe_slow );
7645 %}
7646 
7647 instruct vsll8S(vecX dst, vecS shift) %{
7648   predicate(n->as_Vector()->length() == 8);
7649   match(Set dst (LShiftVS dst shift));
7650   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
7651   ins_encode %{
7652     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
7653   %}
7654   ins_pipe( pipe_slow );
7655 %}
7656 
7657 instruct vsll8S_imm(vecX dst, immI8 shift) %{
7658   predicate(n->as_Vector()->length() == 8);
7659   match(Set dst (LShiftVS dst shift));
7660   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
7661   ins_encode %{
7662     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
7663   %}
7664   ins_pipe( pipe_slow );
7665 %}
7666 
7667 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
7668   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7669   match(Set dst (LShiftVS src shift));
7670   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
7671   ins_encode %{
7672     int vector_len = 0;
7673     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7674   %}
7675   ins_pipe( pipe_slow );
7676 %}
7677 
7678 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7679   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7680   match(Set dst (LShiftVS src shift));
7681   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
7682   ins_encode %{
7683     int vector_len = 0;
7684     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7685   %}
7686   ins_pipe( pipe_slow );
7687 %}
7688 
7689 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
7690   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7691   match(Set dst (LShiftVS src shift));
7692   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
7693   ins_encode %{
7694     int vector_len = 1;
7695     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7696   %}
7697   ins_pipe( pipe_slow );
7698 %}
7699 
7700 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7701   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7702   match(Set dst (LShiftVS src shift));
7703   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
7704   ins_encode %{
7705     int vector_len = 1;
7706     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7707   %}
7708   ins_pipe( pipe_slow );
7709 %}
7710 
7711 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
7712   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7713   match(Set dst (LShiftVS src shift));
7714   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
7715   ins_encode %{
7716     int vector_len = 2;
7717     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7718   %}
7719   ins_pipe( pipe_slow );
7720 %}
7721 
7722 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7723   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7724   match(Set dst (LShiftVS src shift));
7725   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
7726   ins_encode %{
7727     int vector_len = 2;
7728     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7729   %}
7730   ins_pipe( pipe_slow );
7731 %}
7732 
7733 // Integers vector left shift
7734 instruct vsll2I(vecD dst, vecS shift) %{
7735   predicate(n->as_Vector()->length() == 2);
7736   match(Set dst (LShiftVI dst shift));
7737   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
7738   ins_encode %{
7739     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7740   %}
7741   ins_pipe( pipe_slow );
7742 %}
7743 
7744 instruct vsll2I_imm(vecD dst, immI8 shift) %{
7745   predicate(n->as_Vector()->length() == 2);
7746   match(Set dst (LShiftVI dst shift));
7747   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
7748   ins_encode %{
7749     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7750   %}
7751   ins_pipe( pipe_slow );
7752 %}
7753 
7754 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
7755   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7756   match(Set dst (LShiftVI src shift));
7757   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
7758   ins_encode %{
7759     int vector_len = 0;
7760     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7761   %}
7762   ins_pipe( pipe_slow );
7763 %}
7764 
7765 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7766   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7767   match(Set dst (LShiftVI src shift));
7768   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
7769   ins_encode %{
7770     int vector_len = 0;
7771     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7772   %}
7773   ins_pipe( pipe_slow );
7774 %}
7775 
7776 instruct vsll4I(vecX dst, vecS shift) %{
7777   predicate(n->as_Vector()->length() == 4);
7778   match(Set dst (LShiftVI dst shift));
7779   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7780   ins_encode %{
7781     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7782   %}
7783   ins_pipe( pipe_slow );
7784 %}
7785 
7786 instruct vsll4I_imm(vecX dst, immI8 shift) %{
7787   predicate(n->as_Vector()->length() == 4);
7788   match(Set dst (LShiftVI dst shift));
7789   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7790   ins_encode %{
7791     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7792   %}
7793   ins_pipe( pipe_slow );
7794 %}
7795 
7796 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
7797   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7798   match(Set dst (LShiftVI src shift));
7799   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7800   ins_encode %{
7801     int vector_len = 0;
7802     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7803   %}
7804   ins_pipe( pipe_slow );
7805 %}
7806 
7807 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7808   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7809   match(Set dst (LShiftVI src shift));
7810   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7811   ins_encode %{
7812     int vector_len = 0;
7813     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7814   %}
7815   ins_pipe( pipe_slow );
7816 %}
7817 
7818 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
7819   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7820   match(Set dst (LShiftVI src shift));
7821   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7822   ins_encode %{
7823     int vector_len = 1;
7824     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7825   %}
7826   ins_pipe( pipe_slow );
7827 %}
7828 
7829 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7830   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7831   match(Set dst (LShiftVI src shift));
7832   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7833   ins_encode %{
7834     int vector_len = 1;
7835     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7836   %}
7837   ins_pipe( pipe_slow );
7838 %}
7839 
7840 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
7841   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7842   match(Set dst (LShiftVI src shift));
7843   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7844   ins_encode %{
7845     int vector_len = 2;
7846     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7847   %}
7848   ins_pipe( pipe_slow );
7849 %}
7850 
7851 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7852   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7853   match(Set dst (LShiftVI src shift));
7854   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7855   ins_encode %{
7856     int vector_len = 2;
7857     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7858   %}
7859   ins_pipe( pipe_slow );
7860 %}
7861 
7862 // Longs vector left shift
7863 instruct vsll2L(vecX dst, vecS shift) %{
7864   predicate(n->as_Vector()->length() == 2);
7865   match(Set dst (LShiftVL dst shift));
7866   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7867   ins_encode %{
7868     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
7869   %}
7870   ins_pipe( pipe_slow );
7871 %}
7872 
7873 instruct vsll2L_imm(vecX dst, immI8 shift) %{
7874   predicate(n->as_Vector()->length() == 2);
7875   match(Set dst (LShiftVL dst shift));
7876   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7877   ins_encode %{
7878     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
7879   %}
7880   ins_pipe( pipe_slow );
7881 %}
7882 
7883 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
7884   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7885   match(Set dst (LShiftVL src shift));
7886   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7887   ins_encode %{
7888     int vector_len = 0;
7889     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7890   %}
7891   ins_pipe( pipe_slow );
7892 %}
7893 
7894 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7895   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7896   match(Set dst (LShiftVL src shift));
7897   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7898   ins_encode %{
7899     int vector_len = 0;
7900     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7901   %}
7902   ins_pipe( pipe_slow );
7903 %}
7904 
7905 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
7906   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7907   match(Set dst (LShiftVL src shift));
7908   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7909   ins_encode %{
7910     int vector_len = 1;
7911     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7912   %}
7913   ins_pipe( pipe_slow );
7914 %}
7915 
7916 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7917   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7918   match(Set dst (LShiftVL src shift));
7919   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7920   ins_encode %{
7921     int vector_len = 1;
7922     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7923   %}
7924   ins_pipe( pipe_slow );
7925 %}
7926 
7927 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
7928   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7929   match(Set dst (LShiftVL src shift));
7930   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7931   ins_encode %{
7932     int vector_len = 2;
7933     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7934   %}
7935   ins_pipe( pipe_slow );
7936 %}
7937 
7938 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7939   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7940   match(Set dst (LShiftVL src shift));
7941   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7942   ins_encode %{
7943     int vector_len = 2;
7944     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7945   %}
7946   ins_pipe( pipe_slow );
7947 %}
7948 
7949 // ----------------------- LogicalRightShift -----------------------------------
7950 
7951 // Shorts vector logical right shift produces incorrect Java result
7952 // for negative data because java code convert short value into int with
7953 // sign extension before a shift. But char vectors are fine since chars are
7954 // unsigned values.
7955 
7956 instruct vsrl2S(vecS dst, vecS shift) %{
7957   predicate(n->as_Vector()->length() == 2);
7958   match(Set dst (URShiftVS dst shift));
7959   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7960   ins_encode %{
7961     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7962   %}
7963   ins_pipe( pipe_slow );
7964 %}
7965 
7966 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
7967   predicate(n->as_Vector()->length() == 2);
7968   match(Set dst (URShiftVS dst shift));
7969   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7970   ins_encode %{
7971     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7972   %}
7973   ins_pipe( pipe_slow );
7974 %}
7975 
7976 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
7977   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7978   match(Set dst (URShiftVS src shift));
7979   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7980   ins_encode %{
7981     int vector_len = 0;
7982     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7983   %}
7984   ins_pipe( pipe_slow );
7985 %}
7986 
7987 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7988   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7989   match(Set dst (URShiftVS src shift));
7990   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7991   ins_encode %{
7992     int vector_len = 0;
7993     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7994   %}
7995   ins_pipe( pipe_slow );
7996 %}
7997 
7998 instruct vsrl4S(vecD dst, vecS shift) %{
7999   predicate(n->as_Vector()->length() == 4);
8000   match(Set dst (URShiftVS dst shift));
8001   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
8002   ins_encode %{
8003     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
8004   %}
8005   ins_pipe( pipe_slow );
8006 %}
8007 
8008 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
8009   predicate(n->as_Vector()->length() == 4);
8010   match(Set dst (URShiftVS dst shift));
8011   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
8012   ins_encode %{
8013     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
8014   %}
8015   ins_pipe( pipe_slow );
8016 %}
8017 
8018 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
8019   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8020   match(Set dst (URShiftVS src shift));
8021   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
8022   ins_encode %{
8023     int vector_len = 0;
8024     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8025   %}
8026   ins_pipe( pipe_slow );
8027 %}
8028 
8029 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
8030   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8031   match(Set dst (URShiftVS src shift));
8032   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
8033   ins_encode %{
8034     int vector_len = 0;
8035     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8036   %}
8037   ins_pipe( pipe_slow );
8038 %}
8039 
8040 instruct vsrl8S(vecX dst, vecS shift) %{
8041   predicate(n->as_Vector()->length() == 8);
8042   match(Set dst (URShiftVS dst shift));
8043   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
8044   ins_encode %{
8045     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
8046   %}
8047   ins_pipe( pipe_slow );
8048 %}
8049 
8050 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
8051   predicate(n->as_Vector()->length() == 8);
8052   match(Set dst (URShiftVS dst shift));
8053   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
8054   ins_encode %{
8055     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
8056   %}
8057   ins_pipe( pipe_slow );
8058 %}
8059 
8060 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
8061   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8062   match(Set dst (URShiftVS src shift));
8063   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
8064   ins_encode %{
8065     int vector_len = 0;
8066     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8067   %}
8068   ins_pipe( pipe_slow );
8069 %}
8070 
8071 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
8072   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8073   match(Set dst (URShiftVS src shift));
8074   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
8075   ins_encode %{
8076     int vector_len = 0;
8077     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8078   %}
8079   ins_pipe( pipe_slow );
8080 %}
8081 
8082 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
8083   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8084   match(Set dst (URShiftVS src shift));
8085   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
8086   ins_encode %{
8087     int vector_len = 1;
8088     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8089   %}
8090   ins_pipe( pipe_slow );
8091 %}
8092 
8093 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
8094   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8095   match(Set dst (URShiftVS src shift));
8096   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
8097   ins_encode %{
8098     int vector_len = 1;
8099     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8100   %}
8101   ins_pipe( pipe_slow );
8102 %}
8103 
8104 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
8105   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
8106   match(Set dst (URShiftVS src shift));
8107   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
8108   ins_encode %{
8109     int vector_len = 2;
8110     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8111   %}
8112   ins_pipe( pipe_slow );
8113 %}
8114 
8115 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8116   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
8117   match(Set dst (URShiftVS src shift));
8118   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
8119   ins_encode %{
8120     int vector_len = 2;
8121     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8122   %}
8123   ins_pipe( pipe_slow );
8124 %}
8125 
8126 // Integers vector logical right shift
8127 instruct vsrl2I(vecD dst, vecS shift) %{
8128   predicate(n->as_Vector()->length() == 2);
8129   match(Set dst (URShiftVI dst shift));
8130   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
8131   ins_encode %{
8132     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
8133   %}
8134   ins_pipe( pipe_slow );
8135 %}
8136 
8137 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
8138   predicate(n->as_Vector()->length() == 2);
8139   match(Set dst (URShiftVI dst shift));
8140   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
8141   ins_encode %{
8142     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
8143   %}
8144   ins_pipe( pipe_slow );
8145 %}
8146 
8147 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
8148   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8149   match(Set dst (URShiftVI src shift));
8150   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
8151   ins_encode %{
8152     int vector_len = 0;
8153     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8154   %}
8155   ins_pipe( pipe_slow );
8156 %}
8157 
8158 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
8159   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8160   match(Set dst (URShiftVI src shift));
8161   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
8162   ins_encode %{
8163     int vector_len = 0;
8164     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8165   %}
8166   ins_pipe( pipe_slow );
8167 %}
8168 
8169 instruct vsrl4I(vecX dst, vecS shift) %{
8170   predicate(n->as_Vector()->length() == 4);
8171   match(Set dst (URShiftVI dst shift));
8172   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
8173   ins_encode %{
8174     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
8175   %}
8176   ins_pipe( pipe_slow );
8177 %}
8178 
8179 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
8180   predicate(n->as_Vector()->length() == 4);
8181   match(Set dst (URShiftVI dst shift));
8182   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
8183   ins_encode %{
8184     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
8185   %}
8186   ins_pipe( pipe_slow );
8187 %}
8188 
8189 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
8190   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8191   match(Set dst (URShiftVI src shift));
8192   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
8193   ins_encode %{
8194     int vector_len = 0;
8195     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8196   %}
8197   ins_pipe( pipe_slow );
8198 %}
8199 
8200 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
8201   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8202   match(Set dst (URShiftVI src shift));
8203   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
8204   ins_encode %{
8205     int vector_len = 0;
8206     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8207   %}
8208   ins_pipe( pipe_slow );
8209 %}
8210 
8211 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
8212   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8213   match(Set dst (URShiftVI src shift));
8214   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
8215   ins_encode %{
8216     int vector_len = 1;
8217     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8218   %}
8219   ins_pipe( pipe_slow );
8220 %}
8221 
8222 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
8223   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8224   match(Set dst (URShiftVI src shift));
8225   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
8226   ins_encode %{
8227     int vector_len = 1;
8228     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8229   %}
8230   ins_pipe( pipe_slow );
8231 %}
8232 
8233 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
8234   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8235   match(Set dst (URShiftVI src shift));
8236   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
8237   ins_encode %{
8238     int vector_len = 2;
8239     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8240   %}
8241   ins_pipe( pipe_slow );
8242 %}
8243 
8244 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8245   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8246   match(Set dst (URShiftVI src shift));
8247   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
8248   ins_encode %{
8249     int vector_len = 2;
8250     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8251   %}
8252   ins_pipe( pipe_slow );
8253 %}
8254 
8255 // Longs vector logical right shift
8256 instruct vsrl2L(vecX dst, vecS shift) %{
8257   predicate(n->as_Vector()->length() == 2);
8258   match(Set dst (URShiftVL dst shift));
8259   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
8260   ins_encode %{
8261     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
8262   %}
8263   ins_pipe( pipe_slow );
8264 %}
8265 
8266 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
8267   predicate(n->as_Vector()->length() == 2);
8268   match(Set dst (URShiftVL dst shift));
8269   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
8270   ins_encode %{
8271     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
8272   %}
8273   ins_pipe( pipe_slow );
8274 %}
8275 
8276 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
8277   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8278   match(Set dst (URShiftVL src shift));
8279   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
8280   ins_encode %{
8281     int vector_len = 0;
8282     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8283   %}
8284   ins_pipe( pipe_slow );
8285 %}
8286 
8287 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
8288   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8289   match(Set dst (URShiftVL src shift));
8290   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
8291   ins_encode %{
8292     int vector_len = 0;
8293     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8294   %}
8295   ins_pipe( pipe_slow );
8296 %}
8297 
8298 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
8299   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8300   match(Set dst (URShiftVL src shift));
8301   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
8302   ins_encode %{
8303     int vector_len = 1;
8304     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8305   %}
8306   ins_pipe( pipe_slow );
8307 %}
8308 
8309 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
8310   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8311   match(Set dst (URShiftVL src shift));
8312   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
8313   ins_encode %{
8314     int vector_len = 1;
8315     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8316   %}
8317   ins_pipe( pipe_slow );
8318 %}
8319 
8320 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
8321   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8322   match(Set dst (URShiftVL src shift));
8323   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
8324   ins_encode %{
8325     int vector_len = 2;
8326     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8327   %}
8328   ins_pipe( pipe_slow );
8329 %}
8330 
8331 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8332   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8333   match(Set dst (URShiftVL src shift));
8334   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
8335   ins_encode %{
8336     int vector_len = 2;
8337     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8338   %}
8339   ins_pipe( pipe_slow );
8340 %}
8341 
8342 // ------------------- ArithmeticRightShift -----------------------------------
8343 
8344 // Shorts/Chars vector arithmetic right shift
8345 instruct vsra2S(vecS dst, vecS shift) %{
8346   predicate(n->as_Vector()->length() == 2);
8347   match(Set dst (RShiftVS dst shift));
8348   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
8349   ins_encode %{
8350     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8351   %}
8352   ins_pipe( pipe_slow );
8353 %}
8354 
8355 instruct vsra2S_imm(vecS dst, immI8 shift) %{
8356   predicate(n->as_Vector()->length() == 2);
8357   match(Set dst (RShiftVS dst shift));
8358   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
8359   ins_encode %{
8360     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8361   %}
8362   ins_pipe( pipe_slow );
8363 %}
8364 
8365 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
8366   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8367   match(Set dst (RShiftVS src shift));
8368   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
8369   ins_encode %{
8370     int vector_len = 0;
8371     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8372   %}
8373   ins_pipe( pipe_slow );
8374 %}
8375 
8376 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
8377   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8378   match(Set dst (RShiftVS src shift));
8379   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
8380   ins_encode %{
8381     int vector_len = 0;
8382     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8383   %}
8384   ins_pipe( pipe_slow );
8385 %}
8386 
8387 instruct vsra4S(vecD dst, vecS shift) %{
8388   predicate(n->as_Vector()->length() == 4);
8389   match(Set dst (RShiftVS dst shift));
8390   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
8391   ins_encode %{
8392     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8393   %}
8394   ins_pipe( pipe_slow );
8395 %}
8396 
8397 instruct vsra4S_imm(vecD dst, immI8 shift) %{
8398   predicate(n->as_Vector()->length() == 4);
8399   match(Set dst (RShiftVS dst shift));
8400   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
8401   ins_encode %{
8402     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8403   %}
8404   ins_pipe( pipe_slow );
8405 %}
8406 
8407 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
8408   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8409   match(Set dst (RShiftVS src shift));
8410   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
8411   ins_encode %{
8412     int vector_len = 0;
8413     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8414   %}
8415   ins_pipe( pipe_slow );
8416 %}
8417 
8418 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
8419   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8420   match(Set dst (RShiftVS src shift));
8421   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
8422   ins_encode %{
8423     int vector_len = 0;
8424     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8425   %}
8426   ins_pipe( pipe_slow );
8427 %}
8428 
8429 instruct vsra8S(vecX dst, vecS shift) %{
8430   predicate(n->as_Vector()->length() == 8);
8431   match(Set dst (RShiftVS dst shift));
8432   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
8433   ins_encode %{
8434     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8435   %}
8436   ins_pipe( pipe_slow );
8437 %}
8438 
8439 instruct vsra8S_imm(vecX dst, immI8 shift) %{
8440   predicate(n->as_Vector()->length() == 8);
8441   match(Set dst (RShiftVS dst shift));
8442   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
8443   ins_encode %{
8444     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8445   %}
8446   ins_pipe( pipe_slow );
8447 %}
8448 
8449 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
8450   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8451   match(Set dst (RShiftVS src shift));
8452   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
8453   ins_encode %{
8454     int vector_len = 0;
8455     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8456   %}
8457   ins_pipe( pipe_slow );
8458 %}
8459 
8460 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
8461   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8462   match(Set dst (RShiftVS src shift));
8463   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
8464   ins_encode %{
8465     int vector_len = 0;
8466     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8467   %}
8468   ins_pipe( pipe_slow );
8469 %}
8470 
8471 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
8472   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8473   match(Set dst (RShiftVS src shift));
8474   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
8475   ins_encode %{
8476     int vector_len = 1;
8477     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8478   %}
8479   ins_pipe( pipe_slow );
8480 %}
8481 
8482 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
8483   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8484   match(Set dst (RShiftVS src shift));
8485   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
8486   ins_encode %{
8487     int vector_len = 1;
8488     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8489   %}
8490   ins_pipe( pipe_slow );
8491 %}
8492 
8493 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
8494   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
8495   match(Set dst (RShiftVS src shift));
8496   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
8497   ins_encode %{
8498     int vector_len = 2;
8499     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8500   %}
8501   ins_pipe( pipe_slow );
8502 %}
8503 
8504 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8505   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
8506   match(Set dst (RShiftVS src shift));
8507   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
8508   ins_encode %{
8509     int vector_len = 2;
8510     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8511   %}
8512   ins_pipe( pipe_slow );
8513 %}
8514 
8515 // Integers vector arithmetic right shift
8516 instruct vsra2I(vecD dst, vecS shift) %{
8517   predicate(n->as_Vector()->length() == 2);
8518   match(Set dst (RShiftVI dst shift));
8519   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
8520   ins_encode %{
8521     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
8522   %}
8523   ins_pipe( pipe_slow );
8524 %}
8525 
8526 instruct vsra2I_imm(vecD dst, immI8 shift) %{
8527   predicate(n->as_Vector()->length() == 2);
8528   match(Set dst (RShiftVI dst shift));
8529   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
8530   ins_encode %{
8531     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
8532   %}
8533   ins_pipe( pipe_slow );
8534 %}
8535 
8536 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
8537   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8538   match(Set dst (RShiftVI src shift));
8539   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
8540   ins_encode %{
8541     int vector_len = 0;
8542     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8543   %}
8544   ins_pipe( pipe_slow );
8545 %}
8546 
8547 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
8548   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8549   match(Set dst (RShiftVI src shift));
8550   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
8551   ins_encode %{
8552     int vector_len = 0;
8553     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8554   %}
8555   ins_pipe( pipe_slow );
8556 %}
8557 
8558 instruct vsra4I(vecX dst, vecS shift) %{
8559   predicate(n->as_Vector()->length() == 4);
8560   match(Set dst (RShiftVI dst shift));
8561   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
8562   ins_encode %{
8563     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
8564   %}
8565   ins_pipe( pipe_slow );
8566 %}
8567 
8568 instruct vsra4I_imm(vecX dst, immI8 shift) %{
8569   predicate(n->as_Vector()->length() == 4);
8570   match(Set dst (RShiftVI dst shift));
8571   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
8572   ins_encode %{
8573     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
8574   %}
8575   ins_pipe( pipe_slow );
8576 %}
8577 
8578 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
8579   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8580   match(Set dst (RShiftVI src shift));
8581   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
8582   ins_encode %{
8583     int vector_len = 0;
8584     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8585   %}
8586   ins_pipe( pipe_slow );
8587 %}
8588 
8589 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
8590   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8591   match(Set dst (RShiftVI src shift));
8592   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
8593   ins_encode %{
8594     int vector_len = 0;
8595     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8596   %}
8597   ins_pipe( pipe_slow );
8598 %}
8599 
8600 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
8601   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8602   match(Set dst (RShiftVI src shift));
8603   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
8604   ins_encode %{
8605     int vector_len = 1;
8606     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8607   %}
8608   ins_pipe( pipe_slow );
8609 %}
8610 
8611 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
8612   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8613   match(Set dst (RShiftVI src shift));
8614   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
8615   ins_encode %{
8616     int vector_len = 1;
8617     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8618   %}
8619   ins_pipe( pipe_slow );
8620 %}
8621 
8622 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
8623   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8624   match(Set dst (RShiftVI src shift));
8625   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
8626   ins_encode %{
8627     int vector_len = 2;
8628     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8629   %}
8630   ins_pipe( pipe_slow );
8631 %}
8632 
8633 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8634   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8635   match(Set dst (RShiftVI src shift));
8636   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
8637   ins_encode %{
8638     int vector_len = 2;
8639     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8640   %}
8641   ins_pipe( pipe_slow );
8642 %}
8643 
8644 // There are no longs vector arithmetic right shift instructions.
8645 
8646 
8647 // --------------------------------- AND --------------------------------------
8648 
8649 instruct vand4B(vecS dst, vecS src) %{
8650   predicate(n->as_Vector()->length_in_bytes() == 4);
8651   match(Set dst (AndV dst src));
8652   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
8653   ins_encode %{
8654     __ pand($dst$$XMMRegister, $src$$XMMRegister);
8655   %}
8656   ins_pipe( pipe_slow );
8657 %}
8658 
8659 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
8660   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8661   match(Set dst (AndV src1 src2));
8662   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
8663   ins_encode %{
8664     int vector_len = 0;
8665     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8666   %}
8667   ins_pipe( pipe_slow );
8668 %}
8669 
8670 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{
8671   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8672   match(Set dst (AndV src (LoadVector mem)));
8673   format %{ "vpand   $dst,$src,$mem\t! and vectors (4 bytes)" %}
8674   ins_encode %{
8675     int vector_len = 0;
8676     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8677   %}
8678   ins_pipe( pipe_slow );
8679 %}
8680 
8681 instruct vand8B(vecD dst, vecD src) %{
8682   predicate(n->as_Vector()->length_in_bytes() == 8);
8683   match(Set dst (AndV dst src));
8684   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
8685   ins_encode %{
8686     __ pand($dst$$XMMRegister, $src$$XMMRegister);
8687   %}
8688   ins_pipe( pipe_slow );
8689 %}
8690 
8691 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
8692   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8693   match(Set dst (AndV src1 src2));
8694   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
8695   ins_encode %{
8696     int vector_len = 0;
8697     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8698   %}
8699   ins_pipe( pipe_slow );
8700 %}
8701 
8702 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{
8703   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8704   match(Set dst (AndV src (LoadVector mem)));
8705   format %{ "vpand   $dst,$src,$mem\t! and vectors (8 bytes)" %}
8706   ins_encode %{
8707     int vector_len = 0;
8708     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8709   %}
8710   ins_pipe( pipe_slow );
8711 %}
8712 
8713 instruct vand16B(vecX dst, vecX src) %{
8714   predicate(n->as_Vector()->length_in_bytes() == 16);
8715   match(Set dst (AndV dst src));
8716   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
8717   ins_encode %{
8718     __ pand($dst$$XMMRegister, $src$$XMMRegister);
8719   %}
8720   ins_pipe( pipe_slow );
8721 %}
8722 
8723 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
8724   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8725   match(Set dst (AndV src1 src2));
8726   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
8727   ins_encode %{
8728     int vector_len = 0;
8729     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8730   %}
8731   ins_pipe( pipe_slow );
8732 %}
8733 
8734 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
8735   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8736   match(Set dst (AndV src (LoadVector mem)));
8737   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
8738   ins_encode %{
8739     int vector_len = 0;
8740     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8741   %}
8742   ins_pipe( pipe_slow );
8743 %}
8744 
8745 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
8746   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8747   match(Set dst (AndV src1 src2));
8748   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
8749   ins_encode %{
8750     int vector_len = 1;
8751     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8752   %}
8753   ins_pipe( pipe_slow );
8754 %}
8755 
8756 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
8757   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8758   match(Set dst (AndV src (LoadVector mem)));
8759   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
8760   ins_encode %{
8761     int vector_len = 1;
8762     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8763   %}
8764   ins_pipe( pipe_slow );
8765 %}
8766 
8767 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8768   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8769   match(Set dst (AndV src1 src2));
8770   format %{ "vpand   $dst,$src1,$src2\t! and vectors (64 bytes)" %}
8771   ins_encode %{
8772     int vector_len = 2;
8773     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8774   %}
8775   ins_pipe( pipe_slow );
8776 %}
8777 
8778 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
8779   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8780   match(Set dst (AndV src (LoadVector mem)));
8781   format %{ "vpand   $dst,$src,$mem\t! and vectors (64 bytes)" %}
8782   ins_encode %{
8783     int vector_len = 2;
8784     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8785   %}
8786   ins_pipe( pipe_slow );
8787 %}
8788 
8789 // --------------------------------- OR ---------------------------------------
8790 
8791 instruct vor4B(vecS dst, vecS src) %{
8792   predicate(n->as_Vector()->length_in_bytes() == 4);
8793   match(Set dst (OrV dst src));
8794   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
8795   ins_encode %{
8796     __ por($dst$$XMMRegister, $src$$XMMRegister);
8797   %}
8798   ins_pipe( pipe_slow );
8799 %}
8800 
8801 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
8802   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8803   match(Set dst (OrV src1 src2));
8804   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
8805   ins_encode %{
8806     int vector_len = 0;
8807     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8808   %}
8809   ins_pipe( pipe_slow );
8810 %}
8811 
8812 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{
8813   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8814   match(Set dst (OrV src (LoadVector mem)));
8815   format %{ "vpor    $dst,$src,$mem\t! or vectors (4 bytes)" %}
8816   ins_encode %{
8817     int vector_len = 0;
8818     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8819   %}
8820   ins_pipe( pipe_slow );
8821 %}
8822 
8823 instruct vor8B(vecD dst, vecD src) %{
8824   predicate(n->as_Vector()->length_in_bytes() == 8);
8825   match(Set dst (OrV dst src));
8826   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
8827   ins_encode %{
8828     __ por($dst$$XMMRegister, $src$$XMMRegister);
8829   %}
8830   ins_pipe( pipe_slow );
8831 %}
8832 
8833 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
8834   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8835   match(Set dst (OrV src1 src2));
8836   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
8837   ins_encode %{
8838     int vector_len = 0;
8839     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8840   %}
8841   ins_pipe( pipe_slow );
8842 %}
8843 
8844 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{
8845   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8846   match(Set dst (OrV src (LoadVector mem)));
8847   format %{ "vpor    $dst,$src,$mem\t! or vectors (8 bytes)" %}
8848   ins_encode %{
8849     int vector_len = 0;
8850     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8851   %}
8852   ins_pipe( pipe_slow );
8853 %}
8854 
8855 instruct vor16B(vecX dst, vecX src) %{
8856   predicate(n->as_Vector()->length_in_bytes() == 16);
8857   match(Set dst (OrV dst src));
8858   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
8859   ins_encode %{
8860     __ por($dst$$XMMRegister, $src$$XMMRegister);
8861   %}
8862   ins_pipe( pipe_slow );
8863 %}
8864 
8865 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
8866   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8867   match(Set dst (OrV src1 src2));
8868   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
8869   ins_encode %{
8870     int vector_len = 0;
8871     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8872   %}
8873   ins_pipe( pipe_slow );
8874 %}
8875 
8876 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
8877   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8878   match(Set dst (OrV src (LoadVector mem)));
8879   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
8880   ins_encode %{
8881     int vector_len = 0;
8882     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8883   %}
8884   ins_pipe( pipe_slow );
8885 %}
8886 
8887 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
8888   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8889   match(Set dst (OrV src1 src2));
8890   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
8891   ins_encode %{
8892     int vector_len = 1;
8893     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8894   %}
8895   ins_pipe( pipe_slow );
8896 %}
8897 
8898 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
8899   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8900   match(Set dst (OrV src (LoadVector mem)));
8901   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
8902   ins_encode %{
8903     int vector_len = 1;
8904     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8905   %}
8906   ins_pipe( pipe_slow );
8907 %}
8908 
8909 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8910   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8911   match(Set dst (OrV src1 src2));
8912   format %{ "vpor    $dst,$src1,$src2\t! or vectors (64 bytes)" %}
8913   ins_encode %{
8914     int vector_len = 2;
8915     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8916   %}
8917   ins_pipe( pipe_slow );
8918 %}
8919 
8920 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
8921   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8922   match(Set dst (OrV src (LoadVector mem)));
8923   format %{ "vpor    $dst,$src,$mem\t! or vectors (64 bytes)" %}
8924   ins_encode %{
8925     int vector_len = 2;
8926     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8927   %}
8928   ins_pipe( pipe_slow );
8929 %}
8930 
8931 // --------------------------------- XOR --------------------------------------
8932 
8933 instruct vxor4B(vecS dst, vecS src) %{
8934   predicate(n->as_Vector()->length_in_bytes() == 4);
8935   match(Set dst (XorV dst src));
8936   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
8937   ins_encode %{
8938     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8939   %}
8940   ins_pipe( pipe_slow );
8941 %}
8942 
8943 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
8944   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8945   match(Set dst (XorV src1 src2));
8946   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
8947   ins_encode %{
8948     int vector_len = 0;
8949     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8950   %}
8951   ins_pipe( pipe_slow );
8952 %}
8953 
8954 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{
8955   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8956   match(Set dst (XorV src (LoadVector mem)));
8957   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (4 bytes)" %}
8958   ins_encode %{
8959     int vector_len = 0;
8960     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8961   %}
8962   ins_pipe( pipe_slow );
8963 %}
8964 
8965 instruct vxor8B(vecD dst, vecD src) %{
8966   predicate(n->as_Vector()->length_in_bytes() == 8);
8967   match(Set dst (XorV dst src));
8968   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
8969   ins_encode %{
8970     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8971   %}
8972   ins_pipe( pipe_slow );
8973 %}
8974 
8975 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
8976   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8977   match(Set dst (XorV src1 src2));
8978   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
8979   ins_encode %{
8980     int vector_len = 0;
8981     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8982   %}
8983   ins_pipe( pipe_slow );
8984 %}
8985 
8986 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{
8987   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8988   match(Set dst (XorV src (LoadVector mem)));
8989   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (8 bytes)" %}
8990   ins_encode %{
8991     int vector_len = 0;
8992     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8993   %}
8994   ins_pipe( pipe_slow );
8995 %}
8996 
8997 instruct vxor16B(vecX dst, vecX src) %{
8998   predicate(n->as_Vector()->length_in_bytes() == 16);
8999   match(Set dst (XorV dst src));
9000   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
9001   ins_encode %{
9002     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
9003   %}
9004   ins_pipe( pipe_slow );
9005 %}
9006 
9007 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
9008   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
9009   match(Set dst (XorV src1 src2));
9010   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
9011   ins_encode %{
9012     int vector_len = 0;
9013     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9014   %}
9015   ins_pipe( pipe_slow );
9016 %}
9017 
9018 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
9019   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
9020   match(Set dst (XorV src (LoadVector mem)));
9021   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
9022   ins_encode %{
9023     int vector_len = 0;
9024     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9025   %}
9026   ins_pipe( pipe_slow );
9027 %}
9028 
9029 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
9030   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
9031   match(Set dst (XorV src1 src2));
9032   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
9033   ins_encode %{
9034     int vector_len = 1;
9035     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9036   %}
9037   ins_pipe( pipe_slow );
9038 %}
9039 
9040 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
9041   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
9042   match(Set dst (XorV src (LoadVector mem)));
9043   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
9044   ins_encode %{
9045     int vector_len = 1;
9046     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9047   %}
9048   ins_pipe( pipe_slow );
9049 %}
9050 
9051 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
9052   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
9053   match(Set dst (XorV src1 src2));
9054   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
9055   ins_encode %{
9056     int vector_len = 2;
9057     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9058   %}
9059   ins_pipe( pipe_slow );
9060 %}
9061 
9062 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
9063   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
9064   match(Set dst (XorV src (LoadVector mem)));
9065   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (64 bytes)" %}
9066   ins_encode %{
9067     int vector_len = 2;
9068     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9069   %}
9070   ins_pipe( pipe_slow );
9071 %}
9072